commit 0f504dde3388687d1214182fa519354146947635 (HEAD, refs/remotes/origin/master) Author: Stefan Monnier Date: Mon Apr 1 02:12:51 2024 -0400 (scheme-syntax-propertize-sexp-comment): Handle nested sexp-comments Well, I'm not completely sure this will work right in all cases, because I've been confused about this in the past. It works in my test case, at least. * lisp/progmodes/scheme.el (scheme-syntax-propertize-sexp-comment): Look for nested `#;` and mark them appropriately. diff --git a/etc/NEWS b/etc/NEWS index 1b86a968c5d..903c60ac97e 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1222,6 +1222,7 @@ interactive Python interpreter specified by 'python-interpreter'. Scheme mode now handles regular expression literal #/regexp/ that is available in some Scheme implementations. +Also, it should now handle nested sexp-comments. ** use-package diff --git a/lisp/progmodes/scheme.el b/lisp/progmodes/scheme.el index 8652abeb817..79d076ff145 100644 --- a/lisp/progmodes/scheme.el +++ b/lisp/progmodes/scheme.el @@ -50,6 +50,7 @@ ;;; Code: (require 'lisp-mode) +(eval-when-compile 'subr-x) ;For `named-let'. (defvar scheme-mode-syntax-table (let ((st (make-syntax-table)) @@ -426,18 +427,40 @@ See `run-hooks'." (point) end)) (defun scheme-syntax-propertize-sexp-comment (end) - (let ((state (syntax-ppss))) + (let ((state (syntax-ppss)) + (checked (point))) (when (eq 2 (nth 7 state)) ;; It's a sexp-comment. Tell parse-partial-sexp where it ends. - (condition-case nil - (progn - (goto-char (+ 2 (nth 8 state))) - ;; FIXME: this doesn't handle the case where the sexp - ;; itself contains a #; comment. - (forward-sexp 1) - (put-text-property (1- (point)) (point) - 'syntax-table (string-to-syntax "> cn"))) - (scan-error (goto-char end)))))) + (named-let loop ((startpos (+ 2 (nth 8 state)))) + (let ((found nil)) + (while + (progn + (setq found nil) + (condition-case nil + (progn + (goto-char startpos) + (forward-sexp 1) + (setq found (point))) + (scan-error (goto-char end))) + ;; If there's a nested `#;', the syntax-tables will normally + ;; consider the `;' to start a normal comment, so the + ;; (forward-sexp 1) above may have landed at the wrong place. + ;; So look for `#;' in the text over which we jumped, and + ;; mark those we found as nested sexp-comments. + (let ((limit (or found end))) + (when (< checked limit) + (goto-char checked) + (when (re-search-forward "\\(#\\);" limit 'move) + (setq checked (point)) + (put-text-property (match-beginning 1) (match-end 1) + 'syntax-table + (string-to-syntax "< cn")) + (loop (point))) + (< (point) limit))))) + (when found + (goto-char found) + (put-text-property (1- found) found + 'syntax-table (string-to-syntax "> cn")))))))) (defun scheme-syntax-propertize-regexp (end) (let* ((state (syntax-ppss)) commit 9caf5cb55a3889fea019c73d6a3040204d77bf39 Author: Po Lu Date: Mon Apr 1 14:04:56 2024 +0800 Fix androidsu's `make-process' file name handler * lisp/net/tramp-androidsu.el (tramp-androidsu-handle-make-process): Disable exec loader around call to setuid su binary. diff --git a/lisp/net/tramp-androidsu.el b/lisp/net/tramp-androidsu.el index 09bee323f5e..1ec9247cf3c 100644 --- a/lisp/net/tramp-androidsu.el +++ b/lisp/net/tramp-androidsu.el @@ -366,13 +366,19 @@ FUNCTION." ;; suitable options for specifying the mount namespace and ;; suchlike. (setq - p (make-process - :name name :buffer buffer - :command (if (tramp-get-connection-property v "remote-namespace") - (append (list "su" "-mm" "-" user "-c") command) - (append (list "su" "-" user "-c") command)) - :coding coding :noquery noquery :connection-type connection-type - :sentinel sentinel :stderr stderr)) + p (let ((android-use-exec-loader nil)) + (make-process + :name name + :buffer buffer + :command + (if (tramp-get-connection-property v "remote-namespace") + (append (list "su" "-mm" "-" user "-c") command) + (append (list "su" "-" user "-c") command)) + :coding coding + :noquery noquery + :connection-type connection-type + :sentinel sentinel + :stderr stderr))) ;; Set filter. Prior Emacs 29.1, it doesn't work reliably ;; to provide it as `make-process' argument when filter is ;; t. See Bug#51177. commit c4e7eec8c096219ddc6b3a981eef03ce421b8877 Author: niceume Date: Sun Mar 24 12:29:56 2024 +0900 (scheme-syntax-propertize-sexp-comment): Remove unused argument * lisp/progmodes/scheme.el (scheme-syntax-propertize-sexp-comment): Remove first arg, unused. (scheme-syntax-propertize): Adjust calls accordingly. diff --git a/lisp/progmodes/scheme.el b/lisp/progmodes/scheme.el index dc46f0fbbb8..8652abeb817 100644 --- a/lisp/progmodes/scheme.el +++ b/lisp/progmodes/scheme.el @@ -409,12 +409,12 @@ See `run-hooks'." (defun scheme-syntax-propertize (beg end) (goto-char beg) - (scheme-syntax-propertize-sexp-comment (point) end) + (scheme-syntax-propertize-sexp-comment end) (scheme-syntax-propertize-regexp end) (funcall (syntax-propertize-rules ("\\(#\\);" (1 (prog1 "< cn" - (scheme-syntax-propertize-sexp-comment (point) end)))) + (scheme-syntax-propertize-sexp-comment end)))) ("\\(#\\)/" (1 (when (null (nth 8 (save-excursion (syntax-ppss (match-beginning 0))))) (put-text-property @@ -425,7 +425,7 @@ See `run-hooks'." nil)))) (point) end)) -(defun scheme-syntax-propertize-sexp-comment (_ end) +(defun scheme-syntax-propertize-sexp-comment (end) (let ((state (syntax-ppss))) (when (eq 2 (nth 7 state)) ;; It's a sexp-comment. Tell parse-partial-sexp where it ends. commit 02c2a95a52e53486d034de4cd2831b258a49f9c4 Author: niceume Date: Sun Mar 17 09:12:32 2024 +0900 scheme.el: Enable dealing with regular expression literal * lisp/progmodes/scheme.el (scheme-syntax-propertize-regexp): New function. (scheme-syntax-propertize): Use it. diff --git a/etc/NEWS b/etc/NEWS index 775c8e02a95..1b86a968c5d 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1218,6 +1218,11 @@ instead of: This allows the user to specify command line arguments to the non interactive Python interpreter specified by 'python-interpreter'. +** Scheme mode + +Scheme mode now handles regular expression literal #/regexp/ that is +available in some Scheme implementations. + ** use-package +++ diff --git a/lisp/progmodes/scheme.el b/lisp/progmodes/scheme.el index 67abab6913d..dc46f0fbbb8 100644 --- a/lisp/progmodes/scheme.el +++ b/lisp/progmodes/scheme.el @@ -410,10 +410,19 @@ See `run-hooks'." (defun scheme-syntax-propertize (beg end) (goto-char beg) (scheme-syntax-propertize-sexp-comment (point) end) + (scheme-syntax-propertize-regexp end) (funcall (syntax-propertize-rules ("\\(#\\);" (1 (prog1 "< cn" - (scheme-syntax-propertize-sexp-comment (point) end))))) + (scheme-syntax-propertize-sexp-comment (point) end)))) + ("\\(#\\)/" (1 (when (null (nth 8 (save-excursion + (syntax-ppss (match-beginning 0))))) + (put-text-property + (match-beginning 1) + (match-end 1) + 'syntax-table (string-to-syntax "|")) + (scheme-syntax-propertize-regexp end) + nil)))) (point) end)) (defun scheme-syntax-propertize-sexp-comment (_ end) @@ -430,6 +439,22 @@ See `run-hooks'." 'syntax-table (string-to-syntax "> cn"))) (scan-error (goto-char end)))))) +(defun scheme-syntax-propertize-regexp (end) + (let* ((state (syntax-ppss)) + (within-str (nth 3 state)) + (start-delim-pos (nth 8 state))) + (when (and within-str + (char-equal ?# (char-after start-delim-pos))) + (while (and (re-search-forward "/" end 'move) + (eq -1 + (% (save-excursion + (backward-char) + (skip-chars-backward "\\\\")) + 2)))) + (when (< (point) end) + (put-text-property (match-beginning 0) (match-end 0) + 'syntax-table (string-to-syntax "|")))))) + ;;;###autoload (define-derived-mode dsssl-mode scheme-mode "DSSSL" "Major mode for editing DSSSL code. commit 0cf9b58228580bfa400cdaf35eac04d375fe4785 Author: Po Lu Date: Mon Apr 1 09:26:03 2024 +0800 Fix listing of Android root directory when it is accessible * src/androidvfs.c (android_root_closedir, android_root_dirfd) (android_root_opendir): Allocate an ersatz file descriptor even if directory is non-NULL, so that at-funcs will properly return file status for virtual files in the root directory. diff --git a/src/androidvfs.c b/src/androidvfs.c index e8eb9f2d41c..2e23ed40cf5 100644 --- a/src/androidvfs.c +++ b/src/androidvfs.c @@ -6730,7 +6730,8 @@ android_root_closedir (struct android_vdir *vdir) if (dir->directory) closedir (dir->directory); - else if (root_fd_references--) + + if (root_fd_references--) ; else { @@ -6745,13 +6746,7 @@ android_root_closedir (struct android_vdir *vdir) static int android_root_dirfd (struct android_vdir *vdir) { - struct android_unix_vdir *dir; - - dir = (struct android_unix_vdir *) vdir; - - if (dir->directory) - return dirfd (dir->directory); - + eassert (root_fd != -1); return root_fd; } @@ -6778,13 +6773,13 @@ android_root_opendir (struct android_vnode *vnode) dir->directory = directory; dir->index = 0; - if (!directory) - { - /* Allocate a temporary file descriptor for this ersatz root. */ - if (root_fd < 0) - root_fd = open ("/dev/null", O_RDONLY | O_CLOEXEC); - root_fd_references++; - } + /* Allocate a temporary file descriptor for this ersatz root. This is + required regardless of the value of DIRECTORY, as android_fstatat + and co. will not defer to the VFS layer if a directory file + descriptor is not known to be special. */ + if (root_fd < 0) + root_fd = open ("/dev/null", O_RDONLY | O_CLOEXEC); + root_fd_references++; return &dir->vdir; } commit 09f381d70d8852d90cdd3c8d7e1e2786dbc61f92 Author: Michael Albinus Date: Sun Mar 31 19:59:58 2024 +0200 Fix wildcard signals in dbusbind.c * src/dbusbind.c (xd_read_message_1): Handle registered signals with wildcards. (Bug#69926) diff --git a/src/dbusbind.c b/src/dbusbind.c index 7069e27e3eb..0441b07a3b2 100644 --- a/src/dbusbind.c +++ b/src/dbusbind.c @@ -1689,6 +1689,22 @@ xd_read_message_1 (DBusConnection *connection, Lisp_Object bus) bus, build_string (interface), build_string (member)); value = Fgethash (key, Vdbus_registered_objects_table, Qnil); + /* A signal could be registered with a nil interface or member. */ + if (mtype == DBUS_MESSAGE_TYPE_SIGNAL) + { + key = list4 (QCsignal, bus, Qnil, build_string (member)); + value = CALLN (Fappend, value, + Fgethash (key, Vdbus_registered_objects_table, Qnil)); + + key = list4 (QCsignal, bus, build_string (interface), Qnil); + value = CALLN (Fappend, value, + Fgethash (key, Vdbus_registered_objects_table, Qnil)); + + key = list4 (QCsignal, bus, Qnil, Qnil); + value = CALLN (Fappend, value, + Fgethash (key, Vdbus_registered_objects_table, Qnil)); + } + /* Loop over the registered functions. Construct an event. */ for (; !NILP (value); value = CDR_SAFE (value)) { commit e303992a9faf7432ed8045825db45da6f6c48080 Author: Michael Albinus Date: Sun Mar 31 14:21:58 2024 +0200 Adapt Tramp version integrated in Emacs 29.3 * lisp/net/trampver.el: Adapt Tramp version integrated in Emacs 29.3. diff --git a/lisp/net/trampver.el b/lisp/net/trampver.el index c131d39c110..41647d42cc5 100644 --- a/lisp/net/trampver.el +++ b/lisp/net/trampver.el @@ -105,7 +105,7 @@ ("2.3.5.26.3" . "26.3") ("2.4.3.27.1" . "27.1") ("2.4.5.27.2" . "27.2") ("2.5.2.28.1" . "28.1") ("2.5.3.28.2" . "28.2") ("2.5.4" . "28.3") - ("2.6.0.29.1" . "29.1") ("2.6.2.29.2" . "29.2"))) + ("2.6.0.29.1" . "29.1") ("2.6.2.29.2" . "29.2") ("2.6.3-pre" . "29.3"))) (add-hook 'tramp-unload-hook (lambda () commit 7f377407b4b7d6ac9994ed983d7516bc42139885 Author: Po Lu Date: Sun Mar 31 15:33:40 2024 +0800 List special directories when reading root directory on Android * src/androidvfs.c (root_vfs_ops): Substitute android_root_opendir for android_root_opendir. (struct android_root_vdir): New structure. (root_fd, root_fd_references): New variables. (android_root_readdir, android_root_closedir, android_root_dirfd) (android_root_opendir): New functions. (android_fstatat_1): Test provided fd against root_fd, and if they match, prefix FILENAME with the name of the root directory. * lisp/ls-lisp.el (ls-lisp-insert-directory): If d-f-a-a signals an error while retrieving attributes, compile the alist of directory contents by hand. diff --git a/lisp/ls-lisp.el b/lisp/ls-lisp.el index 89f0238cf74..d09b53b1cc3 100644 --- a/lisp/ls-lisp.el +++ b/lisp/ls-lisp.el @@ -328,11 +328,39 @@ not contain `d', so that a full listing is expected." full-directory-p) (let* ((dir (file-name-as-directory file)) (default-directory dir) ; so that file-attributes works + (id-format (if (memq ?n switches) + 'integer + 'string)) (file-alist - (directory-files-and-attributes dir nil wildcard-regexp t - (if (memq ?n switches) - 'integer - 'string))) + (catch 'new-list + (handler-bind + ((error + (lambda (error) + ;; `directory-files-and-attributes' signals + ;; failure on Unix systems if even a single + ;; file's attributes cannot be accessed. + ;; + ;; Detect errors signaled while retrieving file + ;; attributes and resolve them by creating the + ;; attribute list manually, ignoring the + ;; attributes of files that cannot be accessed + ;; in this sense. + (when (member (cadr error) + '("Getting attributes" + "Reading symbolic link")) + (let ((file-list (directory-files dir nil + wildcard-regexp + t))) + (throw 'new-list + (mapcar (lambda (file) + (cons file + (or (ignore-errors + (file-attributes + file id-format)) + nil))) + file-list))))))) + (directory-files-and-attributes + dir nil wildcard-regexp t id-format)))) (sum 0) (max-uid-len 0) (max-gid-len 0) diff --git a/src/androidvfs.c b/src/androidvfs.c index a9035ae53c6..e8eb9f2d41c 100644 --- a/src/androidvfs.c +++ b/src/androidvfs.c @@ -6525,11 +6525,33 @@ NATIVE_NAME (ftruncate) (JNIEnv *env, jobject object, jint fd) /* Root vnode. This vnode represents the root inode, and is a regular - Unix vnode with modifications to `name' that make it return asset - vnodes. */ + Unix vnode with modifications to `name' so that it returns asset and + content vnodes, and to `opendir', so that asset and content vnodes + are read from the root directory, whether or not Emacs holds rights + to access the underlying filesystem. */ + +struct android_root_vdir +{ + /* The directory function table. */ + struct android_vdir vdir; + + /* The directory stream, or NULL if it could not be opened. */ + DIR *directory; + + /* Index of the next directory to return in `special_vnodes'. */ + int index; +}; + +/* File descriptor for instances of the foregoing structure when the + true root is unavailable. */ +static int root_fd = -1; + +/* Number of open instances referencing this file descriptor. */ +static ptrdiff_t root_fd_references; static struct android_vnode *android_root_name (struct android_vnode *, char *, size_t); +static struct android_vdir *android_root_opendir (struct android_vnode *); /* Vector of VFS operations associated with Unix root filesystem VFS nodes. */ @@ -6548,7 +6570,7 @@ static struct android_vops root_vfs_ops = android_unix_mkdir, android_unix_chmod, android_unix_readlink, - android_unix_opendir, + android_root_opendir, }; /* Array of special named vnodes. */ @@ -6676,6 +6698,97 @@ android_root_name (struct android_vnode *vnode, char *name, return android_unix_name (vnode, name, length); } +static struct dirent * +android_root_readdir (struct android_vdir *vdir) +{ + struct android_root_vdir *dir; + static struct dirent dirent, *p; + + dir = (struct android_root_vdir *) vdir; + p = dir->directory ? readdir (dir->directory) : NULL; + + if (p || dir->index >= ARRAYELTS (special_vnodes)) + return p; + + dirent.d_ino = 0; + dirent.d_off = 0; + dirent.d_reclen = sizeof dirent; + dirent.d_type = DT_DIR; + + /* No element in special_vnode must overflow dirent.d_name. */ + strcpy ((char *) &dirent.d_name, + special_vnodes[dir->index++].name); + return &dirent; +} + +static void +android_root_closedir (struct android_vdir *vdir) +{ + struct android_root_vdir *dir; + + dir = (struct android_root_vdir *) vdir; + + if (dir->directory) + closedir (dir->directory); + else if (root_fd_references--) + ; + else + { + /* Close root_fd, for which no references remain. */ + close (root_fd); + root_fd = -1; + } + + xfree (vdir); +} + +static int +android_root_dirfd (struct android_vdir *vdir) +{ + struct android_unix_vdir *dir; + + dir = (struct android_unix_vdir *) vdir; + + if (dir->directory) + return dirfd (dir->directory); + + return root_fd; +} + +static struct android_vdir * +android_root_opendir (struct android_vnode *vnode) +{ + struct android_unix_vnode *vp; + struct android_root_vdir *dir; + DIR *directory; + + /* Try to opendir the vnode. */ + vp = (struct android_unix_vnode *) vnode; + + directory = opendir (vp->name); + + /* Proceed with the remaining code if directory is nil, in which event + directory functions will simply forgo listing files inside the real + root directory. */ + + dir = xmalloc (sizeof *dir); + dir->vdir.readdir = android_root_readdir; + dir->vdir.closedir = android_root_closedir; + dir->vdir.dirfd = android_root_dirfd; + dir->directory = directory; + dir->index = 0; + + if (!directory) + { + /* Allocate a temporary file descriptor for this ersatz root. */ + if (root_fd < 0) + root_fd = open ("/dev/null", O_RDONLY | O_CLOEXEC); + root_fd_references++; + } + + return &dir->vdir; +} + /* File system lookup. */ @@ -7223,6 +7336,14 @@ android_fstatat_1 (int dirfd, const char *filename, return 0; } + /* /foo... */ + + if (root_fd >= 0 && dirfd == root_fd) + { + snprintf (buffer, size, "/%s", filename); + return 0; + } + return 1; } commit de8cae30bcf8283e4c3b069ccb8b75224659ac5c Author: Eshel Yaron Date: Tue Mar 26 22:34:51 2024 +0100 Add global minor mode 'global-completion-preview-mode' This is a global variant of 'completion-preview-mode'. * lisp/completion-preview.el (global-completion-preview-mode): New global minor mode. * doc/emacs/programs.texi (Symbol Completion): Document it. * etc/NEWS: Announce it. (Bug#70010) diff --git a/doc/emacs/programs.texi b/doc/emacs/programs.texi index 1627e7e6cb7..de28a9f1dd4 100644 --- a/doc/emacs/programs.texi +++ b/doc/emacs/programs.texi @@ -1706,11 +1706,14 @@ based on the spell-checker's dictionary. @xref{Spelling}. @cindex suggestion preview @cindex Completion Preview mode @findex completion-preview-mode +@findex global-completion-preview-mode Completion Preview mode is a minor mode that shows completion -suggestions as you type. When you enable this mode (with @kbd{M-x -completion-preview-mode}), Emacs automatically displays the -suggested completion for text around point as an in-line preview -right after point; type @key{TAB} to accept the suggestion. +suggestions as you type. You can enable it for the current buffer with +@kbd{M-x completion-preview-mode}, or globally with @w{@kbd{M-x +global-completion-preview-mode}}. When Completion Preview mode is on, +Emacs automatically displays the suggested completion for text around +point as an in-line preview right after point; type @key{TAB} to accept +the suggestion. @node MixedCase Words @section MixedCase Words diff --git a/etc/NEWS b/etc/NEWS index 1204f58c5ca..775c8e02a95 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1559,6 +1559,8 @@ sexp navigation more intuitive. This minor mode shows you symbol completion suggestions as you type, using an inline preview. New user options in the 'completion-preview' customization group control exactly when Emacs displays this preview. +'completion-preview-mode' is buffer-local, to enable it globally use +'global-completion-preview-mode'. --- ** The highly accessible Modus themes collection has eight items. diff --git a/lisp/completion-preview.el b/lisp/completion-preview.el index e827da43a08..6abdfed51a3 100644 --- a/lisp/completion-preview.el +++ b/lisp/completion-preview.el @@ -415,5 +415,9 @@ cycles backward." (remove-hook 'post-command-hook #'completion-preview--post-command t) (completion-preview-active-mode -1))) +(define-globalized-minor-mode global-completion-preview-mode + completion-preview-mode completion-preview-mode + :predicate '((not minibuffer-mode special-mode) t)) + (provide 'completion-preview) ;;; completion-preview.el ends here commit 994bcc125b66397b455c8a7b70fb454b483df052 Author: Eli Zaretskii Date: Sun Mar 31 10:29:34 2024 +0300 Fix the new PEG library * doc/lispref/peg.texi (Parsing Expression Grammars) (PEX Definitions, Parsing Actions, Writing PEG Rules): Fix markup, indexing, and wording. * etc/NEWS: Fix wording of PEG entry. * test/lisp/progmodes/peg-tests.el: Move from test/lisp/, to match the directory of peg.el. diff --git a/doc/lispref/peg.texi b/doc/lispref/peg.texi index ef4dfa7653e..fbf57852ee0 100644 --- a/doc/lispref/peg.texi +++ b/doc/lispref/peg.texi @@ -7,29 +7,34 @@ @chapter Parsing Expression Grammars @cindex text parsing @cindex parsing expression grammar +@cindex PEG Emacs Lisp provides several tools for parsing and matching text, from regular expressions (@pxref{Regular Expressions}) to full -@acronym{LL} grammar parsers (@pxref{Top,, Bovine parser -development,bovine}). @dfn{Parsing Expression Grammars} +left-to-right (a.k.a.@: @acronym{LL}) grammar parsers (@pxref{Top,, +Bovine parser development,bovine}). @dfn{Parsing Expression Grammars} (@acronym{PEG}) are another approach to text parsing that offer more structure and composibility than regular expressions, but less complexity than context-free grammars. -A @acronym{PEG} parser is defined as a list of named rules, each of -which matches text patterns, and/or contains references to other +A Parsing Expression Grammar (@acronym{PEG}) describes a formal language +in terms of a set of rules for recognizing strings in the language. In +Emacs, a @acronym{PEG} parser is defined as a list of named rules, each +of which matches text patterns and/or contains references to other rules. Parsing is initiated with the function @code{peg-run} or the macro @code{peg-parse} (see below), and parses text after point in the current buffer, using a given set of rules. @cindex parsing expression -The definition of each rule is referred to as a @dfn{parsing -expression} (@acronym{PEX}), and can consist of a literal string, a -regexp-like character range or set, a peg-specific construct -resembling an elisp function call, a reference to another rule, or a -combination of any of these. A grammar is expressed as a tree of -rules in which one rule is typically treated as a ``root'' or -``entry-point'' rule. For instance: +@cindex root, of parsing expression grammar +@cindex entry-point, of parsing expression grammar +Each rule in a @acronym{PEG} is referred to as a @dfn{parsing +expression} (@acronym{PEX}), and can be specified a a literal string, a +regexp-like character range or set, a peg-specific construct resembling +an Emacs Lisp function call, a reference to another rule, or a +combination of any of these. A grammar is expressed as a tree of rules +in which one rule is typically treated as a ``root'' or ``entry-point'' +rule. For instance: @example @group @@ -56,14 +61,17 @@ first rule is considered the ``entry-point'': @end group @end example -This macro represents the simplest use of the @acronym{PEG} library, -but also the least flexible, as the rules must be written directly -into the source code. A more flexible approach involves use of three -macros in conjunction: @code{with-peg-rules}, a @code{let}-like -construct that makes a set of rules available within the macro body; -@code{peg-run}, which initiates parsing given a single rule; and -@code{peg}, which is used to wrap the entry-point rule name. In fact, -a call to @code{peg-parse} expands to just this set of calls. The +@c FIXME: These two should be formally defined using @defmac and @defun. +@findex with-peg-rules +@findex peg-run +The @code{peg-parse} macro represents the simplest use of the +@acronym{PEG} library, but also the least flexible, as the rules must be +written directly into the source code. A more flexible approach +involves use of three macros in conjunction: @code{with-peg-rules}, a +@code{let}-like construct that makes a set of rules available within the +macro body; @code{peg-run}, which initiates parsing given a single rule; +and @code{peg}, which is used to wrap the entry-point rule name. In +fact, a call to @code{peg-parse} expands to just this set of calls. The above example could be written as: @example @@ -79,33 +87,43 @@ above example could be written as: This allows more explicit control over the ``entry-point'' of parsing, and allows the combination of rules from different sources. +@c FIXME: Use @defmac. +@findex define-peg-rule Individual rules can also be defined using a more @code{defun}-like syntax, using the macro @code{define-peg-rule}: @example +@group (define-peg-rule digit () [0-9]) +@end group @end example This also allows for rules that accept an argument (supplied by the -@code{funcall} PEG rule). +@code{funcall} PEG rule, @pxref{PEX Definitions}). +@c FIXME: Use @defmac. +@findex define-peg-ruleset Another possibility is to define a named set of rules with @code{define-peg-ruleset}: @example +@group (define-peg-ruleset number-grammar '((number sign digit (* digit)) digit ;; A reference to the definition above. (sign (or "+" "-" "")))) +@end group @end example Rules and rulesets defined this way can be referred to by name in later calls to @code{peg-run} or @code{with-peg-rules}: @example +@group (with-peg-rules number-grammar (peg-run (peg number))) +@end group @end example By default, calls to @code{peg-run} or @code{peg-parse} produce no @@ -125,11 +143,11 @@ act upon parsed strings, rules can include @dfn{actions}, see Parsing expressions can be defined using the following syntax: @table @code -@item (and E1 E2 ...) -A sequence of @acronym{PEX}s that must all be matched. The @code{and} form is -optional and implicit. +@item (and @var{e1} @var{e2}@dots{}) +A sequence of @acronym{PEX}s that must all be matched. The @code{and} +form is optional and implicit. -@item (or E1 E2 ...) +@item (or @var{e1} @var{e2}@dots{}) Prioritized choices, meaning that, as in Elisp, the choices are tried in order, and the first successful match is used. Note that this is distinct from context-free grammars, in which selection between @@ -141,43 +159,43 @@ Matches any single character, as the regexp ``.''. @item @var{string} A literal string. -@item (char @var{C}) -A single character @var{C}, as an Elisp character literal. +@item (char @var{c}) +A single character @var{c}, as an Elisp character literal. -@item (* @var{E}) -Zero or more instances of expression @var{E}, as the regexp @samp{*}. +@item (* @var{e}) +Zero or more instances of expression @var{e}, as the regexp @samp{*}. Matching is always ``greedy''. -@item (+ @var{E}) -One or more instances of expression @var{E}, as the regexp @samp{+}. +@item (+ @var{e}) +One or more instances of expression @var{e}, as the regexp @samp{+}. Matching is always ``greedy''. -@item (opt @var{E}) -Zero or one instance of expression @var{E}, as the regexp @samp{?}. +@item (opt @var{e}) +Zero or one instance of expression @var{e}, as the regexp @samp{?}. -@item SYMBOL +@item @var{symbol} A symbol representing a previously-defined PEG rule. -@item (range CH1 CH2) -The character range between CH1 and CH2, as the regexp @samp{[CH1-CH2]}. +@item (range @var{ch1} @var{ch2}) +The character range between @var{ch1} and @var{ch2}, as the regexp +@samp{[@var{ch1}-@var{ch2}]}. -@item [CH1-CH2 "+*" ?x] +@item [@var{ch1}-@var{ch2} "+*" ?x] A character set, which can include ranges, character literals, or strings of characters. @item [ascii cntrl] A list of named character classes. -@item (syntax-class @var{NAME}) +@item (syntax-class @var{name}) A single syntax class. -@item (funcall E ARGS...) -Call @acronym{PEX} E (previously defined with @code{define-peg-rule}) -with arguments @var{ARGS}. +@item (funcall @var{e} @var{args}@dots{}) +Call @acronym{PEX} @var{e} (previously defined with +@code{define-peg-rule}) with arguments @var{args}. @item (null) The empty string. - @end table The following expressions are used as anchors or tests -- they do not @@ -210,19 +228,19 @@ Beginning of symbol. @item (eos) End of symbol. -@item (if E) -Returns non-@code{nil} if parsing @acronym{PEX} E from point succeeds (point -is not moved). - -@item (not E) -Returns non-@code{nil} if parsing @acronym{PEX} E from point fails (point -is not moved). +@item (if @var{e}) +Returns non-@code{nil} if parsing @acronym{PEX} @var{e} from point +succeeds (point is not moved). -@item (guard EXP) -Treats the value of the Lisp expression EXP as a boolean. +@item (not @var{e}) +Returns non-@code{nil} if parsing @acronym{PEX} @var{e} from point fails +(point is not moved). +@item (guard @var{exp}) +Treats the value of the Lisp expression @var{exp} as a boolean. @end table +@c FIXME: peg-char-classes should be mentioned in the text below. @vindex peg-char-classes Character class matching can use the same named character classes as in regular expressions (@pxref{Top,, Character Classes,elisp}) @@ -234,12 +252,13 @@ in regular expressions (@pxref{Top,, Character Classes,elisp}) @cindex parsing stack By default the process of parsing simply moves point in the current buffer, ultimately returning @code{t} if the parsing succeeds, and -@code{nil} if it doesn't. It's also possible to define ``actions'' -that can run arbitrary Elisp at certain points in the parsed text. -These actions can optionally affect something called the @dfn{parsing -stack}, which is a list of values returned by the parsing process. -These actions only run (and only return values) if the parsing process -ultimately succeeds; if it fails the action code is not run at all. +@code{nil} if it doesn't. It's also possible to define @dfn{parsing +actions} that can run arbitrary Elisp at certain points in the parsed +text. These actions can optionally affect something called the +@dfn{parsing stack}, which is a list of values returned by the parsing +process. These actions only run (and only return values) if the parsing +process ultimately succeeds; if it fails the action code is not run at +all. Actions can be added anywhere in the definition of a rule. They are distinguished from parsing expressions by an initial backquote @@ -247,12 +266,13 @@ distinguished from parsing expressions by an initial backquote of hyphens (@samp{--}) somewhere within it. Symbols to the left of the hyphens are bound to values popped from the stack (they are somewhat analogous to the argument list of a lambda form). Values -produced by code to the right are pushed to the stack (analogous to -the return value of the lambda). For instance, the previous grammar -can be augmented with actions to return the parsed number as an actual -integer: +produced by code to the right of the hyphens are pushed onto the stack +(analogous to the return value of the lambda). For instance, the +previous grammar can be augmented with actions to return the parsed +number as an actual integer: @example +@group (with-peg-rules ((number sign digit (* digit `(a b -- (+ (* a 10) b))) `(sign val -- (* sign val))) @@ -261,6 +281,7 @@ integer: (and "" `(-- 1)))) (digit [0-9] `(-- (- (char-before) ?0)))) (peg-run (peg number))) +@end group @end example There must be values on the stack before they can be popped and @@ -271,43 +292,53 @@ only left-hand terms will consume (and discard) values from the stack. At the end of parsing, stack values are returned as a flat list. To return the string matched by a @acronym{PEX} (instead of simply -moving point over it), a rule like this can be used: +moving point over it), a grammar can use a rule like this: @example +@group (one-word `(-- (point)) (+ [word]) `(start -- (buffer-substring start (point)))) +@end group @end example -The first action pushes the initial value of point to the stack. The -intervening @acronym{PEX} moves point over the next word. The second -action pops the previous value from the stack (binding it to the -variable @code{start}), and uses that value to extract a substring -from the buffer and push it to the stack. This pattern is so common -that @acronym{PEG} provides a shorthand function that does exactly the -above, along with a few other shorthands for common scenarios: +@noindent +The first action above pushes the initial value of point to the stack. +The intervening @acronym{PEX} moves point over the next word. The +second action pops the previous value from the stack (binding it to the +variable @code{start}), then uses that value to extract a substring from +the buffer and push it to the stack. This pattern is so common that +@acronym{PEG} provides a shorthand function that does exactly the above, +along with a few other shorthands for common scenarios: @table @code -@item (substring @var{E}) -Match @acronym{PEX} @var{E} and push the matched string to the stack. - -@item (region @var{E}) -Match @var{E} and push the start and end positions of the matched -region to the stack. - -@item (replace @var{E} @var{replacement}) -Match @var{E} and replaced the matched region with the string @var{replacement}. - -@item (list @var{E}) -Match @var{E}, collect all values produced by @var{E} (and its -sub-expressions) into a list, and push that list to the stack. Stack +@findex substring (a PEG shorthand) +@item (substring @var{e}) +Match @acronym{PEX} @var{e} and push the matched string onto the stack. + +@findex region (a PEG shorthand) +@item (region @var{e}) +Match @var{e} and push the start and end positions of the matched +region onto the stack. + +@findex replace (a PEG shorthand) +@item (replace @var{e} @var{replacement}) +Match @var{e} and replaced the matched region with the string +@var{replacement}. + +@findex list (a PEG shorthand) +@item (list @var{e}) +Match @var{e}, collect all values produced by @var{e} (and its +sub-expressions) into a list, and push that list onto the stack. Stack values are typically returned as a flat list; this is a way of ``grouping'' values together. @end table @node Writing PEG Rules @section Writing PEG Rules +@cindex PEG rules, pitfalls +@cindex Parsing Expression Grammar, pitfalls in rules Something to be aware of when writing PEG rules is that they are greedy. Rules which can consume a variable amount of text will always @@ -319,9 +350,10 @@ backtracking. For instance, this rule will never succeed: (forest (+ "tree" (* [blank])) "tree" (eol)) @end example -The @acronym{PEX} @code{(+ "tree" (* [blank]))} will consume all -repetitions of the word ``tree'', leaving none to match the final -@code{"tree"}. +@noindent +The @acronym{PEX} @w{@code{(+ "tree" (* [blank]))}} will consume all +the repetitions of the word @samp{tree}, leaving none to match the final +@samp{tree}. In these situations, the desired result can be obtained by using predicates and guards -- namely the @code{not}, @code{if} and @@ -331,6 +363,7 @@ predicates and guards -- namely the @code{not}, @code{if} and (forest (+ "tree" (* [blank])) (not (eol)) "tree" (eol)) @end example +@noindent The @code{if} and @code{not} operators accept a parsing expression and interpret it as a boolean, without moving point. The contents of a @code{guard} operator are evaluated as regular Lisp (not a @@ -345,6 +378,7 @@ rule: (end-game "game" (eob)) @end example +@noindent when run in a buffer containing the text ``game over'' after point, will move point to just after ``game'' then halt parsing, returning @code{nil}. Successful parsing will always return @code{t}, or the diff --git a/etc/NEWS b/etc/NEWS index 8e1c1082b3a..1204f58c5ca 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1587,8 +1587,8 @@ preventing the installation of Compat if unnecessary. +++ ** New package PEG. -Emacs now includes a library for writing (P)arsing (E)xpression -(G)rammars, an approach to text parsing that provides more structure +Emacs now includes a library for writing Parsing Expression +Grammars (PEG), an approach to text parsing that provides more structure than regular expressions, but less complexity than context-free grammars. The Info manual "(elisp) Parsing Expression Grammars" has documentation and examples. diff --git a/test/lisp/peg-tests.el b/test/lisp/progmodes/peg-tests.el similarity index 100% rename from test/lisp/peg-tests.el rename to test/lisp/progmodes/peg-tests.el