commit a5fbb652ed3614d6735015551564f32b80e42c53 (HEAD, refs/remotes/origin/master) Author: Mattias Engdegård Date: Mon Apr 1 17:02:58 2024 +0200 Update JSON codec doc strings * src/json.c (Fjson_serialize, Fjson_insert, Fjson_parse_string) (Fjson_parse_buffer): Make the text more readable, fix minor errors and avoid terminology confusion. diff --git a/src/json.c b/src/json.c index 486253581ff..c3244ad04d2 100644 --- a/src/json.c +++ b/src/json.c @@ -598,23 +598,27 @@ DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, MANY, NULL, doc: /* Return the JSON representation of OBJECT as a string. -OBJECT must be t, a number, string, vector, hashtable, alist, plist, -or the Lisp equivalents to the JSON null and false values, and its -elements must recursively consist of the same kinds of values. t will -be converted to the JSON true value. Vectors will be converted to -JSON arrays, whereas hashtables, alists and plists are converted to -JSON objects. Hashtable keys must be strings, unique within each object. -Alist and plist keys must be symbols; if a key is duplicate, the first -instance is used. A leading colon in plist keys is elided. +OBJECT is translated as follows: + +`t' -- the JSON `true' value. +number -- a JSON number. +string -- a JSON string. +vector -- a JSON array. +hash-table -- a JSON object. Keys must be strings. +alist -- a JSON object. Keys must be symbols. +plist -- a JSON object. Keys must be symbols. + A leading colon in plist key names is elided. + +For duplicate object keys, the first value is used. The Lisp equivalents to the JSON null and false values are configurable in the arguments ARGS, a list of keyword/argument pairs: -The keyword argument `:null-object' specifies which object to use -to represent a JSON null value. It defaults to `:null'. +:null-object OBJ -- use OBJ to represent a JSON null value. + It defaults to `:null'. -The keyword argument `:false-object' specifies which object to use to -represent a JSON false value. It defaults to `:false'. +:false-object OBJ -- use OBJ to represent a JSON false value. + It defaults to `:false'. In you specify the same value for `:null-object' and `:false-object', a potentially ambiguous situation, the JSON output will not contain @@ -631,9 +635,9 @@ usage: (json-serialize OBJECT &rest ARGS) */) DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY, NULL, doc: /* Insert the JSON representation of OBJECT before point. -This is the same as (insert (json-serialize OBJECT)), but potentially +This is the same as (insert (json-serialize OBJECT ...)), but potentially faster. See the function `json-serialize' for allowed values of -OBJECT. +OBJECT and ARGS. usage: (json-insert OBJECT &rest ARGS) */) (ptrdiff_t nargs, Lisp_Object *args) { @@ -1734,31 +1738,30 @@ json_parse (struct json_parser *parser, DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY, NULL, - doc: /* Parse the JSON STRING into a Lisp object. + doc: /* Parse the JSON STRING into a Lisp value. This is essentially the reverse operation of `json-serialize', which -see. The returned object will be the JSON null value, the JSON false -value, t, a number, a string, a vector, a list, a hashtable, an alist, -or a plist. Its elements will be further objects of these types. If -there are duplicate keys in an object, all but the last one are -ignored. If STRING doesn't contain a valid JSON object, this function +see. The returned value will be the JSON null value, the JSON false +value, t, a number, a string, a vector, a list, a hash-table, an alist, +or a plist. Its elements will be further values of these types. +If STRING doesn't contain a valid JSON value, this function signals an error of type `json-parse-error'. The arguments ARGS are a list of keyword/argument pairs: -The keyword argument `:object-type' specifies which Lisp type is used -to represent objects; it can be `hash-table', `alist' or `plist'. It -defaults to `hash-table'. If an object has members with the same -key, `hash-table' keeps only the last value of such keys, while -`alist' and `plist' keep all the members. +:object-type TYPE -- use TYPE to represent JSON objects. + TYPE can be `hash-table' (the default), `alist' or `plist'. + If an object has members with the same key, `hash-table' keeps only + the last value of such keys, while `alist' and `plist' keep all the + members. -The keyword argument `:array-type' specifies which Lisp type is used -to represent arrays; it can be `array' (the default) or `list'. +:array-type TYPE -- use TYPE to represent JSON arrays. + TYPE can be `array' (the default) or `list'. -The keyword argument `:null-object' specifies which object to use -to represent a JSON null value. It defaults to `:null'. +:null-object OBJ -- use OBJ to represent a JSON null value. + It defaults to `:null'. -The keyword argument `:false-object' specifies which object to use to -represent a JSON false value. It defaults to `:false'. +:false-object OBJ -- use OBJ to represent a JSON false value. + It defaults to `:false'. usage: (json-parse-string STRING &rest ARGS) */) (ptrdiff_t nargs, Lisp_Object *args) { @@ -1782,35 +1785,34 @@ usage: (json-parse-string STRING &rest ARGS) */) DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer, 0, MANY, NULL, - doc: /* Read JSON object from current buffer starting at point. -Move point after the end of the object if parsing was successful. + doc: /* Read a JSON value from current buffer starting at point. +Move point after the end of the value if parsing was successful. On error, don't move point. -The returned object will be a vector, list, hashtable, alist, or +The returned value will be a vector, list, hashtable, alist, or plist. Its elements will be the JSON null value, the JSON false value, t, numbers, strings, or further vectors, lists, hashtables, -alists, or plists. If there are duplicate keys in an object, all -but the last one are ignored. +alists, or plists. -If the current buffer doesn't contain a valid JSON object, the +If the current buffer doesn't contain a valid JSON value, the function signals an error of type `json-parse-error'. The arguments ARGS are a list of keyword/argument pairs: -The keyword argument `:object-type' specifies which Lisp type is used -to represent objects; it can be `hash-table', `alist' or `plist'. It -defaults to `hash-table'. If an object has members with the same -key, `hash-table' keeps only the last value of such keys, while -`alist' and `plist' keep all the members. +:object-type TYPE -- use TYPE to represent JSON objects. + TYPE can be `hash-table' (the default), `alist' or `plist'. + If an object has members with the same key, `hash-table' keeps only + the last value of such keys, while `alist' and `plist' keep all the + members. -The keyword argument `:array-type' specifies which Lisp type is used -to represent arrays; it can be `array' (the default) or `list'. +:array-type TYPE -- use TYPE to represent JSON arrays. + TYPE can be `array' (the default) or `list'. -The keyword argument `:null-object' specifies which object to use -to represent a JSON null value. It defaults to `:null'. +:null-object OBJ -- use OBJ to represent a JSON null value. + It defaults to `:null'. -The keyword argument `:false-object' specifies which object to use to -represent a JSON false value. It defaults to `:false'. +:false-object OBJ -- use OBJ to represent a JSON false value. + It defaults to `:false'. usage: (json-parse-buffer &rest args) */) (ptrdiff_t nargs, Lisp_Object *args) { commit 8bddf7f93e671bccec8103cecd99629bcc05f071 Author: Mattias Engdegård Date: Mon Apr 1 17:01:07 2024 +0200 ; * src/json.c (json_parse_object): Call make_hash_table directly. diff --git a/src/json.c b/src/json.c index c64d44b4bac..486253581ff 100644 --- a/src/json.c +++ b/src/json.c @@ -1599,8 +1599,7 @@ json_parse_object (struct json_parser *parser) case json_object_hashtable: { EMACS_INT value = (parser->object_workspace_current - first) / 2; - result = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize, - make_fixed_natnum (value)); + result = make_hash_table (&hashtest_equal, value, Weak_None, false); struct Lisp_Hash_Table *h = XHASH_TABLE (result); for (size_t i = first; i < parser->object_workspace_current; i += 2) { commit dbfe3cae2d9497fb14c83f26425f9421d1ef57cb Author: Mattias Engdegård Date: Mon Apr 1 16:58:03 2024 +0200 Update JSON parser test and docs * test/src/json-tests.el (json-parse-string/object): Duplicated object keys are now retained in alist and plist output. * etc/NEWS: Mention it. diff --git a/etc/NEWS b/etc/NEWS index e575fc2936e..4b0f148dc5d 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1721,8 +1721,16 @@ Use a float value for the first argument instead. Instead, use 'eshell-process-wait-time', which supports floating-point values. +** JSON + +--- +*** The parser keeps duplicated object keys in alist and plist output. +A JSON object such as '{"a":1,"a":2}' will now be translated into the +Lisp values '((a . 1) (a . 2))' or '(:a 1 :a 2)' if alist or plist +object types are requested. + --- -** The JSON parser sometimes signals different types of errors. +*** The parser sometimes signals different types of errors. It will now signal 'json-utf8-decode-error' for inputs that are not correctly UTF-8 encoded. diff --git a/test/src/json-tests.el b/test/src/json-tests.el index 628a5a3de57..8b730ef8c90 100644 --- a/test/src/json-tests.el +++ b/test/src/json-tests.el @@ -156,9 +156,6 @@ ) (ert-deftest json-parse-string/object () - :expected-result :failed - ;; FIXME: This currently fails. Should the parser deduplicate keys? - ;; Never, always, or for alist and plist only? (let ((input "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n")) (let ((actual (json-parse-string input))) @@ -167,9 +164,9 @@ (should (equal (cl-sort (map-pairs actual) #'string< :key #'car) '(("abc" . [9 :false]) ("def" . :null))))) (should (equal (json-parse-string input :object-type 'alist) - '((abc . [9 :false]) (def . :null)))) + '((abc . [1 2 t]) (def . :null) (abc . [9 :false])))) (should (equal (json-parse-string input :object-type 'plist) - '(:abc [9 :false] :def :null))))) + '(:abc [1 2 t] :def :null :abc [9 :false]))))) (ert-deftest json-parse-string/object-unicode-keys () (let ((input "{\"é\":1,\"☃\":2,\"𐌐\":3}")) commit f53152faad170a93b7977c81f736cc787c2f9b71 Author: Po Lu Date: Mon Apr 1 10:46:19 2024 -0400 (scheme-syntax-propertize-sexp-comment): Allow `#;` in strings * lisp/progmodes/scheme.el (scheme-syntax-propertize-sexp-comment): Don't get confused by `#;` inside strings and (normal) comments. (scheme-sexp-comment-syntax-table): Comment-out, unused. diff --git a/lisp/progmodes/scheme.el b/lisp/progmodes/scheme.el index 79d076ff145..3242f1c345c 100644 --- a/lisp/progmodes/scheme.el +++ b/lisp/progmodes/scheme.el @@ -387,12 +387,12 @@ See `run-hooks'." (defvar scheme-font-lock-keywords scheme-font-lock-keywords-1 "Default expressions to highlight in Scheme modes.") -(defconst scheme-sexp-comment-syntax-table - (let ((st (make-syntax-table scheme-mode-syntax-table))) - (modify-syntax-entry ?\; "." st) - (modify-syntax-entry ?\n " " st) - (modify-syntax-entry ?# "'" st) - st)) +;; (defconst scheme-sexp-comment-syntax-table +;; (let ((st (make-syntax-table scheme-mode-syntax-table))) +;; (modify-syntax-entry ?\; "." st) +;; (modify-syntax-entry ?\n " " st) +;; (modify-syntax-entry ?# "'" st) +;; st)) (put 'lambda 'scheme-doc-string-elt 2) (put 'lambda* 'scheme-doc-string-elt 2) @@ -428,6 +428,7 @@ See `run-hooks'." (defun scheme-syntax-propertize-sexp-comment (end) (let ((state (syntax-ppss)) + ;; (beg (point)) (checked (point))) (when (eq 2 (nth 7 state)) ;; It's a sexp-comment. Tell parse-partial-sexp where it ends. @@ -437,9 +438,11 @@ See `run-hooks'." (progn (setq found nil) (condition-case nil - (progn + (save-restriction + (narrow-to-region (point-min) end) (goto-char startpos) (forward-sexp 1) + ;; (cl-assert (> (point) beg)) (setq found (point))) (scan-error (goto-char end))) ;; If there's a nested `#;', the syntax-tables will normally @@ -447,16 +450,22 @@ See `run-hooks'." ;; (forward-sexp 1) above may have landed at the wrong place. ;; So look for `#;' in the text over which we jumped, and ;; mark those we found as nested sexp-comments. - (let ((limit (or found end))) + (let ((limit (min end (or found end)))) (when (< checked limit) (goto-char checked) - (when (re-search-forward "\\(#\\);" limit 'move) - (setq checked (point)) + (while (and (re-search-forward "\\(#\\);" limit 'move) + ;; Skip those #; inside comments and strings. + (nth 8 (save-excursion + (parse-partial-sexp + startpos (match-beginning 0)))))) + (setq checked (point)) + (when (< (point) limit) (put-text-property (match-beginning 1) (match-end 1) 'syntax-table (string-to-syntax "< cn")) - (loop (point))) - (< (point) limit))))) + (loop (point)) + ;; Try the `forward-sexp' with the new text state. + t))))) (when found (goto-char found) (put-text-property (1- found) found commit 123bfc2779d52acfdb18e7fe64577645e227e1c2 Author: Po Lu Date: Mon Apr 1 21:12:49 2024 +0800 Correct custom type in tramp-androidsu.el * lisp/net/tramp-androidsu.el (tramp-androidsu-remote-path): Set type to '(repeat string). diff --git a/lisp/net/tramp-androidsu.el b/lisp/net/tramp-androidsu.el index 1ec9247cf3c..aa7871e6a33 100644 --- a/lisp/net/tramp-androidsu.el +++ b/lisp/net/tramp-androidsu.el @@ -63,7 +63,7 @@ may edit files belonging to any and all applications." "Directories in which to search for transfer programs and the like." :group 'tramp :version "30.1" - :type '(list string)) + :type '(repeat string)) (defvar tramp-androidsu-su-mm-supported 'unknown "Whether `su -mm' is supported on this system.") commit 7970f6bcfce7020030a7f87963496c06fa0017aa Author: Po Lu Date: Mon Apr 1 21:12:19 2024 +0800 ; json.c stylistic adjustments * src/json.c (json_parse_string): Stylistic changes. diff --git a/src/json.c b/src/json.c index ca9be26cd9f..c64d44b4bac 100644 --- a/src/json.c +++ b/src/json.c @@ -1118,9 +1118,10 @@ json_parse_string (struct json_parser *parser, bool intern, bool leading_colon) ptrdiff_t nbytes = parser->byte_workspace_current - parser->byte_workspace; ptrdiff_t nchars = nbytes - chars_delta; - const char *str = (const char *)parser->byte_workspace; - return intern ? intern_c_multibyte (str, nchars, nbytes) - : make_multibyte_string (str, nchars, nbytes); + const char *str = (const char *) parser->byte_workspace; + return (intern + ? intern_c_multibyte (str, nchars, nbytes) + : make_multibyte_string (str, nchars, nbytes)); } if (c & 0x80) commit 51e102a7c80d4b78eacb92a8cd164e9b1c5c3a91 Author: Mattias Engdegård Date: Mon Apr 1 14:41:37 2024 +0200 ; * doc/lispref/processes.texi: use @code for keywords in @table See discussion in bug#69709. diff --git a/doc/lispref/processes.texi b/doc/lispref/processes.texi index ea3fe738f69..c356c905dee 100644 --- a/doc/lispref/processes.texi +++ b/doc/lispref/processes.texi @@ -687,7 +687,7 @@ The arguments @var{args} are a list of keyword/argument pairs. Omitting a keyword is always equivalent to specifying it with value @code{nil}. Here are the meaningful keywords: -@table @asis +@table @code @item :name @var{name} Use the string @var{name} as the process name; if a process with this name already exists, then @var{name} is modified (by appending @@ -817,7 +817,7 @@ Omitting a keyword is always equivalent to specifying it with value Here are the meaningful keywords: -@table @asis +@table @code @item :name @var{name} Use the string @var{name} as the process name. As with @code{make-process}, it is modified if necessary to make it unique. @@ -2828,7 +2828,7 @@ equivalent to specifying it with value @code{nil}, except for are the meaningful keywords (those corresponding to network options are listed in the following section): -@table @asis +@table @code @item :name @var{name} Use the string @var{name} as the process name. It is modified if necessary to make it unique. @@ -3017,7 +3017,7 @@ modify these options later, using @code{set-network-process-option}. connections, so you will need to set the necessary options for each child connection as it is created. -@table @asis +@table @code @item :bindtodevice @var{device-name} If @var{device-name} is a non-empty string identifying a network interface name (see @code{network-interface-list}), only handle commit 601e772b06c47b7459b8355ab0114e87455a31d8 Merge: 61d70186a4a ce492cc5ae4 Author: Eli Zaretskii Date: Mon Apr 1 14:21:10 2024 +0300 Merge branch 'master' of git.savannah.gnu.org:/srv/git/emacs commit 61d70186a4a80d0ffc0aaef224e514ff9cac0372 Author: Zajcev Evgeny Date: Thu Mar 21 17:47:29 2024 +0300 Add support for `ch' and `cw' dimension specifiers for images * src/image.c (image_get_dimension, lookup_image): Handle `ch' and `cw' dimension specifiers in addition to `em'. * src/dispextern.h: Add new members `face_font_height' and `face_font_width' to `struct image'. * doc/lispref/display.texi (Image Descriptors): Document `ch' and `cw'. diff --git a/doc/lispref/display.texi b/doc/lispref/display.texi index b497967c445..f82c2fad14d 100644 --- a/doc/lispref/display.texi +++ b/doc/lispref/display.texi @@ -5788,8 +5788,11 @@ either an integer, which represents the dimension in pixels, or a pair length in @dfn{ems}@footnote{In typography an em is a distance equivalent to the height of the type. For example when using 12 point type 1 em is equal to 12 points. Its use ensures distances and type -remain proportional.}. One em is equivalent to the height of the font -and @var{value} may be an integer or a float. +remain proportional.}. One em is equivalent to the size of the font +and @var{value} may be an integer or a float. Also, dimension can be +specified in @code{(@var{value} . ch)} and @code{(@var{value} . cw)} +forms, where @code{ch} means height of the canonical character and +@code{cw} means width of the canonical character. The following is a list of properties that are meaningful for all image types (there are also properties which are meaningful only for diff --git a/src/dispextern.h b/src/dispextern.h index 1c3232fae3d..f29377f3596 100644 --- a/src/dispextern.h +++ b/src/dispextern.h @@ -3186,6 +3186,11 @@ struct image int face_font_size; char *face_font_family; + /* Details of the font used to calculate image size relative to the + canonical character size, with `ch' and `cw' specifiers. */ + int face_font_height; + int face_font_width; + /* True if this image has a `transparent' background -- that is, is uses an image mask. The accessor macro for this is `IMAGE_BACKGROUND_TRANSPARENT'. */ diff --git a/src/image.c b/src/image.c index 41d72964631..216bdc1ee66 100644 --- a/src/image.c +++ b/src/image.c @@ -2558,9 +2558,20 @@ image_get_dimension (struct image *img, Lisp_Object symbol) if (FIXNATP (value)) return min (XFIXNAT (value), INT_MAX); - if (CONSP (value) && NUMBERP (CAR (value)) && EQ (Qem, CDR (value))) - return scale_image_size (img->face_font_size, 1, XFLOATINT (CAR (value))); + if (CONSP (value) && NUMBERP (CAR (value))) + { + Lisp_Object dim = CDR (value); + if (EQ (Qem, dim)) + return scale_image_size (img->face_font_size, + 1, XFLOATINT (CAR (value))); + if (EQ (Qch, dim)) + return scale_image_size (img->face_font_height, + 1, XFLOATINT (CAR (value))); + if (EQ (Qcw, dim)) + return scale_image_size (img->face_font_width, + 1, XFLOATINT (CAR (value))); + } return -1; } @@ -3384,6 +3395,8 @@ lookup_image (struct frame *f, Lisp_Object spec, int face_id) img->face_foreground = foreground; img->face_background = background; img->face_font_size = font_size; + img->face_font_height = face->font->height; + img->face_font_width = face->font->average_width; img->face_font_family = xmalloc (strlen (font_family) + 1); strcpy (img->face_font_family, font_family); img->load_failed_p = ! img->type->load_img (f, img); @@ -12794,6 +12807,8 @@ non-numeric, there is no explicit limit on the size of images. */); DEFSYM (QCmax_height, ":max-height"); DEFSYM (Qem, "em"); + DEFSYM (Qch, "ch"); + DEFSYM (Qcw, "cw"); #ifdef HAVE_NATIVE_TRANSFORMS DEFSYM (Qscale, "scale"); commit ce492cc5ae4b0a185dde45b5f2fc046e8d98dc36 Author: Mattias Engdegård Date: Mon Apr 1 13:09:23 2024 +0200 * etc/NEWS: Mention a JSON codec improvement. diff --git a/etc/NEWS b/etc/NEWS index 11bca90314a..e575fc2936e 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -2326,6 +2326,10 @@ this case, would mean repeating the object in the argument list.) When replacing an object with a different one, passing both the new and old objects is still necessary. +--- +** The JSON encoder and decoder now accept arbitarily large integers. +Previously, they were limited to the range of signed 64-bit integers. + * Changes in Emacs 30.1 on Non-Free Operating Systems commit 3f9263f791fb8e4ff0507c8fde95fa19dabcab10 Author: Vladimir Kazanov Date: Sun Mar 31 18:32:59 2024 +0100 Fix symbol list matching regexps. Fix symbol list matching regexp performance Allow empty face lists, improve the face list matching regexp (see discussion in Bug#69714) based on relint's comments, add tests: * test/lisp/emacs-lisp/ert-font-lock-tests.el: Add tests. * lisp/emacs-lisp/ert-font-lock.el: Fix regexps. diff --git a/lisp/emacs-lisp/ert-font-lock.el b/lisp/emacs-lisp/ert-font-lock.el index e77c8945dc3..c6fd65e1507 100644 --- a/lisp/emacs-lisp/ert-font-lock.el +++ b/lisp/emacs-lisp/ert-font-lock.el @@ -40,31 +40,34 @@ (require 'pcase) (defconst ert-font-lock--face-symbol-re - (rx (one-or-more (or alphanumeric "-" "_" "."))) - "A face symbol matching regex.") + (rx (+ (or alphanumeric "-" "_" "." "/"))) + "A face symbol matching regex. +The regexp cannot use character classes as these can be redefined by the +major mode of the host language.") (defconst ert-font-lock--face-symbol-list-re (rx "(" (* whitespace) - (one-or-more - (seq (regexp ert-font-lock--face-symbol-re) - (* whitespace))) + (? (regexp ert-font-lock--face-symbol-re)) + (* (+ whitespace) + (regexp ert-font-lock--face-symbol-re)) + (* whitespace) ")") "A face symbol list matching regex.") (defconst ert-font-lock--assertion-line-re (rx ;; leading column assertion (arrow/caret) - (group (or "^" "<-")) - (zero-or-more whitespace) + (group-n 1 (or "^" "<-")) + (* whitespace) ;; possible to have many carets on an assertion line - (group (zero-or-more (seq "^" (zero-or-more whitespace)))) + (group-n 2 (* "^" (* whitespace))) ;; optional negation of the face specification - (group (optional "!")) - (zero-or-more whitespace) + (group-n 3 (optional "!")) + (* whitespace) ;; face symbol name or a list of symbols - (group (or (regexp ert-font-lock--face-symbol-re) - (regexp ert-font-lock--face-symbol-list-re)))) + (group-n 4 (or (regexp ert-font-lock--face-symbol-re) + (regexp ert-font-lock--face-symbol-list-re)))) "An ert-font-lock assertion line regex.") (defun ert-font-lock--validate-major-mode (mode) diff --git a/test/lisp/emacs-lisp/ert-font-lock-tests.el b/test/lisp/emacs-lisp/ert-font-lock-tests.el index fa2e5dc4db7..33ef2c52288 100644 --- a/test/lisp/emacs-lisp/ert-font-lock-tests.el +++ b/test/lisp/emacs-lisp/ert-font-lock-tests.el @@ -44,13 +44,56 @@ (goto-char (point-min)) ,@body)) +(defun ert-font-lock--wrap-begin-end (re) + (concat "^" re "$")) + +;;; Regexp tests +;;; + +(ert-deftest test-regexp--face-symbol-re () + (let ((re (ert-font-lock--wrap-begin-end + ert-font-lock--face-symbol-re))) + (should (string-match-p re "font-lock-keyword-face")) + (should (string-match-p re "-face")) + (should (string-match-p re "weird-package/-face")) + (should (string-match-p re "-")) + (should (string-match-p re "font-lock.face")) + (should-not (string-match-p re "face suffix-with")) + (should-not (string-match-p re "(")))) + +(ert-deftest test-regexp--face-symbol-list-re () + (let ((re (ert-font-lock--wrap-begin-end + ert-font-lock--face-symbol-list-re))) + (should (string-match-p re "(face1 face2)")) + (should (string-match-p re "(face1)")) + (should (string-match-p re "()")) + (should-not (string-match-p re ")")) + (should-not (string-match-p re "(")))) + +(ert-deftest test-regexp--assertion-line-re () + (let ((re (ert-font-lock--wrap-begin-end + ert-font-lock--assertion-line-re))) + (should (string-match-p re "^ something-face")) + (should (string-match-p re "^ !something-face")) + (should (string-match-p re "^ (face1 face2)")) + (should (string-match-p re "^ !(face1 face2)")) + (should (string-match-p re "^ ()")) + (should (string-match-p re "^ !()")) + (should (string-match-p re "^ nil")) + (should (string-match-p re "^ !nil")) + (should (string-match-p re "<- something-face")) + (should (string-match-p re "<- ^ something-face")) + (should (string-match-p re "^^ ^ something-face")) + (should (string-match-p re "^ ^something-face")) + (should-not (string-match-p re "^ <- ^something-face")))) + ;;; Comment parsing tests ;; (ert-deftest test-line-comment-p--fundamental () (with-temp-buffer-str-mode fundamental-mode - "// comment\n" - (should-not (ert-font-lock--line-comment-p)))) + "// comment\n" + (should-not (ert-font-lock--line-comment-p)))) (ert-deftest test-line-comment-p--emacs-lisp () (with-temp-buffer-str-mode emacs-lisp-mode commit 3f4486dd76c44c76c58605fb9a1643515133ff3f Author: Mattias Engdegård Date: Sun Mar 31 19:19:58 2024 +0200 Don't signal `json-end-of-file` for short nonempty bad JSON inputs * src/json.c (json_parse_value): Generate a plain parse error. * test/src/json-tests.el (json-parse-string/short): Adapt test. diff --git a/src/json.c b/src/json.c index 8749009a24b..ca9be26cd9f 100644 --- a/src/json.c +++ b/src/json.c @@ -1655,9 +1655,9 @@ json_parse_value (struct json_parser *parser, int c) return json_parse_number (parser, c); else { - int c2 = json_input_get (parser); - int c3 = json_input_get (parser); - int c4 = json_input_get (parser); + int c2 = json_input_get_if_possible (parser); + int c3 = json_input_get_if_possible (parser); + int c4 = json_input_get_if_possible (parser); int c5 = json_input_get_if_possible (parser); if (c == 't' && c2 == 'r' && c3 == 'u' && c4 == 'e' diff --git a/test/src/json-tests.el b/test/src/json-tests.el index a1bafadaa87..628a5a3de57 100644 --- a/test/src/json-tests.el +++ b/test/src/json-tests.el @@ -215,11 +215,9 @@ (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument)) (ert-deftest json-parse-string/short () - :expected-result :failed (should-error (json-parse-string "") :type 'json-end-of-file) (should-error (json-parse-string " ") :type 'json-end-of-file) - ;; BUG: currently results in `json-end-of-file' for short non-empty inputs. - (dolist (s '("a" "ab" "abc" "abcd" + (dolist (s '("a" "ab" "abc" "abcd" "\0" "\1" "t" "tr" "tru" "truE" "truee" "n" "nu" "nul" "nulL" "nulll" "f" "fa" "fal" "fals" "falsE" "falsee")) commit 734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1 Author: Mattias Engdegård Date: Sun Mar 31 15:00:00 2024 +0200 Faster JSON parsing Speed up JSON parsing substantially by only UTF-8-parsing string literals and only exactly once. Previously, json-parse-string always first parsed the entire input and copied it to a new string, and then validated each string literal twice. We no longer create an extra new string when interning an alist key, nor do we garble plist keys with Unicode characters. * src/lread.c (intern_c_multibyte): New. * src/json.c (json_encode): Remove. (utf8_error): New. (json_parse_string): Faster and more careful UTF-8 decoding. Create and return a new multibyte string or symbol without extra decoding. All callers adapted. (Fjson_parse_string): Skip expensive input pre-decoding. * test/src/json-tests.el (json-parse-string/object-unicode-keys) (json-parse-string/short): New. (json-parse-string/string, json-parse-string/invalid-unicode): Adapt tests. * etc/NEWS: Mentioned change in errors. diff --git a/etc/NEWS b/etc/NEWS index 903c60ac97e..11bca90314a 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1721,6 +1721,11 @@ Use a float value for the first argument instead. Instead, use 'eshell-process-wait-time', which supports floating-point values. +--- +** The JSON parser sometimes signals different types of errors. +It will now signal 'json-utf8-decode-error' for inputs that are not +correctly UTF-8 encoded. + * Lisp Changes in Emacs 30.1 diff --git a/src/json.c b/src/json.c index 908db022c50..8749009a24b 100644 --- a/src/json.c +++ b/src/json.c @@ -699,24 +699,6 @@ usage: (json-insert OBJECT &rest ARGS) */) } -/* Note that all callers of make_string_from_utf8 and build_string_from_utf8 - below either pass only value UTF-8 strings or use the function for - formatting error messages; in the latter case correctness isn't - critical. */ - -/* Return a unibyte string containing the sequence of UTF-8 encoding - units of the UTF-8 representation of STRING. If STRING does not - represent a sequence of Unicode scalar values, return a string with - unspecified contents. */ - -static Lisp_Object -json_encode (Lisp_Object string) -{ - /* FIXME: Raise an error if STRING is not a scalar value - sequence. */ - return encode_string_utf_8 (string, Qnil, false, Qt, Qt); -} - #define JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE 64 #define JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE 512 @@ -1081,52 +1063,21 @@ json_parse_unicode (struct json_parser *parser) return v[0] << 12 | v[1] << 8 | v[2] << 4 | v[3]; } -/* Parses an utf-8 code-point encoding (except the first byte), and - returns the numeric value of the code-point (without considering - the first byte) */ -static int -json_handle_utf8_tail_bytes (struct json_parser *parser, int n) +static AVOID +utf8_error (struct json_parser *parser) { - int v = 0; - for (int i = 0; i < n; i++) - { - int c = json_input_get (parser); - json_byte_workspace_put (parser, c); - if ((c & 0xc0) != 0x80) - json_signal_error (parser, Qjson_utf8_decode_error); - v = (v << 6) | (c & 0x3f); - } - return v; + json_signal_error (parser, Qjson_utf8_decode_error); } -/* Reads a JSON string, and puts the result into the byte workspace */ -static void -json_parse_string (struct json_parser *parser) -{ - /* a single_uninteresting byte can be simply copied from the input - to output, it doesn't need any extra care. This means all the - characters between [0x20;0x7f], except the double quote and - the backslash */ - static const char is_single_uninteresting[256] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 2 */ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - /* 3 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - /* 4 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - /* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, - /* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - /* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - +/* Parse a string literal. Optionally prepend a ':'. + Return the string or an interned symbol. */ +static Lisp_Object +json_parse_string (struct json_parser *parser, bool intern, bool leading_colon) +{ + json_byte_workspace_reset (parser); + if (leading_colon) + json_byte_workspace_put (parser, ':'); + ptrdiff_t chars_delta = 0; /* nchars - nbytes */ for (;;) { /* This if is only here for a possible speedup. If there are 4 @@ -1138,10 +1089,10 @@ json_parse_string (struct json_parser *parser) int c1 = parser->input_current[1]; int c2 = parser->input_current[2]; int c3 = parser->input_current[3]; - bool v0 = is_single_uninteresting[c0]; - bool v1 = is_single_uninteresting[c1]; - bool v2 = is_single_uninteresting[c2]; - bool v3 = is_single_uninteresting[c3]; + bool v0 = json_plain_char[c0]; + bool v1 = json_plain_char[c1]; + bool v2 = json_plain_char[c2]; + bool v3 = json_plain_char[c3]; if (v0 && v1 && v2 && v3) { json_byte_workspace_put (parser, c0); @@ -1156,43 +1107,62 @@ json_parse_string (struct json_parser *parser) int c = json_input_get (parser); parser->current_column++; - if (is_single_uninteresting[c]) + if (json_plain_char[c]) { json_byte_workspace_put (parser, c); continue; } if (c == '"') - return; - else if (c & 0x80) { - /* Handle utf-8 encoding */ + ptrdiff_t nbytes + = parser->byte_workspace_current - parser->byte_workspace; + ptrdiff_t nchars = nbytes - chars_delta; + const char *str = (const char *)parser->byte_workspace; + return intern ? intern_c_multibyte (str, nchars, nbytes) + : make_multibyte_string (str, nchars, nbytes); + } + + if (c & 0x80) + { + /* Parse UTF-8, strictly. This is the correct thing to do + whether or not the input is a unibyte or multibyte string. */ json_byte_workspace_put (parser, c); - if (c < 0xc0) - json_signal_error (parser, Qjson_utf8_decode_error); - else if (c < 0xe0) + unsigned char c1 = json_input_get (parser); + if ((c1 & 0xc0) != 0x80) + utf8_error (parser); + json_byte_workspace_put (parser, c1); + if (c <= 0xc1) + utf8_error (parser); + else if (c <= 0xdf) + chars_delta += 1; + else if (c <= 0xef) { - int n = ((c & 0x1f) << 6 - | json_handle_utf8_tail_bytes (parser, 1)); - if (n < 0x80) - json_signal_error (parser, Qjson_utf8_decode_error); - } - else if (c < 0xf0) - { - int n = ((c & 0xf) << 12 - | json_handle_utf8_tail_bytes (parser, 2)); - if (n < 0x800 || (n >= 0xd800 && n < 0xe000)) - json_signal_error (parser, Qjson_utf8_decode_error); + unsigned char c2 = json_input_get (parser); + if ((c2 & 0xc0) != 0x80) + utf8_error (parser); + int v = ((c & 0x0f) << 12) + ((c1 & 0x3f) << 6) + (c2 & 0x3f); + if (v < 0x800 || (v >= 0xd800 && v <= 0xdfff)) + utf8_error (parser); + json_byte_workspace_put (parser, c2); + chars_delta += 2; } - else if (c < 0xf8) + else if (c <= 0xf7) { - int n = ((c & 0x7) << 18 - | json_handle_utf8_tail_bytes (parser, 3)); - if (n < 0x10000 || n > 0x10ffff) - json_signal_error (parser, Qjson_utf8_decode_error); + unsigned char c2 = json_input_get (parser); + unsigned char c3 = json_input_get (parser); + if ((c2 & 0xc0) != 0x80 || (c3 & 0xc0) != 0x80) + utf8_error (parser); + int v = (((c & 0x07) << 18) + ((c1 & 0x3f) << 12) + + ((c2 & 0x3f) << 6) + (c3 & 0x3f)); + if (v < 0x10000 || v > 0x10ffff) + utf8_error (parser); + json_byte_workspace_put (parser, c2); + json_byte_workspace_put (parser, c3); + chars_delta += 3; } else - json_signal_error (parser, Qjson_utf8_decode_error); + utf8_error (parser); } else if (c == '\\') { @@ -1249,6 +1219,7 @@ json_parse_string (struct json_parser *parser) json_byte_workspace_put (parser, 0xc0 | num >> 6); json_byte_workspace_put (parser, 0x80 | (num & 0x3f)); + chars_delta += 1; } else if (num < 0x10000) { @@ -1258,6 +1229,7 @@ json_parse_string (struct json_parser *parser) | ((num >> 6) & 0x3f))); json_byte_workspace_put (parser, 0x80 | (num & 0x3f)); + chars_delta += 2; } else { @@ -1270,6 +1242,7 @@ json_parse_string (struct json_parser *parser) | ((num >> 6) & 0x3f))); json_byte_workspace_put (parser, 0x80 | (num & 0x3f)); + chars_delta += 3; } } else @@ -1566,16 +1539,11 @@ json_parse_object (struct json_parser *parser) if (c != '"') json_signal_error (parser, Qjson_parse_error); - json_byte_workspace_reset (parser); switch (parser->conf.object_type) { case json_object_hashtable: { - json_parse_string (parser); - Lisp_Object key - = make_string_from_utf8 ((char *) parser->byte_workspace, - (parser->byte_workspace_current - - parser->byte_workspace)); + Lisp_Object key = json_parse_string (parser, false, false); Lisp_Object value = json_parse_object_member_value (parser); json_make_object_workspace_for (parser, 2); parser->object_workspace[parser->object_workspace_current] = key; @@ -1586,13 +1554,7 @@ json_parse_object (struct json_parser *parser) } case json_object_alist: { - json_parse_string (parser); - char *workspace = (char *) parser->byte_workspace; - ptrdiff_t nbytes - = parser->byte_workspace_current - parser->byte_workspace; - Lisp_Object key = Fintern (make_string_from_utf8 (workspace, - nbytes), - Qnil); + Lisp_Object key = json_parse_string (parser, true, false); Lisp_Object value = json_parse_object_member_value (parser); Lisp_Object nc = Fcons (Fcons (key, value), Qnil); *cdr = nc; @@ -1601,11 +1563,7 @@ json_parse_object (struct json_parser *parser) } case json_object_plist: { - json_byte_workspace_put (parser, ':'); - json_parse_string (parser); - Lisp_Object key = intern_1 ((char *) parser->byte_workspace, - (parser->byte_workspace_current - - parser->byte_workspace)); + Lisp_Object key = json_parse_string (parser, true, true); Lisp_Object value = json_parse_object_member_value (parser); Lisp_Object nc = Fcons (key, Qnil); *cdr = nc; @@ -1692,15 +1650,7 @@ json_parse_value (struct json_parser *parser, int c) else if (c == '[') return json_parse_array (parser); else if (c == '"') - { - json_byte_workspace_reset (parser); - json_parse_string (parser); - Lisp_Object result - = make_string_from_utf8 ((const char *) parser->byte_workspace, - (parser->byte_workspace_current - - parser->byte_workspace)); - return result; - } + return json_parse_string (parser, false, false); else if ((c >= '0' && c <= '9') || (c == '-')) return json_parse_number (parser, c); else @@ -1816,14 +1766,13 @@ usage: (json-parse-string STRING &rest ARGS) */) Lisp_Object string = args[0]; CHECK_STRING (string); - Lisp_Object encoded = json_encode (string); struct json_configuration conf = { json_object_hashtable, json_array_array, QCnull, QCfalse }; json_parse_args (nargs - 1, args + 1, &conf, true); struct json_parser p; - const unsigned char *begin = (const unsigned char *) SSDATA (encoded); - json_parser_init (&p, conf, begin, begin + SBYTES (encoded), NULL, NULL); + const unsigned char *begin = SDATA (string); + json_parser_init (&p, conf, begin, begin + SBYTES (string), NULL, NULL); record_unwind_protect_ptr (json_parser_done, &p); return unbind_to (count, diff --git a/src/lisp.h b/src/lisp.h index 43a29489a25..3cb4361e75e 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4744,6 +4744,8 @@ extern ptrdiff_t evxprintf (char **, ptrdiff_t *, char *, ptrdiff_t, extern Lisp_Object intern_1 (const char *, ptrdiff_t); extern Lisp_Object intern_c_string_1 (const char *, ptrdiff_t); extern Lisp_Object intern_driver (Lisp_Object, Lisp_Object, Lisp_Object); +extern Lisp_Object intern_c_multibyte (const char *str, + ptrdiff_t nchars, ptrdiff_t nbytes); extern void init_symbol (Lisp_Object, Lisp_Object); extern Lisp_Object oblookup (Lisp_Object, const char *, ptrdiff_t, ptrdiff_t); INLINE void diff --git a/src/lread.c b/src/lread.c index 1cb941e84fc..09a5589fd0c 100644 --- a/src/lread.c +++ b/src/lread.c @@ -4993,6 +4993,18 @@ intern_c_string_1 (const char *str, ptrdiff_t len) return tem; } +/* Intern STR of NBYTES bytes and NCHARS characters in the default obarray. */ +Lisp_Object +intern_c_multibyte (const char *str, ptrdiff_t nchars, ptrdiff_t nbytes) +{ + Lisp_Object obarray = check_obarray (Vobarray); + Lisp_Object sym = oblookup (obarray, str, nchars, nbytes); + if (BARE_SYMBOL_P (sym)) + return sym; + return intern_driver (make_multibyte_string (str, nchars, nbytes), + obarray, sym); +} + static void define_symbol (Lisp_Object sym, char const *str) { diff --git a/test/src/json-tests.el b/test/src/json-tests.el index fb2384d4a8d..a1bafadaa87 100644 --- a/test/src/json-tests.el +++ b/test/src/json-tests.el @@ -25,6 +25,7 @@ (require 'cl-lib) (require 'map) +(require 'subr-x) (declare-function json-serialize "json.c" (object &rest args)) (declare-function json-insert "json.c" (object &rest args)) @@ -155,6 +156,9 @@ ) (ert-deftest json-parse-string/object () + :expected-result :failed + ;; FIXME: This currently fails. Should the parser deduplicate keys? + ;; Never, always, or for alist and plist only? (let ((input "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n")) (let ((actual (json-parse-string input))) @@ -167,6 +171,15 @@ (should (equal (json-parse-string input :object-type 'plist) '(:abc [9 :false] :def :null))))) +(ert-deftest json-parse-string/object-unicode-keys () + (let ((input "{\"é\":1,\"☃\":2,\"𐌐\":3}")) + (let ((actual (json-parse-string input))) + (should (equal (sort (hash-table-keys actual)) '("é" "☃" "𐌐")))) + (should (equal (json-parse-string input :object-type 'alist) + '((é . 1) (☃ . 2) (𐌐 . 3)))) + (should (equal (json-parse-string input :object-type 'plist) + '(:é 1 :☃ 2 :𐌐 3))))) + (ert-deftest json-parse-string/array () (let ((input "[\"a\", 1, [\"b\", 2]]")) (should (equal (json-parse-string input) @@ -182,8 +195,8 @@ ["\nasdфывfgh\t"])) (should (equal (json-parse-string "[\"\\uD834\\uDD1E\"]") ["\U0001D11E"])) (should-error (json-parse-string "foo") :type 'json-parse-error) - ;; FIXME: Is this the right behavior? - (should (equal (json-parse-string "[\"\u00C4\xC3\x84\"]") ["\u00C4\u00C4"]))) + (should-error (json-parse-string "[\"\u00C4\xC3\x84\"]") + :type 'json-utf8-decode-error)) (ert-deftest json-serialize/string () (should (equal (json-serialize ["foo"]) "[\"foo\"]")) @@ -201,9 +214,23 @@ (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument) (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument)) +(ert-deftest json-parse-string/short () + :expected-result :failed + (should-error (json-parse-string "") :type 'json-end-of-file) + (should-error (json-parse-string " ") :type 'json-end-of-file) + ;; BUG: currently results in `json-end-of-file' for short non-empty inputs. + (dolist (s '("a" "ab" "abc" "abcd" + "t" "tr" "tru" "truE" "truee" + "n" "nu" "nul" "nulL" "nulll" + "f" "fa" "fal" "fals" "falsE" "falsee")) + (condition-case err + (json-parse-string s) + (error + (should (eq (car err) 'json-parse-error))) + (:success (error "parsing %S should fail" s))))) + (ert-deftest json-parse-string/null () - (should-error (json-parse-string "\x00") :type 'wrong-type-argument) - (should (json-parse-string "[\"a\\u0000b\"]")) + (should (equal (json-parse-string "[\"a\\u0000b\"]") ["a\0b"])) (let* ((string "{\"foo\":\"this is a string including a literal \\u0000\"}") (data (json-parse-string string))) (should (hash-table-p data)) @@ -214,30 +241,34 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt. Test with both unibyte and multibyte strings." ;; Invalid UTF-8 code unit sequences. - (should-error (json-parse-string "[\"\x80\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\u00C4\x80\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\xBF\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\u00C4\xBF\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\xFE\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\u00C4\xFE\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\xC0\xAF\"]") :type 'json-parse-error) + (should-error (json-parse-string "[\"\x80\"]") :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\u00C4\x80\"]") + :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\xBF\"]") :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\u00C4\xBF\"]") + :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\xFE\"]") :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\u00C4\xFE\"]") + :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\xC0\xAF\"]") + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\u00C4\xC0\xAF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\u00C4\xC0\x80\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) ;; Surrogates. (should-error (json-parse-string "[\"\uDB7F\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\xED\xAD\xBF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\uDB7F\uDFFF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\xED\xAD\xBF\xED\xBF\xBF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\xED\xBF\xBF\"]") - :type 'json-parse-error)) + :type 'json-utf8-decode-error)) (ert-deftest json-parse-string/incomplete () (should-error (json-parse-string "[123") :type 'json-end-of-file)) commit f178a6d8006f1e8afe06bb71d0a413622d73f131 Author: Mattias Engdegård Date: Sun Mar 31 15:07:34 2024 +0200 Native JSON support is always available * lisp/progmodes/sh-script.el (sh--json-read): Remove. (sh-shellcheck-flymake): Call json-parse-buffer directly. * test/src/json-tests.el: Don't check for function availability. diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el index ab95dc9f924..20c9e00edbf 100644 --- a/lisp/progmodes/sh-script.el +++ b/lisp/progmodes/sh-script.el @@ -3194,12 +3194,6 @@ shell command and conveniently use this command." (defvar-local sh--shellcheck-process nil) -(defalias 'sh--json-read - (if (fboundp 'json-parse-buffer) - (lambda () (json-parse-buffer :object-type 'alist)) - (require 'json) - 'json-read)) - (defun sh-shellcheck-flymake (report-fn &rest _args) "Flymake backend using the shellcheck program. Takes a Flymake callback REPORT-FN as argument, as expected of a @@ -3223,7 +3217,7 @@ member of `flymake-diagnostic-functions'." (with-current-buffer (process-buffer proc) (goto-char (point-min)) (thread-last - (sh--json-read) + (json-parse-buffer :object-type 'alist) (alist-get 'comments) (seq-filter (lambda (item) diff --git a/test/src/json-tests.el b/test/src/json-tests.el index e5cbe8bff5c..fb2384d4a8d 100644 --- a/test/src/json-tests.el +++ b/test/src/json-tests.el @@ -34,7 +34,6 @@ (define-error 'json-tests--error "JSON test error") (ert-deftest json-serialize/roundtrip () - (skip-unless (fboundp 'json-serialize)) ;; The noncharacter U+FFFF should be passed through, ;; cf. https://www.unicode.org/faq/private_use.html#noncharacters. (let ((lisp [:null :false t 0 123 -456 3.75 "abc\uFFFFαβγ𝔸𝐁𝖢\"\\"]) @@ -53,7 +52,6 @@ (ert-deftest json-serialize/roundtrip-scalars () "Check that Bug#42994 is fixed." - (skip-unless (fboundp 'json-serialize)) (dolist (case '((:null "null") (:false "false") (t "true") @@ -80,7 +78,6 @@ (should (eobp))))))) (ert-deftest json-serialize/object () - (skip-unless (fboundp 'json-serialize)) (let ((table (make-hash-table :test #'equal))) (puthash "abc" [1 2 t] table) (puthash "def" :null table) @@ -125,8 +122,6 @@ }"))) (ert-deftest json-serialize/object-with-duplicate-keys () - (skip-unless (fboundp 'json-serialize)) - (dolist (n '(1 5 20 100)) (let ((symbols (mapcar (lambda (i) (make-symbol (format "s%d" i))) (number-sequence 1 n))) @@ -160,7 +155,6 @@ ) (ert-deftest json-parse-string/object () - (skip-unless (fboundp 'json-parse-string)) (let ((input "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n")) (let ((actual (json-parse-string input))) @@ -174,7 +168,6 @@ '(:abc [9 :false] :def :null))))) (ert-deftest json-parse-string/array () - (skip-unless (fboundp 'json-parse-string)) (let ((input "[\"a\", 1, [\"b\", 2]]")) (should (equal (json-parse-string input) ["a" 1 ["b" 2]])) @@ -182,7 +175,6 @@ '("a" 1 ("b" 2)))))) (ert-deftest json-parse-string/string () - (skip-unless (fboundp 'json-parse-string)) (should-error (json-parse-string "[\"formfeed\f\"]") :type 'json-parse-error) (should (equal (json-parse-string "[\"foo \\\"bar\\\"\"]") ["foo \"bar\""])) (should (equal (json-parse-string "[\"abcαβγ\"]") ["abcαβγ"])) @@ -194,7 +186,6 @@ (should (equal (json-parse-string "[\"\u00C4\xC3\x84\"]") ["\u00C4\u00C4"]))) (ert-deftest json-serialize/string () - (skip-unless (fboundp 'json-serialize)) (should (equal (json-serialize ["foo"]) "[\"foo\"]")) (should (equal (json-serialize ["a\n\fb"]) "[\"a\\n\\fb\"]")) (should (equal (json-serialize ["\nasdфыв\u001f\u007ffgh\t"]) @@ -204,7 +195,6 @@ (should-error (json-serialize ["\u00C4\xC3\x84"]))) (ert-deftest json-serialize/invalid-unicode () - (skip-unless (fboundp 'json-serialize)) (should-error (json-serialize ["a\uDBBBb"]) :type 'wrong-type-argument) (should-error (json-serialize ["u\x110000v"]) :type 'wrong-type-argument) (should-error (json-serialize ["u\x3FFFFFv"]) :type 'wrong-type-argument) @@ -212,7 +202,6 @@ (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument)) (ert-deftest json-parse-string/null () - (skip-unless (fboundp 'json-parse-string)) (should-error (json-parse-string "\x00") :type 'wrong-type-argument) (should (json-parse-string "[\"a\\u0000b\"]")) (let* ((string "{\"foo\":\"this is a string including a literal \\u0000\"}") @@ -224,7 +213,6 @@ "Some examples from https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt. Test with both unibyte and multibyte strings." - (skip-unless (fboundp 'json-parse-string)) ;; Invalid UTF-8 code unit sequences. (should-error (json-parse-string "[\"\x80\"]") :type 'json-parse-error) (should-error (json-parse-string "[\"\u00C4\x80\"]") :type 'json-parse-error) @@ -252,15 +240,12 @@ Test with both unibyte and multibyte strings." :type 'json-parse-error)) (ert-deftest json-parse-string/incomplete () - (skip-unless (fboundp 'json-parse-string)) (should-error (json-parse-string "[123") :type 'json-end-of-file)) (ert-deftest json-parse-string/trailing () - (skip-unless (fboundp 'json-parse-string)) (should-error (json-parse-string "[123] [456]") :type 'json-trailing-content)) (ert-deftest json-parse-buffer/incomplete () - (skip-unless (fboundp 'json-parse-buffer)) (with-temp-buffer (insert "[123") (goto-char 1) @@ -268,7 +253,6 @@ Test with both unibyte and multibyte strings." (should (bobp)))) (ert-deftest json-parse-buffer/trailing () - (skip-unless (fboundp 'json-parse-buffer)) (with-temp-buffer (insert "[123] [456]") (goto-char 1) @@ -277,8 +261,6 @@ Test with both unibyte and multibyte strings." (should (looking-at-p (rx " [456]" eos))))) (ert-deftest json-parse-with-custom-null-and-false-objects () - (skip-unless (and (fboundp 'json-serialize) - (fboundp 'json-parse-string))) (let* ((input "{ \"abc\" : [9, false] , \"def\" : null }") (output @@ -316,7 +298,6 @@ Test with both unibyte and multibyte strings." (should-error (json-serialize '() :object-type 'alist)))) (ert-deftest json-insert/signal () - (skip-unless (fboundp 'json-insert)) (with-temp-buffer (let ((calls 0)) (add-hook 'after-change-functions @@ -331,7 +312,6 @@ Test with both unibyte and multibyte strings." (should (equal calls 1))))) (ert-deftest json-insert/throw () - (skip-unless (fboundp 'json-insert)) (with-temp-buffer (let ((calls 0)) (add-hook 'after-change-functions @@ -347,7 +327,6 @@ Test with both unibyte and multibyte strings." (should (equal calls 1))))) (ert-deftest json-serialize/bignum () - (skip-unless (fboundp 'json-serialize)) (should (equal (json-serialize (vector (1+ most-positive-fixnum) (1- most-negative-fixnum))) (format "[%d,%d]" @@ -356,12 +335,10 @@ Test with both unibyte and multibyte strings." (ert-deftest json-parse-string/wrong-type () "Check that Bug#42113 is fixed." - (skip-unless (fboundp 'json-parse-string)) (should-error (json-parse-string 1) :type 'wrong-type-argument)) (ert-deftest json-serialize/wrong-hash-key-type () "Check that Bug#42113 is fixed." - (skip-unless (fboundp 'json-serialize)) (let ((table (make-hash-table :test #'eq))) (puthash 1 2 table) (should-error (json-serialize table) :type 'wrong-type-argument))) commit 155462a1bd7909074f79a534324c7b209eb97142 Author: Mattias Engdegård Date: Sun Mar 31 16:12:45 2024 +0200 Fix mutates-arguments warning for `sort` * lisp/emacs-lisp/bytecomp.el (bytecomp--sort-call-in-place-p) (bytecomp--mutargs-nconc, bytecomp--mutargs-sort): New. (byte-compile-form, bytecomp--actually-important-return-value-p) (mutating-fns): Use a slightly more extendible scheme for specifying what arguments a function mutates. Give `sort` special treatment. diff --git a/lisp/emacs-lisp/bytecomp.el b/lisp/emacs-lisp/bytecomp.el index 2b5eb34e571..5cff86784f0 100644 --- a/lisp/emacs-lisp/bytecomp.el +++ b/lisp/emacs-lisp/bytecomp.el @@ -3402,8 +3402,8 @@ lambda-expression." (t ".")))) (let ((mutargs (function-get (car form) 'mutates-arguments))) (when mutargs - (dolist (idx (if (eq mutargs 'all-but-last) - (number-sequence 1 (- (length form) 2)) + (dolist (idx (if (symbolp mutargs) + (funcall mutargs form) mutargs)) (let ((arg (nth idx form))) (when (and (or (and (eq (car-safe arg) 'quote) @@ -3472,13 +3472,15 @@ lambda-expression." (if byte-compile--for-effect (byte-compile-discard))))) +(defun bytecomp--sort-call-in-place-p (form) + (or (= (length form) 3) ; old-style + (plist-get (cddr form) :in-place))) ; new-style + (defun bytecomp--actually-important-return-value-p (form) "Whether FORM is really a call with a return value that should not go unused. This assumes the function has the `important-return-value' property." (cond ((eq (car form) 'sort) - ;; For `sort', we only care about non-destructive uses. - (and (zerop (% (length form) 2)) ; new-style call - (not (plist-get (cddr form) :in-place)))) + (not (bytecomp--sort-call-in-place-p form))) (t t))) (let ((important-return-value-fns @@ -3504,18 +3506,27 @@ This assumes the function has the `important-return-value' property." (dolist (fn important-return-value-fns) (put fn 'important-return-value t))) +(defun bytecomp--mutargs-nconc (form) + ;; For `nconc', all arguments but the last are mutated. + (number-sequence 1 (- (length form) 2))) + +(defun bytecomp--mutargs-sort (form) + ;; For `sort', the first argument is mutated if the call is in-place. + (and (bytecomp--sort-call-in-place-p form) '(1))) + (let ((mutating-fns ;; FIXME: Should there be a function declaration for this? ;; ;; (FUNC . ARGS) means that FUNC mutates arguments whose indices are - ;; in the list ARGS, starting at 1, or all but the last argument if - ;; ARGS is `all-but-last'. + ;; in the list ARGS, starting at 1. ARGS can also be a function + ;; taking the function call form as argument and returning the + ;; list of indices. '( (setcar 1) (setcdr 1) (aset 1) (nreverse 1) - (nconc . all-but-last) + (nconc . bytecomp--mutargs-nconc) (nbutlast 1) (ntake 2) - (sort 1) + (sort . bytecomp--mutargs-sort) (delq 2) (delete 2) (delete-dups 1) (delete-consecutive-dups 1) (plist-put 1)