commit a5835dfee139322de7aa071f1c87ef015acbecad (HEAD, refs/remotes/origin/master) Author: Philipp Stephani Date: Sat Dec 23 17:56:36 2017 +0100 Improve error reporting when serializing non-Unicode strings to JSON * src/coding.c (utf8_string_p): New helper function. (syms_of_coding) : Move from json.c. * src/json.c (json_check_utf8): New helper function. (lisp_to_json_toplevel_1, lisp_to_json): Use it. To save a bit of time, check for invalid UTF-8 strings only after encountering an error, since Jansson already rejects them. * test/src/json-tests.el (json-serialize/invalid-unicode): Adapt expected error symbol. diff --git a/src/coding.c b/src/coding.c index 1705838ffa..5ea1e395f2 100644 --- a/src/coding.c +++ b/src/coding.c @@ -6360,6 +6360,27 @@ check_utf_8 (struct coding_system *coding) } +/* Return whether STRING is a valid UTF-8 string. STRING must be a + unibyte string. */ + +bool +utf8_string_p (Lisp_Object string) +{ + eassert (!STRING_MULTIBYTE (string)); + struct coding_system coding; + setup_coding_system (Qutf_8_unix, &coding); + /* We initialize only the fields that check_utf_8 accesses. */ + coding.head_ascii = -1; + coding.src_pos = 0; + coding.src_pos_byte = 0; + coding.src_chars = SCHARS (string); + coding.src_bytes = SBYTES (string); + coding.src_object = string; + coding.eol_seen = EOL_SEEN_NONE; + return check_utf_8 (&coding) != -1; +} + + /* Detect how end-of-line of a text of length SRC_BYTES pointed by SOURCE is encoded. If CATEGORY is one of coding_category_utf_16_XXXX, assume that CR and LF are encoded by @@ -10846,6 +10867,7 @@ syms_of_coding (void) DEFSYM (Qiso_2022, "iso-2022"); DEFSYM (Qutf_8, "utf-8"); + DEFSYM (Qutf_8_unix, "utf-8-unix"); DEFSYM (Qutf_8_emacs, "utf-8-emacs"); #if defined (WINDOWSNT) || defined (CYGWIN) diff --git a/src/coding.h b/src/coding.h index 66d125b07e..bc4ef52e1e 100644 --- a/src/coding.h +++ b/src/coding.h @@ -665,6 +665,7 @@ struct coding_system /* Extern declarations. */ extern Lisp_Object code_conversion_save (bool, bool); extern bool encode_coding_utf_8 (struct coding_system *); +extern bool utf8_string_p (Lisp_Object); extern void setup_coding_system (Lisp_Object, struct coding_system *); extern Lisp_Object coding_charset_list (struct coding_system *); extern Lisp_Object coding_system_charset_list (Lisp_Object); diff --git a/src/json.c b/src/json.c index 88db86ad2e..93dcc730da 100644 --- a/src/json.c +++ b/src/json.c @@ -316,6 +316,15 @@ json_check (json_t *object) return object; } +/* If STRING is not a valid UTF-8 string, signal an error of type + `wrong-type-argument'. STRING must be a unibyte string. */ + +static void +json_check_utf8 (Lisp_Object string) +{ + CHECK_TYPE (utf8_string_p (string), Qutf_8_string_p, string); +} + static json_t *lisp_to_json (Lisp_Object); /* Convert a Lisp object to a toplevel JSON object (array or object). @@ -363,9 +372,12 @@ lisp_to_json_toplevel_1 (Lisp_Object lisp, json_t **json) int status = json_object_set_new (*json, key_str, lisp_to_json (HASH_VALUE (h, i))); if (status == -1) - /* FIXME: A failure here might also indicate that the - key is not a valid Unicode string. */ - json_out_of_memory (); + { + /* A failure can be caused either by an invalid key or + by low memory. */ + json_check_utf8 (key); + json_out_of_memory (); + } } clear_unwind_protect (count); return unbind_to (count, Qnil); @@ -447,9 +459,15 @@ lisp_to_json (Lisp_Object lisp) else if (STRINGP (lisp)) { Lisp_Object encoded = json_encode (lisp); - /* FIXME: We might throw an out-of-memory error here if the - string is not valid Unicode. */ - return json_check (json_stringn (SSDATA (encoded), SBYTES (encoded))); + json_t *json = json_stringn (SSDATA (encoded), SBYTES (encoded)); + if (json == NULL) + { + /* A failure can be caused either by an invalid string or by + low memory. */ + json_check_utf8 (encoded); + json_out_of_memory (); + } + return json; } /* LISP now must be a vector, hashtable, or alist. */ @@ -863,8 +881,7 @@ syms_of_json (void) DEFSYM (Qstring_without_embedded_nulls_p, "string-without-embedded-nulls-p"); DEFSYM (Qjson_value_p, "json-value-p"); - - DEFSYM (Qutf_8_unix, "utf-8-unix"); + DEFSYM (Qutf_8_string_p, "utf-8-string-p"); DEFSYM (Qjson_error, "json-error"); DEFSYM (Qjson_out_of_memory, "json-out-of-memory"); diff --git a/test/src/json-tests.el b/test/src/json-tests.el index e394583bc7..107cab8908 100644 --- a/test/src/json-tests.el +++ b/test/src/json-tests.el @@ -108,13 +108,11 @@ (ert-deftest json-serialize/invalid-unicode () (skip-unless (fboundp 'json-serialize)) - ;; FIXME: "out of memory" is the wrong error signal, but we don't - ;; currently distinguish between error types when serializing. - (should-error (json-serialize ["a\uDBBBb"]) :type 'json-out-of-memory) - (should-error (json-serialize ["u\x110000v"]) :type 'json-out-of-memory) - (should-error (json-serialize ["u\x3FFFFFv"]) :type 'json-out-of-memory) - (should-error (json-serialize ["u\xCCv"]) :type 'json-out-of-memory) - (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'json-out-of-memory)) + (should-error (json-serialize ["a\uDBBBb"]) :type 'wrong-type-argument) + (should-error (json-serialize ["u\x110000v"]) :type 'wrong-type-argument) + (should-error (json-serialize ["u\x3FFFFFv"]) :type 'wrong-type-argument) + (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument) + (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument)) (ert-deftest json-parse-string/null () (skip-unless (fboundp 'json-parse-string)) commit 30ffc256abe7443a02b44490c518baf9a122b4c8 Author: David McFarland Date: Sat Dec 30 13:14:32 2017 +0200 Fix regex stack overflow in gdb-mi.el when parsing complex locals * lisp/progmodes/gdb-mi.el (gdb-jsonify-buffer): Skip string literals with (forward-sexp) instead of matching with regex. (Bug#29868) Copyright-paperwork-exempt: yes diff --git a/lisp/progmodes/gdb-mi.el b/lisp/progmodes/gdb-mi.el index 58552759b9..84b8c6b44b 100644 --- a/lisp/progmodes/gdb-mi.el +++ b/lisp/progmodes/gdb-mi.el @@ -2717,10 +2717,10 @@ If `default-directory' is remote, full file names are adapted accordingly." (insert "]")))))) (goto-char (point-min)) (insert "{") - (let ((re (concat "\\([[:alnum:]-_]+\\)=\\({\\|\\[\\|\"\"\\|" - gdb--string-regexp "\\)"))) + (let ((re (concat "\\([[:alnum:]-_]+\\)="))) (while (re-search-forward re nil t) - (replace-match "\"\\1\":\\2" nil nil))) + (replace-match "\"\\1\":" nil nil) + (if (eq (char-after) ?\") (forward-sexp) (forward-char)))) (goto-char (point-max)) (insert "}"))) commit fb20043b2fec8e8aff6354ec1396fd5ba688b76b Author: Sebastian Reuße Date: Sat Dec 30 12:41:23 2017 +0200 Fix output alignment in 'find-dired' for "ls -h" * lisp/find-dired.el (find-dired-filter): Fix alignment of the file size column when the -h ls option is used in 'find-ls-option'. (Bug#29803) Copyright-paperwork-exempt: yes diff --git a/lisp/find-dired.el b/lisp/find-dired.el index 3b0613b280..bf815d500d 100644 --- a/lisp/find-dired.el +++ b/lisp/find-dired.el @@ -295,7 +295,7 @@ specifies what to use in place of \"-ls\" as the final argument." (l-opt (and (consp find-ls-option) (string-match "l" (cdr find-ls-option)))) (ls-regexp (concat "^ +[^ \t\r\n]+\\( +[^ \t\r\n]+\\) +" - "[^ \t\r\n]+ +[^ \t\r\n]+\\( +[0-9]+\\)"))) + "[^ \t\r\n]+ +[^ \t\r\n]+\\( +[^[:space:]]+\\)"))) (goto-char beg) (insert string) (goto-char beg)