commit 914b00f2079431bd0d44618f1d3558986ac5c282 (HEAD, refs/remotes/origin/master) Author: Eli Zaretskii Date: Sun Mar 31 09:31:58 2024 +0300 ; Another round of stylistic fixes in json.c * src/json.c (json_parser_init, json_parse_object) (json_parse_value, Fjson_parse_string, json_parse) (json_create_float, json_create_integer, json_parse_args): Fix whitespace and indentation. diff --git a/src/json.c b/src/json.c index b5d5c1f1fe7..908db022c50 100644 --- a/src/json.c +++ b/src/json.c @@ -60,44 +60,45 @@ json_parse_args (ptrdiff_t nargs, Lisp_Object *args, /* Start from the back so keyword values appearing first take precedence. */ - for (ptrdiff_t i = nargs; i > 0; i -= 2) { - Lisp_Object key = args[i - 2]; - Lisp_Object value = args[i - 1]; - if (parse_object_types && EQ (key, QCobject_type)) - { - if (EQ (value, Qhash_table)) - conf->object_type = json_object_hashtable; - else if (EQ (value, Qalist)) - conf->object_type = json_object_alist; - else if (EQ (value, Qplist)) - conf->object_type = json_object_plist; - else - wrong_choice (list3 (Qhash_table, Qalist, Qplist), value); - } - else if (parse_object_types && EQ (key, QCarray_type)) - { - if (EQ (value, Qarray)) - conf->array_type = json_array_array; - else if (EQ (value, Qlist)) - conf->array_type = json_array_list; - else - wrong_choice (list2 (Qarray, Qlist), value); - } - else if (EQ (key, QCnull_object)) - conf->null_object = value; - else if (EQ (key, QCfalse_object)) - conf->false_object = value; - else if (parse_object_types) - wrong_choice (list4 (QCobject_type, - QCarray_type, - QCnull_object, - QCfalse_object), - value); - else - wrong_choice (list2 (QCnull_object, - QCfalse_object), - value); - } + for (ptrdiff_t i = nargs; i > 0; i -= 2) + { + Lisp_Object key = args[i - 2]; + Lisp_Object value = args[i - 1]; + if (parse_object_types && EQ (key, QCobject_type)) + { + if (EQ (value, Qhash_table)) + conf->object_type = json_object_hashtable; + else if (EQ (value, Qalist)) + conf->object_type = json_object_alist; + else if (EQ (value, Qplist)) + conf->object_type = json_object_plist; + else + wrong_choice (list3 (Qhash_table, Qalist, Qplist), value); + } + else if (parse_object_types && EQ (key, QCarray_type)) + { + if (EQ (value, Qarray)) + conf->array_type = json_array_array; + else if (EQ (value, Qlist)) + conf->array_type = json_array_list; + else + wrong_choice (list2 (Qarray, Qlist), value); + } + else if (EQ (key, QCnull_object)) + conf->null_object = value; + else if (EQ (key, QCfalse_object)) + conf->false_object = value; + else if (parse_object_types) + wrong_choice (list4 (QCobject_type, + QCarray_type, + QCnull_object, + QCfalse_object), + value); + else + wrong_choice (list2 (QCnull_object, + QCfalse_object), + value); + } } /* JSON encoding context. */ @@ -824,9 +825,8 @@ json_parser_init (struct json_parser *parser, parser->object_workspace_current = 0; parser->byte_workspace = parser->internal_byte_workspace; - parser->byte_workspace_end - = (parser->byte_workspace - + JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE); + parser->byte_workspace_end = (parser->byte_workspace + + JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE); } static void @@ -1233,8 +1233,7 @@ json_parse_string (struct json_parser *parser) if (num2 < 0xdc00 || num2 >= 0xe000) json_signal_error (parser, Qjson_invalid_surrogate_error); - num = (0x10000 - + ((num - 0xd800) << 10 | (num2 - 0xdc00))); + num = (0x10000 + ((num - 0xd800) << 10 | (num2 - 0xdc00))); } else if (num >= 0xdc00 && num < 0xe000) /* is the second half of the surrogate pair without @@ -1307,10 +1306,8 @@ json_create_integer (struct json_parser *parser, json_byte_workspace_put (parser, 0); ptrdiff_t len; Lisp_Object result - = string_to_number ((const char *) parser->byte_workspace, 10, - &len); - if (len - != parser->byte_workspace_current - parser->byte_workspace - 1) + = string_to_number ((const char *) parser->byte_workspace, 10, &len); + if (len != parser->byte_workspace_current - parser->byte_workspace - 1) json_signal_error (parser, Qjson_error); return result; } @@ -1323,12 +1320,10 @@ json_create_float (struct json_parser *parser) errno = 0; char *e; double value = strtod ((const char *) parser->byte_workspace, &e); - bool out_of_range - = (errno != 0 && (value == HUGE_VAL || value == -HUGE_VAL)); + bool out_of_range = (errno != 0 && (value == HUGE_VAL || value == -HUGE_VAL)); if (out_of_range) json_signal_error (parser, Qjson_number_out_of_range); - else if ((const unsigned char *) e - != parser->byte_workspace_current - 1) + else if ((const unsigned char *) e != parser->byte_workspace_current - 1) json_signal_error (parser, Qjson_error); else return make_float (value); @@ -1578,36 +1573,27 @@ json_parse_object (struct json_parser *parser) { json_parse_string (parser); Lisp_Object key - = make_string_from_utf8 ((char *) - parser->byte_workspace, + = make_string_from_utf8 ((char *) parser->byte_workspace, (parser->byte_workspace_current - parser->byte_workspace)); - Lisp_Object value - = json_parse_object_member_value (parser); + Lisp_Object value = json_parse_object_member_value (parser); json_make_object_workspace_for (parser, 2); - parser->object_workspace[parser->object_workspace_current] - = key; + parser->object_workspace[parser->object_workspace_current] = key; parser->object_workspace_current++; - parser->object_workspace[parser->object_workspace_current] - = value; + parser->object_workspace[parser->object_workspace_current] = value; parser->object_workspace_current++; break; } case json_object_alist: { - ptrdiff_t nbytes; - char *workspace; - json_parse_string (parser); - workspace = (char *) parser->byte_workspace; - nbytes = (parser->byte_workspace_current - - parser->byte_workspace); - - Lisp_Object key - = Fintern (make_string_from_utf8 (workspace, nbytes), - Qnil); - Lisp_Object value - = json_parse_object_member_value (parser); + char *workspace = (char *) parser->byte_workspace; + ptrdiff_t nbytes + = parser->byte_workspace_current - parser->byte_workspace; + Lisp_Object key = Fintern (make_string_from_utf8 (workspace, + nbytes), + Qnil); + Lisp_Object value = json_parse_object_member_value (parser); Lisp_Object nc = Fcons (Fcons (key, value), Qnil); *cdr = nc; cdr = xcdr_addr (nc); @@ -1617,12 +1603,10 @@ json_parse_object (struct json_parser *parser) { json_byte_workspace_put (parser, ':'); json_parse_string (parser); - Lisp_Object key - = intern_1 ((char *) parser->byte_workspace, - (parser->byte_workspace_current - - parser->byte_workspace)); - Lisp_Object value - = json_parse_object_member_value (parser); + Lisp_Object key = intern_1 ((char *) parser->byte_workspace, + (parser->byte_workspace_current + - parser->byte_workspace)); + Lisp_Object value = json_parse_object_member_value (parser); Lisp_Object nc = Fcons (key, Qnil); *cdr = nc; cdr = xcdr_addr (nc); @@ -1655,16 +1639,11 @@ json_parse_object (struct json_parser *parser) { case json_object_hashtable: { - EMACS_INT value; - - value - = (parser->object_workspace_current - first) / 2; - result - = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize, - make_fixed_natnum (value)); + EMACS_INT value = (parser->object_workspace_current - first) / 2; + result = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize, + make_fixed_natnum (value)); struct Lisp_Hash_Table *h = XHASH_TABLE (result); - for (size_t i = first; i < parser->object_workspace_current; - i += 2) + for (size_t i = first; i < parser->object_workspace_current; i += 2) { hash_hash_t hash; Lisp_Object key = parser->object_workspace[i]; @@ -1717,8 +1696,7 @@ json_parse_value (struct json_parser *parser, int c) json_byte_workspace_reset (parser); json_parse_string (parser); Lisp_Object result - = make_string_from_utf8 ((const char *) - parser->byte_workspace, + = make_string_from_utf8 ((const char *) parser->byte_workspace, (parser->byte_workspace_current - parser->byte_workspace)); return result; @@ -1788,15 +1766,13 @@ json_parse (struct json_parser *parser, break; case PARSEENDBEHAVIOR_MovePoint: { - ptrdiff_t byte - = (PT_BYTE + parser->input_current - parser->input_begin - + parser->additional_bytes_count); + ptrdiff_t byte = (PT_BYTE + parser->input_current - parser->input_begin + + parser->additional_bytes_count); ptrdiff_t position; if (NILP (BVAR (current_buffer, enable_multibyte_characters))) position = byte; else - position - = PT + parser->point_of_current_line + parser->current_column; + position = PT + parser->point_of_current_line + parser->current_column; SET_PT_BOTH (position, byte); break; @@ -1846,10 +1822,8 @@ usage: (json-parse-string STRING &rest ARGS) */) json_parse_args (nargs - 1, args + 1, &conf, true); struct json_parser p; - const unsigned char *begin - = (const unsigned char *) SSDATA (encoded); - json_parser_init (&p, conf, begin, begin + SBYTES (encoded), NULL, - NULL); + const unsigned char *begin = (const unsigned char *) SSDATA (encoded); + json_parser_init (&p, conf, begin, begin + SBYTES (encoded), NULL, NULL); record_unwind_protect_ptr (json_parser_done, &p); return unbind_to (count, commit 411f46fd365bc0008c58e1fa6bee6a60d841da75 Author: Po Lu Date: Sun Mar 31 08:26:48 2024 +0800 ; json.c stylistic adjustments * src/json.c (json_signal_error, json_parser_init) (json_parse_object, json_parse_value, syms_of_json): Tabify and wrap unacceptably long lines. diff --git a/src/json.c b/src/json.c index 82df60b8507..b5d5c1f1fe7 100644 --- a/src/json.c +++ b/src/json.c @@ -772,9 +772,9 @@ static AVOID json_signal_error (struct json_parser *parser, Lisp_Object error) { xsignal3 (error, INT_TO_INTEGER (parser->current_line), - INT_TO_INTEGER (parser->current_column), - INT_TO_INTEGER (parser->point_of_current_line - + parser->current_column)); + INT_TO_INTEGER (parser->current_column), + INT_TO_INTEGER (parser->point_of_current_line + + parser->current_column)); } static void @@ -825,8 +825,8 @@ json_parser_init (struct json_parser *parser, parser->byte_workspace = parser->internal_byte_workspace; parser->byte_workspace_end - = (parser->byte_workspace - + JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE); + = (parser->byte_workspace + + JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE); } static void @@ -1579,7 +1579,7 @@ json_parse_object (struct json_parser *parser) json_parse_string (parser); Lisp_Object key = make_string_from_utf8 ((char *) - parser->byte_workspace, + parser->byte_workspace, (parser->byte_workspace_current - parser->byte_workspace)); Lisp_Object value @@ -1595,12 +1595,16 @@ json_parse_object (struct json_parser *parser) } case json_object_alist: { + ptrdiff_t nbytes; + char *workspace; + json_parse_string (parser); + workspace = (char *) parser->byte_workspace; + nbytes = (parser->byte_workspace_current + - parser->byte_workspace); + Lisp_Object key - = Fintern (make_string_from_utf8 ( - (char *) parser->byte_workspace, - (parser->byte_workspace_current - - parser->byte_workspace)), + = Fintern (make_string_from_utf8 (workspace, nbytes), Qnil); Lisp_Object value = json_parse_object_member_value (parser); @@ -1651,10 +1655,13 @@ json_parse_object (struct json_parser *parser) { case json_object_hashtable: { + EMACS_INT value; + + value + = (parser->object_workspace_current - first) / 2; result = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize, - make_fixed_natnum ( - (parser->object_workspace_current - first) / 2)); + make_fixed_natnum (value)); struct Lisp_Hash_Table *h = XHASH_TABLE (result); for (size_t i = first; i < parser->object_workspace_current; i += 2) @@ -1711,7 +1718,7 @@ json_parse_value (struct json_parser *parser, int c) json_parse_string (parser); Lisp_Object result = make_string_from_utf8 ((const char *) - parser->byte_workspace, + parser->byte_workspace, (parser->byte_workspace_current - parser->byte_workspace)); return result; @@ -1925,28 +1932,28 @@ syms_of_json (void) DEFSYM (Qjson_end_of_file, "json-end-of-file"); DEFSYM (Qjson_trailing_content, "json-trailing-content"); DEFSYM (Qjson_object_too_deep, "json-object-too-deep"); - DEFSYM (Qjson_utf8_decode_error, "json-utf8-decode-error") - DEFSYM (Qjson_invalid_surrogate_error, "json-invalid-surrogate-error") - DEFSYM (Qjson_number_out_of_range, "json-number-out-of-range-error") - DEFSYM (Qjson_escape_sequence_error, "json-escape-sequence-error") + DEFSYM (Qjson_utf8_decode_error, "json-utf8-decode-error"); + DEFSYM (Qjson_invalid_surrogate_error, "json-invalid-surrogate-error"); + DEFSYM (Qjson_number_out_of_range, "json-number-out-of-range-error"); + DEFSYM (Qjson_escape_sequence_error, "json-escape-sequence-error"); define_error (Qjson_error, "generic JSON error", Qerror); define_error (Qjson_out_of_memory, - "not enough memory for creating JSON object", Qjson_error); + "not enough memory for creating JSON object", Qjson_error); define_error (Qjson_parse_error, "could not parse JSON stream", - Qjson_error); + Qjson_error); define_error (Qjson_end_of_file, "end of JSON stream", Qjson_parse_error); define_error (Qjson_trailing_content, "trailing content after JSON stream", - Qjson_parse_error); + Qjson_parse_error); define_error (Qjson_object_too_deep, - "object cyclic or Lisp evaluation too deep", Qjson_error); + "object cyclic or Lisp evaluation too deep", Qjson_error); define_error (Qjson_utf8_decode_error, - "invalid utf-8 encoding", Qjson_error); + "invalid utf-8 encoding", Qjson_error); define_error (Qjson_invalid_surrogate_error, - "invalid surrogate pair", Qjson_error); + "invalid surrogate pair", Qjson_error); define_error (Qjson_number_out_of_range, - "number out of range", Qjson_error); + "number out of range", Qjson_error); define_error (Qjson_escape_sequence_error, - "invalid escape sequence", Qjson_parse_error); + "invalid escape sequence", Qjson_parse_error); DEFSYM (Qpure, "pure"); DEFSYM (Qside_effect_free, "side-effect-free"); commit d2c822944cc6e4480e64ec8c90f74a256971dfdd Author: Po Lu Date: Sun Mar 31 08:22:56 2024 +0800 ; json.c stylistic adjustments * src/json.c (json_parse_args, json_out_t, symset_t, symset_size) (Fjson_serialize, Fjson_insert): Tabify and modify all sentences to be punctuated with two spaces. diff --git a/src/json.c b/src/json.c index 5970c539f53..82df60b8507 100644 --- a/src/json.c +++ b/src/json.c @@ -29,18 +29,21 @@ along with GNU Emacs. If not, see . */ #include "buffer.h" #include "coding.h" -enum json_object_type { - json_object_hashtable, - json_object_alist, - json_object_plist -}; +enum json_object_type + { + json_object_hashtable, + json_object_alist, + json_object_plist, + }; -enum json_array_type { - json_array_array, - json_array_list -}; +enum json_array_type + { + json_array_array, + json_array_list, + }; -struct json_configuration { +struct json_configuration +{ enum json_object_type object_type; enum json_array_type array_type; Lisp_Object null_object; @@ -48,38 +51,37 @@ struct json_configuration { }; static void -json_parse_args (ptrdiff_t nargs, - Lisp_Object *args, - struct json_configuration *conf, - bool parse_object_types) +json_parse_args (ptrdiff_t nargs, Lisp_Object *args, + struct json_configuration *conf, + bool parse_object_types) { if ((nargs % 2) != 0) wrong_type_argument (Qplistp, Flist (nargs, args)); - /* Start from the back so keyword values appearing - first take precedence. */ + /* Start from the back so keyword values appearing first take + precedence. */ for (ptrdiff_t i = nargs; i > 0; i -= 2) { Lisp_Object key = args[i - 2]; Lisp_Object value = args[i - 1]; if (parse_object_types && EQ (key, QCobject_type)) { - if (EQ (value, Qhash_table)) - conf->object_type = json_object_hashtable; - else if (EQ (value, Qalist)) - conf->object_type = json_object_alist; - else if (EQ (value, Qplist)) - conf->object_type = json_object_plist; - else - wrong_choice (list3 (Qhash_table, Qalist, Qplist), value); + if (EQ (value, Qhash_table)) + conf->object_type = json_object_hashtable; + else if (EQ (value, Qalist)) + conf->object_type = json_object_alist; + else if (EQ (value, Qplist)) + conf->object_type = json_object_plist; + else + wrong_choice (list3 (Qhash_table, Qalist, Qplist), value); } else if (parse_object_types && EQ (key, QCarray_type)) { - if (EQ (value, Qarray)) - conf->array_type = json_array_array; - else if (EQ (value, Qlist)) - conf->array_type = json_array_list; - else - wrong_choice (list2 (Qarray, Qlist), value); + if (EQ (value, Qarray)) + conf->array_type = json_array_array; + else if (EQ (value, Qlist)) + conf->array_type = json_array_list; + else + wrong_choice (list2 (Qarray, Qlist), value); } else if (EQ (key, QCnull_object)) conf->null_object = value; @@ -87,19 +89,20 @@ json_parse_args (ptrdiff_t nargs, conf->false_object = value; else if (parse_object_types) wrong_choice (list4 (QCobject_type, - QCarray_type, - QCnull_object, - QCfalse_object), - value); + QCarray_type, + QCnull_object, + QCfalse_object), + value); else wrong_choice (list2 (QCnull_object, - QCfalse_object), - value); + QCfalse_object), + value); } } /* JSON encoding context. */ -typedef struct { +typedef struct +{ char *buf; ptrdiff_t size; /* number of bytes in buf */ ptrdiff_t capacity; /* allocated size of buf */ @@ -111,7 +114,8 @@ typedef struct { } json_out_t; /* Set of symbols. */ -typedef struct { +typedef struct +{ ptrdiff_t count; /* symbols in table */ int bits; /* log2(table size) */ struct symset_tbl *table; /* heap-allocated table */ @@ -129,7 +133,7 @@ struct symset_tbl static inline ptrdiff_t symset_size (int bits) { - return (ptrdiff_t)1 << bits; + return (ptrdiff_t) 1 << bits; } static struct symset_tbl * @@ -615,7 +619,7 @@ In you specify the same value for `:null-object' and `:false-object', a potentially ambiguous situation, the JSON output will not contain any JSON false values. usage: (json-serialize OBJECT &rest ARGS) */) - (ptrdiff_t nargs, Lisp_Object *args) + (ptrdiff_t nargs, Lisp_Object *args) { specpdl_ref count = SPECPDL_INDEX (); json_out_t jo; @@ -630,7 +634,7 @@ This is the same as (insert (json-serialize OBJECT)), but potentially faster. See the function `json-serialize' for allowed values of OBJECT. usage: (json-insert OBJECT &rest ARGS) */) - (ptrdiff_t nargs, Lisp_Object *args) + (ptrdiff_t nargs, Lisp_Object *args) { specpdl_ref count = SPECPDL_INDEX (); json_out_t jo; commit 8bee4060ea42c61e52ebe6487ff97bc095261050 Author: Eric Abrahamsen Date: Mon Dec 5 21:59:03 2022 -0800 Add peg.el as a built-in library * lisp/progmodes/peg.el: New file, taken from ELPA package. * test/lisp/peg-tests.el: Package tests. * doc/lispref/peg.texi: Documentation. diff --git a/doc/lispref/Makefile.in b/doc/lispref/Makefile.in index 0a228271be3..4ceffd7d7d3 100644 --- a/doc/lispref/Makefile.in +++ b/doc/lispref/Makefile.in @@ -112,6 +112,7 @@ srcs = \ $(srcdir)/os.texi \ $(srcdir)/package.texi \ $(srcdir)/parsing.texi \ + $(srcdir)/peg.texi \ $(srcdir)/positions.texi \ $(srcdir)/processes.texi \ $(srcdir)/records.texi \ diff --git a/doc/lispref/elisp.texi b/doc/lispref/elisp.texi index 71139db4359..ec93a0b9c8a 100644 --- a/doc/lispref/elisp.texi +++ b/doc/lispref/elisp.texi @@ -222,6 +222,7 @@ To view this manual in other formats, click * Non-ASCII Characters:: Non-ASCII text in buffers and strings. * Searching and Matching:: Searching buffers for strings or regexps. * Syntax Tables:: The syntax table controls word and list parsing. +* Parsing Expression Grammars:: Parsing structured buffer text. * Parsing Program Source:: Generate syntax tree for program sources. * Abbrevs:: How Abbrev mode works, and its data structures. @@ -1365,6 +1366,12 @@ Syntax Tables * Syntax Table Internals:: How syntax table information is stored. * Categories:: Another way of classifying character syntax. +Parsing Expression Grammars + +* PEX Definitions:: The syntax of PEX rules +* Parsing Actions:: Running actions upon successful parsing. +* Writing PEG Rules:: Tips for writing parsing rules. + Parsing Program Source * Language Grammar:: Loading tree-sitter language grammar. @@ -1720,6 +1727,7 @@ Object Internals @include searching.texi @include syntax.texi +@include peg.texi @include parsing.texi @include abbrevs.texi @include threads.texi diff --git a/doc/lispref/peg.texi b/doc/lispref/peg.texi new file mode 100644 index 00000000000..ef4dfa7653e --- /dev/null +++ b/doc/lispref/peg.texi @@ -0,0 +1,351 @@ +@c -*-texinfo-*- +@c This is part of the GNU Emacs Lisp Reference Manual. +@c Copyright (C) 1990--1995, 1998--1999, 2001--2023 Free Software +@c Foundation, Inc. +@c See the file elisp.texi for copying conditions. +@node Parsing Expression Grammars +@chapter Parsing Expression Grammars +@cindex text parsing +@cindex parsing expression grammar + + Emacs Lisp provides several tools for parsing and matching text, +from regular expressions (@pxref{Regular Expressions}) to full +@acronym{LL} grammar parsers (@pxref{Top,, Bovine parser +development,bovine}). @dfn{Parsing Expression Grammars} +(@acronym{PEG}) are another approach to text parsing that offer more +structure and composibility than regular expressions, but less +complexity than context-free grammars. + +A @acronym{PEG} parser is defined as a list of named rules, each of +which matches text patterns, and/or contains references to other +rules. Parsing is initiated with the function @code{peg-run} or the +macro @code{peg-parse} (see below), and parses text after point in the +current buffer, using a given set of rules. + +@cindex parsing expression +The definition of each rule is referred to as a @dfn{parsing +expression} (@acronym{PEX}), and can consist of a literal string, a +regexp-like character range or set, a peg-specific construct +resembling an elisp function call, a reference to another rule, or a +combination of any of these. A grammar is expressed as a tree of +rules in which one rule is typically treated as a ``root'' or +``entry-point'' rule. For instance: + +@example +@group +((number sign digit (* digit)) + (sign (or "+" "-" "")) + (digit [0-9])) +@end group +@end example + +Once defined, grammars can be used to parse text after point in the +current buffer, in the following ways: + +@defmac peg-parse &rest pexs +Match @var{pexs} at point. If @var{pexs} is a list of PEG rules, the +first rule is considered the ``entry-point'': +@end defmac + +@example +@group +(peg-parse + ((number sign digit (* digit)) + (sign (or "+" "-" "")) + (digit [0-9]))) +@end group +@end example + +This macro represents the simplest use of the @acronym{PEG} library, +but also the least flexible, as the rules must be written directly +into the source code. A more flexible approach involves use of three +macros in conjunction: @code{with-peg-rules}, a @code{let}-like +construct that makes a set of rules available within the macro body; +@code{peg-run}, which initiates parsing given a single rule; and +@code{peg}, which is used to wrap the entry-point rule name. In fact, +a call to @code{peg-parse} expands to just this set of calls. The +above example could be written as: + +@example +@group +(with-peg-rules + ((number sign digit (* digit)) + (sign (or "+" "-" "")) + (digit [0-9])) + (peg-run (peg number))) +@end group +@end example + +This allows more explicit control over the ``entry-point'' of parsing, +and allows the combination of rules from different sources. + +Individual rules can also be defined using a more @code{defun}-like +syntax, using the macro @code{define-peg-rule}: + +@example +(define-peg-rule digit () + [0-9]) +@end example + +This also allows for rules that accept an argument (supplied by the +@code{funcall} PEG rule). + +Another possibility is to define a named set of rules with +@code{define-peg-ruleset}: + +@example +(define-peg-ruleset number-grammar + '((number sign digit (* digit)) + digit ;; A reference to the definition above. + (sign (or "+" "-" "")))) +@end example + +Rules and rulesets defined this way can be referred to by name in +later calls to @code{peg-run} or @code{with-peg-rules}: + +@example +(with-peg-rules number-grammar + (peg-run (peg number))) +@end example + +By default, calls to @code{peg-run} or @code{peg-parse} produce no +output: parsing simply moves point. In order to return or otherwise +act upon parsed strings, rules can include @dfn{actions}, see +@ref{Parsing Actions}. + +@menu +* PEX Definitions:: The syntax of PEX rules. +* Parsing Actions:: Running actions upon successful parsing. +* Writing PEG Rules:: Tips for writing parsing rules. +@end menu + +@node PEX Definitions +@section PEX Definitions + +Parsing expressions can be defined using the following syntax: + +@table @code +@item (and E1 E2 ...) +A sequence of @acronym{PEX}s that must all be matched. The @code{and} form is +optional and implicit. + +@item (or E1 E2 ...) +Prioritized choices, meaning that, as in Elisp, the choices are tried +in order, and the first successful match is used. Note that this is +distinct from context-free grammars, in which selection between +multiple matches is indeterminate. + +@item (any) +Matches any single character, as the regexp ``.''. + +@item @var{string} +A literal string. + +@item (char @var{C}) +A single character @var{C}, as an Elisp character literal. + +@item (* @var{E}) +Zero or more instances of expression @var{E}, as the regexp @samp{*}. +Matching is always ``greedy''. + +@item (+ @var{E}) +One or more instances of expression @var{E}, as the regexp @samp{+}. +Matching is always ``greedy''. + +@item (opt @var{E}) +Zero or one instance of expression @var{E}, as the regexp @samp{?}. + +@item SYMBOL +A symbol representing a previously-defined PEG rule. + +@item (range CH1 CH2) +The character range between CH1 and CH2, as the regexp @samp{[CH1-CH2]}. + +@item [CH1-CH2 "+*" ?x] +A character set, which can include ranges, character literals, or +strings of characters. + +@item [ascii cntrl] +A list of named character classes. + +@item (syntax-class @var{NAME}) +A single syntax class. + +@item (funcall E ARGS...) +Call @acronym{PEX} E (previously defined with @code{define-peg-rule}) +with arguments @var{ARGS}. + +@item (null) +The empty string. + +@end table + +The following expressions are used as anchors or tests -- they do not +move point, but return a boolean value which can be used to constrain +matches as a way of controlling the parsing process (@pxref{Writing +PEG Rules}). + +@table @code +@item (bob) +Beginning of buffer. + +@item (eob) +End of buffer. + +@item (bol) +Beginning of line. + +@item (eol) +End of line. + +@item (bow) +Beginning of word. + +@item (eow) +End of word. + +@item (bos) +Beginning of symbol. + +@item (eos) +End of symbol. + +@item (if E) +Returns non-@code{nil} if parsing @acronym{PEX} E from point succeeds (point +is not moved). + +@item (not E) +Returns non-@code{nil} if parsing @acronym{PEX} E from point fails (point +is not moved). + +@item (guard EXP) +Treats the value of the Lisp expression EXP as a boolean. + +@end table + +@vindex peg-char-classes +Character class matching can use the same named character classes as +in regular expressions (@pxref{Top,, Character Classes,elisp}) + +@node Parsing Actions +@section Parsing Actions + +@cindex parsing actions +@cindex parsing stack +By default the process of parsing simply moves point in the current +buffer, ultimately returning @code{t} if the parsing succeeds, and +@code{nil} if it doesn't. It's also possible to define ``actions'' +that can run arbitrary Elisp at certain points in the parsed text. +These actions can optionally affect something called the @dfn{parsing +stack}, which is a list of values returned by the parsing process. +These actions only run (and only return values) if the parsing process +ultimately succeeds; if it fails the action code is not run at all. + +Actions can be added anywhere in the definition of a rule. They are +distinguished from parsing expressions by an initial backquote +(@samp{`}), followed by a parenthetical form that must contain a pair +of hyphens (@samp{--}) somewhere within it. Symbols to the left of +the hyphens are bound to values popped from the stack (they are +somewhat analogous to the argument list of a lambda form). Values +produced by code to the right are pushed to the stack (analogous to +the return value of the lambda). For instance, the previous grammar +can be augmented with actions to return the parsed number as an actual +integer: + +@example +(with-peg-rules ((number sign digit (* digit + `(a b -- (+ (* a 10) b))) + `(sign val -- (* sign val))) + (sign (or (and "+" `(-- 1)) + (and "-" `(-- -1)) + (and "" `(-- 1)))) + (digit [0-9] `(-- (- (char-before) ?0)))) + (peg-run (peg number))) +@end example + +There must be values on the stack before they can be popped and +returned -- if there aren't enough stack values to bind to an action's +left-hand terms, they will be bound to @code{nil}. An action with +only right-hand terms will push values to the stack; an action with +only left-hand terms will consume (and discard) values from the stack. +At the end of parsing, stack values are returned as a flat list. + +To return the string matched by a @acronym{PEX} (instead of simply +moving point over it), a rule like this can be used: + +@example +(one-word + `(-- (point)) + (+ [word]) + `(start -- (buffer-substring start (point)))) +@end example + +The first action pushes the initial value of point to the stack. The +intervening @acronym{PEX} moves point over the next word. The second +action pops the previous value from the stack (binding it to the +variable @code{start}), and uses that value to extract a substring +from the buffer and push it to the stack. This pattern is so common +that @acronym{PEG} provides a shorthand function that does exactly the +above, along with a few other shorthands for common scenarios: + +@table @code +@item (substring @var{E}) +Match @acronym{PEX} @var{E} and push the matched string to the stack. + +@item (region @var{E}) +Match @var{E} and push the start and end positions of the matched +region to the stack. + +@item (replace @var{E} @var{replacement}) +Match @var{E} and replaced the matched region with the string @var{replacement}. + +@item (list @var{E}) +Match @var{E}, collect all values produced by @var{E} (and its +sub-expressions) into a list, and push that list to the stack. Stack +values are typically returned as a flat list; this is a way of +``grouping'' values together. +@end table + +@node Writing PEG Rules +@section Writing PEG Rules + +Something to be aware of when writing PEG rules is that they are +greedy. Rules which can consume a variable amount of text will always +consume the maximum amount possible, even if that causes a rule that +might otherwise have matched to fail later on -- there is no +backtracking. For instance, this rule will never succeed: + +@example +(forest (+ "tree" (* [blank])) "tree" (eol)) +@end example + +The @acronym{PEX} @code{(+ "tree" (* [blank]))} will consume all +repetitions of the word ``tree'', leaving none to match the final +@code{"tree"}. + +In these situations, the desired result can be obtained by using +predicates and guards -- namely the @code{not}, @code{if} and +@code{guard} expressions -- to constrain behavior. For instance: + +@example +(forest (+ "tree" (* [blank])) (not (eol)) "tree" (eol)) +@end example + +The @code{if} and @code{not} operators accept a parsing expression and +interpret it as a boolean, without moving point. The contents of a +@code{guard} operator are evaluated as regular Lisp (not a +@acronym{PEX}) and should return a boolean value. A @code{nil} value +causes the match to fail. + +Another potentially unexpected behavior is that parsing will move +point as far as possible, even if the parsing ultimately fails. This +rule: + +@example +(end-game "game" (eob)) +@end example + +when run in a buffer containing the text ``game over'' after point, +will move point to just after ``game'' then halt parsing, returning +@code{nil}. Successful parsing will always return @code{t}, or the +contexts of the parsing stack. diff --git a/etc/NEWS b/etc/NEWS index 8ccf04276f6..8e1c1082b3a 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1585,6 +1585,14 @@ forwards-compatibility Compat package from GNU ELPA. This allows built-in packages to use the library more effectively, and helps preventing the installation of Compat if unnecessary. ++++ +** New package PEG. +Emacs now includes a library for writing (P)arsing (E)xpression +(G)rammars, an approach to text parsing that provides more structure +than regular expressions, but less complexity than context-free +grammars. The Info manual "(elisp) Parsing Expression Grammars" has +documentation and examples. + * Incompatible Lisp Changes in Emacs 30.1 diff --git a/lisp/progmodes/peg.el b/lisp/progmodes/peg.el new file mode 100644 index 00000000000..2eb4a7384d0 --- /dev/null +++ b/lisp/progmodes/peg.el @@ -0,0 +1,944 @@ +;;; peg.el --- Parsing Expression Grammars in Emacs Lisp -*- lexical-binding:t -*- + +;; Copyright (C) 2008-2023 Free Software Foundation, Inc. +;; +;; Author: Helmut Eller +;; Maintainer: Stefan Monnier +;; Version: 1.0.1 +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . +;; +;;; Commentary: +;; +;; This package implements Parsing Expression Grammars for Emacs Lisp. + +;; Parsing Expression Grammars (PEG) are a formalism in the spirit of +;; Context Free Grammars (CFG) with some simplifications which makes +;; the implementation of PEGs as recursive descent parsers particularly +;; simple and easy to understand [Ford, Baker]. +;; PEGs are more expressive than regexps and potentially easier to use. +;; +;; This file implements the macros `define-peg-rule', `with-peg-rules', and +;; `peg-parse' which parses the current buffer according to a PEG. +;; E.g. we can match integers with: +;; +;; (with-peg-rules +;; ((number sign digit (* digit)) +;; (sign (or "+" "-" "")) +;; (digit [0-9])) +;; (peg-run (peg number))) +;; or +;; (define-peg-rule digit () +;; [0-9]) +;; (peg-parse (number sign digit (* digit)) +;; (sign (or "+" "-" ""))) +;; +;; In contrast to regexps, PEGs allow us to define recursive "rules". +;; A "grammar" is a set of rules. A rule is written as (NAME PEX...) +;; E.g. (sign (or "+" "-" "")) is a rule with the name "sign". +;; The syntax for PEX (Parsing Expression) is a follows: +;; +;; Description Lisp Traditional, as in Ford's paper +;; =========== ==== =========== +;; Sequence (and E1 E2) e1 e2 +;; Prioritized Choice (or E1 E2) e1 / e2 +;; Not-predicate (not E) !e +;; And-predicate (if E) &e +;; Any character (any) . +;; Literal string "abc" "abc" +;; Character C (char C) 'c' +;; Zero-or-more (* E) e* +;; One-or-more (+ E) e+ +;; Optional (opt E) e? +;; Non-terminal SYMBOL A +;; Character range (range A B) [a-b] +;; Character set [a-b "+*" ?x] [a-b+*x] ;Note: it's a vector +;; Character classes [ascii cntrl] +;; Boolean-guard (guard EXP) +;; Syntax-Class (syntax-class NAME) +;; Local definitions (with RULES PEX...) +;; Indirect call (funcall EXP ARGS...) +;; and +;; Empty-string (null) ε +;; Beginning-of-Buffer (bob) +;; End-of-Buffer (eob) +;; Beginning-of-Line (bol) +;; End-of-Line (eol) +;; Beginning-of-Word (bow) +;; End-of-Word (eow) +;; Beginning-of-Symbol (bos) +;; End-of-Symbol (eos) +;; +;; Rules can refer to other rules, and a grammar is often structured +;; as a tree, with a root rule referring to one or more "branch +;; rules", all the way down to the "leaf rules" that deal with actual +;; buffer text. Rules can be recursive or mutually referential, +;; though care must be taken not to create infinite loops. +;; +;;;; Named rulesets: +;; +;; You can define a set of rules for later use with: +;; +;; (define-peg-ruleset myrules +;; (sign () (or "+" "-" "")) +;; (digit () [0-9]) +;; (nat () digit (* digit)) +;; (int () sign digit (* digit)) +;; (float () int "." nat)) +;; +;; and later refer to it: +;; +;; (with-peg-rules +;; (myrules +;; (complex float "+i" float)) +;; ... (peg-parse nat "," nat "," complex) ...) +;; +;;;; Parsing actions: +;; +;; PEXs also support parsing actions, i.e. Lisp snippets which are +;; executed when a pex matches. This can be used to construct syntax +;; trees or for similar tasks. The most basic form of action is +;; written as: +;; +;; (action FORM) ; evaluate FORM for its side-effects +;; +;; Actions don't consume input, but are executed at the point of +;; match. Another kind of action is called a "stack action", and +;; looks like this: +;; +;; `(VAR... -- FORM...) ; stack action +;; +;; A stack action takes VARs from the "value stack" and pushes the +;; results of evaluating FORMs to that stack. + +;; The value stack is created during the course of parsing. Certain +;; operators (see below) that match buffer text can push values onto +;; this stack. "Upstream" rules can then draw values from the stack, +;; and optionally push new ones back. For instance, consider this +;; very simple grammar: +;; +;; (with-peg-rules +;; ((query (+ term) (eol)) +;; (term key ":" value (opt (+ [space])) +;; `(k v -- (cons (intern k) v))) +;; (key (substring (and (not ":") (+ [word])))) +;; (value (or string-value number-value)) +;; (string-value (substring (+ [alpha]))) +;; (number-value (substring (+ [digit])) +;; `(val -- (string-to-number val)))) +;; (peg-run (peg query))) +;; +;; This invocation of `peg-run' would parse this buffer text: +;; +;; name:Jane age:30 +;; +;; And return this Elisp sexp: +;; +;; ((age . 30) (name . "Jane")) +;; +;; Note that, in complex grammars, some care must be taken to make +;; sure that the number and type of values drawn from the stack always +;; match those pushed. In the example above, both `string-value' and +;; `number-value' push a single value to the stack. Since the `value' +;; rule only includes these two sub-rules, any upstream rule that +;; makes use of `value' can be confident it will always and only push +;; a single value to the stack. +;; +;; Stack action forms are in a sense analogous to lambda forms: the +;; symbols before the "--" are the equivalent of lambda arguments, +;; while the forms after the "--" are return values. The difference +;; being that a lambda form can only return a single value, while a +;; stack action can push multiple values onto the stack. It's also +;; perfectly valid to use `(-- FORM...)' or `(VAR... --)': the former +;; pushes values to the stack without consuming any, and the latter +;; pops values from the stack and discards them. +;; +;;;; Derived Operators: +;; +;; The following operators are implemented as combinations of +;; primitive expressions: +;; +;; (substring E) ; Match E and push the substring for the matched region. +;; (region E) ; Match E and push the start and end positions. +;; (replace E RPL); Match E and replace the matched region with RPL. +;; (list E) ; Match E and push a list of the items that E produced. +;; +;; See `peg-ex-parse-int' in `peg-tests.el' for further examples. +;; +;; Regexp equivalents: +;; +;; Here a some examples for regexps and how those could be written as pex. +;; [Most are taken from rx.el] +;; +;; "^[a-z]*" +;; (and (bol) (* [a-z])) +;; +;; "\n[^ \t]" +;; (and "\n" (not [" \t"]) (any)) +;; +;; "\\*\\*\\* EOOH \\*\\*\\*\n" +;; "*** EOOH ***\n" +;; +;; "\\<\\(catch\\|finally\\)\\>[^_]" +;; (and (bow) (or "catch" "finally") (eow) (not "_") (any)) +;; +;; "[ \t\n]*:\\([^:]+\\|$\\)" +;; (and (* [" \t\n"]) ":" (or (+ (not ":") (any)) (eol))) +;; +;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" +;; (and (bol) +;; "content-transfer-encoding:" +;; (* (opt "\n") ["\t "]) +;; "quoted-printable" +;; (* (opt "\n") ["\t "])) +;; +;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " +;; (and "$Id: " (+ (not " ") (any)) " " (+ (not " ") (any)) " ") +;; +;; "^;;\\s-*\n\\|^\n" +;; (or (and (bol) ";;" (* (syntax-class whitespace)) "\n") +;; (and (bol) "\n")) +;; +;; "\\\\\\\\\\[\\w+" +;; (and "\\\\[" (+ (syntax-class word))) +;; +;; See ";;; Examples" in `peg-tests.el' for other examples. +;; +;;;; Rule argument and indirect calls: +;; +;; Rules can take arguments and those arguments can themselves be PEGs. +;; For example: +;; +;; (define-peg-rule 2-or-more (peg) +;; (funcall peg) +;; (funcall peg) +;; (* (funcall peg))) +;; +;; ... (peg-parse +;; ... +;; (2-or-more (peg foo)) +;; ... +;; (2-or-more (peg bar)) +;; ...) +;; +;;;; References: +;; +;; [Ford] Bryan Ford. Parsing Expression Grammars: a Recognition-Based +;; Syntactic Foundation. In POPL'04: Proceedings of the 31st ACM +;; SIGPLAN-SIGACT symposium on Principles of Programming Languages, +;; pages 111-122, New York, NY, USA, 2004. ACM Press. +;; http://pdos.csail.mit.edu/~baford/packrat/ +;; +;; [Baker] Baker, Henry G. "Pragmatic Parsing in Common Lisp". ACM Lisp +;; Pointers 4(2), April--June 1991, pp. 3--15. +;; http://home.pipeline.com/~hbaker1/Prag-Parse.html +;; +;; Roman Redziejowski does good PEG related research +;; http://www.romanredz.se/pubs.htm + +;;;; Todo: + +;; - Fix the exponential blowup in `peg-translate-exp'. +;; - Add a proper debug-spec for PEXs. + +;;; News: + +;; Since 1.0.1: +;; - Use OClosures to represent PEG rules when available, and let cl-print +;; display their source code. +;; - New PEX form (with RULES PEX...). +;; - Named rulesets. +;; - You can pass arguments to rules. +;; - New `funcall' rule to call rules indirectly (e.g. a peg you received +;; as argument). + +;; Version 1.0: +;; - New official entry points `peg` and `peg-run`. + +;;; Code: + +(eval-when-compile (require 'cl-lib)) + +(defvar peg--actions nil + "Actions collected along the current parse. +Used at runtime for backtracking. It's a list ((POS . THUNK)...). +Each THUNK is executed at the corresponding POS. Thunks are +executed in a postprocessing step, not during parsing.") + +(defvar peg--errors nil + "Data keeping track of the rightmost parse failure location. +It's a pair (POSITION . EXPS ...). POSITION is the buffer position and +EXPS is a list of rules/expressions that failed.") + +;;;; Main entry points + +(defmacro peg--when-fboundp (f &rest body) + (declare (indent 1) (debug (sexp body))) + (when (fboundp f) + (macroexp-progn body))) + +(peg--when-fboundp oclosure-define + (oclosure-define peg-function + "Parsing function built from PEG rule." + pexs) + + (cl-defmethod cl-print-object ((peg peg-function) stream) + (princ "#f" stream))) + +(defmacro peg--lambda (pexs args &rest body) + (declare (indent 2) + (debug (&define form lambda-list def-body))) + (if (fboundp 'oclosure-lambda) + `(oclosure-lambda (peg-function (pexs ,pexs)) ,args . ,body) + `(lambda ,args . ,body))) + +;; Sometimes (with-peg-rules ... (peg-run (peg ...))) is too +;; longwinded for the task at hand, so `peg-parse' comes in handy. +(defmacro peg-parse (&rest pexs) + "Match PEXS at point. +PEXS is a sequence of PEG expressions, implicitly combined with `and'. +Returns STACK if the match succeed and signals an error on failure, +moving point along the way. +PEXS can also be a list of PEG rules, in which case the first rule is used." + (if (and (consp (car pexs)) + (symbolp (caar pexs)) + (not (ignore-errors (peg-normalize (car pexs))))) + ;; `pexs' is a list of rules: use the first rule as entry point. + `(with-peg-rules ,pexs (peg-run (peg ,(caar pexs)) #'peg-signal-failure)) + `(peg-run (peg ,@pexs) #'peg-signal-failure))) + +(defmacro peg (&rest pexs) + "Return a PEG-matcher that matches PEXS." + (pcase (peg-normalize `(and . ,pexs)) + (`(call ,name) `#',(peg--rule-id name)) ;Optimize this case by η-reduction! + (exp `(peg--lambda ',pexs () ,(peg-translate-exp exp))))) + +;; There are several "infos we want to return" when parsing a given PEX: +;; 1- We want to return the success/failure of the parse. +;; 2- We want to return the data of the successful parse (the stack). +;; 3- We want to return the diagnostic of the failures. +;; 4- We want to perform the actions (upon parse success)! +;; `peg-parse' used an error signal to encode the (1) boolean, which +;; lets it return all the info conveniently but the error signal was sometimes +;; inconvenient. Other times one wants to just know (1) maybe without even +;; performing (4). +;; `peg-run' lets you choose all that, and by default gives you +;; (1) as a simple boolean, while also doing (2), and (4). + +(defun peg-run (peg-matcher &optional failure-function success-function) + "Parse with PEG-MATCHER at point and run the success/failure function. +If a match was found, move to the end of the match and call SUCCESS-FUNCTION +with one argument: a function which will perform all the actions collected +during the parse and then return the resulting stack (or t if empty). +If no match was found, move to the (rightmost) point of parse failure and call +FAILURE-FUNCTION with one argument, which is a list of PEG expressions that +failed at this point. +SUCCESS-FUNCTION defaults to `funcall' and FAILURE-FUNCTION +defaults to `ignore'." + (let ((peg--actions '()) (peg--errors '(-1))) + (if (funcall peg-matcher) + ;; Found a parse: run the actions collected along the way. + (funcall (or success-function #'funcall) + (lambda () + (save-excursion (peg-postprocess peg--actions)))) + (goto-char (car peg--errors)) + (when failure-function + (funcall failure-function (peg-merge-errors (cdr peg--errors))))))) + +(defmacro define-peg-rule (name args &rest pexs) + "Define PEG rule NAME as equivalent to PEXS. +The PEG expressions in PEXS are implicitly combined with the +sequencing `and' operator of PEG grammars." + (declare (indent 1)) + (let ((inline nil)) + (while (keywordp (car pexs)) + (pcase (pop pexs) + (:inline (setq inline (car pexs)))) + (setq pexs (cdr pexs))) + (let ((id (peg--rule-id name)) + (exp (peg-normalize `(and . ,pexs)))) + `(progn + (defalias ',id + (peg--lambda ',pexs ,args + ,(if inline + ;; Short-circuit to peg--translate in order to skip + ;; the extra failure-recording of `peg-translate-exp'. + ;; It also skips the cycle detection of + ;; `peg--translate-rule-body', which is not the main + ;; purpose but we can live with it. + (apply #'peg--translate exp) + (peg--translate-rule-body name exp)))) + (eval-and-compile + ;; FIXME: We shouldn't need this any more since the info is now + ;; stored in the function, but sadly we need to find a name's EXP + ;; during compilation (i.e. before the `defalias' is executed) + ;; as part of cycle-detection! + (put ',id 'peg--rule-definition ',exp) + ,@(when inline + ;; FIXME: Copied from `defsubst'. + `(;; Never native-compile defsubsts as we need the byte + ;; definition in `byte-compile-unfold-bcf' to perform the + ;; inlining (Bug#42664, Bug#43280, Bug#44209). + ,(byte-run--set-speed id nil -1) + (put ',id 'byte-optimizer #'byte-compile-inline-expand)))))))) + +(defmacro define-peg-ruleset (name &rest rules) + "Define a set of PEG rules for later use, e.g., in `with-peg-rules'." + (declare (indent 1)) + (let ((defs ()) + (aliases ())) + (dolist (rule rules) + (let* ((rname (car rule)) + (full-rname (format "%s %s" name rname))) + (push `(define-peg-rule ,full-rname . ,(cdr rule)) defs) + (push `(,(peg--rule-id rname) #',(peg--rule-id full-rname)) aliases))) + `(cl-flet ,aliases + ,@defs + (eval-and-compile (put ',name 'peg--rules ',aliases))))) + +(defmacro with-peg-rules (rules &rest body) + "Make PEG rules RULES available within the scope of BODY. +RULES is a list of rules of the form (NAME . PEXS), where PEXS is a sequence +of PEG expressions, implicitly combined with `and'. +RULES can also contain symbols in which case these must name +rulesets defined previously with `define-peg-ruleset'." + (declare (indent 1) (debug (sexp form))) ;FIXME: `sexp' is not good enough! + (let* ((rulesets nil) + (rules + ;; First, macroexpand the rules. + (delq nil + (mapcar (lambda (rule) + (if (symbolp rule) + (progn (push rule rulesets) nil) + (cons (car rule) (peg-normalize `(and . ,(cdr rule)))))) + rules))) + (ctx (assq :peg-rules macroexpand-all-environment))) + (macroexpand-all + `(cl-labels + ,(mapcar (lambda (rule) + ;; FIXME: Use `peg--lambda' as well. + `(,(peg--rule-id (car rule)) + () + ,(peg--translate-rule-body (car rule) (cdr rule)))) + rules) + ,@body) + `((:peg-rules ,@(append rules (cdr ctx))) + ,@macroexpand-all-environment)))) + +;;;;; Old entry points + +(defmacro peg-parse-exp (exp) + "Match the parsing expression EXP at point." + (declare (obsolete peg-parse "peg-0.9")) + `(peg-run (peg ,exp))) + +;;;; The actual implementation + +(defun peg--lookup-rule (name) + (or (cdr (assq name (cdr (assq :peg-rules macroexpand-all-environment)))) + ;; With `peg-function' objects, we can recover the PEG from which it was + ;; defined, but this info is not yet available at compile-time. :-( + ;;(let ((id (peg--rule-id name))) + ;; (peg-function--pexs (symbol-function id))) + (get (peg--rule-id name) 'peg--rule-definition))) + +(defun peg--rule-id (name) + (intern (format "peg-rule %s" name))) + +(define-error 'peg-search-failed "Parse error at %d (expecting %S)") + +(defun peg-signal-failure (failures) + (signal 'peg-search-failed (list (point) failures))) + +(defun peg-parse-at-point (peg-matcher) + "Parse text at point according to the PEG rule PEG-MATCHER." + (declare (obsolete peg-run "peg-1.0")) + (peg-run peg-matcher + #'peg-signal-failure + (lambda (f) (let ((r (funcall f))) (if (listp r) r))))) + +;; Internally we use a regularized syntax, e.g. we only have binary OR +;; nodes. Regularized nodes are lists of the form (OP ARGS...). +(cl-defgeneric peg-normalize (exp) + "Return a \"normalized\" form of EXP." + (error "Invalid parsing expression: %S" exp)) + +(cl-defmethod peg-normalize ((exp string)) + (let ((len (length exp))) + (cond ((zerop len) '(guard t)) + ((= len 1) `(char ,(aref exp 0))) + (t `(str ,exp))))) + +(cl-defmethod peg-normalize ((exp symbol)) + ;; (peg--lookup-rule exp) + `(call ,exp)) + +(cl-defmethod peg-normalize ((exp vector)) + (peg-normalize `(set . ,(append exp '())))) + +(cl-defmethod peg-normalize ((exp cons)) + (apply #'peg--macroexpand exp)) + +(defconst peg-leaf-types '(any call action char range str set + guard syntax-class = funcall)) + +(cl-defgeneric peg--macroexpand (head &rest args) + (cond + ((memq head peg-leaf-types) (cons head args)) + (t `(call ,head ,@args)))) + +(cl-defmethod peg--macroexpand ((_ (eql or)) &rest args) + (cond ((null args) '(guard nil)) + ((null (cdr args)) (peg-normalize (car args))) + (t `(or ,(peg-normalize (car args)) + ,(peg-normalize `(or . ,(cdr args))))))) + +(cl-defmethod peg--macroexpand ((_ (eql and)) &rest args) + (cond ((null args) '(guard t)) + ((null (cdr args)) (peg-normalize (car args))) + (t `(and ,(peg-normalize (car args)) + ,(peg-normalize `(and . ,(cdr args))))))) + +(cl-defmethod peg--macroexpand ((_ (eql *)) &rest args) + `(* ,(peg-normalize `(and . ,args)))) + +;; FIXME: this duplicates code; could use some loop to avoid that +(cl-defmethod peg--macroexpand ((_ (eql +)) &rest args) + (let ((e (peg-normalize `(and . ,args)))) + `(and ,e (* ,e)))) + +(cl-defmethod peg--macroexpand ((_ (eql opt)) &rest args) + (let ((e (peg-normalize `(and . ,args)))) + `(or ,e (guard t)))) + +(cl-defmethod peg--macroexpand ((_ (eql if)) &rest args) + `(if ,(peg-normalize `(and . ,args)))) + +(cl-defmethod peg--macroexpand ((_ (eql not)) &rest args) + `(not ,(peg-normalize `(and . ,args)))) + +(cl-defmethod peg--macroexpand ((_ (eql \`)) form) + (peg-normalize `(stack-action ,form))) + +(cl-defmethod peg--macroexpand ((_ (eql stack-action)) form) + (unless (member '-- form) + (error "Malformed stack action: %S" form)) + (let ((args (cdr (member '-- (reverse form)))) + (values (cdr (member '-- form)))) + (let ((form `(let ,(mapcar (lambda (var) `(,var (pop peg--stack))) args) + ,@(mapcar (lambda (val) `(push ,val peg--stack)) values)))) + `(action ,form)))) + +(defvar peg-char-classes + '(ascii alnum alpha blank cntrl digit graph lower multibyte nonascii print + punct space unibyte upper word xdigit)) + +(cl-defmethod peg--macroexpand ((_ (eql set)) &rest specs) + (cond ((null specs) '(guard nil)) + ((and (null (cdr specs)) + (let ((range (peg-range-designator (car specs)))) + (and range `(range ,(car range) ,(cdr range)))))) + (t + (let ((chars '()) (ranges '()) (classes '())) + (while specs + (let* ((spec (pop specs)) + (range (peg-range-designator spec))) + (cond (range + (push range ranges)) + ((peg-characterp spec) + (push spec chars)) + ((stringp spec) + (setq chars (append (reverse (append spec ())) chars))) + ((memq spec peg-char-classes) + (push spec classes)) + (t (error "Invalid set specifier: %S" spec))))) + (setq ranges (reverse ranges)) + (setq chars (delete-dups (reverse chars))) + (setq classes (reverse classes)) + (cond ((and (null ranges) + (null classes) + (cond ((null chars) '(guard nil)) + ((null (cdr chars)) `(char ,(car chars)))))) + (t `(set ,ranges ,chars ,classes))))))) + +(defun peg-range-designator (x) + (and (symbolp x) + (let ((str (symbol-name x))) + (and (= (length str) 3) + (eq (aref str 1) ?-) + (< (aref str 0) (aref str 2)) + (cons (aref str 0) (aref str 2)))))) + +;; characterp is new in Emacs 23. +(defun peg-characterp (x) + (if (fboundp 'characterp) + (characterp x) + (integerp x))) + +(cl-defmethod peg--macroexpand ((_ (eql list)) &rest args) + (peg-normalize + (let ((marker (make-symbol "magic-marker"))) + `(and (stack-action (-- ',marker)) + ,@args + (stack-action (-- + (let ((l '())) + (while + (let ((e (pop peg--stack))) + (cond ((eq e ',marker) nil) + ((null peg--stack) + (error "No marker on stack")) + (t (push e l) t)))) + l))))))) + +(cl-defmethod peg--macroexpand ((_ (eql substring)) &rest args) + (peg-normalize + `(and `(-- (point)) + ,@args + `(start -- (buffer-substring-no-properties start (point)))))) + +(cl-defmethod peg--macroexpand ((_ (eql region)) &rest args) + (peg-normalize + `(and `(-- (point)) + ,@args + `(-- (point))))) + +(cl-defmethod peg--macroexpand ((_ (eql replace)) pe replacement) + (peg-normalize + `(and (stack-action (-- (point))) + ,pe + (stack-action (start -- (progn + (delete-region start (point)) + (insert-before-markers ,replacement)))) + (stack-action (_ --))))) + +(cl-defmethod peg--macroexpand ((_ (eql quote)) _form) + (error "quote is reserved for future use")) + +(cl-defgeneric peg--translate (head &rest args) + (error "No translator for: %S" (cons head args))) + +(defun peg--translate-rule-body (name exp) + (let ((msg (condition-case err + (progn (peg-detect-cycles exp (list name)) nil) + (error (error-message-string err)))) + (code (peg-translate-exp exp))) + (cond + ((null msg) code) + ((fboundp 'macroexp--warn-and-return) + (macroexp--warn-and-return msg code)) + (t + (message "%s" msg) + code)))) + +;; This is the main translation function. +(defun peg-translate-exp (exp) + "Return the ELisp code to match the PE EXP." + ;; FIXME: This expansion basically duplicates `exp' in the output, which is + ;; a serious problem because it's done recursively, so it makes the output + ;; code's size exponentially larger than the input! + `(or ,(apply #'peg--translate exp) + (peg--record-failure ',exp))) ; for error reporting + +(define-obsolete-function-alias 'peg-record-failure + #'peg--record-failure "peg-1.0") +(defun peg--record-failure (exp) + (cond ((= (point) (car peg--errors)) + (setcdr peg--errors (cons exp (cdr peg--errors)))) + ((> (point) (car peg--errors)) + (setq peg--errors (list (point) exp)))) + nil) + +(cl-defmethod peg--translate ((_ (eql and)) e1 e2) + `(and ,(peg-translate-exp e1) + ,(peg-translate-exp e2))) + +;; Choicepoints are used for backtracking. At a choicepoint we save +;; enough state, so that we can continue from there if needed. +(defun peg--choicepoint-moved-p (choicepoint) + `(/= ,(car choicepoint) (point))) + +(defun peg--choicepoint-restore (choicepoint) + `(progn + (goto-char ,(car choicepoint)) + (setq peg--actions ,(cdr choicepoint)))) + +(defmacro peg--with-choicepoint (var &rest body) + (declare (indent 1) (debug (symbolp form))) + `(let ((,var (cons (make-symbol "point") (make-symbol "actions")))) + `(let ((,(car ,var) (point)) + (,(cdr ,var) peg--actions)) + ,@(list ,@body)))) + +(cl-defmethod peg--translate ((_ (eql or)) e1 e2) + (peg--with-choicepoint cp + `(or ,(peg-translate-exp e1) + (,@(peg--choicepoint-restore cp) + ,(peg-translate-exp e2))))) + +(cl-defmethod peg--translate ((_ (eql with)) rules &rest exps) + `(with-peg-rules ,rules ,(peg--translate `(and . ,exps)))) + +(cl-defmethod peg--translate ((_ (eql guard)) exp) exp) + +(defvar peg-syntax-classes + '((whitespace ?-) (word ?w) (symbol ?s) (punctuation ?.) + (open ?\() (close ?\)) (string ?\") (escape ?\\) (charquote ?/) + (math ?$) (prefix ?') (comment ?<) (endcomment ?>) + (comment-fence ?!) (string-fence ?|))) + +(cl-defmethod peg--translate ((_ (eql syntax-class)) class) + (let ((probe (assoc class peg-syntax-classes))) + (cond (probe `(when (looking-at ,(format "\\s%c" (cadr probe))) + (forward-char) + t)) + (t (error "Invalid syntax class: %S\nMust be one of: %s" class + (mapcar #'car peg-syntax-classes)))))) + +(cl-defmethod peg--translate ((_ (eql =)) string) + `(let ((str ,string)) + (when (zerop (length str)) + (error "Empty strings not allowed for =")) + (search-forward str (+ (point) (length str)) t))) + +(cl-defmethod peg--translate ((_ (eql *)) e) + `(progn (while ,(peg--with-choicepoint cp + `(if ,(peg-translate-exp e) + ;; Just as regexps do for the `*' operator, + ;; we allow the body of `*' loops to match + ;; the empty string, but we don't repeat the loop if + ;; we haven't moved, to avoid inf-loops. + ,(peg--choicepoint-moved-p cp) + ,(peg--choicepoint-restore cp) + nil))) + t)) + +(cl-defmethod peg--translate ((_ (eql if)) e) + (peg--with-choicepoint cp + `(when ,(peg-translate-exp e) + ,(peg--choicepoint-restore cp) + t))) + +(cl-defmethod peg--translate ((_ (eql not)) e) + (peg--with-choicepoint cp + `(unless ,(peg-translate-exp e) + ,(peg--choicepoint-restore cp) + t))) + +(cl-defmethod peg--translate ((_ (eql any)) ) + '(when (not (eobp)) + (forward-char) + t)) + +(cl-defmethod peg--translate ((_ (eql char)) c) + `(when (eq (char-after) ',c) + (forward-char) + t)) + +(cl-defmethod peg--translate ((_ (eql set)) ranges chars classes) + `(when (looking-at ',(peg-make-charset-regexp ranges chars classes)) + (forward-char) + t)) + +(defun peg-make-charset-regexp (ranges chars classes) + (when (and (not ranges) (not classes) (<= (length chars) 1)) + (error "Bug")) + (let ((rbracket (member ?\] chars)) + (minus (member ?- chars)) + (hat (member ?^ chars))) + (dolist (c '(?\] ?- ?^)) + (setq chars (remove c chars))) + (format "[%s%s%s%s%s%s]" + (if rbracket "]" "") + (if minus "-" "") + (mapconcat (lambda (x) (format "%c-%c" (car x) (cdr x))) ranges "") + (mapconcat (lambda (c) (format "[:%s:]" c)) classes "") + (mapconcat (lambda (c) (format "%c" c)) chars "") + (if hat "^" "")))) + +(cl-defmethod peg--translate ((_ (eql range)) from to) + `(when (and (char-after) + (<= ',from (char-after)) + (<= (char-after) ',to)) + (forward-char) + t)) + +(cl-defmethod peg--translate ((_ (eql str)) str) + `(when (looking-at ',(regexp-quote str)) + (goto-char (match-end 0)) + t)) + +(cl-defmethod peg--translate ((_ (eql call)) name &rest args) + `(,(peg--rule-id name) ,@args)) + +(cl-defmethod peg--translate ((_ (eql funcall)) exp &rest args) + `(funcall ,exp ,@args)) + +(cl-defmethod peg--translate ((_ (eql action)) form) + `(progn + (push (cons (point) (lambda () ,form)) peg--actions) + t)) + +(defvar peg--stack nil) +(defun peg-postprocess (actions) + "Execute \"actions\"." + (let ((peg--stack '()) + (forw-actions ())) + (pcase-dolist (`(,pos . ,thunk) actions) + (push (cons (copy-marker pos) thunk) forw-actions)) + (pcase-dolist (`(,pos . ,thunk) forw-actions) + (goto-char pos) + (funcall thunk)) + (or peg--stack t))) + +;; Left recursion is presumably a common mistake when using PEGs. +;; Here we try to detect such mistakes. Essentially we traverse the +;; graph as long as we can without consuming input. When we find a +;; recursive call we signal an error. + +(defun peg-detect-cycles (exp path) + "Signal an error on a cycle. +Otherwise traverse EXP recursively and return T if EXP can match +without consuming input. Return nil if EXP definitely consumes +input. PATH is the list of rules that we have visited so far." + (apply #'peg--detect-cycles path exp)) + +(cl-defgeneric peg--detect-cycles (head _path &rest args) + (error "No detect-cycle method for: %S" (cons head args))) + +(cl-defmethod peg--detect-cycles (path (_ (eql call)) name) + (if (member name path) + (error "Possible left recursion: %s" + (mapconcat (lambda (x) (format "%s" x)) + (reverse (cons name path)) " -> ")) + (let ((exp (peg--lookup-rule name))) + (if (null exp) + ;; If there's no rule by that name, either we'll fail at + ;; run-time or it will be defined later. In any case, at this + ;; point there's no evidence of a cycle, and if a cycle appears + ;; later we'll hopefully catch it when the rule gets defined. + ;; FIXME: In practice, if `name' is part of the cycle, we will + ;; indeed detect it when it gets defined, but OTOH if `name' + ;; is not part of a cycle but it *enables* a cycle because + ;; it matches the empty string (i.e. we should have returned t + ;; here), then we may not catch the problem at all :-( + nil + (peg-detect-cycles exp (cons name path)))))) + +(cl-defmethod peg--detect-cycles (path (_ (eql and)) e1 e2) + (and (peg-detect-cycles e1 path) + (peg-detect-cycles e2 path))) + +(cl-defmethod peg--detect-cycles (path (_ (eql or)) e1 e2) + (or (peg-detect-cycles e1 path) + (peg-detect-cycles e2 path))) + +(cl-defmethod peg--detect-cycles (path (_ (eql *)) e) + (peg-detect-cycles e path) + t) + +(cl-defmethod peg--detect-cycles (path (_ (eql if)) e) + (peg-unary-nullable e path)) +(cl-defmethod peg--detect-cycles (path (_ (eql not)) e) + (peg-unary-nullable e path)) + +(defun peg-unary-nullable (exp path) + (peg-detect-cycles exp path) + t) + +(cl-defmethod peg--detect-cycles (_path (_ (eql any))) nil) +(cl-defmethod peg--detect-cycles (_path (_ (eql char)) _c) nil) +(cl-defmethod peg--detect-cycles (_path (_ (eql set)) _r _c _k) nil) +(cl-defmethod peg--detect-cycles (_path (_ (eql range)) _c1 _c2) nil) +(cl-defmethod peg--detect-cycles (_path (_ (eql str)) s) (equal s "")) +(cl-defmethod peg--detect-cycles (_path (_ (eql guard)) _e) t) +(cl-defmethod peg--detect-cycles (_path (_ (eql =)) _s) nil) +(cl-defmethod peg--detect-cycles (_path (_ (eql syntax-class)) _n) nil) +(cl-defmethod peg--detect-cycles (_path (_ (eql action)) _form) t) + +(defun peg-merge-errors (exps) + "Build a more readable error message out of failed expression." + (let ((merged '())) + (dolist (exp exps) + (setq merged (peg-merge-error exp merged))) + merged)) + +(defun peg-merge-error (exp merged) + (apply #'peg--merge-error merged exp)) + +(cl-defgeneric peg--merge-error (_merged head &rest args) + (error "No merge-error method for: %S" (cons head args))) + +(cl-defmethod peg--merge-error (merged (_ (eql or)) e1 e2) + (peg-merge-error e2 (peg-merge-error e1 merged))) + +(cl-defmethod peg--merge-error (merged (_ (eql and)) e1 _e2) + ;; FIXME: Why is `e2' not used? + (peg-merge-error e1 merged)) + +(cl-defmethod peg--merge-error (merged (_ (eql str)) str) + ;;(add-to-list 'merged str) + (cl-adjoin str merged :test #'equal)) + +(cl-defmethod peg--merge-error (merged (_ (eql call)) rule) + ;; (add-to-list 'merged rule) + (cl-adjoin rule merged :test #'equal)) + +(cl-defmethod peg--merge-error (merged (_ (eql char)) char) + ;; (add-to-list 'merged (string char)) + (cl-adjoin (string char) merged :test #'equal)) + +(cl-defmethod peg--merge-error (merged (_ (eql set)) r c k) + ;; (add-to-list 'merged (peg-make-charset-regexp r c k)) + (cl-adjoin (peg-make-charset-regexp r c k) merged :test #'equal)) + +(cl-defmethod peg--merge-error (merged (_ (eql range)) from to) + ;; (add-to-list 'merged (format "[%c-%c]" from to)) + (cl-adjoin (format "[%c-%c]" from to) merged :test #'equal)) + +(cl-defmethod peg--merge-error (merged (_ (eql *)) exp) + (peg-merge-error exp merged)) + +(cl-defmethod peg--merge-error (merged (_ (eql any))) + ;; (add-to-list 'merged '(any)) + (cl-adjoin '(any) merged :test #'equal)) + +(cl-defmethod peg--merge-error (merged (_ (eql not)) x) + ;; (add-to-list 'merged `(not ,x)) + (cl-adjoin `(not ,x) merged :test #'equal)) + +(cl-defmethod peg--merge-error (merged (_ (eql action)) _action) merged) +(cl-defmethod peg--merge-error (merged (_ (eql null))) merged) + +(provide 'peg) +(require 'peg) + +(define-peg-rule null () :inline t (guard t)) +(define-peg-rule fail () :inline t (guard nil)) +(define-peg-rule bob () :inline t (guard (bobp))) +(define-peg-rule eob () :inline t (guard (eobp))) +(define-peg-rule bol () :inline t (guard (bolp))) +(define-peg-rule eol () :inline t (guard (eolp))) +(define-peg-rule bow () :inline t (guard (looking-at "\\<"))) +(define-peg-rule eow () :inline t (guard (looking-at "\\>"))) +(define-peg-rule bos () :inline t (guard (looking-at "\\_<"))) +(define-peg-rule eos () :inline t (guard (looking-at "\\_>"))) + +;;; peg.el ends here diff --git a/test/lisp/peg-tests.el b/test/lisp/peg-tests.el new file mode 100644 index 00000000000..864e09b4200 --- /dev/null +++ b/test/lisp/peg-tests.el @@ -0,0 +1,367 @@ +;;; peg-tests.el --- Tests of PEG parsers -*- lexical-binding: t; -*- + +;; Copyright (C) 2008-2023 Free Software Foundation, Inc. + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;;; Commentary: + +;; Tests and examples, that used to live in peg.el wrapped inside an `eval'. + +;;; Code: + +(require 'peg) +(require 'ert) + +;;; Tests: + +(defmacro peg-parse-string (pex string &optional noerror) + "Parse STRING according to PEX. +If NOERROR is non-nil, push nil resp. t if the parse failed +resp. succeeded instead of signaling an error." + (let ((oldstyle (consp (car-safe pex)))) ;PEX is really a list of rules. + `(with-temp-buffer + (insert ,string) + (goto-char (point-min)) + ,(if oldstyle + `(with-peg-rules ,pex + (peg-run (peg ,(caar pex)) + ,(unless noerror '#'peg-signal-failure))) + `(peg-run (peg ,pex) + ,(unless noerror '#'peg-signal-failure)))))) + +(define-peg-rule peg-test-natural () + [0-9] (* [0-9])) + +(ert-deftest peg-test () + (should (peg-parse-string peg-test-natural "99 bottles" t)) + (should (peg-parse-string ((s "a")) "a" t)) + (should (not (peg-parse-string ((s "a")) "b" t))) + (should (peg-parse-string ((s (not "a"))) "b" t)) + (should (not (peg-parse-string ((s (not "a"))) "a" t))) + (should (peg-parse-string ((s (if "a"))) "a" t)) + (should (not (peg-parse-string ((s (if "a"))) "b" t))) + (should (peg-parse-string ((s "ab")) "ab" t)) + (should (not (peg-parse-string ((s "ab")) "ba" t))) + (should (not (peg-parse-string ((s "ab")) "a" t))) + (should (peg-parse-string ((s (range ?0 ?9))) "0" t)) + (should (not (peg-parse-string ((s (range ?0 ?9))) "a" t))) + (should (peg-parse-string ((s [0-9])) "0" t)) + (should (not (peg-parse-string ((s [0-9])) "a" t))) + (should (not (peg-parse-string ((s [0-9])) "" t))) + (should (peg-parse-string ((s (any))) "0" t)) + (should (not (peg-parse-string ((s (any))) "" t))) + (should (peg-parse-string ((s (eob))) "" t)) + (should (peg-parse-string ((s (not (eob)))) "a" t)) + (should (peg-parse-string ((s (or "a" "b"))) "a" t)) + (should (peg-parse-string ((s (or "a" "b"))) "b" t)) + (should (not (peg-parse-string ((s (or "a" "b"))) "c" t))) + (should (peg-parse-string (and "a" "b") "ab" t)) + (should (peg-parse-string ((s (and "a" "b"))) "abc" t)) + (should (not (peg-parse-string (and "a" "b") "ba" t))) + (should (peg-parse-string ((s (and "a" "b" "c"))) "abc" t)) + (should (peg-parse-string ((s (* "a") "b" (eob))) "b" t)) + (should (peg-parse-string ((s (* "a") "b" (eob))) "ab" t)) + (should (peg-parse-string ((s (* "a") "b" (eob))) "aaab" t)) + (should (not (peg-parse-string ((s (* "a") "b" (eob))) "abc" t))) + (should (peg-parse-string ((s "")) "abc" t)) + (should (peg-parse-string ((s "" (eob))) "" t)) + (should (peg-parse-string ((s (opt "a") "b")) "abc" t)) + (should (peg-parse-string ((s (opt "a") "b")) "bc" t)) + (should (not (peg-parse-string ((s (or))) "ab" t))) + (should (peg-parse-string ((s (and))) "ab" t)) + (should (peg-parse-string ((s (and))) "" t)) + (should (peg-parse-string ((s ["^"])) "^" t)) + (should (peg-parse-string ((s ["^a"])) "a" t)) + (should (peg-parse-string ["-"] "-" t)) + (should (peg-parse-string ((s ["]-"])) "]" t)) + (should (peg-parse-string ((s ["^]"])) "^" t)) + (should (peg-parse-string ((s [alpha])) "z" t)) + (should (not (peg-parse-string ((s [alpha])) "0" t))) + (should (not (peg-parse-string ((s [alpha])) "" t))) + (should (not (peg-parse-string ((s ["][:alpha:]"])) "z" t))) + (should (peg-parse-string ((s (bob))) "" t)) + (should (peg-parse-string ((s (bos))) "x" t)) + (should (not (peg-parse-string ((s (bos))) " x" t))) + (should (peg-parse-string ((s "x" (eos))) "x" t)) + (should (peg-parse-string ((s (syntax-class whitespace))) " " t)) + (should (peg-parse-string ((s (= "foo"))) "foo" t)) + (should (let ((f "foo")) (peg-parse-string ((s (= f))) "foo" t))) + (should (not (peg-parse-string ((s (= "foo"))) "xfoo" t))) + (should (equal (peg-parse-string ((s `(-- 1 2))) "") '(2 1))) + (should (equal (peg-parse-string ((s `(-- 1 2) `(a b -- a b))) "") '(2 1))) + (should (equal (peg-parse-string ((s (or (and (any) s) + (substring [0-9])))) + "ab0cd1ef2gh") + '("2"))) + ;; The PEG rule `other' doesn't exist, which will cause a byte-compiler + ;; warning, but not an error at run time because the rule is not actually + ;; used in this particular case. + (should (equal (peg-parse-string ((s (substring (or "a" other))) + ;; Unused left-recursive rule, should + ;; cause a byte-compiler warning. + (r (* "a") r)) + "af") + '("a"))) + (should (equal (peg-parse-string ((s (list x y)) + (x `(-- 1)) + (y `(-- 2))) + "") + '((1 2)))) + (should (equal (peg-parse-string ((s (list (* x))) + (x "" `(-- 'x))) + "xxx") + ;; The empty loop body should be matched once! + '((x)))) + (should (equal (peg-parse-string ((s (list (* x))) + (x "x" `(-- 'x))) + "xxx") + '((x x x)))) + (should (equal (peg-parse-string ((s (region (* x))) + (x "x" `(-- 'x))) + "xxx") + ;; FIXME: Since string positions start at 0, this should + ;; really be '(3 x x x 0) !! + '(4 x x x 1))) + (should (equal (peg-parse-string ((s (region (list (* x)))) + (x "x" `(-- 'x 'y))) + "xxx") + '(4 (x y x y x y) 1))) + (should (equal (with-temp-buffer + (save-excursion (insert "abcdef")) + (list + (peg-run (peg "a" + (replace "bc" "x") + (replace "de" "y") + "f")) + (buffer-string))) + '(t "axyf"))) + (with-temp-buffer + (insert "toro") + (goto-char (point-min)) + (should (peg-run (peg "to"))) + (should-not (peg-run (peg "to"))) + (should (peg-run (peg "ro"))) + (should (eobp))) + (with-temp-buffer + (insert " ") + (goto-char (point-min)) + (peg-run (peg (+ (syntax-class whitespace)))) + (should (eobp))) + ) + +;;; Examples: + +;; peg-ex-recognize-int recognizes integers. An integer begins with a +;; optional sign, then follows one or more digits. Digits are all +;; characters from 0 to 9. +;; +;; Notes: +;; 1) "" matches the empty sequence, i.e. matches without consuming +;; input. +;; 2) [0-9] is the character range from 0 to 9. This can also be +;; written as (range ?0 ?9). Note that 0-9 is a symbol. +(defun peg-ex-recognize-int () + (with-peg-rules ((number sign digit (* digit)) + (sign (or "+" "-" "")) + (digit [0-9])) + (peg-run (peg number)))) + +;; peg-ex-parse-int recognizes integers and computes the corresponding +;; value. The grammar is the same as for `peg-ex-recognize-int' +;; augmented with parsing actions. Unfortunaletly, the actions add +;; quite a bit of clutter. +;; +;; The actions for the sign rule push -1 on the stack for a minus sign +;; and 1 for plus or no sign. +;; +;; The action for the digit rule pushes the value for a single digit. +;; +;; The action `(a b -- (+ (* a 10) b)), takes two items from the stack +;; and pushes the first digit times 10 added to the second digit. +;; +;; The action `(sign val -- (* sign val)), multiplies val with the +;; sign (1 or -1). +(defun peg-ex-parse-int () + (with-peg-rules ((number sign digit (* digit + `(a b -- (+ (* a 10) b))) + `(sign val -- (* sign val))) + (sign (or (and "+" `(-- 1)) + (and "-" `(-- -1)) + (and "" `(-- 1)))) + (digit [0-9] `(-- (- (char-before) ?0)))) + (peg-run (peg number)))) + +;; Put point after the ) and press C-x C-e +;; (peg-ex-parse-int)-234234 + +;; Parse arithmetic expressions and compute the result as side effect. +(defun peg-ex-arith () + (peg-parse + (expr _ sum eol) + (sum product (* (or (and "+" _ product `(a b -- (+ a b))) + (and "-" _ product `(a b -- (- a b)))))) + (product value (* (or (and "*" _ value `(a b -- (* a b))) + (and "/" _ value `(a b -- (/ a b)))))) + (value (or (and (substring number) `(string -- (string-to-number string))) + (and "(" _ sum ")" _))) + (number (+ [0-9]) _) + (_ (* [" \t"])) + (eol (or "\n" "\r\n" "\r")))) + +;; (peg-ex-arith) 1 + 2 * 3 * (4 + 5) +;; (peg-ex-arith) 1 + 2 ^ 3 * (4 + 5) ; fails to parse + +;; Parse URI according to RFC 2396. +(defun peg-ex-uri () + (peg-parse + (URI-reference (or absoluteURI relativeURI) + (or (and "#" (substring fragment)) + `(-- nil)) + `(scheme user host port path query fragment -- + (list :scheme scheme :user user + :host host :port port + :path path :query query + :fragment fragment))) + (absoluteURI (substring scheme) ":" (or hier-part opaque-part)) + (hier-part ;(-- user host port path query) + (or net-path + (and `(-- nil nil nil) + abs-path)) + (or (and "?" (substring query)) + `(-- nil))) + (net-path "//" authority (or abs-path `(-- nil))) + (abs-path "/" path-segments) + (path-segments segment (list (* "/" segment)) `(s l -- (cons s l))) + (segment (substring (* pchar) (* ";" param))) + (param (* pchar)) + (pchar (or unreserved escaped [":@&=+$,"])) + (query (* uric)) + (fragment (* uric)) + (relativeURI (or net-path abs-path rel-path) (opt "?" query)) + (rel-path rel-segment (opt abs-path)) + (rel-segment (+ unreserved escaped [";@&=+$,"])) + (authority (or server reg-name)) + (server (or (and (or (and (substring userinfo) "@") + `(-- nil)) + hostport) + `(-- nil nil nil))) + (userinfo (* (or unreserved escaped [";:&=+$,"]))) + (hostport (substring host) (or (and ":" (substring port)) + `(-- nil))) + (host (or hostname ipv4address)) + (hostname (* domainlabel ".") toplabel (opt ".")) + (domainlabel alphanum + (opt (* (or alphanum "-") (if alphanum)) + alphanum)) + (toplabel alpha + (* (or alphanum "-") (if alphanum)) + alphanum) + (ipv4address (+ digit) "." (+ digit) "." (+ digit) "." (+ digit)) + (port (* digit)) + (scheme alpha (* (or alpha digit ["+-."]))) + (reg-name (or unreserved escaped ["$,;:@&=+"])) + (opaque-part uric-no-slash (* uric)) + (uric (or reserved unreserved escaped)) + (uric-no-slash (or unreserved escaped [";?:@&=+$,"])) + (reserved (set ";/?:@&=+$,")) + (unreserved (or alphanum mark)) + (escaped "%" hex hex) + (hex (or digit [A-F] [a-f])) + (mark (set "-_.!~*'()")) + (alphanum (or alpha digit)) + (alpha (or lowalpha upalpha)) + (lowalpha [a-z]) + (upalpha [A-Z]) + (digit [0-9]))) + +;; (peg-ex-uri)http://luser@www.foo.com:8080/bar/baz.html?x=1#foo +;; (peg-ex-uri)file:/bar/baz.html?foo=df#x + +;; Split STRING where SEPARATOR occurs. +(defun peg-ex-split (string separator) + (peg-parse-string ((s (list (* (* sep) elt))) + (elt (substring (+ (not sep) (any)))) + (sep (= separator))) + string)) + +;; (peg-ex-split "-abc-cd-" "-") + +;; Parse a lisp style Sexp. +;; [To keep the example short, ' and . are handled as ordinary symbol.] +(defun peg-ex-lisp () + (peg-parse + (sexp _ (or string list number symbol)) + (_ (* (or [" \n\t"] comment))) + (comment ";" (* (not (or "\n" (eob))) (any))) + (string "\"" (substring (* (not "\"") (any))) "\"") + (number (substring (opt (set "+-")) (+ digit)) + (if terminating) + `(string -- (string-to-number string))) + (symbol (substring (and symchar (* (not terminating) symchar))) + `(s -- (intern s))) + (symchar [a-z A-Z 0-9 "-;!#%&'*+,./:;<=>?@[]^_`{|}~"]) + (list "(" `(-- (cons nil nil)) `(hd -- hd hd) + (* sexp `(tl e -- (setcdr tl (list e)))) + _ ")" `(hd _tl -- (cdr hd))) + (digit [0-9]) + (terminating (or (set " \n\t();\"'") (eob))))) + +;; (peg-ex-lisp) + +;; We try to detect left recursion and report it as error. +(defun peg-ex-left-recursion () + (eval '(peg-parse (exp (or term + (and exp "+" exp))) + (term (or digit + (and term "*" term))) + (digit [0-9])) + t)) + +(defun peg-ex-infinite-loop () + (eval '(peg-parse (exp (* (or "x" + "y" + (action (foo)))))) + t)) + +;; Some efficiency problems: + +;; Find the last digit in a string. +;; Recursive definition with excessive stack usage. +(defun peg-ex-last-digit (string) + (peg-parse-string ((s (or (and (any) s) + (substring [0-9])))) + string)) + +;; (peg-ex-last-digit "ab0cd1ef2gh") +;; (peg-ex-last-digit (make-string 50 ?-)) +;; (peg-ex-last-digit (make-string 1000 ?-)) + +;; Find the last digit without recursion. Doesn't run out of stack, +;; but probably still too inefficient for large inputs. +(defun peg-ex-last-digit2 (string) + (peg-parse-string ((s `(-- nil) + (+ (* (not digit) (any)) + (substring digit) + `(_d1 d2 -- d2))) + (digit [0-9])) + string)) + +;; (peg-ex-last-digit2 "ab0cd1ef2gh") +;; (peg-ex-last-digit2 (concat (make-string 500000 ?-) "8a9b")) +;; (peg-ex-last-digit2 (make-string 500000 ?-)) +;; (peg-ex-last-digit2 (make-string 500000 ?5)) + +(provide 'peg-tests) +;;; peg-tests.el ends here commit 0df8dadde2edaee406c76d639a22c70d0b03426b Author: James Thomas Date: Thu Mar 14 08:42:00 2024 +0530 Make gnus cache work with group names having '/' Make `gnus-cache-file-name` use the existing `nnmail-group-pathname`. * lisp/gnus/gnus-cache.el (gnus-cache-file-name) (gnus-cache-update-article): * lisp/gnus/nnmail.el (nnmail-group-pathname): diff --git a/lisp/gnus/gnus-cache.el b/lisp/gnus/gnus-cache.el index 961219eee8f..7af02368d36 100644 --- a/lisp/gnus/gnus-cache.el +++ b/lisp/gnus/gnus-cache.el @@ -443,23 +443,9 @@ Returns the list of articles removed." (and (not unread) (not ticked) (not dormant) (memq 'read class)))) (defun gnus-cache-file-name (group article) - (expand-file-name - (if (stringp article) article (int-to-string article)) - (file-name-as-directory - (expand-file-name - (nnheader-translate-file-chars - (if (gnus-use-long-file-name 'not-cache) - group - (let ((group (nnheader-replace-duplicate-chars-in-string - (nnheader-replace-chars-in-string group ?/ ?_) - ?. ?_))) - ;; Translate the first colon into a slash. - (when (string-match ":" group) - (setq group (concat (substring group 0 (match-beginning 0)) - "/" (substring group (match-end 0))))) - (nnheader-replace-chars-in-string group ?. ?/))) - t) - gnus-cache-directory)))) + (nnmail-group-pathname + group gnus-cache-directory + (if (stringp article) article (int-to-string article)))) (defun gnus-cache-update-article (group article) "If ARTICLE is in the cache, remove it and re-enter it." @@ -699,9 +685,10 @@ If LOW, update the lower bound instead." (file-name-as-directory (expand-file-name gnus-cache-directory)))) (directory-file-name directory)) - (nnheader-replace-chars-in-string - (substring (directory-file-name directory) (match-end 0)) - ?/ ?.))) + (url-unhex-string + (nnheader-replace-chars-in-string + (substring (directory-file-name directory) (match-end 0)) + ?/ ?.)))) nums alphs) (when top (gnus-message 5 "Generating the cache active file...") diff --git a/lisp/gnus/nnmail.el b/lisp/gnus/nnmail.el index fef12eebe09..a9f5b89c6fe 100644 --- a/lisp/gnus/nnmail.el +++ b/lisp/gnus/nnmail.el @@ -33,6 +33,7 @@ (require 'mail-source) (require 'mm-util) (require 'gnus-int) +(require 'browse-url) (autoload 'mail-send-and-exit "sendmail" nil t) @@ -627,7 +628,7 @@ These will be logged to the \"*nnmail split*\" buffer." (concat (let ((dir (file-name-as-directory (expand-file-name dir)))) (setq group (nnheader-replace-duplicate-chars-in-string - (nnheader-replace-chars-in-string group ?/ ?_) + (browse-url-url-encode-chars group "[/%]") ?. ?_)) (setq group (nnheader-translate-file-chars group)) ;; If this directory exists, we use it directly. commit a33e7c0286c2a9c8af1c96db42f5c0c37611c9db Author: Theodor Thornhill Date: Sat Mar 30 09:35:16 2024 +0100 Disable workDoneProgress if defcustom is nil There is no need to receive the $/progress notifications from the server if we don't want to render them. Because they are effectively ignored when eglot-report-progress is nil we'd rather not waste cycles on serde of the messages. * lisp/progmodes/eglot.el (eglot-client-capabilities): use value from defcustom to decide whether or not to advertise to server. diff --git a/lisp/progmodes/eglot.el b/lisp/progmodes/eglot.el index 515c33f8cde..f247c43203c 100644 --- a/lisp/progmodes/eglot.el +++ b/lisp/progmodes/eglot.el @@ -1004,7 +1004,7 @@ ACTION is an LSP object of either `CodeAction' or `Command' type." [,@(mapcar #'car eglot--tag-faces)]))) :window `(:showDocument (:support t) - :workDoneProgress t) + :workDoneProgress ,(if eglot-report-progress t :json-false)) :general (list :positionEncodings ["utf-32" "utf-8" "utf-16"]) :experimental eglot--{}))) commit e5e3c9cef64e3ef3f54b91f98e28e030aba8c93f Author: Theodor Thornhill Date: Fri Mar 29 09:18:53 2024 +0100 Fix typo in docstring * lisp/progmodes/eglot.el (eglot-lsp-server): eglot-current-server is now exposed. diff --git a/lisp/progmodes/eglot.el b/lisp/progmodes/eglot.el index 7d2f1a55165..515c33f8cde 100644 --- a/lisp/progmodes/eglot.el +++ b/lisp/progmodes/eglot.el @@ -1064,7 +1064,7 @@ ACTION is an LSP object of either `CodeAction' or `Command' type." (declare-function w32-long-file-name "w32proc.c" (fn)) (defun eglot-uri-to-path (uri) - "Convert URI to file path, helped by `eglot--current-server'." + "Convert URI to file path, helped by `eglot-current-server'." (when (keywordp uri) (setq uri (substring (symbol-name uri) 1))) (let* ((server (eglot-current-server)) (remote-prefix (and server (eglot--trampish-p server))) commit 000f919b3c7779609dc43773fdc49aca9b50d76f Author: Eli Zaretskii Date: Sat Mar 30 19:33:23 2024 +0300 Fix the native JSON support code * src/Makefile.in (base_obj): Add the missing json.o. Without this, we get link error. * src/json.c (json_serialize): Don't use too sophisticated C99 features, as they confuse make-docfile. Initialize all the members explicitly. diff --git a/src/Makefile.in b/src/Makefile.in index f58a3a7761e..9dd88895d27 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -472,7 +472,7 @@ base_obj = dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \ $(XWIDGETS_OBJ) \ profiler.o decompress.o \ thread.o systhread.o sqlite.o treesit.o \ - itree.o \ + itree.o json.o \ $(if $(HYBRID_MALLOC),sheap.o) \ $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \ $(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ) \ diff --git a/src/json.c b/src/json.c index 43b2d1cb4f8..5970c539f53 100644 --- a/src/json.c +++ b/src/json.c @@ -568,13 +568,17 @@ static void json_serialize (json_out_t *jo, Lisp_Object object, ptrdiff_t nargs, Lisp_Object *args) { - *jo = (json_out_t) { - /* The maximum nesting depth allowed should be sufficient for most - uses but could be raised if necessary. (The default maximum - depth for JSON_checker is 20.) */ - .maxdepth = 50, - .conf = {json_object_hashtable, json_array_array, QCnull, QCfalse} - }; + jo->maxdepth = 50; + jo->size = 0; + jo->capacity = 0; + jo->chars_delta = 0; + jo->buf = NULL; + jo->ss_table = NULL; + jo->conf.object_type = json_object_hashtable; + jo->conf.array_type = json_array_array; + jo->conf.null_object = QCnull; + jo->conf.false_object = QCfalse; + json_parse_args (nargs, args, &jo->conf, false); record_unwind_protect_ptr (cleanup_json_out, jo); commit 1135ce461d188869e0294af45641edc2cbfacbf0 Author: Mattias Engdegård Date: Sat Mar 30 15:13:24 2024 +0100 Always enable native JSON support and remove Jansson references * src/json.c (Fjson__available_p): Remove. * lisp/subr.el (json-available-p): Always return t. * admin/nt/dist-build/build-dep-zips.py: * configure.ac: * doc/lispref/text.texi (Parsing JSON): * java/INSTALL: * java/org/gnu/emacs/EmacsNative.java (EmacsNative): * lisp/term/w32-win.el (dynamic-library-alist): * m4/ndk-build.m4 (ndk_INIT): * msdos/sed1v2.inp: * nt/INSTALL: * nt/INSTALL.W64: * src/Makefile.in: * src/emacs.c (main): * src/lisp.h: Remove JSON configuration options and references to it and Jansson from documentation and build files. * etc/NEWS: Announce. diff --git a/admin/nt/dist-build/build-dep-zips.py b/admin/nt/dist-build/build-dep-zips.py index fb0aca87731..0b1cc4d8695 100755 --- a/admin/nt/dist-build/build-dep-zips.py +++ b/admin/nt/dist-build/build-dep-zips.py @@ -32,7 +32,6 @@ PKG_REQ='''mingw-w64-x86_64-giflib mingw-w64-x86_64-gnutls mingw-w64-x86_64-harfbuzz -mingw-w64-x86_64-jansson mingw-w64-x86_64-lcms2 mingw-w64-x86_64-libjpeg-turbo mingw-w64-x86_64-libpng @@ -44,7 +43,6 @@ DLL_REQ='''libgif libgnutls libharfbuzz -libjansson liblcms2 libturbojpeg libpng diff --git a/configure.ac b/configure.ac index bd678ea52a3..b1dbaa13155 100644 --- a/configure.ac +++ b/configure.ac @@ -548,7 +548,6 @@ OPTION_DEFAULT_OFF([cairo-xcb], [use XCB surfaces for Cairo support]) OPTION_DEFAULT_ON([xml2],[don't compile with XML parsing support]) OPTION_DEFAULT_OFF([imagemagick],[compile with ImageMagick image support]) OPTION_DEFAULT_ON([native-image-api], [don't use native image APIs (GDI+ on Windows)]) -OPTION_DEFAULT_IFAVAILABLE([json], [compile with native JSON support]) OPTION_DEFAULT_IFAVAILABLE([tree-sitter], [compile with tree-sitter]) OPTION_DEFAULT_ON([xft],[don't use XFT for anti aliased fonts]) @@ -1216,7 +1215,6 @@ package will likely install on older systems but crash on startup.]) passthrough="$passthrough --with-png=$with_png" passthrough="$passthrough --with-webp=$with_webp" passthrough="$passthrough --with-gif=$with_gif" - passthrough="$passthrough --with-json=$with_json" passthrough="$passthrough --with-jpeg=$with_jpeg" passthrough="$passthrough --with-xml2=$with_xml2" passthrough="$passthrough --with-sqlite3=$with_sqlite3" @@ -1305,7 +1303,6 @@ if test "$ANDROID" = "yes"; then with_png=no with_webp=no with_gif=no - with_json=no with_jpeg=no with_xml2=no with_sqlite3=no @@ -4005,27 +4002,6 @@ fi AC_SUBST([LIBSYSTEMD_LIBS]) AC_SUBST([LIBSYSTEMD_CFLAGS]) -HAVE_JSON=no -JSON_OBJ= - -if test "${with_json}" != no; then - EMACS_CHECK_MODULES([JSON], [jansson >= 2.7], - [HAVE_JSON=yes], [HAVE_JSON=no]) - if test "${HAVE_JSON}" = yes; then - AC_DEFINE([HAVE_JSON], [1], [Define if using Jansson.]) - JSON_OBJ=json.o - fi - - # Windows loads libjansson dynamically - if test "${opsys}" = "mingw32"; then - JSON_LIBS= - fi -fi - -AC_SUBST([JSON_LIBS]) -AC_SUBST([JSON_CFLAGS]) -AC_SUBST([JSON_OBJ]) - HAVE_TREE_SITTER=no TREE_SITTER_OBJ= NEED_DYNLIB=no @@ -5470,11 +5446,6 @@ case $with_gnutls,$HAVE_GNUTLS in *) MISSING="$MISSING gnutls" WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-gnutls=ifavailable";; esac -case $with_json,$HAVE_JSON in - no,* | ifavailable,* | *,yes) ;; - *) MISSING="$MISSING json" - WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-json=ifavailable";; -esac case $with_tree_sitter,$HAVE_TREE_SITTER in no,* | ifavailable,* | *,yes) ;; *) MISSING="$MISSING tree-sitter" @@ -7655,7 +7626,7 @@ Configured for '${canonical}'. optsep= emacs_config_features= for opt in ACL BE_APP CAIRO DBUS FREETYPE GCONF GIF GLIB GMP GNUTLS GPM GSETTINGS \ - HARFBUZZ IMAGEMAGICK JPEG JSON LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \ + HARFBUZZ IMAGEMAGICK JPEG LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \ M17N_FLT MODULES NATIVE_COMP NOTIFY NS OLDXMENU PDUMPER PGTK PNG RSVG SECCOMP \ SOUND SQLITE3 THREADS TIFF TOOLKIT_SCROLL_BARS TREE_SITTER \ UNEXEC WEBP X11 XAW3D XDBE XFT XIM XINPUT2 XPM XWIDGETS X_TOOLKIT \ @@ -7731,7 +7702,6 @@ AS_ECHO([" Does Emacs use -lXaw3d? ${HAVE_XAW3D Does Emacs use -lotf? ${HAVE_LIBOTF} Does Emacs use -lxft? ${HAVE_XFT} Does Emacs use -lsystemd? ${HAVE_LIBSYSTEMD} - Does Emacs use -ljansson? ${HAVE_JSON} Does Emacs use -ltree-sitter? ${HAVE_TREE_SITTER} Does Emacs use the GMP library? ${HAVE_GMP} Does Emacs directly use zlib? ${HAVE_ZLIB} diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi index 3d14a5ad8be..90e2c6ce882 100644 --- a/doc/lispref/text.texi +++ b/doc/lispref/text.texi @@ -5752,10 +5752,10 @@ non-@code{nil}, indent the @acronym{HTML}/@acronym{XML} logically. @cindex JSON @cindex JavaScript Object Notation - When Emacs is compiled with @acronym{JSON} (@dfn{JavaScript Object -Notation}) support, it provides several functions to convert -between Lisp objects and JSON values. Any JSON value can be converted -to a Lisp object, but not vice versa. Specifically: + The Emacs @acronym{JSON} (@dfn{JavaScript Object Notation}) support +provides several functions to convert between Lisp objects and JSON +values. Any JSON value can be converted to a Lisp object, but not vice +versa. Specifically: @itemize @item @@ -5790,12 +5790,6 @@ represents @code{@{@}}, the empty JSON object; not @code{null}, @code{false}, or an empty array, all of which are different JSON values. -@defun json-available-p -This predicate returns non-@code{nil} if Emacs has been built with -@acronym{JSON} support, and the library is available on the current -system. -@end defun - If some Lisp object can't be represented in JSON, the serialization functions will signal an error of type @code{wrong-type-argument}. The parsing functions can also signal the following errors: diff --git a/etc/NEWS b/etc/NEWS index feb128bc2de..8ccf04276f6 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -43,6 +43,12 @@ external packages and to resolve potential incompatibilities between Linux and BSD versions of ALSA. Use '--with-sound=alsa' to build with ALSA on these operating systems instead. +--- +** Native JSON support is now always available; libjansson is no longer used. +No external library is required. The '--with-json' configure option has +been removed. 'json-available-p' now always returns non-nil and is only +kept for compatibility. + * Startup Changes in Emacs 30.1 diff --git a/java/INSTALL b/java/INSTALL index f1063b40c25..6daef59084e 100644 --- a/java/INSTALL +++ b/java/INSTALL @@ -307,14 +307,6 @@ Many of these dependencies have been migrated over to the However, the old ``Android.mk'' Makefiles are still present in older branches, and can be easily adapted to newer versions. -In addition, some Emacs dependencies provide `ndk-build' support -themselves: - - libjansson - https://github.com/akheron/jansson - (You must add LOCAL_EXPORT_INCLUDES := $(LOCAL_C_INCLUDES) before - its Android.mk includes $(BUILD_SHARED_LIBRARY), then copy - android/jansson_config.h to android/jansson_private_config.h.) - Emacs developers have ported the following dependencies to ARM Android systems: diff --git a/java/org/gnu/emacs/EmacsNative.java b/java/org/gnu/emacs/EmacsNative.java index 654e94b1a7d..24440bd5953 100644 --- a/java/org/gnu/emacs/EmacsNative.java +++ b/java/org/gnu/emacs/EmacsNative.java @@ -337,7 +337,7 @@ public static native void blitRect (Bitmap src, Bitmap dest, int x1, "gnutls_emacs", "gmp_emacs", "nettle_emacs", "p11-kit_emacs", "tasn1_emacs", "hogweed_emacs", - "jansson_emacs", "jpeg_emacs", + "jpeg_emacs", "tiff_emacs", "xml2_emacs", "icuuc_emacs", "harfbuzz_emacs", "tree-sitter_emacs", }; diff --git a/lisp/subr.el b/lisp/subr.el index 90dbfc75d52..50487e2c734 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -7346,9 +7346,8 @@ sentence (see Info node `(elisp) Documentation Tips')." (internal--fill-string-single-line (apply #'format string objects))) (defun json-available-p () - "Return non-nil if Emacs has libjansson support." - (and (fboundp 'json--available-p) - (json--available-p))) + "Return non-nil if Emacs has native JSON support." + t) (defun ensure-list (object) "Return OBJECT as a list. diff --git a/lisp/term/w32-win.el b/lisp/term/w32-win.el index 9b696475c34..3c0acf368f4 100644 --- a/lisp/term/w32-win.el +++ b/lisp/term/w32-win.el @@ -288,7 +288,6 @@ See the documentation of `create-fontset-from-fontset-spec' for the format.") '(libxml2 "libxml2-2.dll" "libxml2.dll") '(zlib "zlib1.dll" "libz-1.dll") '(lcms2 "liblcms2-2.dll") - '(json "libjansson-4.dll") '(gccjit "libgccjit-0.dll") ;; MSYS2 distributes libtree-sitter.dll, without API version ;; number... diff --git a/m4/ndk-build.m4 b/m4/ndk-build.m4 index 7012471e046..abe06063ab0 100644 --- a/m4/ndk-build.m4 +++ b/m4/ndk-build.m4 @@ -69,7 +69,7 @@ AS_CASE(["$ndk_ABI"], # This is a map between pkg-config style package names and Android # ones. -ndk_package_map="libwebpdemux:webpdemux libxml-2.0:libxml2 jansson:libjansson" +ndk_package_map="libwebpdemux:webpdemux libxml-2.0:libxml2" ndk_package_map="$ndk_package_map sqlite3:libsqlite_static_minimal" ndk_package_map="$ndk_package_map MagickWand:libmagickwand-7 lcms2:liblcms2" diff --git a/msdos/sed1v2.inp b/msdos/sed1v2.inp index 632c45a16b6..da056067548 100644 --- a/msdos/sed1v2.inp +++ b/msdos/sed1v2.inp @@ -175,9 +175,6 @@ s/ *@WEBP_LIBS@// /^CANNOT_DUMP *=/s/@CANNOT_DUMP@/no/ /^W32_OBJ *=/s/@W32_OBJ@// /^W32_LIBS *=/s/@W32_LIBS@// -/^JSON_OBJ *=/s/@JSON_OBJ@// -/^JSON_CFLAGS *=/s/@JSON_CFLAGS@// -/^JSON_LIBS *=/s/@JSON_LIBS@// /^LIBGCCJIT_OBJ *=/s/@LIBGCCJIT_OBJ@// /^LIBGCCJIT_CFLAGS *=/s/@LIBGCCJIT_CFLAGS@// /^LIBGCCJIT_LIBS *=/s/@LIBGCCJIT_LIBS@// diff --git a/nt/INSTALL b/nt/INSTALL index 77626f8a343..6167365169b 100644 --- a/nt/INSTALL +++ b/nt/INSTALL @@ -508,7 +508,6 @@ build should run on Windows 9X and newer systems). Does Emacs use -lotf? no Does Emacs use -lxft? no Does Emacs use -lsystemd? no - Does Emacs use -ljansson? yes Does Emacs use the GMP library? yes Does Emacs directly use zlib? yes Does Emacs have dynamic modules support? yes @@ -830,13 +829,6 @@ build should run on Windows 9X and newer systems). Prebuilt binaries of lcms2 DLL (for 32-bit builds of Emacs) are available from the ezwinports site and from the MSYS2 project. -* Optional support for JSON - - Emacs can provide built-in support for JSON parsing and - serialization using the libjansson library. Prebuilt binaries of - the libjansson DLL (for 32-bit builds of Emacs) are available from - the ezwinports site and from the MSYS2 project. - * Optional support for HarfBuzzz shaping library Emacs supports display of complex scripts and Arabic shaping. The diff --git a/nt/INSTALL.W64 b/nt/INSTALL.W64 index 2aa05ea0062..d25fc2e18af 100644 --- a/nt/INSTALL.W64 +++ b/nt/INSTALL.W64 @@ -55,7 +55,6 @@ packages (you can copy and paste it into the shell with Shift + Insert): mingw-w64-x86_64-librsvg \ mingw-w64-x86_64-libwebp \ mingw-w64-x86_64-lcms2 \ - mingw-w64-x86_64-jansson \ mingw-w64-x86_64-libxml2 \ mingw-w64-x86_64-zlib \ mingw-w64-x86_64-harfbuzz \ diff --git a/src/Makefile.in b/src/Makefile.in index de45b2290f1..f58a3a7761e 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -361,10 +361,6 @@ LIBGNUTLS_CFLAGS = @LIBGNUTLS_CFLAGS@ LIBSYSTEMD_LIBS = @LIBSYSTEMD_LIBS@ LIBSYSTEMD_CFLAGS = @LIBSYSTEMD_CFLAGS@ -JSON_LIBS = @JSON_LIBS@ -JSON_CFLAGS = @JSON_CFLAGS@ -JSON_OBJ = @JSON_OBJ@ - TREE_SITTER_LIBS = @TREE_SITTER_LIBS@ TREE_SITTER_CFLAGS = @TREE_SITTER_CFLAGS@ @@ -438,7 +434,7 @@ EMACS_CFLAGS=-Demacs $(MYCPPFLAGS) -I. -I$(srcdir) \ $(XINPUT_CFLAGS) $(WEBP_CFLAGS) $(WEBKIT_CFLAGS) $(LCMS2_CFLAGS) \ $(SETTINGS_CFLAGS) $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) \ $(HARFBUZZ_CFLAGS) $(LIBOTF_CFLAGS) $(M17N_FLT_CFLAGS) $(DEPFLAGS) \ - $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) $(TREE_SITTER_CFLAGS) \ + $(LIBSYSTEMD_CFLAGS) $(XSYNC_CFLAGS) $(TREE_SITTER_CFLAGS) \ $(LIBGNUTLS_CFLAGS) $(NOTIFY_CFLAGS) $(CAIRO_CFLAGS) \ $(WERROR_CFLAGS) $(HAIKU_CFLAGS) $(XCOMPOSITE_CFLAGS) $(XSHAPE_CFLAGS) \ $(ANDROID_BUILD_CFLAGS) $(GIF_CFLAGS) $(JPEG_CFLAGS) $(SQLITE3_CFLAGS) \ @@ -479,7 +475,7 @@ base_obj = dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \ itree.o \ $(if $(HYBRID_MALLOC),sheap.o) \ $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \ - $(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ) $(JSON_OBJ) \ + $(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ) \ $(HAIKU_OBJ) $(PGTK_OBJ) $(ANDROID_OBJ) doc_obj = $(base_obj) $(NS_OBJC_OBJ) obj = $(doc_obj) $(HAIKU_CXX_OBJ) @@ -604,7 +600,7 @@ LIBES = $(LIBS) $(W32_LIBS) $(LIBS_GNUSTEP) $(PGTK_LIBS) $(LIBX_BASE) $(LIBIMAGE $(FREETYPE_LIBS) $(FONTCONFIG_LIBS) $(HARFBUZZ_LIBS) $(LIBOTF_LIBS) $(M17N_FLT_LIBS) \ $(LIBGNUTLS_LIBS) $(LIB_PTHREAD) $(GETADDRINFO_A_LIBS) $(LCMS2_LIBS) \ $(NOTIFY_LIBS) $(LIB_MATH) $(LIBZ) $(LIBMODULES) $(LIBSYSTEMD_LIBS) \ - $(JSON_LIBS) $(LIBGMP) $(LIBGCCJIT_LIBS) $(XINPUT_LIBS) $(HAIKU_LIBS) \ + $(LIBGMP) $(LIBGCCJIT_LIBS) $(XINPUT_LIBS) $(HAIKU_LIBS) \ $(TREE_SITTER_LIBS) $(SQLITE3_LIBS) $(XCOMPOSITE_LIBS) $(XSHAPE_LIBS) \ $(ANDROID_LIBS) diff --git a/src/emacs.c b/src/emacs.c index 4a34bb06425..1cb1e70ac65 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -2475,10 +2475,7 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem syms_of_threads (); syms_of_profiler (); syms_of_pdumper (); - -#ifdef HAVE_JSON syms_of_json (); -#endif keys_of_keyboard (); diff --git a/src/json.c b/src/json.c index 5bc63069624..43b2d1cb4f8 100644 --- a/src/json.c +++ b/src/json.c @@ -98,14 +98,6 @@ json_parse_args (ptrdiff_t nargs, } } -/* FIXME: Remove completely. */ -DEFUN ("json--available-p", Fjson__available_p, Sjson__available_p, 0, 0, NULL, - doc: /* Return non-nil if libjansson is available (internal use only). */) - (void) -{ - return Qt; -} - /* JSON encoding context. */ typedef struct { char *buf; @@ -1966,7 +1958,6 @@ syms_of_json (void) DEFSYM (Qplist, "plist"); DEFSYM (Qarray, "array"); - defsubr (&Sjson__available_p); defsubr (&Sjson_serialize); defsubr (&Sjson_insert); defsubr (&Sjson_parse_string); diff --git a/src/lisp.h b/src/lisp.h index 7c4bd435cd8..43a29489a25 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4325,10 +4325,8 @@ extern void init_fringe_once (void); extern int x_bitmap_mask (struct frame *, ptrdiff_t); extern void syms_of_image (void); -#ifdef HAVE_JSON /* Defined in json.c. */ extern void syms_of_json (void); -#endif /* Defined in insdel.c. */ extern void move_gap_both (ptrdiff_t, ptrdiff_t); commit 890edfd2bb8fd79730919972cc82811b73c7f572 Author: Mattias Engdegård Date: Tue Mar 26 16:44:09 2024 +0100 New JSON encoder (bug#70007) It is in general at least 2x faster than the old encoder and does not depend on any external library. Using our own code also gives us control over translation details: for example, we now have full bignum support and tighter float formatting. * src/json.c (json_delete, json_initialized, init_json_functions) (json_malloc, json_free, init_json, json_out_of_memory) (json_releae_object, check_string_without_embedded_nulls, json_check) (json_check_utf8, lisp_to_json_nonscalar_1, lisp_to_json_nonscalar) (lisp_to_json, json_available_p, ensure_json_available, json_insert) (json_handle_nonlocal_exit, json_insert_callback): Remove. Remaining uses updated. * src/json.c (json_out_t, symset_t, struct symset_tbl) (symset_size, make_symset_table, push_symset, pop_symset) (cleanup_symset_tables, symset_hash, symset_expand, symset_add) (json_out_grow_buf, cleanup_json_out, json_make_room, JSON_OUT_STR) (json_out_str, json_out_byte, json_out_fixnum, string_not_unicode) (json_plain_char, json_out_string, json_out_nest, json_out_unnest) (json_out_object_cons, json_out_object_hash), json_out_array) (json_out_float, json_out_bignum, json_out_something) (json_out_to_string, json_serialize): New. (Fjson_serialize, Fjson_insert): New JSON encoder implementation. * test/src/json-tests.el (json-serialize/object-with-duplicate-keys) (json-serialize/string): Update tests. diff --git a/src/emacs.c b/src/emacs.c index 87f12d3fa86..4a34bb06425 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -2013,10 +2013,6 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem init_random (); init_xfaces (); -#if defined HAVE_JSON && !defined WINDOWSNT - init_json (); -#endif - if (!initialized) syms_of_comp (); diff --git a/src/json.c b/src/json.c index afc48c59d5a..5bc63069624 100644 --- a/src/json.c +++ b/src/json.c @@ -25,189 +25,10 @@ along with GNU Emacs. If not, see . */ #include #include -#include - #include "lisp.h" #include "buffer.h" #include "coding.h" -#ifdef WINDOWSNT -# include -# include "w32common.h" -# include "w32.h" - -DEF_DLL_FN (void, json_set_alloc_funcs, - (json_malloc_t malloc_fn, json_free_t free_fn)); -DEF_DLL_FN (void, json_delete, (json_t *json)); -DEF_DLL_FN (json_t *, json_array, (void)); -DEF_DLL_FN (int, json_array_append_new, (json_t *array, json_t *value)); -DEF_DLL_FN (size_t, json_array_size, (const json_t *array)); -DEF_DLL_FN (json_t *, json_object, (void)); -DEF_DLL_FN (int, json_object_set_new, - (json_t *object, const char *key, json_t *value)); -DEF_DLL_FN (json_t *, json_null, (void)); -DEF_DLL_FN (json_t *, json_true, (void)); -DEF_DLL_FN (json_t *, json_false, (void)); -DEF_DLL_FN (json_t *, json_integer, (json_int_t value)); -DEF_DLL_FN (json_t *, json_real, (double value)); -DEF_DLL_FN (json_t *, json_stringn, (const char *value, size_t len)); -DEF_DLL_FN (char *, json_dumps, (const json_t *json, size_t flags)); -DEF_DLL_FN (int, json_dump_callback, - (const json_t *json, json_dump_callback_t callback, void *data, - size_t flags)); -DEF_DLL_FN (json_t *, json_object_get, (const json_t *object, const char *key)); - -/* This is called by json_decref, which is an inline function. */ -void json_delete(json_t *json) -{ - fn_json_delete (json); -} - -static bool json_initialized; - -static bool -init_json_functions (void) -{ - HMODULE library = w32_delayed_load (Qjson); - - if (!library) - return false; - - LOAD_DLL_FN (library, json_set_alloc_funcs); - LOAD_DLL_FN (library, json_delete); - LOAD_DLL_FN (library, json_array); - LOAD_DLL_FN (library, json_array_append_new); - LOAD_DLL_FN (library, json_array_size); - LOAD_DLL_FN (library, json_object); - LOAD_DLL_FN (library, json_object_set_new); - LOAD_DLL_FN (library, json_null); - LOAD_DLL_FN (library, json_true); - LOAD_DLL_FN (library, json_false); - LOAD_DLL_FN (library, json_integer); - LOAD_DLL_FN (library, json_real); - LOAD_DLL_FN (library, json_stringn); - LOAD_DLL_FN (library, json_dumps); - LOAD_DLL_FN (library, json_dump_callback); - LOAD_DLL_FN (library, json_object_get); - - init_json (); - - return true; -} - -#define json_set_alloc_funcs fn_json_set_alloc_funcs -#define json_array fn_json_array -#define json_array_append_new fn_json_array_append_new -#define json_array_size fn_json_array_size -#define json_object fn_json_object -#define json_object_set_new fn_json_object_set_new -#define json_null fn_json_null -#define json_true fn_json_true -#define json_false fn_json_false -#define json_integer fn_json_integer -#define json_real fn_json_real -#define json_stringn fn_json_stringn -#define json_dumps fn_json_dumps -#define json_dump_callback fn_json_dump_callback -#define json_object_get fn_json_object_get - -#endif /* WINDOWSNT */ - -/* We install a custom allocator so that we can avoid objects larger - than PTRDIFF_MAX. Such objects wouldn't play well with the rest of - Emacs's codebase, which generally uses ptrdiff_t for sizes and - indices. The other functions in this file also generally assume - that size_t values never exceed PTRDIFF_MAX. - - In addition, we need to use a custom allocator because on - MS-Windows we replace malloc/free with our own functions, see - w32heap.c, so we must force the library to use our allocator, or - else we won't be able to free storage allocated by the library. */ - -static void * -json_malloc (size_t size) -{ - if (size > PTRDIFF_MAX) - { - errno = ENOMEM; - return NULL; - } - return malloc (size); -} - -static void -json_free (void *ptr) -{ - free (ptr); -} - -void -init_json (void) -{ - json_set_alloc_funcs (json_malloc, json_free); -} - -/* Note that all callers of make_string_from_utf8 and build_string_from_utf8 - below either pass only value UTF-8 strings or use the functionf for - formatting error messages; in the latter case correctness isn't - critical. */ - -/* Return a unibyte string containing the sequence of UTF-8 encoding - units of the UTF-8 representation of STRING. If STRING does not - represent a sequence of Unicode scalar values, return a string with - unspecified contents. */ - -static Lisp_Object -json_encode (Lisp_Object string) -{ - /* FIXME: Raise an error if STRING is not a scalar value - sequence. */ - return encode_string_utf_8 (string, Qnil, false, Qt, Qt); -} - -static AVOID -json_out_of_memory (void) -{ - xsignal0 (Qjson_out_of_memory); -} - -static void -json_release_object (void *object) -{ - json_decref (object); -} - -/* Signal an error if OBJECT is not a string, or if OBJECT contains - embedded null characters. */ - -static void -check_string_without_embedded_nulls (Lisp_Object object) -{ - CHECK_STRING (object); - CHECK_TYPE (memchr (SDATA (object), '\0', SBYTES (object)) == NULL, - Qstring_without_embedded_nulls_p, object); -} - -/* Signal an error of type `json-out-of-memory' if OBJECT is - NULL. */ - -static json_t * -json_check (json_t *object) -{ - if (object == NULL) - json_out_of_memory (); - return object; -} - -/* If STRING is not a valid UTF-8 string, signal an error of type - `wrong-type-argument'. STRING must be a unibyte string. */ - -static void -json_check_utf8 (Lisp_Object string) -{ - CHECK_TYPE (utf8_string_p (string), Qutf_8_string_p, string); -} - enum json_object_type { json_object_hashtable, json_object_alist, @@ -226,179 +47,6 @@ struct json_configuration { Lisp_Object false_object; }; -static json_t *lisp_to_json (Lisp_Object, - const struct json_configuration *conf); - -/* Convert a Lisp object to a nonscalar JSON object (array or object). */ - -static json_t * -lisp_to_json_nonscalar_1 (Lisp_Object lisp, - const struct json_configuration *conf) -{ - json_t *json; - specpdl_ref count; - - if (VECTORP (lisp)) - { - ptrdiff_t size = ASIZE (lisp); - json = json_check (json_array ()); - count = SPECPDL_INDEX (); - record_unwind_protect_ptr (json_release_object, json); - for (ptrdiff_t i = 0; i < size; ++i) - { - int status - = json_array_append_new (json, lisp_to_json (AREF (lisp, i), - conf)); - if (status == -1) - json_out_of_memory (); - } - eassert (json_array_size (json) == size); - } - else if (HASH_TABLE_P (lisp)) - { - struct Lisp_Hash_Table *h = XHASH_TABLE (lisp); - json = json_check (json_object ()); - count = SPECPDL_INDEX (); - record_unwind_protect_ptr (json_release_object, json); - DOHASH (h, key, v) - { - CHECK_STRING (key); - Lisp_Object ekey = json_encode (key); - /* We can't specify the length, so the string must be - null-terminated. */ - check_string_without_embedded_nulls (ekey); - const char *key_str = SSDATA (ekey); - /* Reject duplicate keys. These are possible if the hash - table test is not `equal'. */ - if (json_object_get (json, key_str) != NULL) - wrong_type_argument (Qjson_value_p, lisp); - int status - = json_object_set_new (json, key_str, - lisp_to_json (v, conf)); - if (status == -1) - { - /* A failure can be caused either by an invalid key or - by low memory. */ - json_check_utf8 (ekey); - json_out_of_memory (); - } - } - } - else if (NILP (lisp)) - return json_check (json_object ()); - else if (CONSP (lisp)) - { - Lisp_Object tail = lisp; - json = json_check (json_object ()); - count = SPECPDL_INDEX (); - record_unwind_protect_ptr (json_release_object, json); - bool is_plist = !CONSP (XCAR (tail)); - FOR_EACH_TAIL (tail) - { - const char *key_str; - Lisp_Object value; - Lisp_Object key_symbol; - if (is_plist) - { - key_symbol = XCAR (tail); - tail = XCDR (tail); - CHECK_CONS (tail); - value = XCAR (tail); - } - else - { - Lisp_Object pair = XCAR (tail); - CHECK_CONS (pair); - key_symbol = XCAR (pair); - value = XCDR (pair); - } - CHECK_SYMBOL (key_symbol); - Lisp_Object key = SYMBOL_NAME (key_symbol); - /* We can't specify the length, so the string must be - null-terminated. */ - check_string_without_embedded_nulls (key); - key_str = SSDATA (key); - /* In plists, ensure leading ":" in keys is stripped. It - will be reconstructed later in `json_to_lisp'.*/ - if (is_plist && ':' == key_str[0] && key_str[1]) - { - key_str = &key_str[1]; - } - /* Only add element if key is not already present. */ - if (json_object_get (json, key_str) == NULL) - { - int status - = json_object_set_new (json, key_str, lisp_to_json (value, - conf)); - if (status == -1) - json_out_of_memory (); - } - } - CHECK_LIST_END (tail, lisp); - } - else - wrong_type_argument (Qjson_value_p, lisp); - - clear_unwind_protect (count); - unbind_to (count, Qnil); - return json; -} - -/* Convert LISP to a nonscalar JSON object (array or object). Signal - an error of type `wrong-type-argument' if LISP is not a vector, - hashtable, alist, or plist. */ - -static json_t * -lisp_to_json_nonscalar (Lisp_Object lisp, - const struct json_configuration *conf) -{ - if (++lisp_eval_depth > max_lisp_eval_depth) - xsignal0 (Qjson_object_too_deep); - json_t *json = lisp_to_json_nonscalar_1 (lisp, conf); - --lisp_eval_depth; - return json; -} - -/* Convert LISP to any JSON object. Signal an error of type - `wrong-type-argument' if the type of LISP can't be converted to a - JSON object. */ - -static json_t * -lisp_to_json (Lisp_Object lisp, const struct json_configuration *conf) -{ - if (EQ (lisp, conf->null_object)) - return json_check (json_null ()); - else if (EQ (lisp, conf->false_object)) - return json_check (json_false ()); - else if (EQ (lisp, Qt)) - return json_check (json_true ()); - else if (INTEGERP (lisp)) - { - intmax_t low = TYPE_MINIMUM (json_int_t); - intmax_t high = TYPE_MAXIMUM (json_int_t); - intmax_t value = check_integer_range (lisp, low, high); - return json_check (json_integer (value)); - } - else if (FLOATP (lisp)) - return json_check (json_real (XFLOAT_DATA (lisp))); - else if (STRINGP (lisp)) - { - Lisp_Object encoded = json_encode (lisp); - json_t *json = json_stringn (SSDATA (encoded), SBYTES (encoded)); - if (json == NULL) - { - /* A failure can be caused either by an invalid string or by - low memory. */ - json_check_utf8 (encoded); - json_out_of_memory (); - } - return json; - } - - /* LISP now must be a vector, hashtable, alist, or plist. */ - return lisp_to_json_nonscalar (lisp, conf); -} - static void json_parse_args (ptrdiff_t nargs, Lisp_Object *args, @@ -450,158 +98,533 @@ json_parse_args (ptrdiff_t nargs, } } -static bool -json_available_p (void) +/* FIXME: Remove completely. */ +DEFUN ("json--available-p", Fjson__available_p, Sjson__available_p, 0, 0, NULL, + doc: /* Return non-nil if libjansson is available (internal use only). */) + (void) { -#ifdef WINDOWSNT - if (!json_initialized) - { - Lisp_Object status; - json_initialized = init_json_functions (); - status = json_initialized ? Qt : Qnil; - Vlibrary_cache = Fcons (Fcons (Qjson, status), Vlibrary_cache); - } - return json_initialized; -#else /* !WINDOWSNT */ - return true; -#endif + return Qt; } -#ifdef WINDOWSNT +/* JSON encoding context. */ +typedef struct { + char *buf; + ptrdiff_t size; /* number of bytes in buf */ + ptrdiff_t capacity; /* allocated size of buf */ + ptrdiff_t chars_delta; /* size - {number of characters in buf} */ + + int maxdepth; + struct symset_tbl *ss_table; /* table used by containing object */ + struct json_configuration conf; +} json_out_t; + +/* Set of symbols. */ +typedef struct { + ptrdiff_t count; /* symbols in table */ + int bits; /* log2(table size) */ + struct symset_tbl *table; /* heap-allocated table */ +} symset_t; + +struct symset_tbl +{ + /* Table used by the containing object if any, so that we can free all + tables if an error occurs. */ + struct symset_tbl *up; + /* Table of symbols (2**bits elements), Qunbound where unused. */ + Lisp_Object entries[]; +}; + +static inline ptrdiff_t +symset_size (int bits) +{ + return (ptrdiff_t)1 << bits; +} + +static struct symset_tbl * +make_symset_table (int bits, struct symset_tbl *up) +{ + int maxbits = min (SIZE_WIDTH - 2 - (word_size < 8 ? 2 : 3), 32); + if (bits > maxbits) + memory_full (PTRDIFF_MAX); /* Will never happen in practice. */ + struct symset_tbl *st = xnmalloc (sizeof *st->entries << bits, sizeof *st); + st->up = up; + ptrdiff_t size = symset_size (bits); + for (ptrdiff_t i = 0; i < size; i++) + st->entries[i] = Qunbound; + return st; +} + +/* Create a new symset to use for a new object. */ +static symset_t +push_symset (json_out_t *jo) +{ + int bits = 4; + struct symset_tbl *tbl = make_symset_table (bits, jo->ss_table); + jo->ss_table = tbl; + return (symset_t){ .count = 0, .bits = bits, .table = tbl }; +} + +/* Destroy the current symset. */ static void -ensure_json_available (void) +pop_symset (json_out_t *jo, symset_t *ss) { - if (!json_available_p ()) - Fsignal (Qjson_unavailable, - list1 (build_unibyte_string ("jansson library not found"))); + jo->ss_table = ss->table->up; + xfree (ss->table); } -#endif -DEFUN ("json--available-p", Fjson__available_p, Sjson__available_p, 0, 0, NULL, - doc: /* Return non-nil if libjansson is available (internal use only). */) - (void) +/* Remove all heap-allocated symset tables, in case an error occurred. */ +static void +cleanup_symset_tables (struct symset_tbl *st) { - return json_available_p () ? Qt : Qnil; + while (st) + { + struct symset_tbl *up = st->up; + xfree (st); + st = up; + } } -DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, MANY, - NULL, - doc: /* Return the JSON representation of OBJECT as a string. +static inline uint32_t +symset_hash (Lisp_Object sym, int bits) +{ + return knuth_hash (reduce_emacs_uint_to_hash_hash (XHASH (sym)), bits); +} -OBJECT must be t, a number, string, vector, hashtable, alist, plist, -or the Lisp equivalents to the JSON null and false values, and its -elements must recursively consist of the same kinds of values. t will -be converted to the JSON true value. Vectors will be converted to -JSON arrays, whereas hashtables, alists and plists are converted to -JSON objects. Hashtable keys must be strings without embedded null -characters and must be unique within each object. Alist and plist -keys must be symbols; if a key is duplicate, the first instance is -used. +/* Enlarge the table used by a symset. */ +static NO_INLINE void +symset_expand (symset_t *ss) +{ + struct symset_tbl *old_table = ss->table; + int oldbits = ss->bits; + ptrdiff_t oldsize = symset_size (oldbits); + int bits = oldbits + 1; + ss->bits = bits; + ss->table = make_symset_table (bits, old_table->up); + /* Move all entries from the old table to the new one. */ + ptrdiff_t mask = symset_size (bits) - 1; + struct symset_tbl *tbl = ss->table; + for (ptrdiff_t i = 0; i < oldsize; i++) + { + Lisp_Object sym = old_table->entries[i]; + if (!BASE_EQ (sym, Qunbound)) + { + ptrdiff_t j = symset_hash (sym, bits); + while (!BASE_EQ (tbl->entries[j], Qunbound)) + j = (j + 1) & mask; + tbl->entries[j] = sym; + } + } + xfree (old_table); +} -The Lisp equivalents to the JSON null and false values are -configurable in the arguments ARGS, a list of keyword/argument pairs: +/* If sym is in ss, return false; otherwise add it and return true. + Comparison is done by strict identity. */ +static inline bool +symset_add (json_out_t *jo, symset_t *ss, Lisp_Object sym) +{ + /* Make sure we don't fill more than half of the table. */ + if (ss->count >= (symset_size (ss->bits) >> 1)) + { + symset_expand (ss); + jo->ss_table = ss->table; + } -The keyword argument `:null-object' specifies which object to use -to represent a JSON null value. It defaults to `:null'. + struct symset_tbl *tbl = ss->table; + ptrdiff_t mask = symset_size (ss->bits) - 1; + for (ptrdiff_t i = symset_hash (sym, ss->bits); ; i = (i + 1) & mask) + { + Lisp_Object s = tbl->entries[i]; + if (BASE_EQ (s, sym)) + return false; /* Previous occurrence found. */ + if (BASE_EQ (s, Qunbound)) + { + /* Not in set, add it. */ + tbl->entries[i] = sym; + ss->count++; + return true; + } + } +} -The keyword argument `:false-object' specifies which object to use to -represent a JSON false value. It defaults to `:false'. +static NO_INLINE void +json_out_grow_buf (json_out_t *jo, ptrdiff_t bytes) +{ + ptrdiff_t need = jo->size + bytes; + ptrdiff_t new_size = max (jo->capacity, 512); + while (new_size < need) + new_size <<= 1; + jo->buf = xrealloc (jo->buf, new_size); + jo->capacity = new_size; +} -In you specify the same value for `:null-object' and `:false-object', -a potentially ambiguous situation, the JSON output will not contain -any JSON false values. -usage: (json-serialize OBJECT &rest ARGS) */) - (ptrdiff_t nargs, Lisp_Object *args) +static void +cleanup_json_out (void *arg) { - specpdl_ref count = SPECPDL_INDEX (); + json_out_t *jo = arg; + xfree (jo->buf); + jo->buf = NULL; + cleanup_symset_tables (jo->ss_table); +} -#ifdef WINDOWSNT - ensure_json_available (); -#endif +/* Make room for `bytes` more bytes in buffer. */ +static void +json_make_room (json_out_t *jo, ptrdiff_t bytes) +{ + if (bytes > jo->capacity - jo->size) + json_out_grow_buf (jo, bytes); +} - struct json_configuration conf = - {json_object_hashtable, json_array_array, QCnull, QCfalse}; - json_parse_args (nargs - 1, args + 1, &conf, false); +#define JSON_OUT_STR(jo, str) (json_out_str (jo, str, sizeof (str) - 1)) - json_t *json = lisp_to_json (args[0], &conf); - record_unwind_protect_ptr (json_release_object, json); +/* Add `bytes` bytes from `str` to the buffer. */ +static void +json_out_str (json_out_t *jo, const char *str, size_t bytes) +{ + json_make_room (jo, bytes); + memcpy (jo->buf + jo->size, str, bytes); + jo->size += bytes; +} - char *string = json_dumps (json, JSON_COMPACT | JSON_ENCODE_ANY); - if (string == NULL) - json_out_of_memory (); - record_unwind_protect_ptr (json_free, string); +static void +json_out_byte (json_out_t *jo, unsigned char c) +{ + json_make_room (jo, 1); + jo->buf[jo->size++] = c; +} - return unbind_to (count, build_string_from_utf8 (string)); +static void +json_out_fixnum (json_out_t *jo, EMACS_INT x) +{ + char buf[INT_BUFSIZE_BOUND (EMACS_INT)]; + char *end = buf + sizeof buf; + char *p = fixnum_to_string (x, buf, end); + json_out_str (jo, p, end - p); } -struct json_buffer_and_size +static AVOID +string_not_unicode (Lisp_Object obj) { - const char *buffer; - ptrdiff_t size; - /* This tracks how many bytes were inserted by the callback since - json_dump_callback was called. */ - ptrdiff_t inserted_bytes; + /* FIXME: this is just for compatibility with existing tests, it's not + a very descriptive error. */ + wrong_type_argument (Qjson_value_p, obj); +} + +static const unsigned char json_plain_char[256] = { + /* 32 chars/line: 1 for printable ASCII + DEL except " and \, 0 elsewhere */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 00-1f */ + 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 20-3f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1, /* 40-5f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 60-7f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 80-9f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* a0-bf */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* c0-df */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* e0-ff */ }; -static Lisp_Object -json_insert (void *data) +static void +json_out_string (json_out_t *jo, Lisp_Object str, int skip) +{ + /* FIXME: this code is slow, make faster! */ + + static const char hexchar[16] = "0123456789ABCDEF"; + ptrdiff_t len = SBYTES (str); + json_make_room (jo, len + 2); + json_out_byte (jo, '"'); + unsigned char *p = SDATA (str); + unsigned char *end = p + len; + p += skip; + while (p < end) + { + unsigned char c = *p; + if (json_plain_char[c]) + { + json_out_byte (jo, c); + p++; + } + else if (c > 0x7f) + { + if (STRING_MULTIBYTE (str)) + { + int n; + if (c <= 0xc1) + string_not_unicode (str); + if (c <= 0xdf) + n = 2; + else if (c <= 0xef) + { + int v = (((c & 0x0f) << 12) + + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f)); + if (char_surrogate_p (v)) + string_not_unicode (str); + n = 3; + } + else if (c <= 0xf7) + { + int v = (((c & 0x07) << 18) + + ((p[1] & 0x3f) << 12) + + ((p[2] & 0x3f) << 6) + + (p[3] & 0x3f)); + if (v > MAX_UNICODE_CHAR) + string_not_unicode (str); + n = 4; + } + else + string_not_unicode (str); + json_out_str (jo, (const char *)p, n); + jo->chars_delta += n - 1; + p += n; + } + else + string_not_unicode (str); + } + else + { + json_out_byte (jo, '\\'); + switch (c) + { + case '"': + case '\\': json_out_byte (jo, c); break; + case '\b': json_out_byte (jo, 'b'); break; + case '\t': json_out_byte (jo, 't'); break; + case '\n': json_out_byte (jo, 'n'); break; + case '\f': json_out_byte (jo, 'f'); break; + case '\r': json_out_byte (jo, 'r'); break; + default: + { + char hex[5] = { 'u', '0', '0', + hexchar[c >> 4], hexchar[c & 0xf] }; + json_out_str (jo, hex, 5); + break; + } + } + p++; + } + } + json_out_byte (jo, '"'); +} + +static void +json_out_nest (json_out_t *jo) +{ + --jo->maxdepth; + if (jo->maxdepth < 0) + error ("Maximum JSON serialisation depth exceeded"); +} + +static void +json_out_unnest (json_out_t *jo) { - struct json_buffer_and_size *buffer_and_size = data; - ptrdiff_t len = buffer_and_size->size; - ptrdiff_t inserted_bytes = buffer_and_size->inserted_bytes; - ptrdiff_t gap_size = GAP_SIZE - inserted_bytes; + ++jo->maxdepth; +} - /* Enlarge the gap if necessary. */ - if (gap_size < len) - make_gap (len - gap_size); +static void json_out_something (json_out_t *jo, Lisp_Object obj); - /* Copy this chunk of data into the gap. */ - memcpy ((char *) BEG_ADDR + PT_BYTE - BEG_BYTE + inserted_bytes, - buffer_and_size->buffer, len); - buffer_and_size->inserted_bytes += len; - return Qnil; +static void +json_out_object_cons (json_out_t *jo, Lisp_Object obj) +{ + json_out_nest (jo); + symset_t ss = push_symset (jo); + json_out_byte (jo, '{'); + bool is_alist = CONSP (XCAR (obj)); + bool first = true; + Lisp_Object tail = obj; + FOR_EACH_TAIL (tail) + { + Lisp_Object key; + Lisp_Object value; + if (is_alist) + { + Lisp_Object pair = XCAR (tail); + CHECK_CONS (pair); + key = XCAR (pair); + value = XCDR (pair); + } + else + { + key = XCAR (tail); + tail = XCDR (tail); + CHECK_CONS (tail); + value = XCAR (tail); + } + key = maybe_remove_pos_from_symbol (key); + CHECK_TYPE (BARE_SYMBOL_P (key), Qsymbolp, key); + + if (symset_add (jo, &ss, key)) + { + if (!first) + json_out_byte (jo, ','); + first = false; + + Lisp_Object key_str = SYMBOL_NAME (key); + const char *str = SSDATA (key_str); + /* Skip leading ':' in plist keys. */ + int skip = !is_alist && str[0] == ':' && str[1] ? 1 : 0; + json_out_string (jo, key_str, skip); + json_out_byte (jo, ':'); + json_out_something (jo, value); + } + } + CHECK_LIST_END (tail, obj); + json_out_byte (jo, '}'); + pop_symset (jo, &ss); + json_out_unnest (jo); } -static Lisp_Object -json_handle_nonlocal_exit (enum nonlocal_exit type, Lisp_Object data) +static void +json_out_object_hash (json_out_t *jo, Lisp_Object obj) { - switch (type) + json_out_nest (jo); + json_out_byte (jo, '{'); + struct Lisp_Hash_Table *h = XHASH_TABLE (obj); + bool first = true; + DOHASH (h, k, v) { - case NONLOCAL_EXIT_SIGNAL: - return data; - case NONLOCAL_EXIT_THROW: - return Fcons (Qno_catch, data); - default: - eassume (false); + if (!first) + json_out_byte (jo, ','); + first = false; + CHECK_STRING (k); + /* It's the user's responsibility to ensure that hash keys are + unique; we don't check for it. */ + json_out_string (jo, k, 0); + json_out_byte (jo, ':'); + json_out_something (jo, v); } + json_out_byte (jo, '}'); + json_out_unnest (jo); + } -struct json_insert_data +static void +json_out_array (json_out_t *jo, Lisp_Object obj) { - /* This tracks how many bytes were inserted by the callback since - json_dump_callback was called. */ - ptrdiff_t inserted_bytes; - /* nil if json_insert succeeded, otherwise the symbol - Qcatch_all_memory_full or a cons (ERROR-SYMBOL . ERROR-DATA). */ - Lisp_Object error; -}; + json_out_nest (jo); + json_out_byte (jo, '['); + ptrdiff_t n = ASIZE (obj); + for (ptrdiff_t i = 0; i < n; i++) + { + if (i > 0) + json_out_byte (jo, ','); + json_out_something (jo, AREF (obj, i)); + } + json_out_byte (jo, ']'); + json_out_unnest (jo); +} -/* Callback for json_dump_callback that inserts a JSON representation - as a unibyte string into the gap. DATA must point to a structure - of type json_insert_data. This function may not exit nonlocally. - It catches all nonlocal exits and stores them in data->error for - reraising. */ +static void +json_out_float (json_out_t *jo, Lisp_Object f) +{ + double x = XFLOAT_DATA (f); + if (!isfinite (x)) + signal_error ("JSON does not allow Inf or NaN", f); + /* As luck has it, float_to_string emits correct JSON float syntax for + all numbers (because Vfloat_output_format is Qnil). */ + json_make_room (jo, FLOAT_TO_STRING_BUFSIZE); + int n = float_to_string (jo->buf + jo->size, x); + jo->size += n; +} -static int -json_insert_callback (const char *buffer, size_t size, void *data) +static void +json_out_bignum (json_out_t *jo, Lisp_Object x) { - struct json_insert_data *d = data; - struct json_buffer_and_size buffer_and_size - = {.buffer = buffer, .size = size, .inserted_bytes = d->inserted_bytes}; - d->error = internal_catch_all (json_insert, &buffer_and_size, - json_handle_nonlocal_exit); - d->inserted_bytes = buffer_and_size.inserted_bytes; - return NILP (d->error) ? 0 : -1; + int base = 10; + ptrdiff_t size = bignum_bufsize (x, base); + json_make_room (jo, size); + int n = bignum_to_c_string (jo->buf + jo->size, size, x, base); + jo->size += n; +} + +static void +json_out_something (json_out_t *jo, Lisp_Object obj) +{ + if (EQ (obj, jo->conf.null_object)) + JSON_OUT_STR (jo, "null"); + else if (EQ (obj, jo->conf.false_object)) + JSON_OUT_STR (jo, "false"); + else if (EQ (obj, Qt)) + JSON_OUT_STR (jo, "true"); + else if (NILP (obj)) + JSON_OUT_STR (jo, "{}"); + else if (FIXNUMP (obj)) + json_out_fixnum (jo, XFIXNUM (obj)); + else if (STRINGP (obj)) + json_out_string (jo, obj, 0); + else if (CONSP (obj)) + json_out_object_cons (jo, obj); + else if (FLOATP (obj)) + json_out_float (jo, obj); + else if (HASH_TABLE_P (obj)) + json_out_object_hash (jo, obj); + else if (VECTORP (obj)) + json_out_array (jo, obj); + else if (BIGNUMP (obj)) + json_out_bignum (jo, obj); + else + wrong_type_argument (Qjson_value_p, obj); +} + +static Lisp_Object +json_out_to_string (json_out_t *jo) +{ + /* FIXME: should this be a unibyte or multibyte string? + Right now we make a multibyte string for test compatibility, + but we are really encoding so unibyte would make more sense. */ + ptrdiff_t nchars = jo->size - jo->chars_delta; + return make_multibyte_string (jo->buf, nchars, jo->size); +} + +static void +json_serialize (json_out_t *jo, Lisp_Object object, + ptrdiff_t nargs, Lisp_Object *args) +{ + *jo = (json_out_t) { + /* The maximum nesting depth allowed should be sufficient for most + uses but could be raised if necessary. (The default maximum + depth for JSON_checker is 20.) */ + .maxdepth = 50, + .conf = {json_object_hashtable, json_array_array, QCnull, QCfalse} + }; + json_parse_args (nargs, args, &jo->conf, false); + record_unwind_protect_ptr (cleanup_json_out, jo); + + /* Make float conversion independent of float-output-format. */ + if (!NILP (Vfloat_output_format)) + specbind (Qfloat_output_format, Qnil); + + json_out_something (jo, object); +} + +DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, MANY, + NULL, + doc: /* Return the JSON representation of OBJECT as a string. + +OBJECT must be t, a number, string, vector, hashtable, alist, plist, +or the Lisp equivalents to the JSON null and false values, and its +elements must recursively consist of the same kinds of values. t will +be converted to the JSON true value. Vectors will be converted to +JSON arrays, whereas hashtables, alists and plists are converted to +JSON objects. Hashtable keys must be strings, unique within each object. +Alist and plist keys must be symbols; if a key is duplicate, the first +instance is used. A leading colon in plist keys is elided. + +The Lisp equivalents to the JSON null and false values are +configurable in the arguments ARGS, a list of keyword/argument pairs: + +The keyword argument `:null-object' specifies which object to use +to represent a JSON null value. It defaults to `:null'. + +The keyword argument `:false-object' specifies which object to use to +represent a JSON false value. It defaults to `:false'. + +In you specify the same value for `:null-object' and `:false-object', +a potentially ambiguous situation, the JSON output will not contain +any JSON false values. +usage: (json-serialize OBJECT &rest ARGS) */) + (ptrdiff_t nargs, Lisp_Object *args) +{ + specpdl_ref count = SPECPDL_INDEX (); + json_out_t jo; + json_serialize (&jo, args[0], nargs - 1, args + 1); + return unbind_to (count, json_out_to_string (&jo)); } DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY, @@ -614,71 +637,52 @@ usage: (json-insert OBJECT &rest ARGS) */) (ptrdiff_t nargs, Lisp_Object *args) { specpdl_ref count = SPECPDL_INDEX (); + json_out_t jo; + json_serialize (&jo, args[0], nargs - 1, args + 1); -#ifdef WINDOWSNT - ensure_json_available (); -#endif - - struct json_configuration conf = - {json_object_hashtable, json_array_array, QCnull, QCfalse}; - json_parse_args (nargs - 1, args + 1, &conf, false); - - json_t *json = lisp_to_json (args[0], &conf); - record_unwind_protect_ptr (json_release_object, json); + /* FIXME: All the work below just to insert a string into a buffer? */ prepare_to_modify_buffer (PT, PT, NULL); move_gap_both (PT, PT_BYTE); - struct json_insert_data data; - data.inserted_bytes = 0; - /* Could have used json_dumpb, but that became available only in - Jansson 2.10, whereas we want to support 2.7 and upward. */ - int status = json_dump_callback (json, json_insert_callback, &data, - JSON_COMPACT | JSON_ENCODE_ANY); - if (status == -1) - { - if (CONSP (data.error)) - xsignal (XCAR (data.error), XCDR (data.error)); - else - json_out_of_memory (); - } + if (GAP_SIZE < jo.size) + make_gap (jo.size - GAP_SIZE); + memcpy ((char *) BEG_ADDR + PT_BYTE - BEG_BYTE, jo.buf, jo.size); + + /* No need to keep allocation beyond this point. */ + unbind_to (count, Qnil); ptrdiff_t inserted = 0; - ptrdiff_t inserted_bytes = data.inserted_bytes; - if (inserted_bytes > 0) + ptrdiff_t inserted_bytes = jo.size; + + /* If required, decode the stuff we've read into the gap. */ + struct coding_system coding; + /* JSON strings are UTF-8 encoded strings. */ + setup_coding_system (Qutf_8_unix, &coding); + coding.dst_multibyte = !NILP (BVAR (current_buffer, + enable_multibyte_characters)); + if (CODING_MAY_REQUIRE_DECODING (&coding)) { - /* If required, decode the stuff we've read into the gap. */ - struct coding_system coding; - /* JSON strings are UTF-8 encoded strings. If for some reason - the text returned by the Jansson library includes invalid - byte sequences, they will be represented by raw bytes in the - buffer text. */ - setup_coding_system (Qutf_8_unix, &coding); - coding.dst_multibyte = - !NILP (BVAR (current_buffer, enable_multibyte_characters)); - if (CODING_MAY_REQUIRE_DECODING (&coding)) - { - /* Now we have all the new bytes at the beginning of the gap, - but `decode_coding_gap` needs them at the end of the gap, so - we need to move them. */ - memmove (GAP_END_ADDR - inserted_bytes, GPT_ADDR, inserted_bytes); - decode_coding_gap (&coding, inserted_bytes); - inserted = coding.produced_char; - } - else - { - /* Make the inserted text part of the buffer, as unibyte text. */ - eassert (NILP (BVAR (current_buffer, enable_multibyte_characters))); - insert_from_gap_1 (inserted_bytes, inserted_bytes, false); - - /* The target buffer is unibyte, so we don't need to decode. */ - invalidate_buffer_caches (current_buffer, - PT, PT + inserted_bytes); - adjust_after_insert (PT, PT_BYTE, - PT + inserted_bytes, - PT_BYTE + inserted_bytes, - inserted_bytes); - inserted = inserted_bytes; - } + /* Now we have all the new bytes at the beginning of the gap, + but `decode_coding_gap` needs them at the end of the gap, so + we need to move them. */ + memmove (GAP_END_ADDR - inserted_bytes, GPT_ADDR, inserted_bytes); + decode_coding_gap (&coding, inserted_bytes); + inserted = coding.produced_char; + } + else + { + /* Make the inserted text part of the buffer, as unibyte text. */ + eassert (NILP (BVAR (current_buffer, enable_multibyte_characters))); + insert_from_gap_1 (inserted_bytes, inserted_bytes, false); + + /* The target buffer is unibyte, so we don't need to decode. */ + invalidate_buffer_caches (current_buffer, + PT, PT + inserted_bytes); + adjust_after_insert (PT, PT_BYTE, + PT + inserted_bytes, + PT_BYTE + inserted_bytes, + inserted_bytes); + inserted = inserted_bytes; } /* Call after-change hooks. */ @@ -690,7 +694,26 @@ usage: (json-insert OBJECT &rest ARGS) */) SET_PT_BOTH (PT + inserted, PT_BYTE + inserted_bytes); } - return unbind_to (count, Qnil); + return Qnil; +} + + +/* Note that all callers of make_string_from_utf8 and build_string_from_utf8 + below either pass only value UTF-8 strings or use the function for + formatting error messages; in the latter case correctness isn't + critical. */ + +/* Return a unibyte string containing the sequence of UTF-8 encoding + units of the UTF-8 representation of STRING. If STRING does not + represent a sequence of Unicode scalar values, return a string with + unspecified contents. */ + +static Lisp_Object +json_encode (Lisp_Object string) +{ + /* FIXME: Raise an error if STRING is not a scalar value + sequence. */ + return encode_string_utf_8 (string, Qnil, false, Qt, Qt); } #define JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE 64 @@ -1894,7 +1917,6 @@ syms_of_json (void) DEFSYM (QCnull, ":null"); DEFSYM (QCfalse, ":false"); - DEFSYM (Qstring_without_embedded_nulls_p, "string-without-embedded-nulls-p"); DEFSYM (Qjson_value_p, "json-value-p"); DEFSYM (Qjson_error, "json-error"); @@ -1907,7 +1929,6 @@ syms_of_json (void) DEFSYM (Qjson_invalid_surrogate_error, "json-invalid-surrogate-error") DEFSYM (Qjson_number_out_of_range, "json-number-out-of-range-error") DEFSYM (Qjson_escape_sequence_error, "json-escape-sequence-error") - DEFSYM (Qjson_unavailable, "json-unavailable"); define_error (Qjson_error, "generic JSON error", Qerror); define_error (Qjson_out_of_memory, "not enough memory for creating JSON object", Qjson_error); diff --git a/src/lisp.h b/src/lisp.h index f066c876619..7c4bd435cd8 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4327,7 +4327,6 @@ extern void syms_of_image (void); #ifdef HAVE_JSON /* Defined in json.c. */ -extern void init_json (void); extern void syms_of_json (void); #endif diff --git a/src/print.c b/src/print.c index 76c577ec800..0d867b89395 100644 --- a/src/print.c +++ b/src/print.c @@ -2859,6 +2859,7 @@ decimal point. 0 is not allowed with `e' or `g'. A value of nil means to use the shortest notation that represents the number without losing information. */); Vfloat_output_format = Qnil; + DEFSYM (Qfloat_output_format, "float-output-format"); DEFVAR_BOOL ("print-integers-as-characters", print_integers_as_characters, doc: /* Non-nil means integers are printed using characters syntax. diff --git a/test/src/json-tests.el b/test/src/json-tests.el index dffc6291ca1..e5cbe8bff5c 100644 --- a/test/src/json-tests.el +++ b/test/src/json-tests.el @@ -126,11 +126,38 @@ (ert-deftest json-serialize/object-with-duplicate-keys () (skip-unless (fboundp 'json-serialize)) - (let ((table (make-hash-table :test #'eq))) - (puthash (copy-sequence "abc") [1 2 t] table) - (puthash (copy-sequence "abc") :null table) - (should (equal (hash-table-count table) 2)) - (should-error (json-serialize table) :type 'wrong-type-argument))) + + (dolist (n '(1 5 20 100)) + (let ((symbols (mapcar (lambda (i) (make-symbol (format "s%d" i))) + (number-sequence 1 n))) + (expected (concat "{" + (mapconcat (lambda (i) (format "\"s%d\":%d" i i)) + (number-sequence 1 n) ",") + "}"))) + ;; alist + (should (equal (json-serialize + (append + (cl-mapcar #'cons + symbols (number-sequence 1 n)) + (cl-mapcar #'cons + symbols (number-sequence 1001 (+ 1000 n))))) + expected)) + ;; plist + (should (equal (json-serialize + (append + (cl-mapcan #'list + symbols (number-sequence 1 n)) + (cl-mapcan #'list + symbols (number-sequence 1001 (+ 1000 n))))) + expected)))) + + ;; We don't check for duplicated keys in hash tables. + ;; (let ((table (make-hash-table :test #'eq))) + ;; (puthash (copy-sequence "abc") [1 2 t] table) + ;; (puthash (copy-sequence "abc") :null table) + ;; (should (equal (hash-table-count table) 2)) + ;; (should-error (json-serialize table) :type 'wrong-type-argument)) + ) (ert-deftest json-parse-string/object () (skip-unless (fboundp 'json-parse-string)) @@ -173,8 +200,8 @@ (should (equal (json-serialize ["\nasdфыв\u001f\u007ffgh\t"]) "[\"\\nasdфыв\\u001F\u007ffgh\\t\"]")) (should (equal (json-serialize ["a\0b"]) "[\"a\\u0000b\"]")) - ;; FIXME: Is this the right behavior? - (should (equal (json-serialize ["\u00C4\xC3\x84"]) "[\"\u00C4\u00C4\"]"))) + (should-error (json-serialize ["\xC3\x84"])) + (should-error (json-serialize ["\u00C4\xC3\x84"]))) (ert-deftest json-serialize/invalid-unicode () (skip-unless (fboundp 'json-serialize)) commit ab016657e7b1bd32c775da271ffb7127f86d5a23 Merge: cc212ea314d 77115be256d Author: Eli Zaretskii Date: Sat Mar 30 16:37:27 2024 +0300 Merge branch 'master' of git.savannah.gnu.org:/srv/git/emacs commit cc212ea314d45c98761ae7f68600ad8bf799ea36 Author: David Ponce Date: Sat Mar 30 13:59:41 2024 +0100 bug#69992: Minor improvement to image map transformation logic * lisp/image.el (image--compute-rotation): New function. (image--compute-map, image--compute-original-map): Use it. Ensure all transformations are applied or undone according to what Emacs does internally. Call a transformation function only when needed. Fix doc string. (image--scale-map, image--rotate-map): Assume effective scale argument. (image--rotate-coord): Improve doc string. (image--flip-map): Remove no more used argument FLIP. * test/lisp/image-tests.el (image-create-image-with-map): Use a valid SVG image otherwise `image-size' will not return a valid value and calculation of scale could fail. (image-transform-map): Update according to changed signature of `image--flip-map'. diff --git a/lisp/image.el b/lisp/image.el index d7496485aca..e973dff32c7 100644 --- a/lisp/image.el +++ b/lisp/image.el @@ -1423,115 +1423,142 @@ is recomputed to fit the newly transformed image." :type 'boolean :version "30.1") +(defsubst image--compute-rotation (image) + "Return the current rotation of IMAGE, or 0 if no rotation. +Also return nil if rotation is not a multiples of 90 degrees (0, 90, +180[-180] and 270[-90])." + (let ((degrees (or (image-property image :rotation) 0))) + (and (= 0 (mod degrees 1)) + (car (memql (truncate (mod degrees 360)) '(0 90 180 270)))))) + (defun image--compute-map (image) "Compute map for IMAGE suitable to be used as its :map property. -Return a copy of :original-image transformed based on IMAGE's :scale, +Return a copy of :original-map transformed based on IMAGE's :scale, :rotation, and :flip. When IMAGE's :original-map is nil, return nil. When :rotation is not a multiple of 90, return copy of :original-map." - (pcase-let* ((original-map (image-property image :original-map)) - (map (copy-tree original-map t)) - (scale (or (image-property image :scale) 1)) - (rotation (or (image-property image :rotation) 0)) - (flip (image-property image :flip)) - ((and size `(,width . ,height)) (image-size image t))) - (when (and ; Handle only 90-degree rotations - (zerop (mod rotation 1)) - (zerop (% (truncate rotation) 90))) - ;; SIZE fits MAP after transformations. Scale MAP before - ;; flip and rotate operations, since both need MAP to fit SIZE. - (image--scale-map map scale) + (when-let ((map (image-property image :original-map))) + (setq map (copy-tree map t)) + (let* ((size (image-size image t)) + ;; The image can be scaled for many reasons (:scale, + ;; :max-width, etc), so using `image--current-scaling' to + ;; calculate the current scaling is the correct method. But, + ;; since each call to `image_size' is expensive, the code is + ;; duplicated here to save the a call to `image-size'. + (scale (/ (float (car size)) + (car (image-size + (image--image-without-parameters image) t)))) + (rotation (image--compute-rotation image)) + ;; Image is flipped only if rotation is a multiple of 90, + ;; including 0. + (flip (and rotation (image-property image :flip)))) + ;; SIZE fits MAP after transformations. Scale MAP before flip and + ;; rotate operations, since both need MAP to fit SIZE. + (unless (= scale 1) + (image--scale-map map scale)) ;; In rendered images, rotation is always applied before flip. - (image--rotate-map - map rotation (if (or (= 90 rotation) (= 270 rotation)) + (when (memql rotation '(90 180 270)) + (image--rotate-map + map rotation (if (= rotation 180) + size ;; If rotated ±90°, swap width and height. - (cons height width) - size)) + (cons (cdr size) (car size))))) ;; After rotation, there's no need to swap width and height. - (image--flip-map map flip size)) + (when flip + (image--flip-map map size))) map)) (defun image--compute-original-map (image) "Return original map for IMAGE. If IMAGE lacks :map property, return nil. -When :rotation is not a multiple of 90, return copy of :map." - (when (image-property image :map) - (let* ((original-map (copy-tree (image-property image :map) t)) - (scale (or (image-property image :scale) 1)) - (rotation (or (image-property image :rotation) 0)) - (flip (image-property image :flip)) - (size (image-size image t))) - (when (and ; Handle only 90-degree rotations - (zerop (mod rotation 1)) - (zerop (% (truncate rotation) 90))) - ;; In rendered images, rotation is always applied before flip. - ;; To undo the transformation, flip before rotating. SIZE fits - ;; ORIGINAL-MAP before transformations are applied. Therefore, - ;; scale ORIGINAL-MAP after flip and rotate operations, since - ;; both need ORIGINAL-MAP to fit SIZE. - (image--flip-map original-map flip size) - (image--rotate-map original-map (- rotation) size) - (image--scale-map original-map (/ 1.0 scale))) - original-map))) +When there is no transformation, return copy of :map." + (when-let ((original-map (image-property image :map))) + (setq original-map (copy-tree original-map t)) + (let* ((size (image-size image t)) + ;; The image can be scaled for many reasons (:scale, + ;; :max-width, etc), so using `image--current-scaling' to + ;; calculate the current scaling is the correct method. But, + ;; since each call to `image_size' is expensive, the code is + ;; duplicated here to save the a call to `image-size'. + (scale (/ (float (car size)) + (car (image-size + (image--image-without-parameters image) t)))) + (rotation (image--compute-rotation image)) + ;; Image is flipped only if rotation is a multiple of 90 + ;; including 0. + (flip (and rotation (image-property image :flip)))) + ;; In rendered images, rotation is always applied before flip. + ;; To undo the transformation, flip before rotating. SIZE fits + ;; ORIGINAL-MAP before transformations are applied. Therefore, + ;; scale ORIGINAL-MAP after flip and rotate operations, since + ;; both need ORIGINAL-MAP to fit SIZE. + ;; In rendered images, rotation is always applied before flip. + (when flip + (image--flip-map original-map size)) + (when (memql rotation '(90 180 270)) + (image--rotate-map original-map (- rotation) size)) + (unless (= scale 1) + (image--scale-map original-map (/ 1.0 scale)))) + original-map)) (defun image--scale-map (map scale) "Scale MAP according to SCALE. Destructively modifies and returns MAP." - (unless (= 1 scale) - (pcase-dolist (`(,`(,type . ,coords) ,_id ,_plist) map) - (pcase-exhaustive type - ('rect - (setf (caar coords) (round (* (caar coords) scale))) - (setf (cdar coords) (round (* (cdar coords) scale))) - (setf (cadr coords) (round (* (cadr coords) scale))) - (setf (cddr coords) (round (* (cddr coords) scale)))) - ('circle - (setf (caar coords) (round (* (caar coords) scale))) - (setf (cdar coords) (round (* (cdar coords) scale))) - (setcdr coords (round (* (cdr coords) scale)))) - ('poly - (dotimes (i (length coords)) - (aset coords i - (round (* (aref coords i) scale)))))))) + (pcase-dolist (`(,`(,type . ,coords) ,_id ,_plist) map) + (pcase-exhaustive type + ('rect + (setf (caar coords) (round (* (caar coords) scale))) + (setf (cdar coords) (round (* (cdar coords) scale))) + (setf (cadr coords) (round (* (cadr coords) scale))) + (setf (cddr coords) (round (* (cddr coords) scale)))) + ('circle + (setf (caar coords) (round (* (caar coords) scale))) + (setf (cdar coords) (round (* (cdar coords) scale))) + (setcdr coords (round (* (cdr coords) scale)))) + ('poly + (dotimes (i (length coords)) + (aset coords i + (round (* (aref coords i) scale))))))) map) (defun image--rotate-map (map rotation size) "Rotate MAP according to ROTATION and SIZE. +ROTATION must be a non-zero multiple of 90. Destructively modifies and returns MAP." - (unless (zerop rotation) - (pcase-dolist (`(,`(,type . ,coords) ,_id ,_plist) map) - (pcase-exhaustive type - ('rect - (let ( x0 y0 ; New upper left corner - x1 y1) ; New bottom right corner - (pcase (truncate (mod rotation 360)) ; Set new corners to... - (90 ; ...old bottom left and upper right - (setq x0 (caar coords) y0 (cddr coords) - x1 (cadr coords) y1 (cdar coords))) - (180 ; ...old bottom right and upper left - (setq x0 (cadr coords) y0 (cddr coords) - x1 (caar coords) y1 (cdar coords))) - (270 ; ...old upper right and bottom left - (setq x0 (cadr coords) y0 (cdar coords) - x1 (caar coords) y1 (cddr coords)))) - (setcar coords (image--rotate-coord x0 y0 rotation size)) - (setcdr coords (image--rotate-coord x1 y1 rotation size)))) - ('circle - (setcar coords (image--rotate-coord - (caar coords) (cdar coords) rotation size))) - ('poly - (dotimes (i (length coords)) - (when (= 0 (% i 2)) - (pcase-let ((`(,x . ,y) - (image--rotate-coord - (aref coords i) (aref coords (1+ i)) rotation size))) - (aset coords i x) - (aset coords (1+ i) y)))))))) + (setq rotation (mod rotation 360)) + (pcase-dolist (`(,`(,type . ,coords) ,_id ,_plist) map) + (pcase-exhaustive type + ('rect + (let ( x0 y0 ; New upper left corner + x1 y1) ; New bottom right corner + (pcase rotation ; Set new corners to... + (90 ; ...old bottom left and upper right + (setq x0 (caar coords) y0 (cddr coords) + x1 (cadr coords) y1 (cdar coords))) + (180 ; ...old bottom right and upper left + (setq x0 (cadr coords) y0 (cddr coords) + x1 (caar coords) y1 (cdar coords))) + (270 ; ...old upper right and bottom left + (setq x0 (cadr coords) y0 (cdar coords) + x1 (caar coords) y1 (cddr coords)))) + (setcar coords (image--rotate-coord x0 y0 rotation size)) + (setcdr coords (image--rotate-coord x1 y1 rotation size)))) + ('circle + (setcar coords (image--rotate-coord + (caar coords) (cdar coords) rotation size))) + ('poly + (dotimes (i (length coords)) + (when (= 0 (% i 2)) + (pcase-let ((`(,x . ,y) + (image--rotate-coord + (aref coords i) (aref coords (1+ i)) rotation size))) + (aset coords i x) + (aset coords (1+ i) y))))))) map) (defun image--rotate-coord (x y angle size) "Rotate coordinates X and Y by ANGLE in image of SIZE. -ANGLE must be a multiple of 90. Returns a cons cell of rounded -coordinates (X1 Y1)." +ANGLE must be a multiple of 90 in [90 180 270]. Returns a cons cell of +rounded coordinates (X1 Y1)." (pcase-let* ((radian (* (/ angle 180.0) float-pi)) (`(,width . ,height) size) ;; y is positive, but we are in the bottom-right quadrant @@ -1552,25 +1579,24 @@ coordinates (X1 Y1)." (y1 (- y1))) (cons (round x1) (round y1)))) -(defun image--flip-map (map flip size) - "Horizontally flip MAP according to FLIP and SIZE. +(defun image--flip-map (map size) + "Horizontally flip MAP according to SIZE. Destructively modifies and returns MAP." - (when flip - (pcase-dolist (`(,`(,type . ,coords) ,_id ,_plist) map) - (pcase-exhaustive type - ('rect - (let ((x0 (- (car size) (cadr coords))) - (y0 (cdar coords)) - (x1 (- (car size) (caar coords))) - (y1 (cddr coords))) - (setcar coords (cons x0 y0)) - (setcdr coords (cons x1 y1)))) - ('circle - (setf (caar coords) (- (car size) (caar coords)))) - ('poly - (dotimes (i (length coords)) - (when (= 0 (% i 2)) - (aset coords i (- (car size) (aref coords i))))))))) + (pcase-dolist (`(,`(,type . ,coords) ,_id ,_plist) map) + (pcase-exhaustive type + ('rect + (let ((x0 (- (car size) (cadr coords))) + (y0 (cdar coords)) + (x1 (- (car size) (caar coords))) + (y1 (cddr coords))) + (setcar coords (cons x0 y0)) + (setcdr coords (cons x1 y1)))) + ('circle + (setf (caar coords) (- (car size) (caar coords)))) + ('poly + (dotimes (i (length coords)) + (when (= 0 (% i 2)) + (aset coords i (- (car size) (aref coords i)))))))) map) (provide 'image) diff --git a/test/lisp/image-tests.el b/test/lisp/image-tests.el index 6a5f03e38a0..020781eff50 100644 --- a/test/lisp/image-tests.el +++ b/test/lisp/image-tests.el @@ -158,7 +158,7 @@ (ert-deftest image-create-image-with-map () "Test that `create-image' correctly adds :map and/or :original-map." (skip-unless (display-images-p)) - (let ((data "foo") + (let ((data "") (map '(((circle (1 . 1) . 1) a))) (original-map '(((circle (2 . 2) . 2) a))) (original-map-other '(((circle (3 . 3) . 3) a)))) @@ -282,7 +282,7 @@ corresponding coordinate in B. When nil, TOLERANCE defaults to 5." '(((circle (12 . 4) . 2) "circle") ((rect (7 . 3) 9 . 8) "rect") ((poly . [4 6 2 7 1 2]) "poly")))) - (should (equal (image--flip-map (copy-tree map t) t `(,width . ,height)) + (should (equal (image--flip-map (copy-tree map t) `(,width . ,height)) '(((circle (6 . 3) . 2) "circle") ((rect (2 . 6) 7 . 8) "rect") ((poly . [4 11 3 13 8 14]) "poly")))) @@ -291,7 +291,7 @@ corresponding coordinate in B. When nil, TOLERANCE defaults to 5." ;; Scale size because the map has been scaled. (image--rotate-map copy 90 `(,(* 2 width) . ,(* 2 height))) ;; Swap width and height because the map has been flipped. - (image--flip-map copy t `(,(* 2 height) . ,(* 2 width))) + (image--flip-map copy `(,(* 2 height) . ,(* 2 width))) (should (equal copy '(((circle (6 . 8) . 4) "circle") ((rect (12 . 6) 16 . 16) "rect") commit 77115be256d08c6524bc0c498d1d268686814090 Author: Tony Zorman Date: Thu Dec 21 17:51:09 2023 +0100 Add use-package-vc-prefer-newest user option * lisp/use-package/use-package-core.el (use-package-vc-prefer-newest): User option to prefer the latest commit (as opposed to the latest release) of a package. (use-package-normalize--vc-arg): Check for use-package-vc-prefer-newest. * doc/misc/use-package.texi (Install package): Document use-package-vc-prefer-newest. * etc/NEWS: Document use-package-vc-prefer-newest. diff --git a/doc/misc/use-package.texi b/doc/misc/use-package.texi index d834e1be754..c2b6404b68b 100644 --- a/doc/misc/use-package.texi +++ b/doc/misc/use-package.texi @@ -1639,8 +1639,12 @@ For example, would try -- by invoking @code{package-vc-install} -- to install the latest commit of the package @code{foo} from the specified remote. -This can also be used for local packages, by combining it with the -@code{:load-path} (@pxref{Load path}) keyword: +@vindex use-package-vc-prefer-newest +Alternatively, the @code{use-package-vc-prefer-newest} user option +exists to always prefer the latest commit. + +The @code{:vc} keyword can also be used for local packages, by +combining it with @code{:load-path} (@pxref{Load path}): @example @group diff --git a/etc/NEWS b/etc/NEWS index 6cefe11a2cc..feb128bc2de 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1218,6 +1218,11 @@ interactive Python interpreter specified by 'python-interpreter'. *** New ':vc' keyword. This keyword enables the user to install packages using 'package-vc'. ++++ +*** New user option 'use-package-vc-prefer-newest'. +This allows the user to always install the newest commit of a package +when using the ':vc' keyword. + ** Gnus *** The 'nnweb-type' option 'gmane' has been removed. diff --git a/lisp/use-package/use-package-core.el b/lisp/use-package/use-package-core.el index d9343e14839..ba2e93c97e9 100644 --- a/lisp/use-package/use-package-core.el +++ b/lisp/use-package/use-package-core.el @@ -346,6 +346,20 @@ undefined variables." :type 'boolean :group 'use-package) +(defcustom use-package-vc-prefer-newest nil + "Prefer the newest commit over the latest release. +By default, much like GNU ELPA and NonGNU ELPA, the `:vc' keyword +tracks the latest stable release of a package. If this option is +non-nil, the latest commit is preferred instead. This has the +same effect as specifying `:rev :newest' in every invocation of +`:vc'. + +Note that always tracking a package's latest commit might lead to +stability issues." + :type 'boolean + :version "30.1" + :group 'use-package) + (defvar use-package-statistics (make-hash-table)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1649,9 +1663,11 @@ indicating the latest commit) revision." (if (and s (stringp s)) (intern s) s)) (normalize (k v) (pcase k - (:rev (cond ((or (eq v :last-release) (not v)) :last-release) - ((eq v :newest) nil) - (t (ensure-string v)))) + (:rev (pcase v + ('nil (if use-package-vc-prefer-newest nil :last-release)) + (:last-release :last-release) + (:newest nil) + (_ (ensure-string v)))) (:vc-backend (ensure-symbol v)) (_ (ensure-string v))))) (pcase-let ((valid-kws '(:url :branch :lisp-dir :main-file :vc-backend :rev)) commit 87be53846bfbf5a6387cb5a40105bd0fc5b48b38 Merge: 966ece24756 946d4aad1df Author: Eli Zaretskii Date: Sat Mar 30 04:38:18 2024 -0400 Merge from origin/emacs-29 946d4aad1df Avoid errors in Info-search-case-sensitively in DIR buffers fbf68302999 Add test for previous change (bug#70023) bcf6dd6e266 Add typescript-ts-mode indentation for interface bodies (... commit 966ece247562086ef8a25ce9c985f583bb3c2822 Merge: b1e33b0b621 95d9e6eb6b4 Author: Eli Zaretskii Date: Sat Mar 30 04:38:18 2024 -0400 ; Merge from origin/emacs-29 The following commit was skipped: 95d9e6eb6b4 * Don't install unnecessary trampolines (bug#69573) (don'... commit b1e33b0b62115148909de4923b04eac49e1433d4 Merge: 1284aa2655b 38faacf353f Author: Eli Zaretskii Date: Sat Mar 30 04:38:17 2024 -0400 Merge from origin/emacs-29 38faacf353f Improve documentation of in user manual 9d3d77f12da Fix documentation of 'other-window-for-scrolling' commit 1284aa2655bfc21f973a1b6859138921c47ecc9e Merge: 826998e82f2 96fb7199424 Author: Eli Zaretskii Date: Sat Mar 30 04:38:17 2024 -0400 ; Merge from origin/emacs-29 The following commit was skipped: 96fb7199424 Bump Emacs version to 29.3.50 commit 826998e82f26be2a839e4a755f9be28a1f47b9af Merge: 706fda358bc ae8f815613c Author: Eli Zaretskii Date: Sat Mar 30 04:37:43 2024 -0400 Merge from origin/emacs-29 ae8f815613c Update files for Emacs 29.3 commit 706fda358bcb2c12eefa974d6a3b3f01ed1bd2ef Merge: cd60fa42f68 ff6cc3d2cf0 Author: Eli Zaretskii Date: Sat Mar 30 04:36:13 2024 -0400 ; Merge from origin/emacs-29 The following commits were skipped: ff6cc3d2cf0 * lisp/ldefs-boot.el: Regenerate. 0dab0c0d688 Bump Emacs version to 29.3 commit cd60fa42f6803d4d34dedfaea3d1ed5c16349670 Merge: 06882a2d768 7a5d7be52c5 Author: Eli Zaretskii Date: Sat Mar 30 04:35:24 2024 -0400 Merge from origin/emacs-29 7a5d7be52c5 org--confirm-resource-safe: Fix prompt when prompting in ... 2bc865ace05 org-file-contents: Consider all remote files unsafe 6f9ea396f49 org-latex-preview: Add protection when `untrusted-content... 937b9042ad7 * lisp/gnus/mm-view.el (mm-display-inline-fontify): Mark ... ccc188fcf98 * lisp/files.el (untrusted-content): New variable. befa9fcaae2 org-macro--set-templates: Prevent code evaluation 3221d8d4611 * admin/authors.el (authors-aliases): Add ignored authors. 8d8253f8991 * etc/NEWS: Update for Emacs 29.3 commit 06882a2d768241a814d7f9da24e1e5436207c0d8 Author: Eli Zaretskii Date: Sat Mar 30 10:52:00 2024 +0300 ; Fix last change in inclusion of byteswap.h header * src/w32uniscribe.c (w32hb_get_font_table): Avoid warning due to redefinition of 'bswap_32'. diff --git a/src/w32uniscribe.c b/src/w32uniscribe.c index 84d0d95b2ab..b3112912c76 100644 --- a/src/w32uniscribe.c +++ b/src/w32uniscribe.c @@ -33,11 +33,6 @@ along with GNU Emacs. If not, see . */ #ifdef HAVE_HARFBUZZ # include # include /* for hb_ot_font_set_funcs */ -# if GNUC_PREREQ (4, 3, 0) -# define bswap_32(v) __builtin_bswap32(v) -# else -# include -# endif #endif #include "lisp.h" @@ -1270,7 +1265,11 @@ w32hb_get_font_table (hb_face_t *face, hb_tag_t tag, void *data) HFONT old_font = SelectObject (context, (HFONT) data); char *font_data = NULL; DWORD font_data_size = 0, val; +#if GNUC_PREREQ (4, 3, 0) + DWORD table = __builtin_bswap32 (tag); +#else DWORD table = bswap_32 (tag); +#endif hb_blob_t *blob = NULL; val = GetFontData (context, table, 0, font_data, font_data_size); commit d422e985ff59906e5c8b93e583d084e6d1462539 Merge: c2d21bda618 86c4e5a2fb3 Author: Eli Zaretskii Date: Sat Mar 30 10:38:34 2024 +0300 Merge branch 'master' of git.savannah.gnu.org:/srv/git/emacs commit 86c4e5a2fb3fd6b7acb8a3fc10e1e7c2eb8012a9 Author: Andreas Schwab Date: Sat Mar 30 08:29:52 2024 +0100 Fix implicit declaration of bswap_{32,64} * src/data.c: Move include of ... * src/lisp.h: ... here. diff --git a/src/data.c b/src/data.c index a86f86c52f5..c4b9cff8ae0 100644 --- a/src/data.c +++ b/src/data.c @@ -23,7 +23,6 @@ along with GNU Emacs. If not, see . */ #include #include -#include #include #include #include diff --git a/src/lisp.h b/src/lisp.h index 6226ab33244..f066c876619 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -36,6 +36,7 @@ along with GNU Emacs. If not, see . */ #endif #include +#include #include #include #include commit c2d21bda6182511f453f7bea2cbff2e0640625c9 Author: Eli Zaretskii Date: Sat Mar 30 10:37:09 2024 +0300 Clean up removal of libjansson parser * src/json.c (json_has_suffix, json_has_prefix): Remove unused functions. (json_object_key_to_iter, json_array_get, json_loads) (json_load_callback, json_object_iter, json_object_iter_value) (json_string_value, json_object_size, json_object_iter_key) (json_object_iter_next, json_real_value, json_string_length) (json_integer_value) [WINDOWSNT]: Don't DEF_DLL_FN, don't LOAD_DLL_FN, and don't define a macro for unused libjansson functions. (JSON_HAS_ERROR_CODE): Remove: not used. diff --git a/src/json.c b/src/json.c index bdb9e4cdd58..afc48c59d5a 100644 --- a/src/json.c +++ b/src/json.c @@ -31,8 +31,6 @@ along with GNU Emacs. If not, see . */ #include "buffer.h" #include "coding.h" -#define JSON_HAS_ERROR_CODE (JANSSON_VERSION_HEX >= 0x020B00) - #ifdef WINDOWSNT # include # include "w32common.h" @@ -57,23 +55,7 @@ DEF_DLL_FN (char *, json_dumps, (const json_t *json, size_t flags)); DEF_DLL_FN (int, json_dump_callback, (const json_t *json, json_dump_callback_t callback, void *data, size_t flags)); -DEF_DLL_FN (json_int_t, json_integer_value, (const json_t *integer)); -DEF_DLL_FN (double, json_real_value, (const json_t *real)); -DEF_DLL_FN (const char *, json_string_value, (const json_t *string)); -DEF_DLL_FN (size_t, json_string_length, (const json_t *string)); -DEF_DLL_FN (json_t *, json_array_get, (const json_t *array, size_t index)); DEF_DLL_FN (json_t *, json_object_get, (const json_t *object, const char *key)); -DEF_DLL_FN (size_t, json_object_size, (const json_t *object)); -DEF_DLL_FN (const char *, json_object_iter_key, (void *iter)); -DEF_DLL_FN (void *, json_object_iter, (json_t *object)); -DEF_DLL_FN (json_t *, json_object_iter_value, (void *iter)); -DEF_DLL_FN (void *, json_object_key_to_iter, (const char *key)); -DEF_DLL_FN (void *, json_object_iter_next, (json_t *object, void *iter)); -DEF_DLL_FN (json_t *, json_loads, - (const char *input, size_t flags, json_error_t *error)); -DEF_DLL_FN (json_t *, json_load_callback, - (json_load_callback_t callback, void *data, size_t flags, - json_error_t *error)); /* This is called by json_decref, which is an inline function. */ void json_delete(json_t *json) @@ -106,20 +88,7 @@ init_json_functions (void) LOAD_DLL_FN (library, json_stringn); LOAD_DLL_FN (library, json_dumps); LOAD_DLL_FN (library, json_dump_callback); - LOAD_DLL_FN (library, json_integer_value); - LOAD_DLL_FN (library, json_real_value); - LOAD_DLL_FN (library, json_string_value); - LOAD_DLL_FN (library, json_string_length); - LOAD_DLL_FN (library, json_array_get); LOAD_DLL_FN (library, json_object_get); - LOAD_DLL_FN (library, json_object_size); - LOAD_DLL_FN (library, json_object_iter_key); - LOAD_DLL_FN (library, json_object_iter); - LOAD_DLL_FN (library, json_object_iter_value); - LOAD_DLL_FN (library, json_object_key_to_iter); - LOAD_DLL_FN (library, json_object_iter_next); - LOAD_DLL_FN (library, json_loads); - LOAD_DLL_FN (library, json_load_callback); init_json (); @@ -140,20 +109,7 @@ init_json_functions (void) #define json_stringn fn_json_stringn #define json_dumps fn_json_dumps #define json_dump_callback fn_json_dump_callback -#define json_integer_value fn_json_integer_value -#define json_real_value fn_json_real_value -#define json_string_value fn_json_string_value -#define json_string_length fn_json_string_length -#define json_array_get fn_json_array_get #define json_object_get fn_json_object_get -#define json_object_size fn_json_object_size -#define json_object_iter_key fn_json_object_iter_key -#define json_object_iter fn_json_object_iter -#define json_object_iter_value fn_json_object_iter_value -#define json_object_key_to_iter fn_json_object_key_to_iter -#define json_object_iter_next fn_json_object_iter_next -#define json_loads fn_json_loads -#define json_load_callback fn_json_load_callback #endif /* WINDOWSNT */ @@ -191,29 +147,6 @@ init_json (void) json_set_alloc_funcs (json_malloc, json_free); } -#if !JSON_HAS_ERROR_CODE - -/* Return whether STRING starts with PREFIX. */ - -static bool -json_has_prefix (const char *string, const char *prefix) -{ - return strncmp (string, prefix, strlen (prefix)) == 0; -} - -/* Return whether STRING ends with SUFFIX. */ - -static bool -json_has_suffix (const char *string, const char *suffix) -{ - size_t string_len = strlen (string); - size_t suffix_len = strlen (suffix); - return string_len >= suffix_len - && memcmp (string + string_len - suffix_len, suffix, suffix_len) == 0; -} - -#endif - /* Note that all callers of make_string_from_utf8 and build_string_from_utf8 below either pass only value UTF-8 strings or use the functionf for formatting error messages; in the latter case correctness isn't commit a5df4d92e37a176396577ac901b85025a6952376 Author: Géza Herman Date: Wed Mar 6 13:14:50 2024 +0100 Replace libjansson JSON parser with a custom one * src/json.c (json_parse_error, json_to_lisp) (json_read_buffer_callback): Remove functions. (struct json_parser): New struct. (json_signal_error, json_parser_init, json_parser_done) (json_make_object_workspace_for_slow_path) (json_make_object_workspace_for, json_byte_workspace_reset) (json_byte_workspace_put_slow_path, json_byte_workspace_put) (json_input_at_eof, json_input_switch_to_secondary) (json_input_get_slow_path, json_input_get) (json_input_get_if_possible, json_input_put_back) (json_skip_whitespace_internal, json_skip_whitespace) (json_skip_whitespace_if_possible, json_hex_value) (json_parse_unicode, json_handle_utf8_tail_bytes) (json_parse_string, json_create_integer, json_create_float) (json_parse_number, json_parse_array) (json_parse_object_member_value, json_parse_object) (json_is_token_char, json_parse_value, json_parse): New functions. (Fjson_parse_buffer, Fjson_parse_string): Adjust to changes in the parser. (syms_of_json): DEFSYM new symbols and define_error new errors. diff --git a/src/json.c b/src/json.c index e849ccaf722..bdb9e4cdd58 100644 --- a/src/json.c +++ b/src/json.c @@ -23,6 +23,7 @@ along with GNU Emacs. If not, see . */ #include #include #include +#include #include @@ -237,41 +238,6 @@ json_out_of_memory (void) xsignal0 (Qjson_out_of_memory); } -/* Signal a Lisp error corresponding to the JSON ERROR. */ - -static AVOID -json_parse_error (const json_error_t *error) -{ - Lisp_Object symbol; -#if JSON_HAS_ERROR_CODE - switch (json_error_code (error)) - { - case json_error_premature_end_of_input: - symbol = Qjson_end_of_file; - break; - case json_error_end_of_input_expected: - symbol = Qjson_trailing_content; - break; - default: - symbol = Qjson_parse_error; - break; - } -#else - if (json_has_suffix (error->text, "expected near end of file")) - symbol = Qjson_end_of_file; - else if (json_has_prefix (error->text, "end of file expected")) - symbol = Qjson_trailing_content; - else - symbol = Qjson_parse_error; -#endif - xsignal (symbol, - list5 (build_string_from_utf8 (error->text), - build_string_from_utf8 (error->source), - INT_TO_INTEGER (error->line), - INT_TO_INTEGER (error->column), - INT_TO_INTEGER (error->position))); -} - static void json_release_object (void *object) { @@ -794,145 +760,1087 @@ usage: (json-insert OBJECT &rest ARGS) */) return unbind_to (count, Qnil); } -/* Convert a JSON object to a Lisp object. */ +#define JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE 64 +#define JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE 512 + +struct json_parser +{ + /* Because of a possible gap in the input (an emacs buffer can have + a gap), the input is described by [input_begin;input_end) and + [secondary_input_begin;secondary_input_end). If the input is + continuous, then secondary_input_begin and secondary_input_end + should be NULL */ + const unsigned char *input_current; + const unsigned char *input_begin; + const unsigned char *input_end; + + const unsigned char *secondary_input_begin; + const unsigned char *secondary_input_end; + + ptrdiff_t current_line; + ptrdiff_t current_column; + ptrdiff_t point_of_current_line; + + /* The parser has a maximum allowed depth. available_depth + decreases at each object/array begin. If reaches zero, then an + error is generated */ + int available_depth; + + struct json_configuration conf; + + size_t additional_bytes_count; + + /* Lisp_Objects are collected in this area during object/array + parsing. To avoid allocations, initially + internal_object_workspace is used. If it runs out of space then + we switch to allocated space. Important note: with this design, + GC must not run during JSON parsing, otherwise Lisp_Objects in + the workspace may get incorrectly collected. */ + Lisp_Object internal_object_workspace + [JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE]; + Lisp_Object *object_workspace; + size_t object_workspace_size; + size_t object_workspace_current; + + /* String and number parsing uses this workspace. The idea behind + internal_byte_workspace is the same as the idea behind + internal_object_workspace */ + unsigned char + internal_byte_workspace[JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE]; + unsigned char *byte_workspace; + unsigned char *byte_workspace_end; + unsigned char *byte_workspace_current; +}; + +static AVOID +json_signal_error (struct json_parser *parser, Lisp_Object error) +{ + xsignal3 (error, INT_TO_INTEGER (parser->current_line), + INT_TO_INTEGER (parser->current_column), + INT_TO_INTEGER (parser->point_of_current_line + + parser->current_column)); +} + +static void +json_parser_init (struct json_parser *parser, + struct json_configuration conf, + const unsigned char *input, + const unsigned char *input_end, + const unsigned char *secondary_input, + const unsigned char *secondary_input_end) +{ + if (secondary_input >= secondary_input_end) + { + secondary_input = NULL; + secondary_input_end = NULL; + } + + if (input < input_end) + { + parser->input_begin = input; + parser->input_end = input_end; + + parser->secondary_input_begin = secondary_input; + parser->secondary_input_end = secondary_input_end; + } + else + { + parser->input_begin = secondary_input; + parser->input_end = secondary_input_end; + + parser->secondary_input_begin = NULL; + parser->secondary_input_end = NULL; + } + + parser->input_current = parser->input_begin; + + parser->current_line = 1; + parser->current_column = 0; + parser->point_of_current_line = 0; + parser->available_depth = 10000; + parser->conf = conf; + + parser->additional_bytes_count = 0; + + parser->object_workspace = parser->internal_object_workspace; + parser->object_workspace_size + = JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE; + parser->object_workspace_current = 0; + + parser->byte_workspace = parser->internal_byte_workspace; + parser->byte_workspace_end + = (parser->byte_workspace + + JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE); +} + +static void +json_parser_done (void *parser) +{ + struct json_parser *p = (struct json_parser *) parser; + if (p->object_workspace != p->internal_object_workspace) + xfree (p->object_workspace); + if (p->byte_workspace != p->internal_byte_workspace) + xfree (p->byte_workspace); +} + +/* Makes sure that the object_workspace has 'size' available space for + Lisp_Objects */ +NO_INLINE static void +json_make_object_workspace_for_slow_path (struct json_parser *parser, + size_t size) +{ + size_t needed_workspace_size + = (parser->object_workspace_current + size); + size_t new_workspace_size = parser->object_workspace_size; + while (new_workspace_size < needed_workspace_size) + { + if (ckd_mul (&new_workspace_size, new_workspace_size, 2)) + { + json_signal_error (parser, Qjson_out_of_memory); + } + } + + Lisp_Object *new_workspace_ptr; + if (parser->object_workspace_size + == JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE) + { + new_workspace_ptr + = xnmalloc (new_workspace_size, sizeof (Lisp_Object)); + memcpy (new_workspace_ptr, parser->object_workspace, + (sizeof (Lisp_Object) + * parser->object_workspace_current)); + } + else + { + new_workspace_ptr + = xnrealloc (parser->object_workspace, new_workspace_size, + sizeof (Lisp_Object)); + } + + parser->object_workspace = new_workspace_ptr; + parser->object_workspace_size = new_workspace_size; +} + +INLINE void +json_make_object_workspace_for (struct json_parser *parser, + size_t size) +{ + if (parser->object_workspace_size - parser->object_workspace_current + < size) + { + json_make_object_workspace_for_slow_path (parser, size); + } +} + +static void +json_byte_workspace_reset (struct json_parser *parser) +{ + parser->byte_workspace_current = parser->byte_workspace; +} + +/* Puts 'value' into the byte_workspace. If there is no space + available, it allocates space */ +NO_INLINE static void +json_byte_workspace_put_slow_path (struct json_parser *parser, + unsigned char value) +{ + size_t new_workspace_size + = parser->byte_workspace_end - parser->byte_workspace; + if (ckd_mul (&new_workspace_size, new_workspace_size, 2)) + { + json_signal_error (parser, Qjson_out_of_memory); + } + + size_t offset + = parser->byte_workspace_current - parser->byte_workspace; + + if (parser->byte_workspace == parser->internal_byte_workspace) + { + parser->byte_workspace = xmalloc (new_workspace_size); + memcpy (parser->byte_workspace, parser->internal_byte_workspace, + offset); + } + else + { + parser->byte_workspace + = xrealloc (parser->byte_workspace, new_workspace_size); + } + parser->byte_workspace_end + = parser->byte_workspace + new_workspace_size; + parser->byte_workspace_current = parser->byte_workspace + offset; + *parser->byte_workspace_current++ = value; +} + +INLINE void +json_byte_workspace_put (struct json_parser *parser, + unsigned char value) +{ + if (parser->byte_workspace_current < parser->byte_workspace_end) + { + *parser->byte_workspace_current++ = value; + } + else + { + json_byte_workspace_put_slow_path (parser, value); + } +} + +static bool +json_input_at_eof (struct json_parser *parser) +{ + if (parser->input_current < parser->input_end) + return false; + return parser->secondary_input_end == NULL; +} + +/* If there is a secondary buffer, this switches to it */ +static int +json_input_switch_to_secondary (struct json_parser *parser) +{ + if (parser->secondary_input_begin < parser->secondary_input_end) + { + parser->additional_bytes_count + = parser->input_end - parser->input_begin; + parser->input_begin = parser->secondary_input_begin; + parser->input_end = parser->secondary_input_end; + parser->input_current = parser->secondary_input_begin; + parser->secondary_input_begin = NULL; + parser->secondary_input_end = NULL; + return 0; + } + else + return -1; +} + +/* Reads a byte from the JSON input stream */ +NO_INLINE static unsigned char +json_input_get_slow_path (struct json_parser *parser) +{ + if (json_input_switch_to_secondary (parser) < 0) + json_signal_error (parser, Qjson_end_of_file); + return *parser->input_current++; +} + +static unsigned char +json_input_get (struct json_parser *parser) +{ + if (parser->input_current < parser->input_end) + return *parser->input_current++; + return json_input_get_slow_path (parser); +} + +/* Reads a byte from the JSON input stream, if the stream is not at + * eof. At eof, returns -1 */ +static int +json_input_get_if_possible (struct json_parser *parser) +{ + if (parser->input_current >= parser->input_end + && json_input_switch_to_secondary (parser) < 0) + return -1; + return *parser->input_current++; +} + +/* Puts back the last read input byte. Only one byte can be put back, + because otherwise this code would need to handle switching from + the secondary buffer to the initial */ +static void +json_input_put_back (struct json_parser *parser) +{ + parser->input_current--; +} + +static bool +json_skip_whitespace_internal (struct json_parser *parser, int c) +{ + parser->current_column++; + if (c == 0x20 || c == 0x09 || c == 0x0d) + return false; + else if (c == 0x0a) + { + parser->current_line++; + parser->point_of_current_line += parser->current_column; + parser->current_column = 0; + return false; + } + else + return true; +} + +/* Skips JSON whitespace, and returns with the first non-whitespace + * character */ +static int +json_skip_whitespace (struct json_parser *parser) +{ + for (;;) + { + int c = json_input_get (parser); + if (json_skip_whitespace_internal (parser, c)) + return c; + } +} + +/* Skips JSON whitespace, and returns with the first non-whitespace + * character, if possible. If there is no non-whitespace character + * (because we reached the end), it returns -1 */ +static int +json_skip_whitespace_if_possible (struct json_parser *parser) +{ + for (;;) + { + int c = json_input_get_if_possible (parser); + if (c < 0) + return c; + if (json_skip_whitespace_internal (parser, c)) + return c; + } +} + +static int +json_hex_value (int c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + else if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + else if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + else + return -1; +} + +/* Parses the CCCC part of the unicode escape sequence \uCCCC */ +static int +json_parse_unicode (struct json_parser *parser) +{ + unsigned char v[4]; + for (int i = 0; i < 4; i++) + { + int c = json_hex_value (json_input_get (parser)); + parser->current_column++; + if (c < 0) + json_signal_error (parser, Qjson_escape_sequence_error); + v[i] = c; + } + + return v[0] << 12 | v[1] << 8 | v[2] << 4 | v[3]; +} + +/* Parses an utf-8 code-point encoding (except the first byte), and + returns the numeric value of the code-point (without considering + the first byte) */ +static int +json_handle_utf8_tail_bytes (struct json_parser *parser, int n) +{ + int v = 0; + for (int i = 0; i < n; i++) + { + int c = json_input_get (parser); + json_byte_workspace_put (parser, c); + if ((c & 0xc0) != 0x80) + json_signal_error (parser, Qjson_utf8_decode_error); + v = (v << 6) | (c & 0x3f); + } + return v; +} + +/* Reads a JSON string, and puts the result into the byte workspace */ +static void +json_parse_string (struct json_parser *parser) +{ + /* a single_uninteresting byte can be simply copied from the input + to output, it doesn't need any extra care. This means all the + characters between [0x20;0x7f], except the double quote and + the backslash */ + static const char is_single_uninteresting[256] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 2 */ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 3 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 4 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + /* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + for (;;) + { + /* This if is only here for a possible speedup. If there are 4 + bytes available, and all of them are single_uninteresting, + then we can just copy these 4 bytes to output */ + if (parser->input_end - parser->input_current >= 4) + { + int c0 = parser->input_current[0]; + int c1 = parser->input_current[1]; + int c2 = parser->input_current[2]; + int c3 = parser->input_current[3]; + bool v0 = is_single_uninteresting[c0]; + bool v1 = is_single_uninteresting[c1]; + bool v2 = is_single_uninteresting[c2]; + bool v3 = is_single_uninteresting[c3]; + if (v0 && v1 && v2 && v3) + { + json_byte_workspace_put (parser, c0); + json_byte_workspace_put (parser, c1); + json_byte_workspace_put (parser, c2); + json_byte_workspace_put (parser, c3); + parser->input_current += 4; + parser->current_column += 4; + continue; + } + } + + int c = json_input_get (parser); + parser->current_column++; + if (is_single_uninteresting[c]) + { + json_byte_workspace_put (parser, c); + continue; + } + + if (c == '"') + return; + else if (c & 0x80) + { + /* Handle utf-8 encoding */ + json_byte_workspace_put (parser, c); + if (c < 0xc0) + json_signal_error (parser, Qjson_utf8_decode_error); + else if (c < 0xe0) + { + int n = ((c & 0x1f) << 6 + | json_handle_utf8_tail_bytes (parser, 1)); + if (n < 0x80) + json_signal_error (parser, Qjson_utf8_decode_error); + } + else if (c < 0xf0) + { + int n = ((c & 0xf) << 12 + | json_handle_utf8_tail_bytes (parser, 2)); + if (n < 0x800 || (n >= 0xd800 && n < 0xe000)) + json_signal_error (parser, Qjson_utf8_decode_error); + } + else if (c < 0xf8) + { + int n = ((c & 0x7) << 18 + | json_handle_utf8_tail_bytes (parser, 3)); + if (n < 0x10000 || n > 0x10ffff) + json_signal_error (parser, Qjson_utf8_decode_error); + } + else + json_signal_error (parser, Qjson_utf8_decode_error); + } + else if (c == '\\') + { + /* Handle escape sequences */ + c = json_input_get (parser); + parser->current_column++; + if (c == '"') + json_byte_workspace_put (parser, '"'); + else if (c == '\\') + json_byte_workspace_put (parser, '\\'); + else if (c == '/') + json_byte_workspace_put (parser, '/'); + else if (c == 'b') + json_byte_workspace_put (parser, '\b'); + else if (c == 'f') + json_byte_workspace_put (parser, '\f'); + else if (c == 'n') + json_byte_workspace_put (parser, '\n'); + else if (c == 'r') + json_byte_workspace_put (parser, '\r'); + else if (c == 't') + json_byte_workspace_put (parser, '\t'); + else if (c == 'u') + { + int num = json_parse_unicode (parser); + /* is the first half of the surrogate pair */ + if (num >= 0xd800 && num < 0xdc00) + { + parser->current_column++; + if (json_input_get (parser) != '\\') + json_signal_error (parser, + Qjson_invalid_surrogate_error); + parser->current_column++; + if (json_input_get (parser) != 'u') + json_signal_error (parser, + Qjson_invalid_surrogate_error); + int num2 = json_parse_unicode (parser); + if (num2 < 0xdc00 || num2 >= 0xe000) + json_signal_error (parser, + Qjson_invalid_surrogate_error); + num = (0x10000 + + ((num - 0xd800) << 10 | (num2 - 0xdc00))); + } + else if (num >= 0xdc00 && num < 0xe000) + /* is the second half of the surrogate pair without + the first half */ + json_signal_error (parser, + Qjson_invalid_surrogate_error); + + /* utf-8 encode the code-point */ + if (num < 0x80) + json_byte_workspace_put (parser, num); + else if (num < 0x800) + { + json_byte_workspace_put (parser, 0xc0 | num >> 6); + json_byte_workspace_put (parser, + 0x80 | (num & 0x3f)); + } + else if (num < 0x10000) + { + json_byte_workspace_put (parser, 0xe0 | num >> 12); + json_byte_workspace_put (parser, + (0x80 + | ((num >> 6) & 0x3f))); + json_byte_workspace_put (parser, + 0x80 | (num & 0x3f)); + } + else + { + json_byte_workspace_put (parser, 0xf0 | num >> 18); + json_byte_workspace_put (parser, + (0x80 + | ((num >> 12) & 0x3f))); + json_byte_workspace_put (parser, + (0x80 + | ((num >> 6) & 0x3f))); + json_byte_workspace_put (parser, + 0x80 | (num & 0x3f)); + } + } + else + json_signal_error (parser, Qjson_escape_sequence_error); + } + else + json_signal_error (parser, Qjson_parse_error); + } +} + +/* If there was no integer overflow during parsing the integer, this + puts 'value' to the output. Otherwise this calls string_to_number + to parse integer on the byte workspace. This could just always + call string_to_number, but for performance reasons, during parsing + the code tries to calculate the value, so in most cases, we can + save call of string_to_number */ +static Lisp_Object +json_create_integer (struct json_parser *parser, + bool integer_overflow, bool negative, + EMACS_UINT value) +{ + if (!integer_overflow) + { + if (negative) + { + uintmax_t v = value; + if (v <= (uintmax_t) INTMAX_MAX + 1) + return INT_TO_INTEGER ((intmax_t) -v); + } + else + return INT_TO_INTEGER (value); + } + + json_byte_workspace_put (parser, 0); + ptrdiff_t len; + Lisp_Object result + = string_to_number ((const char *) parser->byte_workspace, 10, + &len); + if (len + != parser->byte_workspace_current - parser->byte_workspace - 1) + json_signal_error (parser, Qjson_error); + return result; +} + +/* Parses a float using the byte workspace */ +static Lisp_Object +json_create_float (struct json_parser *parser) +{ + json_byte_workspace_put (parser, 0); + errno = 0; + char *e; + double value = strtod ((const char *) parser->byte_workspace, &e); + bool out_of_range + = (errno != 0 && (value == HUGE_VAL || value == -HUGE_VAL)); + if (out_of_range) + json_signal_error (parser, Qjson_number_out_of_range); + else if ((const unsigned char *) e + != parser->byte_workspace_current - 1) + json_signal_error (parser, Qjson_error); + else + return make_float (value); +} + +/* Parses a number. The first character is the input parameter 'c'. + */ +static Lisp_Object +json_parse_number (struct json_parser *parser, int c) +{ + json_byte_workspace_reset (parser); + json_byte_workspace_put (parser, c); + + bool negative = false; + if (c == '-') + { + negative = true; + c = json_input_get (parser); + json_byte_workspace_put (parser, c); + parser->current_column++; + } + if (c < '0' || c > '9') + json_signal_error (parser, Qjson_parse_error); + + /* The idea is that during finding the last character of the + number, the for loop below also tries to calculate the value. If + the parsed number is an integer which fits into unsigned long, + then the parser can use the value of 'integer' right away, + instead of having to re-parse the byte workspace later. + Ideally, this integer should have the same size as a CPU general + purpose register. */ + EMACS_UINT integer = c - '0'; + bool integer_overflow = false; + + if (integer == 0) + { + if (json_input_at_eof (parser)) + return INT_TO_INTEGER (0); + c = json_input_get (parser); + } + else + { + for (;;) + { + if (json_input_at_eof (parser)) + return json_create_integer (parser, integer_overflow, + negative, integer); + c = json_input_get (parser); + if (c < '0' || c > '9') + break; + json_byte_workspace_put (parser, c); + parser->current_column++; + + integer_overflow |= ckd_mul (&integer, integer, 10); + integer_overflow |= ckd_add (&integer, integer, c - '0'); + } + } + + bool is_float = false; + if (c == '.') + { + json_byte_workspace_put (parser, c); + parser->current_column++; + + is_float = true; + c = json_input_get (parser); + json_byte_workspace_put (parser, c); + parser->current_column++; + if (c < '0' || c > '9') + json_signal_error (parser, Qjson_parse_error); + for (;;) + { + if (json_input_at_eof (parser)) + return json_create_float (parser); + c = json_input_get (parser); + if (c < '0' || c > '9') + break; + json_byte_workspace_put (parser, c); + parser->current_column++; + } + } + if (c == 'e' || c == 'E') + { + json_byte_workspace_put (parser, c); + parser->current_column++; + + is_float = true; + c = json_input_get (parser); + json_byte_workspace_put (parser, c); + parser->current_column++; + if (c == '-' || c == '+') + { + c = json_input_get (parser); + json_byte_workspace_put (parser, c); + parser->current_column++; + } + if (c < '0' || c > '9') + json_signal_error (parser, Qjson_parse_error); + for (;;) + { + if (json_input_at_eof (parser)) + return json_create_float (parser); + c = json_input_get (parser); + if (c < '0' || c > '9') + break; + json_byte_workspace_put (parser, c); + parser->current_column++; + } + } + + /* 'c' contains a character which is not part of the number, + so it is need to be put back */ + json_input_put_back (parser); + + if (is_float) + return json_create_float (parser); + else + return json_create_integer (parser, integer_overflow, negative, + integer); +} + +static Lisp_Object json_parse_value (struct json_parser *parser, + int c); -static Lisp_Object ARG_NONNULL ((1)) -json_to_lisp (json_t *json, const struct json_configuration *conf) +/* Parses a JSON array. */ +static Lisp_Object +json_parse_array (struct json_parser *parser) { - switch (json_typeof (json)) + int c = json_skip_whitespace (parser); + + const size_t first = parser->object_workspace_current; + Lisp_Object result = Qnil; + + if (c != ']') + { + parser->available_depth--; + if (parser->available_depth < 0) + json_signal_error (parser, Qjson_object_too_deep); + + size_t number_of_elements = 0; + Lisp_Object *cdr = &result; + /* This loop collects the array elements in the object workspace + */ + for (;;) + { + Lisp_Object element = json_parse_value (parser, c); + switch (parser->conf.array_type) + { + case json_array_array: + json_make_object_workspace_for (parser, 1); + parser->object_workspace[parser->object_workspace_current] + = element; + parser->object_workspace_current++; + break; + case json_array_list: + { + Lisp_Object nc = Fcons (element, Qnil); + *cdr = nc; + cdr = xcdr_addr (nc); + break; + } + default: + emacs_abort (); + } + + c = json_skip_whitespace (parser); + + number_of_elements++; + if (c == ']') + { + parser->available_depth++; + break; + } + + if (c != ',') + json_signal_error (parser, Qjson_parse_error); + + c = json_skip_whitespace (parser); + } + } + + switch (parser->conf.array_type) { - case JSON_NULL: - return conf->null_object; - case JSON_FALSE: - return conf->false_object; - case JSON_TRUE: - return Qt; - case JSON_INTEGER: + case json_array_array: { - json_int_t i = json_integer_value (json); - return INT_TO_INTEGER (i); + size_t number_of_elements + = parser->object_workspace_current - first; + result = make_vector (number_of_elements, Qnil); + for (size_t i = 0; i < number_of_elements; i++) + { + rarely_quit (i); + ASET (result, i, parser->object_workspace[first + i]); + } + parser->object_workspace_current = first; + break; } - case JSON_REAL: - return make_float (json_real_value (json)); - case JSON_STRING: - return make_string_from_utf8 (json_string_value (json), - json_string_length (json)); - case JSON_ARRAY: + case json_array_list: + break; + default: + emacs_abort (); + } + + return result; +} + +/* Parses the ": value" part of a JSON object member. */ +static Lisp_Object +json_parse_object_member_value (struct json_parser *parser) +{ + int c = json_skip_whitespace (parser); + if (c != ':') + json_signal_error (parser, Qjson_parse_error); + + c = json_skip_whitespace (parser); + + return json_parse_value (parser, c); +} + +/* Parses a JSON object. */ +static Lisp_Object +json_parse_object (struct json_parser *parser) +{ + int c = json_skip_whitespace (parser); + + const size_t first = parser->object_workspace_current; + Lisp_Object result = Qnil; + + if (c != '}') + { + parser->available_depth--; + if (parser->available_depth < 0) + json_signal_error (parser, Qjson_object_too_deep); + + Lisp_Object *cdr = &result; + + /* This loop collects the object members (key/value pairs) in + * the object workspace */ + for (;;) + { + if (c != '"') + json_signal_error (parser, Qjson_parse_error); + + json_byte_workspace_reset (parser); + switch (parser->conf.object_type) + { + case json_object_hashtable: + { + json_parse_string (parser); + Lisp_Object key + = make_string_from_utf8 ((char *) + parser->byte_workspace, + (parser->byte_workspace_current + - parser->byte_workspace)); + Lisp_Object value + = json_parse_object_member_value (parser); + json_make_object_workspace_for (parser, 2); + parser->object_workspace[parser->object_workspace_current] + = key; + parser->object_workspace_current++; + parser->object_workspace[parser->object_workspace_current] + = value; + parser->object_workspace_current++; + break; + } + case json_object_alist: + { + json_parse_string (parser); + Lisp_Object key + = Fintern (make_string_from_utf8 ( + (char *) parser->byte_workspace, + (parser->byte_workspace_current + - parser->byte_workspace)), + Qnil); + Lisp_Object value + = json_parse_object_member_value (parser); + Lisp_Object nc = Fcons (Fcons (key, value), Qnil); + *cdr = nc; + cdr = xcdr_addr (nc); + break; + } + case json_object_plist: + { + json_byte_workspace_put (parser, ':'); + json_parse_string (parser); + Lisp_Object key + = intern_1 ((char *) parser->byte_workspace, + (parser->byte_workspace_current + - parser->byte_workspace)); + Lisp_Object value + = json_parse_object_member_value (parser); + Lisp_Object nc = Fcons (key, Qnil); + *cdr = nc; + cdr = xcdr_addr (nc); + + nc = Fcons (value, Qnil); + *cdr = nc; + cdr = xcdr_addr (nc); + break; + } + default: + emacs_abort (); + } + + c = json_skip_whitespace (parser); + + if (c == '}') + { + parser->available_depth++; + break; + } + + if (c != ',') + json_signal_error (parser, Qjson_parse_error); + + c = json_skip_whitespace (parser); + } + } + + switch (parser->conf.object_type) + { + case json_object_hashtable: { - if (++lisp_eval_depth > max_lisp_eval_depth) - xsignal0 (Qjson_object_too_deep); - size_t size = json_array_size (json); - if (PTRDIFF_MAX < size) - overflow_error (); - Lisp_Object result; - switch (conf->array_type) - { - case json_array_array: - { - result = make_vector (size, Qunbound); - for (ptrdiff_t i = 0; i < size; ++i) - { - rarely_quit (i); - ASET (result, i, - json_to_lisp (json_array_get (json, i), conf)); - } - break; - } - case json_array_list: - { - result = Qnil; - for (ptrdiff_t i = size - 1; i >= 0; --i) - { - rarely_quit (i); - result = Fcons (json_to_lisp (json_array_get (json, i), conf), - result); - } - break; - } - default: - /* Can't get here. */ - emacs_abort (); - } - --lisp_eval_depth; - return result; + result + = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize, + make_fixed_natnum ( + (parser->object_workspace_current - first) / 2)); + struct Lisp_Hash_Table *h = XHASH_TABLE (result); + for (size_t i = first; i < parser->object_workspace_current; + i += 2) + { + hash_hash_t hash; + Lisp_Object key = parser->object_workspace[i]; + Lisp_Object value = parser->object_workspace[i + 1]; + ptrdiff_t i = hash_lookup_get_hash (h, key, &hash); + if (i < 0) + hash_put (h, key, value, hash); + else + set_hash_value_slot (h, i, value); + } + parser->object_workspace_current = first; + break; } - case JSON_OBJECT: + case json_object_alist: + case json_object_plist: + break; + default: + emacs_abort (); + } + + return result; +} + +/* Token-char is not a JSON terminology. When parsing + null/false/true, this function tells the character set that is need + to be considered as part of a token. For example, if the input is + "truesomething", then the parser shouldn't consider it as "true", + and an additional later "something" token. An additional example: + if the input is "truetrue", then calling (json-parse-buffer) twice + shouldn't produce two successful calls which return t, but a + parsing error */ +static bool +json_is_token_char (int c) +{ + return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + || (c >= '0' && c <= '9') || (c == '-')); +} + +/* This is the entry point to the value parser, this parses a JSON + * value */ +Lisp_Object +json_parse_value (struct json_parser *parser, int c) +{ + if (c == '{') + return json_parse_object (parser); + else if (c == '[') + return json_parse_array (parser); + else if (c == '"') + { + json_byte_workspace_reset (parser); + json_parse_string (parser); + Lisp_Object result + = make_string_from_utf8 ((const char *) + parser->byte_workspace, + (parser->byte_workspace_current + - parser->byte_workspace)); + return result; + } + else if ((c >= '0' && c <= '9') || (c == '-')) + return json_parse_number (parser, c); + else + { + int c2 = json_input_get (parser); + int c3 = json_input_get (parser); + int c4 = json_input_get (parser); + int c5 = json_input_get_if_possible (parser); + + if (c == 't' && c2 == 'r' && c3 == 'u' && c4 == 'e' + && (c5 < 0 || !json_is_token_char (c5))) + { + if (c5 >= 0) + json_input_put_back (parser); + parser->current_column += 3; + return Qt; + } + if (c == 'n' && c2 == 'u' && c3 == 'l' && c4 == 'l' + && (c5 < 0 || !json_is_token_char (c5))) + { + if (c5 >= 0) + json_input_put_back (parser); + parser->current_column += 3; + return parser->conf.null_object; + } + if (c == 'f' && c2 == 'a' && c3 == 'l' && c4 == 's' + && c5 == 'e') + { + int c6 = json_input_get_if_possible (parser); + if (c6 < 0 || !json_is_token_char (c6)) + { + if (c6 >= 0) + json_input_put_back (parser); + parser->current_column += 4; + return parser->conf.false_object; + } + } + + json_signal_error (parser, Qjson_parse_error); + } +} + +enum ParseEndBehavior + { + PARSEENDBEHAVIOR_CheckForGarbage, + PARSEENDBEHAVIOR_MovePoint + }; + +static Lisp_Object +json_parse (struct json_parser *parser, + enum ParseEndBehavior parse_end_behavior) +{ + int c = json_skip_whitespace (parser); + + Lisp_Object result = json_parse_value (parser, c); + + switch (parse_end_behavior) + { + case PARSEENDBEHAVIOR_CheckForGarbage: + c = json_skip_whitespace_if_possible (parser); + if (c >= 0) + json_signal_error (parser, Qjson_trailing_content); + break; + case PARSEENDBEHAVIOR_MovePoint: { - if (++lisp_eval_depth > max_lisp_eval_depth) - xsignal0 (Qjson_object_too_deep); - Lisp_Object result; - switch (conf->object_type) - { - case json_object_hashtable: - { - size_t size = json_object_size (json); - if (FIXNUM_OVERFLOW_P (size)) - overflow_error (); - result = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize, - make_fixed_natnum (size)); - struct Lisp_Hash_Table *h = XHASH_TABLE (result); - const char *key_str; - json_t *value; - json_object_foreach (json, key_str, value) - { - Lisp_Object key = build_string_from_utf8 (key_str); - hash_hash_t hash; - ptrdiff_t i = hash_lookup_get_hash (h, key, &hash); - /* Keys in JSON objects are unique, so the key can't - be present yet. */ - eassert (i < 0); - hash_put (h, key, json_to_lisp (value, conf), hash); - } - break; - } - case json_object_alist: - { - result = Qnil; - const char *key_str; - json_t *value; - json_object_foreach (json, key_str, value) - { - Lisp_Object key - = Fintern (build_string_from_utf8 (key_str), Qnil); - result - = Fcons (Fcons (key, json_to_lisp (value, conf)), - result); - } - result = Fnreverse (result); - break; - } - case json_object_plist: - { - result = Qnil; - const char *key_str; - json_t *value; - json_object_foreach (json, key_str, value) - { - USE_SAFE_ALLOCA; - ptrdiff_t key_str_len = strlen (key_str); - char *keyword_key_str = SAFE_ALLOCA (1 + key_str_len + 1); - keyword_key_str[0] = ':'; - strcpy (&keyword_key_str[1], key_str); - Lisp_Object key = intern_1 (keyword_key_str, key_str_len + 1); - /* Build the plist as value-key since we're going to - reverse it in the end.*/ - result = Fcons (key, result); - result = Fcons (json_to_lisp (value, conf), result); - SAFE_FREE (); - } - result = Fnreverse (result); - break; - } - default: - /* Can't get here. */ - emacs_abort (); - } - --lisp_eval_depth; - return result; + ptrdiff_t byte + = (PT_BYTE + parser->input_current - parser->input_begin + + parser->additional_bytes_count); + ptrdiff_t position; + if (NILP (BVAR (current_buffer, enable_multibyte_characters))) + position = byte; + else + position + = PT + parser->point_of_current_line + parser->current_column; + + SET_PT_BOTH (position, byte); + break; } } - /* Can't get here. */ - emacs_abort (); + + return result; } DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY, @@ -950,7 +1858,9 @@ The arguments ARGS are a list of keyword/argument pairs: The keyword argument `:object-type' specifies which Lisp type is used to represent objects; it can be `hash-table', `alist' or `plist'. It -defaults to `hash-table'. +defaults to `hash-table'. If an object has members with the same +key, `hash-table' keeps only the last value of such keys, while +`alist' and `plist' keep all the members. The keyword argument `:array-type' specifies which Lisp type is used to represent arrays; it can be `array' (the default) or `list'. @@ -961,62 +1871,27 @@ to represent a JSON null value. It defaults to `:null'. The keyword argument `:false-object' specifies which object to use to represent a JSON false value. It defaults to `:false'. usage: (json-parse-string STRING &rest ARGS) */) - (ptrdiff_t nargs, Lisp_Object *args) +(ptrdiff_t nargs, Lisp_Object *args) { specpdl_ref count = SPECPDL_INDEX (); -#ifdef WINDOWSNT - ensure_json_available (); -#endif - Lisp_Object string = args[0]; CHECK_STRING (string); Lisp_Object encoded = json_encode (string); - check_string_without_embedded_nulls (encoded); - struct json_configuration conf = - {json_object_hashtable, json_array_array, QCnull, QCfalse}; + struct json_configuration conf + = { json_object_hashtable, json_array_array, QCnull, QCfalse }; json_parse_args (nargs - 1, args + 1, &conf, true); - json_error_t error; - json_t *object - = json_loads (SSDATA (encoded), JSON_DECODE_ANY | JSON_ALLOW_NUL, &error); - if (object == NULL) - json_parse_error (&error); + struct json_parser p; + const unsigned char *begin + = (const unsigned char *) SSDATA (encoded); + json_parser_init (&p, conf, begin, begin + SBYTES (encoded), NULL, + NULL); + record_unwind_protect_ptr (json_parser_done, &p); - /* Avoid leaking the object in case of further errors. */ - if (object != NULL) - record_unwind_protect_ptr (json_release_object, object); - - return unbind_to (count, json_to_lisp (object, &conf)); -} - -struct json_read_buffer_data -{ - /* Byte position of position to read the next chunk from. */ - ptrdiff_t point; -}; - -/* Callback for json_load_callback that reads from the current buffer. - DATA must point to a structure of type json_read_buffer_data. - data->point must point to the byte position to read from; after - reading, data->point is advanced accordingly. The buffer point - itself is ignored. This function may not exit nonlocally. */ - -static size_t -json_read_buffer_callback (void *buffer, size_t buflen, void *data) -{ - struct json_read_buffer_data *d = data; - - /* First, parse from point to the gap or the end of the accessible - portion, whatever is closer. */ - ptrdiff_t point = d->point; - ptrdiff_t end = BUFFER_CEILING_OF (point) + 1; - ptrdiff_t count = end - point; - if (buflen < count) - count = buflen; - memcpy (buffer, BYTE_POS_ADDR (point), count); - d->point += count; - return count; + return unbind_to (count, + json_parse (&p, + PARSEENDBEHAVIOR_CheckForGarbage)); } DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer, @@ -1038,7 +1913,9 @@ The arguments ARGS are a list of keyword/argument pairs: The keyword argument `:object-type' specifies which Lisp type is used to represent objects; it can be `hash-table', `alist' or `plist'. It -defaults to `hash-table'. +defaults to `hash-table'. If an object has members with the same +key, `hash-table' keeps only the last value of such keys, while +`alist' and `plist' keep all the members. The keyword argument `:array-type' specifies which Lisp type is used to represent arrays; it can be `array' (the default) or `list'. @@ -1049,42 +1926,33 @@ to represent a JSON null value. It defaults to `:null'. The keyword argument `:false-object' specifies which object to use to represent a JSON false value. It defaults to `:false'. usage: (json-parse-buffer &rest args) */) - (ptrdiff_t nargs, Lisp_Object *args) +(ptrdiff_t nargs, Lisp_Object *args) { specpdl_ref count = SPECPDL_INDEX (); -#ifdef WINDOWSNT - ensure_json_available (); -#endif - - struct json_configuration conf = - {json_object_hashtable, json_array_array, QCnull, QCfalse}; + struct json_configuration conf + = { json_object_hashtable, json_array_array, QCnull, QCfalse }; json_parse_args (nargs, args, &conf, true); - ptrdiff_t point = PT_BYTE; - struct json_read_buffer_data data = {.point = point}; - json_error_t error; - json_t *object - = json_load_callback (json_read_buffer_callback, &data, - JSON_DECODE_ANY - | JSON_DISABLE_EOF_CHECK - | JSON_ALLOW_NUL, - &error); - - if (object == NULL) - json_parse_error (&error); - - /* Avoid leaking the object in case of further errors. */ - record_unwind_protect_ptr (json_release_object, object); - - /* Convert and then move point only if everything succeeded. */ - Lisp_Object lisp = json_to_lisp (object, &conf); + struct json_parser p; + unsigned char *begin = PT_ADDR; + unsigned char *end = GPT_ADDR; + unsigned char *secondary_begin = NULL; + unsigned char *secondary_end = NULL; + if (GPT_ADDR < Z_ADDR) + { + secondary_begin = GAP_END_ADDR; + if (secondary_begin < PT_ADDR) + secondary_begin = PT_ADDR; + secondary_end = Z_ADDR; + } - /* Adjust point by how much we just read. */ - point += error.position; - SET_PT_BOTH (BYTE_TO_CHAR (point), point); + json_parser_init (&p, conf, begin, end, secondary_begin, + secondary_end); + record_unwind_protect_ptr (json_parser_done, &p); - return unbind_to (count, lisp); + return unbind_to (count, + json_parse (&p, PARSEENDBEHAVIOR_MovePoint)); } void @@ -1102,6 +1970,10 @@ syms_of_json (void) DEFSYM (Qjson_end_of_file, "json-end-of-file"); DEFSYM (Qjson_trailing_content, "json-trailing-content"); DEFSYM (Qjson_object_too_deep, "json-object-too-deep"); + DEFSYM (Qjson_utf8_decode_error, "json-utf8-decode-error") + DEFSYM (Qjson_invalid_surrogate_error, "json-invalid-surrogate-error") + DEFSYM (Qjson_number_out_of_range, "json-number-out-of-range-error") + DEFSYM (Qjson_escape_sequence_error, "json-escape-sequence-error") DEFSYM (Qjson_unavailable, "json-unavailable"); define_error (Qjson_error, "generic JSON error", Qerror); define_error (Qjson_out_of_memory, @@ -1113,6 +1985,14 @@ syms_of_json (void) Qjson_parse_error); define_error (Qjson_object_too_deep, "object cyclic or Lisp evaluation too deep", Qjson_error); + define_error (Qjson_utf8_decode_error, + "invalid utf-8 encoding", Qjson_error); + define_error (Qjson_invalid_surrogate_error, + "invalid surrogate pair", Qjson_error); + define_error (Qjson_number_out_of_range, + "number out of range", Qjson_error); + define_error (Qjson_escape_sequence_error, + "invalid escape sequence", Qjson_parse_error); DEFSYM (Qpure, "pure"); DEFSYM (Qside_effect_free, "side-effect-free"); commit 946d4aad1dfb244352dfd0845a8bc3078fe9bca4 (refs/remotes/origin/emacs-29) Author: Eli Zaretskii Date: Sat Mar 30 10:00:02 2024 +0300 Avoid errors in Info-search-case-sensitively in DIR buffers * lisp/info.el (Info-search): Don't run the "try other subfiles" code if there are no subfiles. This happens, for example, in DIR files. (Bug#70058) diff --git a/lisp/info.el b/lisp/info.el index 1c6df9a6ee5..5817737ca92 100644 --- a/lisp/info.el +++ b/lisp/info.el @@ -2056,7 +2056,7 @@ If DIRECTION is `backward', search in the reverse direction." (re-search-forward regexp nil t)) (signal 'user-search-failed (list regexp)))))) - (if (and bound (not found)) + (if (and (or bound (not Info-current-subfile)) (not found)) (signal 'user-search-failed (list regexp))) (unless (or found bound) commit fbf6830299998a1e99b99c69cb90b637a3d26f12 Author: Theodor Thornhill Date: Thu Mar 28 19:02:09 2024 +0100 Add test for previous change (bug#70023) * test/lisp/progmodes/typescript-ts-mode-resources/indent.erts: Add test. diff --git a/test/lisp/progmodes/typescript-ts-mode-resources/indent.erts b/test/lisp/progmodes/typescript-ts-mode-resources/indent.erts index 7b6185e0386..bec96ad82e0 100644 --- a/test/lisp/progmodes/typescript-ts-mode-resources/indent.erts +++ b/test/lisp/progmodes/typescript-ts-mode-resources/indent.erts @@ -110,3 +110,17 @@ const foo = (props) => { ); } =-=-= + +Name: Interface body fields are indented + +=-= +interface Foo { +foo: string; +bar?: boolean; +} +=-= +interface Foo { + foo: string; + bar?: boolean; +} +=-=-= commit bcf6dd6e266222a293e359430afdf3a2dc18369c Author: Noah Peart Date: Tue Mar 26 22:44:48 2024 -0700 Add typescript-ts-mode indentation for interface bodies (bug#70023) * lisp/progmodes/typescript-ts-mode.el (typescript-ts-mode--indent-rules): Add indentation rule for interface bodies. diff --git a/lisp/progmodes/typescript-ts-mode.el b/lisp/progmodes/typescript-ts-mode.el index 7021f012dcd..9c4c388c6b1 100644 --- a/lisp/progmodes/typescript-ts-mode.el +++ b/lisp/progmodes/typescript-ts-mode.el @@ -124,6 +124,7 @@ Argument LANGUAGE is either `typescript' or `tsx'." ((parent-is "object_type") parent-bol typescript-ts-mode-indent-offset) ((parent-is "enum_body") parent-bol typescript-ts-mode-indent-offset) ((parent-is "class_body") parent-bol typescript-ts-mode-indent-offset) + ((parent-is "interface_body") parent-bol typescript-ts-mode-indent-offset) ((parent-is "arrow_function") parent-bol typescript-ts-mode-indent-offset) ((parent-is "parenthesized_expression") parent-bol typescript-ts-mode-indent-offset) ((parent-is "binary_expression") parent-bol typescript-ts-mode-indent-offset) commit 95d9e6eb6b48b9b51a0b9d7de4a0c4eeed6c7a70 Author: Andrea Corallo Date: Tue Mar 26 08:20:54 2024 +0100 * Don't install unnecessary trampolines (bug#69573) (don't merge) * lisp/emacs-lisp/comp.el (comp-subr-trampoline-install): Check that subr-name actually matches the target subr. diff --git a/lisp/emacs-lisp/comp.el b/lisp/emacs-lisp/comp.el index 6b65a375ea0..a3c6bb59469 100644 --- a/lisp/emacs-lisp/comp.el +++ b/lisp/emacs-lisp/comp.el @@ -714,13 +714,15 @@ This are essential for the trampoline machinery to work properly.") (when (memq subr-name comp-warn-primitives) (warn "Redefining `%s' might break native compilation of trampolines." subr-name)) - (unless (or (null native-comp-enable-subr-trampolines) - (memq subr-name native-comp-never-optimize-functions) - (gethash subr-name comp-installed-trampolines-h)) - (cl-assert (subr-primitive-p (symbol-function subr-name))) - (when-let ((trampoline (or (comp-trampoline-search subr-name) - (comp-trampoline-compile subr-name)))) - (comp--install-trampoline subr-name trampoline)))) + (let ((subr (symbol-function subr-name))) + (unless (or (not (string= subr-name (subr-name subr))) ;; (bug#69573) + (null native-comp-enable-subr-trampolines) + (memq subr-name native-comp-never-optimize-functions) + (gethash subr-name comp-installed-trampolines-h)) + (cl-assert (subr-primitive-p subr)) + (when-let ((trampoline (or (comp-trampoline-search subr-name) + (comp-trampoline-compile subr-name)))) + (comp--install-trampoline subr-name trampoline))))) (cl-defstruct (comp-vec (:copier nil)) commit 38faacf353fb4c8efb027019a4619a386edfe62c Author: Eli Zaretskii Date: Mon Mar 25 21:49:55 2024 +0200 Improve documentation of in user manual * doc/emacs/basic.texi (Erasing): Document that deletes entire grapheme clusters. diff --git a/doc/emacs/basic.texi b/doc/emacs/basic.texi index f64b3995d25..abdfcb1ab8a 100644 --- a/doc/emacs/basic.texi +++ b/doc/emacs/basic.texi @@ -406,8 +406,8 @@ Delete the character before point, or the region if it is active (@code{delete-backward-char}). @item @key{Delete} -Delete the character after point, or the region if it is active -(@code{delete-forward-char}). +Delete the character or grapheme cluster after point, or the region if +it is active (@code{delete-forward-char}). @item C-d Delete the character after point (@code{delete-char}). @@ -438,11 +438,18 @@ with the @key{Delete} key; we will discuss @key{Delete} momentarily.) On some text terminals, Emacs may not recognize the @key{DEL} key properly. @xref{DEL Does Not Delete}, if you encounter this problem. +@cindex grapheme cluster, deletion +@cindex delete entire grapheme cluster The @key{Delete} (@code{delete-forward-char}) command deletes in the opposite direction: it deletes the character after point, i.e., the character under the cursor. If point was at the end of a line, this joins the following line onto this one. Like @kbd{@key{DEL}}, it deletes the text in the region if the region is active (@pxref{Mark}). +If the character after point is composed with following characters and +displayed as a single display unit, a so-called @dfn{grapheme cluster} +representing the entire sequence, @key{Delete} deletes the entire +sequence in one go. This is in contrast to @key{DEL} which always +deletes a single character, even if the character is composed. @kbd{C-d} (@code{delete-char}) deletes the character after point, similar to @key{Delete}, but regardless of whether the region is commit 9d3d77f12dac21c633cf10f111b0e4e574036b30 Author: Eli Zaretskii Date: Mon Mar 25 15:12:42 2024 +0200 Fix documentation of 'other-window-for-scrolling' * src/window.c (Fother_window_for_scrolling): More accurate documentation of how "the other" window is looked for. Suggested by Karthik Chikmagalur . diff --git a/src/window.c b/src/window.c index 8d4bde8d6db..3a8f864ec69 100644 --- a/src/window.c +++ b/src/window.c @@ -6452,13 +6452,16 @@ When calling from a program, supply as argument a number, nil, or `-'. */) } DEFUN ("other-window-for-scrolling", Fother_window_for_scrolling, Sother_window_for_scrolling, 0, 0, 0, - doc: /* Return the other window for \"other window scroll\" commands. -If in the minibuffer, `minibuffer-scroll-window' if non-nil -specifies the window. -Otherwise, if `other-window-scroll-buffer' is non-nil, a window -showing that buffer is used, popping the buffer up if necessary. -Finally, look for a neighboring window on the selected frame, -followed by all visible frames on the current terminal. */) + doc: /* Return \"the other\" window for \"other window scroll\" commands. +If in the minibuffer, and `minibuffer-scroll-window' is non-nil, +it specifies the window to use. +Otherwise, if `other-window-scroll-buffer' is a buffer, a window +showing that buffer is the window to use, popping it up if necessary. +Otherwise, if `other-window-scroll-default' is a function, call it, +and the window it returns is the window to use. +Finally, the function looks for a neighboring window on the selected +frame, followed by windows on all the visible frames on the current +terminal. */) (void) { Lisp_Object window; commit 96fb71994246508f9bcaae95395d44042941f02d Author: Eli Zaretskii Date: Sun Mar 24 10:38:01 2024 -0400 Bump Emacs version to 29.3.50 * README: * configure.ac: * nt/README.W32: * msdos/sed2v2.inp: * etc/NEWS: Bump Emacs version to 29.3.50. diff --git a/README b/README index 877ebb3c642..b972a53e9f3 100644 --- a/README +++ b/README @@ -2,7 +2,7 @@ Copyright (C) 2001-2024 Free Software Foundation, Inc. See the end of the file for license conditions. -This directory tree holds version 29.3 of GNU Emacs, the extensible, +This directory tree holds version 29.3.50 of GNU Emacs, the extensible, customizable, self-documenting real-time display editor. The file INSTALL in this directory says how to build and install GNU diff --git a/configure.ac b/configure.ac index 473ae06833c..f2a7463dfe8 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ dnl along with GNU Emacs. If not, see . AC_PREREQ([2.65]) dnl Note this is parsed by (at least) make-dist and lisp/cedet/ede/emacs.el. -AC_INIT([GNU Emacs], [29.3], [bug-gnu-emacs@gnu.org], [], +AC_INIT([GNU Emacs], [29.3.50], [bug-gnu-emacs@gnu.org], [], [https://www.gnu.org/software/emacs/]) dnl Set emacs_config_options to the options of 'configure', quoted for the shell, diff --git a/etc/NEWS b/etc/NEWS index 3f94b0d4634..4695bcc5334 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -15,6 +15,33 @@ in older Emacs versions. You can narrow news to a specific version by calling 'view-emacs-news' with a prefix argument or by typing 'C-u C-h C-n'. + +* Installation Changes in Emacs 29.4 + + +* Startup Changes in Emacs 29.4 + + +* Changes in Emacs 29.4 + + +* Editing Changes in Emacs 29.4 + + +* Changes in Specialized Modes and Packages in Emacs 29.4 + + +* New Modes and Packages in Emacs 29.4 + + +* Incompatible Lisp Changes in Emacs 29.4 + + +* Lisp Changes in Emacs 29.4 + + +* Changes in Emacs 29.4 on Non-Free Operating Systems + * Changes in Emacs 29.3 Emacs 29.3 is an emergency bugfix release intended to fix several diff --git a/msdos/sed2v2.inp b/msdos/sed2v2.inp index 8e6f42ebee4..34b382df8fe 100644 --- a/msdos/sed2v2.inp +++ b/msdos/sed2v2.inp @@ -67,7 +67,7 @@ /^#undef PACKAGE_NAME/s/^.*$/#define PACKAGE_NAME ""/ /^#undef PACKAGE_STRING/s/^.*$/#define PACKAGE_STRING ""/ /^#undef PACKAGE_TARNAME/s/^.*$/#define PACKAGE_TARNAME ""/ -/^#undef PACKAGE_VERSION/s/^.*$/#define PACKAGE_VERSION "29.3"/ +/^#undef PACKAGE_VERSION/s/^.*$/#define PACKAGE_VERSION "29.3.50"/ /^#undef SYSTEM_TYPE/s/^.*$/#define SYSTEM_TYPE "ms-dos"/ /^#undef HAVE_DECL_GETENV/s/^.*$/#define HAVE_DECL_GETENV 1/ /^#undef SYS_SIGLIST_DECLARED/s/^.*$/#define SYS_SIGLIST_DECLARED 1/ diff --git a/nt/README.W32 b/nt/README.W32 index 83ef00b8eba..a1838f66988 100644 --- a/nt/README.W32 +++ b/nt/README.W32 @@ -1,7 +1,7 @@ Copyright (C) 2001-2024 Free Software Foundation, Inc. See the end of the file for license conditions. - Emacs version 29.3 for MS-Windows + Emacs version 29.3.50 for MS-Windows This README file describes how to set up and run a precompiled distribution of the latest version of GNU Emacs for MS-Windows. You commit ae8f815613c2e072e92aa8fe7b4bcf2fdabc7408 (tag: refs/tags/emacs-29.3) Author: Eli Zaretskii Date: Sun Mar 24 09:37:03 2024 -0400 Update files for Emacs 29.3 * ChangeLog.4: * etc/AUTHORS: * etc/HISTORY: Update for Emacs 29.3. diff --git a/ChangeLog.4 b/ChangeLog.4 index 74d6887376b..4b806c21124 100644 --- a/ChangeLog.4 +++ b/ChangeLog.4 @@ -1,3 +1,595 @@ +2024-03-24 Ihor Radchenko + + org--confirm-resource-safe: Fix prompt when prompting in non-file Org buffers + + * lisp/org/org.el (org--confirm-resource-safe): When called from + non-file buffer, do not put stray "f" in the prompt. + + org-file-contents: Consider all remote files unsafe + + * lisp/org/org.el (org-file-contents): When loading files, consider all + remote files (like TRAMP-fetched files) unsafe, in addition to URLs. + + org-latex-preview: Add protection when `untrusted-content' is non-nil + + * lisp/org/org.el (org--latex-preview-when-risky): New variable + controlling how to handle LaTeX previews in Org files from untrusted + origin. + (org-latex-preview): Consult `org--latex-preview-when-risky' before + generating previews. + This patch adds a layer of protection when LaTeX preview is requested + for an email attachment, where `untrusted-content' is set to non-nil. + + * lisp/gnus/mm-view.el (mm-display-inline-fontify): Mark contents untrusted. + * lisp/files.el (untrusted-content): New variable. + + The new variable is to be used when buffer contents comes from untrusted + source. + + org-macro--set-templates: Prevent code evaluation + + * lisp/org/org-macro.el (org-macro--set-templates): Get rid of any + risk to evaluate code when `org-macro--set-templates' is called as a + part of major mode initialization. This way, no code evaluation is + ever triggered when user merely opens the file or when + `mm-display-org-inline' invokes Org major mode to fontify mime part + preview in email messages. + +2024-03-24 Eli Zaretskii + + * admin/authors.el (authors-aliases): Add ignored authors. + + * etc/NEWS: Update for Emacs 29.3 + +2024-03-21 Andrea Corallo + + * Fix missing `comp-files-queue' update (bug#63415). + + * lisp/emacs-lisp/comp.el (native--compile-async): Update + `comp-files-queue' for real. + +2024-03-21 Basil L. Contovounesios + + Clarify description of format-spec truncation + + * doc/lispref/strings.texi (Custom Format Strings): Mention that + precision specifier affects both '<' and '>' truncation (bug#69822). + * lisp/format-spec.el (format-spec, format-spec--do-flags): Use same + terminology as 'format', especially when referring to its behavior. + +2024-03-21 Eli Zaretskii + + More accurate documentation of 'rmail-mail-new-frame' + + * doc/emacs/rmail.texi (Rmail Reply): More accurate documentation + of the effects of 'rmail-mail-new-frame'. (Bug#69738) + +2024-03-20 Eli Zaretskii + + Fix documentation of M-SPC in user manual + + * doc/emacs/killing.texi (Deletion): Fix documentation of + 'cycle-spacing'. (Bug#69905) + +2024-03-17 Michael Albinus + + * admin/notes/bugtracker: Minor copyedit. + +2024-03-16 Theodor Thornhill + + Tweak regexp for object initializers in csharp-mode (bug#69571) + + * lisp/progmodes/csharp-mode.el (csharp-guess-basic-syntax): Add + handling to not consider ended statements as object init openers. + * test/lisp/progmodes/csharp-mode-resources/indent.erts: New test + resources. + * test/lisp/progmodes/csharp-mode-tests.el: Add test for this particular + issue. + +2024-03-16 Konstantin Kharlamov + + `term-mode': mention the keymap to add keybindings to + + A user typically expects a keymap for mode `foo' to be called + `foo-mode-map'. term-mode has `term-mode-map' too, but for + user-defined bindings to have effect they have to be put to + `term-raw-map' instead. So let's mention that. + * lisp/term.el (term-mode) (term-mode-map) (term-raw-map): Mention + the keymaps to add keybindings to for `term-mode'. (Bug#69786) + +2024-03-16 Eli Zaretskii + + Fix 'shortdoc-copy-function-as-kill' + + * lisp/emacs-lisp/shortdoc.el (shortdoc-copy-function-as-kill): + Fix handling of functions with no arguments. (Bug#69720) + +2024-03-16 Eli Zaretskii + + Improve documentation of 'edebug-print-*' variables + + * lisp/emacs-lisp/edebug.el (edebug-print-length) + (edebug-print-level): Fix doc strings and customization labels. + Suggested by Matt Trzcinski . (Bug#69745) + +2024-03-11 F. Jason Park + + Fix 'with-sqlite-transaction' + + * lisp/sqlite.el (with-sqlite-transaction): Tuck misplaced body + of else form back into feature-test control structure whence it + escaped. (Bug#67142) + + * test/lisp/sqlite-tests.el: New file to accompany + test/src/sqlite-tests.el. + +2024-03-01 Dan Jacobson (tiny change) + + Fix typos in vnvni.el. + + * lisp/leim/quail/vnvni.el ("vietnamese-vni"): Fix typos. (Bug#69485) + +2024-02-27 Eli Zaretskii + + Avoid assertion violations in bidi.c + + * src/bidi.c (bidi_resolve_brackets): Move assertion about + 'resolved_level' to where it belongs. This avoids unnecessary + aborts when the character is not a bracket type and doesn't need + BPA resolution. (Bug#69421) + +2024-02-25 Stefan Monnier + + * lisp/files.el (hack-one-local-variable): Use `set-auto-mode-0` + + This fixes bug#69373. + +2024-02-24 Eli Zaretskii + + Fix infinite recursion in gdb-mi.el + + * lisp/progmodes/gdb-mi.el: (gdb-clear-partial-output) + (gdb-clear-inferior-io): Set inhibit-read-only, to avoid + signaling errors in process filter. (Bug#69327) + +2024-02-24 Eli Zaretskii + + Fix 'help-quick-toggle' + + * lisp/help.el (help-quick-sections): Fix "kill-region" command. + Add a doc string. (Bug#69345) + +2024-02-21 Juri Linkov + + * doc/lispref/modes.texi (Tabulated List Mode): Update. + + In the description of 'tabulated-list-format' document + the missing value 'props' that was added long ago. + +2024-02-21 Michael Albinus + + * lisp/net/tramp.el (tramp-methods): Fix typo in docstring. (Bug#69294) + +2024-02-17 Dmitry Gutov + + java-ts-mode: Indentation for opening brace on a separate line + + * lisp/progmodes/java-ts-mode.el (java-ts-mode--indent-rules): + Support putting the opening brace on a separate line (bug#67556). + + * test/lisp/progmodes/java-ts-mode-resources/indent.erts: + Add a test. + +2024-02-17 Philip Kaludercic + + Removed decommissioned PGP keyservers + + * lisp/epa-ks.el (epa-keyserver): Update the user option type of + `epa-keyserver'. + + See https://mail.gnu.org/archive/html/emacs-devel/2023-11/msg00857.html. + +2024-02-17 Ihor Radchenko + + org: Fix security prompt for downloading remote resource + + * lisp/org/org.el (org--confirm-resource-safe): Do not assume that + resource is safe when user replies "n" (do not download). + + Reported-by: Max Nikulin + Link: https://orgmode.org/list/upj6uk$b7o$1@ciao.gmane.io + +2024-02-17 Eli Zaretskii + + Revert "Update to Org 9.6.19" + + This reverts commit 07a392f445eb21c5e4681027eee9d981300a4309. + It was installed by mistake. + +2024-02-17 Kyle Meyer + + Update to Org 9.6.19 + +2024-02-15 Philipp Stephani + + Remove references to phst@google.com. + + I don't work for Google any more, so I'll use my private address going + forward. + + * .mailmap: Remove references to phst@google.com. + +2024-02-14 Stefan Kangas + + * BUGS: Note how to report critical security issues. + +2024-02-14 Stefan Kangas + + Add cross-reference to ELisp manual Caveats + + * doc/lispref/intro.texi (Caveats): Add cross-reference to Emacs manual. + Talking about "contributing code" makes little sense in a section about + reporting mistakes in the ELisp manual, so skip that part. + +2024-02-14 Joseph Turner + + Improve directory prompt used by package-vc-checkout + + * lisp/emacs-lisp/package-vc.el (package-vc--read-package-name): Use + read-directory-name instead of read-file-name. (Bug#66114) + +2024-02-14 Michael Albinus + + Minor Tramp doc adaption + + * doc/misc/tramp.texi (Frequently Asked Questions): Be more + precise with FIDO2 keys. + + * lisp/net/tramp.el: Adapt comments. + +2024-02-12 Daniel Martín + + ;; Fix typo in the Tramp documentation + +2024-02-11 Andrea Corallo + + * Improve reproducibility of inferred values by native comp + + * lisp/emacs-lisp/comp-cstr.el (comp-normalize-valset): Do not try to + reorder conses using 'sxhash-equal' as its behavior is not reproducible + over different sessions. + +2024-02-10 Loïc Lemaître (tiny change) + + Handle typescript ts grammar breaking change for function_expression + + Starting from version 0.20.4 of the typescript/tsx grammar, "function" + becomes "function_expression". The right expression is used depending + on the grammar version. + + * lisp/progmodes/typescript-ts-mode.el + (tsx-ts-mode--font-lock-compatibility-function-expression): + New function (bug#69024). + (typescript-ts-mode--font-lock-settings): Use it. + +2024-02-10 Eli Zaretskii + + Don't quote 't' in doc strings + + * lisp/outline.el (outline-minor-mode-use-buttons): Doc fix. + Patch by Arash Esbati . (Bug#69012) + +2024-02-09 Michael Albinus + + Tramp: Handle PIN requests from security keys (don't merge) + + * doc/misc/tramp.texi (Frequently Asked Questions): Clarify FIDO entry. + + * lisp/net/tramp-sh.el (tramp-actions-before-shell) + (tramp-actions-copy-out-of-band): + Use `tramp-security-key-pin-regexp'. + + * lisp/net/tramp.el (tramp-security-key-pin-regexp): New defcustom. + (tramp-action-otp-password, tramp-read-passwd): Trim password prompt. + (tramp-action-show-and-confirm-message): Expand for PIN requests. + +2024-02-08 Stefan Kangas + + * admin/notes/kind-communication: New file. + +2024-02-08 Eli Zaretskii + + Don't skip links to "." and ".." in Dired when marking files + + * lisp/dired.el (dired-mark): Skip "." and "..", but not symlinks + to those two. (Bug#38729) (Bug#68814) + +2024-02-06 Joseph Turner + + Pass unquoted filename to user-supplied MUSTMATCH predicate + + * lisp/minibuffer.el (read-file-name-default): Pass REQUIRE-MATCH + argument through substitute-in-file-name. + * lisp/minibuffer.el (read-file-name): Update docstring. + + Resolves bug#68815. + +2024-02-04 Juri Linkov + + * doc/lispref/parsing.texi (Retrieving Nodes): Improve documentation. + + Update optional arguments 'predicate' and 'include-node' + of 'treesit-node-top-level'. + +2024-02-03 Vincenzo Pupillo + + Fix incompatibility with tree-sitter-javascript >= 0.20.2 + + Starting from version 0.20.2 the grammar's primary expression + "function" has been renamed to "function_expression". A new + function checks if the new primary expression is available, + and if so, it returns the correct rules. + * lisp/progmodes/js.el + (js--treesit-font-lock-compatibility-definition-feature): New + function. + (js--treesit-font-lock-settings): Use it. (Bug#68879) + +2024-02-03 Eli Zaretskii + + Avoid signaling errors from 'pixel-fill-region' + + * lisp/textmodes/pixel-fill.el (pixel-fill-region): Make sure the + selected window displays the current buffer. This is important + when this function is called inside 'with-current-buffer' or + similar forms which temporarily change the buffer displayed in the + selected window. (Bug#67791) + +2024-02-02 nibon7 + + eglot: Add nushell language server + + * lisp/progmodes/eglot.el (eglot-server-programs): Add nushell + language server. (Bug#68823) + +2024-02-02 Piotr Kwiecinski (tiny change) + + eglot: Add php-ts-mode to eglot-server-programs + + * lisp/progmodes/eglot.el (eglot-server-programs): Add + php-ts-mode. (Bug#68870) + +2024-02-02 dalu (tiny change) + + Support kotlin-ts-mode in Eglot + + * lisp/progmodes/eglot.el (eglot-server-programs): Support + kotlin-ts-mode. (Bug#68865) + +2024-02-01 Michael Albinus + + Fix stale cache in Tramp (do not merge with master) + + * lisp/net/tramp-sh.el (tramp-do-copy-or-rename-file-out-of-band): + Flush file properties when needed. (Bug#68805) + +2024-02-01 Ulrich Müller + + * configure.ac: Include X11/Xlib.h for XOpenDisplay. (Bug#68842) + + Do not merge to master. + +2024-02-01 Stefan Kangas + + Improve `desktop-save-mode` docstring + + * lisp/desktop.el (desktop-save-mode): Improve docstring. + +2024-01-28 Joseph Turner + + Fix completing-read functional REQUIRE-MATCH behavior + + * lisp/minibuffer.el (completion--complete-and-exit): If + minibuffer-completion-confirm is a function which returns nil, + immediately fail to complete. + + See bug#66187. + +2024-01-28 Eli Zaretskii + + Fix "emacs -nw" on MS-Windows + + * src/w32term.c (w32_flip_buffers_if_dirty): Do nothing if F is + not a GUI frame. This avoids rare crashes in "emacs -nw". + * src/w32console.c (initialize_w32_display): Set the + ENABLE_EXTENDED_FLAGS bit in 'prev_console_mode'. + + (cherry picked from commit e1970c99f097715fc5bb3b88154799bfe13de90f) + +2024-01-28 Michael Albinus + + Handle wrong login program in Tramp + + * lisp/net/tramp-sh.el (tramp-maybe-open-connection): Exit remote + shell when login fails. + +2024-01-27 Jim Porter + + * doc/lispref/package.texi (Multi-file Packages): Document ".elpaignore". + + (cherry picked from commit 744a10a4d722a361bc21561b4162045e4ec97ed6) + +2024-01-27 Eshel Yaron + + Avoid signaling errors in emoji.el on empty input + + * lisp/international/emoji.el (emoji--read-emoji): Signal + user-error on empty input. (Bug#68671) + + Do not merge to master. + +2024-01-27 Eli Zaretskii + + Fix description of when "\xNNN" is considered a unibyte character + + * doc/lispref/objects.texi (Non-ASCII in Strings): More accurate + description of when a hexadecimal escape sequence yields a unibyte + character. (Bug#68751) + +2024-01-26 Randy Taylor + + Simplify imenu setup for {cmake,dockerfile}-ts-modes + + * lisp/progmodes/cmake-ts-mode.el (treesit-induce-sparse-tree, + treesit-node-child, treesit-node-start, cmake-ts-mode--imenu, + cmake-ts-mode--imenu-1): Remove. + (treesit-search-subtree): Declare. + (cmake-ts-mode--function-name): New function. + (cmake-ts-mode): Use it. + + * lisp/progmodes/dockerfile-ts-mode.el (treesit-induce-sparse-tree, + treesit-node-start, dockerfile-ts-mode--imenu, + dockerfile-ts-mode--imenu-1): Remove. + (dockerfile-ts-mode--stage-name): New function. + (dockerfile-ts-mode): Use it. + +2024-01-24 Eli Zaretskii + + Improve documentation of profiler commands + + * doc/lispref/debugging.texi (Profiling): Document more commands. + Improve indexing. (Bug#68693) + +2024-01-23 Basil L. Contovounesios + + Fix broken links to Freedesktop notifications spec + + * doc/lispref/os.texi (Desktop Notifications): + * lisp/notifications.el: Replace broken developer.gnome.org links + with specifications.freedesktop.org (bug#67939). + +2024-01-22 Michael Albinus + + Fix nasty cut'n'waste error in Tramp + + * lisp/net/tramp.el (tramp-parse-passwd): Use `tramp-parse-passwd-group'. + Reported by Tim Landscheidt . + +2024-01-21 Stefan Kangas + + Fix image-dired-tags-db-file void variable error + + * lisp/image/image-dired-tags.el (image-dired-sane-db-file): + Require 'image-dired'. (Bug#68636) + +2024-01-21 Matthew Smith (tiny change) + + typescript-ts-mode: Skip test if tsx grammar missing + + typescript-ts-mode-test-indentation depends on both the tree-sitter + typescript grammar, and the tree-sitter tsx grammar. If only the + typescript is installed, the tests will run and then fail unexpectedly + after tsx fails to load. + + * test/lisp/progmodes/typescript-ts-mode-tests.el + (typescript-ts-mode-test-indentation): Skip test if tsx grammar is + missing. + +2024-01-20 Stefan Kangas + + * admin/README: Document the run-codespell script. + + * admin/README: Fix entry on coccinelle subdirectory. + +2024-01-20 Stefan Kangas + + Add script admin/run-codespell and supporting files + + * admin/codespell/README: + * admin/codespell/codespell.dictionary: + * admin/codespell/codespell.exclude: + * admin/codespell/codespell.ignore: + * admin/codespell/codespell.rc: + * admin/run-codespell: New files. + +2024-01-20 Michael Albinus + + Sync with Tramp 2.6.3-pre (don't merge with master) + + * doc/misc/tramp.texi (Obtaining @value{tramp}): Mention the ELPA + Tramp manual. + (Remote processes): Adapt index. + + * doc/misc/trampver.texi: + * lisp/net/trampver.el (tramp-version): Set to "2.6.3-pre". + + * lisp/net/tramp.el (tramp-local-host-regexp): Extend. Adapt :version. + (tramp-signal-process): PROCESS can also be a string. + (tramp-skeleton-directory-files): + * lisp/net/tramp-cache.el (with-tramp-saved-file-property) + (with-tramp-saved-file-properties) + (with-tramp-saved-connection-property) + (with-tramp-saved-connection-properties): Use `setf' but `setq' in macro. + + * lisp/net/tramp-compat.el (tramp-compat-funcall): Declare debug. + + * lisp/net/tramp-crypt.el (tramp-crypt-file-name-p): Exclude lock files. + (tramp-crypt-file-name-handler-alist): Use `identity' for + `abbreviate-file-name'. + (tramp-crypt-add-directory, tramp-crypt-remove-directory): + Adapt docstrings. + (tramp-crypt-cleanup-connection): New defun. Add it to + `tramp-cleanup-connection-hook' + + * lisp/net/tramp.el (tramp-skeleton-file-name-all-completions): + Handle "." and "..". + + * lisp/net/tramp-adb.el (tramp-adb-handle-file-name-all-completions): + * lisp/net/tramp-fuse.el (tramp-fuse-handle-file-name-all-completions): + * lisp/net/tramp-gvfs.el (tramp-gvfs-handle-file-name-all-completions): + Remove special handling of "." an "..". + + * lisp/net/tramp-sh.el (tramp-pipe-stty-settings): New defcustom. + (tramp-sh-handle-make-process): Use it. (Bug#62093) + + * test/lisp/net/tramp-tests.el (tramp-test18-file-attributes): + Adapt test. + (tramp-test31-signal-process): Extend. + +2024-01-20 Eli Zaretskii (tiny change) + + Update Polish translation of tutorial + + * etc/tutorials/TUTORIAL.pl: Update text about scroll bar. New + text by Christopher Yeleighton . + (Bug#68599) + +2024-01-19 Michael Albinus + + * doc/misc/gnus.texi (Summary Mail Commands): Fix command name. + +2024-01-18 Eli Zaretskii + + Bump Emacs version to 29.2.50. + + * README: + * configure.ac: + * nt/README.W32: + * msdos/sed2v2.inp: + * etc/NEWS: Bump Emacs version to 29.2.50. + +2024-01-18 Eli Zaretskii + + * Update etc/HISTORY and ChangeLog.4 for 29.2 release. + +2024-01-18 Eli Zaretskii + + Bump Emacs version to 29.2 + + * README: + * configure.ac: + * nt/README.W32: + * msdos/sed2v2.inp: Bump Emacs version to 29.2. + 2024-01-18 Eli Zaretskii * Version 29.2 released. @@ -120914,7 +121506,7 @@ This file records repository revisions from commit f2ae39829812098d8269eafbc0fcb98959ee5bb7 (exclusive) to -commit 92a7132bd6c76a43860fa01ca3363857d8dfc8f3 (inclusive). +commit 8d8253f89915f1d9b45791d46cf974c6bdcc1457 (inclusive). See ChangeLog.3 for earlier changes. ;; Local Variables: diff --git a/etc/AUTHORS b/etc/AUTHORS index 193a3db6760..8a541e8a7e2 100644 --- a/etc/AUTHORS +++ b/etc/AUTHORS @@ -585,7 +585,7 @@ Bartosz Duszel: changed allout.el bib-mode.el cc-cmds.el hexl.el icon.el Basil L. Contovounesios: changed simple.el subr.el message.el eww.el modes.texi custom.el text.texi bibtex.el gnus-sum.el internals.texi js.el customize.texi display.texi files.texi gnus-group.el gnus-win.el - gnus.texi gravatar.el json.el map.el shr.el and 345 other files + gnus.texi gravatar.el json.el map.el shr.el and 346 other files Bastian Beischer: changed semantic/complete.el calc-yank.el include.el mru-bookmark.el refs.el senator.el @@ -1246,6 +1246,8 @@ Dani Moncayo: changed msys-to-w32 Makefile.in configure.ac buffers.texi dired.texi display.texi emacs-lisp-intro.texi files.texi killing.texi make-dist mark.texi msysconfig.sh simple.el text.texi version.el +Dan Jacobson: changed vnvni.el + Dan Nicolaescu: wrote iris-ansi.el romanian.el vc-dir.el and co-wrote hideshow.el and changed vc.el configure.ac vc-hg.el vc-git.el src/Makefile.in @@ -1576,7 +1578,7 @@ and changed xref.el ruby-mode.el project.el vc-git.el ruby-ts-mode.el elisp-mode.el js.el etags.el ruby-mode-tests.el vc.el package.el vc-hg.el symref/grep.el treesit.el dired-aux.el progmodes/python.el ruby-ts-mode-tests.el simple.el typescript-ts-mode.el log-edit.el - ruby-ts.rb and 158 other files + ruby-ts.rb and 159 other files Dmitry Kurochkin: changed isearch.el @@ -1675,7 +1677,7 @@ Eli Zaretskii: wrote [bidirectional display in xdisp.c] chartab-tests.el coding-tests.el etags-tests.el rxvt.el tty-colors.el and co-wrote help-tests.el and changed xdisp.c display.texi w32.c msdos.c simple.el w32fns.c - files.el fileio.c keyboard.c emacs.c text.texi configure.ac w32term.c + files.el fileio.c keyboard.c emacs.c configure.ac text.texi w32term.c dispnew.c frames.texi w32proc.c files.texi xfaces.c window.c dispextern.h lisp.h and 1341 other files @@ -1823,7 +1825,7 @@ Ernesto Alfonso: changed simple.el E Sabof: changed hi-lock.el image-dired.el -Eshel Yaron: changed eglot.el emacs.texi eww.el indent.texi +Eshel Yaron: changed eglot.el emacs.texi emoji.el eww.el indent.texi Espen Skoglund: wrote pascal.el @@ -1935,7 +1937,7 @@ F. Jason Park: changed erc.el erc-backend.el erc-tests.el foonet.eld barnet.eld erc-scenarios-misc.el erc-services.el erc-common.el erc-networks-tests.el erc-scenarios-base-reconnect.el erc-scenarios-common.el socks-tests.el auth-source-pass-tests.el - auth-source-pass.el erc-join.el erc-sasl-tests.el and 104 other files + auth-source-pass.el erc-join.el erc-sasl-tests.el and 106 other files Flemming Hoejstrup Hansen: changed forms.el @@ -2356,7 +2358,7 @@ Igor Saprykin: changed ftfont.c Ihor Radchenko: wrote org-fold-core.el org-fold.el org-persist.el and changed ox.el fns.c emacsclient.desktop help-mode.el oc.el - org-element.el + org-element.el org.el Iku Iwasa: changed auth-source-pass-tests.el auth-source-pass.el @@ -2780,7 +2782,7 @@ Jim Porter: changed eshell.texi esh-cmd.el esh-var-tests.el esh-util.el eshell-tests-helpers.el em-pred.el esh-arg.el esh-cmd-tests.el tramp.el em-pred-tests.el em-dirs-tests.el server.el em-basic.el em-extpipe-tests.el esh-opt-tests.el esh-opt.el - and 93 other files + and 94 other files Jim Radford: changed gnus-start.el @@ -3059,7 +3061,7 @@ and changed xterm.c xfns.c keyboard.c screen.c dispnew.c xdisp.c window.c Joseph M. Kelsey: changed fileio.c skeleton.el -Joseph Turner: changed package-vc.el subr.el +Joseph Turner: changed package-vc.el minibuffer.el subr.el Josh Elsasser: changed eglot.el README.md configure.ac @@ -3435,7 +3437,7 @@ Konstantin Kharlamov: changed smerge-mode.el diff-mode.el files.el ada-mode.el autorevert.el calc-aent.el calc-ext.el calc-lang.el cc-mode.el cperl-mode.el css-mode.el cua-rect.el dnd.el ebnf-abn.el ebnf-dtd.el ebnf-ebx.el emacs-module-tests.el epg.el faces.el - gnus-art.el gtkutil.c and 27 other files + gnus-art.el gtkutil.c and 28 other files Konstantin Kliakhandler: changed org-agenda.el @@ -3611,6 +3613,8 @@ Lluís Vilanova: changed ede/linux.el Logan Perkins: changed keyboard.c +Loïc Lemaître: changed typescript-ts-mode.el + Luca Capello: changed mm-encode.el Lucas Werkmeister: changed emacs.c emacs.service nxml-mode.el @@ -3950,6 +3954,8 @@ Matthew Mundell: changed calendar.texi diary-lib.el files.texi Matthew Newton: changed imenu.el +Matthew Smith: changed typescript-ts-mode-tests.el + Matthew Tromp: changed ielm.el Matthew White: changed buffer.c bookmark-tests.el bookmark.el @@ -4275,10 +4281,8 @@ Mohsin Kaleem: changed eglot.el Mon Key: changed animate.el imap.el syntax.el -Morgan J. Smith: changed gnus-group-tests.el url-vars.el - -Morgan Smith: changed image-dired.el doc-view.el minibuffer-tests.el - minibuffer.el vc-git.el window.el +Morgan Smith: changed image-dired.el doc-view.el gnus-group-tests.el + minibuffer-tests.el minibuffer.el url-vars.el vc-git.el window.el Morten Welinder: wrote [many MS-DOS files] arc-mode.el desktop.el dosfns.c internal.el msdos.h pc-win.el @@ -4291,6 +4295,8 @@ Mosur Mohan: changed etags.c Motorola: changed buff-menu.el +Mou Tong: changed eglot.el + Muchenxuan Tong: changed org-agenda.el org-mobile.el org-timer.el Murata Shuuichirou: changed coding.c @@ -4823,7 +4829,7 @@ and changed emacs-module.c emacs-module-tests.el configure.ac json.c process.c eval.c internals.texi json-tests.el process-tests.el pdumper.c alloc.c emacs-module.h.in emacs.c lread.c nsterm.m bytecomp.el lisp.h seccomp-filter.c callproc.c cl-macs.el gtkutil.c - and 188 other files + and 189 other files Phillip Dixon: changed eglot.el @@ -4872,6 +4878,8 @@ Piet van Oostrum: changed data.c fileio.c flyspell.el smtpmail.el Pinku Surana: changed sql.el +Piotr Kwiecinski: changed eglot.el + Piotr Trojanek: changed gnutls.c process.c Piotr Zieliński: wrote org-mouse.el @@ -4967,8 +4975,8 @@ Randall Smith: changed dired.el Randal Schwartz: wrote pp.el -Randy Taylor: changed build.sh eglot.el batch.sh dockerfile-ts-mode.el - rust-ts-mode.el go-ts-mode.el c-ts-mode.el cmake-ts-mode.el +Randy Taylor: changed build.sh dockerfile-ts-mode.el eglot.el batch.sh + rust-ts-mode.el cmake-ts-mode.el go-ts-mode.el c-ts-mode.el cus-theme.el font-lock.el java-ts-mode.el js.el json-ts-mode.el modes.texi progmodes/python.el project.el sh-script.el typescript-ts-mode.el yaml-ts-mode.el @@ -5550,7 +5558,7 @@ and co-wrote help-tests.el keymap-tests.el and changed image-dired.el efaq.texi package.el cperl-mode.el help.el subr.el checkdoc.el bookmark.el simple.el dired.el files.el gnus.texi dired-x.el keymap.c image-mode.el erc.el ediff-util.el speedbar.el - woman.el browse-url.el bytecomp-tests.el and 1683 other files + woman.el browse-url.el bytecomp-tests.el and 1690 other files Stefan Merten: co-wrote rst.el @@ -5815,9 +5823,9 @@ Theodore Jump: changed makefile.nt makefile.def w32-win.el w32faces.c Theodor Thornhill: changed typescript-ts-mode.el java-ts-mode.el c-ts-mode.el eglot.el csharp-mode.el js.el css-mode.el project.el indent.erts json-ts-mode.el treesit.el c-ts-common.el eglot-tests.el - EGLOT-NEWS README.md c-ts-mode-tests.el compile-tests.el go-ts-mode.el - indent-bsd.erts java-ts-mode-tests.el maintaining.texi - and 8 other files + EGLOT-NEWS README.md c-ts-mode-tests.el compile-tests.el + csharp-mode-tests.el go-ts-mode.el indent-bsd.erts + java-ts-mode-tests.el and 9 other files Theresa O'Connor: wrote json.el and changed erc.el erc-viper.el erc-log.el erc-track.el viper.el @@ -6185,7 +6193,7 @@ Vincent Bernat: changed gnus-int.el nnimap.el xsettings.c Vincent Del Vecchio: changed info.el mh-utils.el -Vincenzo Pupillo: changed cmake-ts-mode.el js.el typescript-ts-mode.el +Vincenzo Pupillo: changed js.el cmake-ts-mode.el typescript-ts-mode.el java-ts-mode.el Vince Salvino: changed msdos.texi w32.c w32fns.c diff --git a/etc/HISTORY b/etc/HISTORY index afa14cb2350..cfd4f1f6873 100644 --- a/etc/HISTORY +++ b/etc/HISTORY @@ -235,6 +235,8 @@ GNU Emacs 29.1 (2023-07-30) emacs-29.1 GNU Emacs 29.2 (2024-01-18) emacs-29.2 +GNU Emacs 29.3 (2024-03-24) emacs-29.3 + ---------------------------------------------------------------------- This file is part of GNU Emacs. commit ff6cc3d2cf0e17fc44ac7dfd259c74f96eafa1c4 Author: Eli Zaretskii Date: Sun Mar 24 09:36:24 2024 -0400 * lisp/ldefs-boot.el: Regenerate. diff --git a/lisp/ldefs-boot.el b/lisp/ldefs-boot.el index 16a9df2c92e..60e7f6811bc 100644 --- a/lisp/ldefs-boot.el +++ b/lisp/ldefs-boot.el @@ -6961,13 +6961,22 @@ or call the function `desktop-save-mode'.") (autoload 'desktop-save-mode "desktop" "\ Toggle desktop saving (Desktop Save mode). -When Desktop Save mode is enabled, the state of Emacs is saved from -one session to another. In particular, Emacs will save the desktop when -it exits (this may prompt you; see the option `desktop-save'). The next -time Emacs starts, if this mode is active it will restore the desktop. +When Desktop Save mode is enabled, the state of Emacs is saved from one +session to another. The saved Emacs \"desktop configuration\" includes the +buffers, their file names, major modes, buffer positions, window and frame +configuration, and some important global variables. -To manually save the desktop at any time, use the command `\\[desktop-save]'. -To load it, use `\\[desktop-read]'. +To enable this feature for future sessions, customize `desktop-save-mode' +to t, or add this line in your init file: + + (desktop-save-mode 1) + +When this mode is enabled, Emacs will save the desktop when it exits +(this may prompt you, see the option `desktop-save'). The next time +Emacs starts, if this mode is active it will restore the desktop. + +To manually save the desktop at any time, use the command \\[desktop-save]. +To load it, use \\[desktop-read]. Once a desktop file exists, Emacs will auto-save it according to the option `desktop-auto-save-timeout'. @@ -13023,7 +13032,7 @@ For instance: (?l . \"ls\"))) Each %-spec may contain optional flag, width, and precision -modifiers, as follows: +specifiers, as follows: %character @@ -13036,7 +13045,7 @@ The following flags are allowed: * ^: Convert to upper case. * _: Convert to lower case. -The width and truncation modifiers behave like the corresponding +The width and precision specifiers behave like the corresponding ones in `format' when applied to %s. For example, \"%<010b\" means \"substitute into the output the @@ -17261,9 +17270,13 @@ use its file extension as image type. Optional DATA-P non-nil means FILE-OR-DATA is a string containing image data. Optional PROPS are additional image attributes to assign to the image, -like, e.g. `:mask MASK'. If the property `:scale' is not given and the -display has a high resolution (more exactly, when the average width of a -character in the default font is more than 10 pixels), the image is +like, e.g. `:mask MASK'. See Info node `(elisp)Image Descriptors' for +the list of supported properties; see the nodes following that node +for properties specific to certain image types. + +If the property `:scale' is not given and the display has a high +resolution (more exactly, when the average width of a character +in the default font is more than 10 pixels), the image is automatically scaled up in proportion to the default font. Value is the image created, or nil if images of type TYPE are not supported. @@ -23870,8 +23883,8 @@ As with `pcase-let', BINDINGS are of the form (PATTERN EXP), but the EXP in each binding in BINDINGS can use the results of the destructuring bindings that precede it in BINDINGS' order. -Each EXP should match (i.e. be of compatible structure) to its -respective PATTERN; a mismatch may signal an error or may go +Each EXP should match its respective PATTERN (i.e. be of structure +compatible to PATTERN); a mismatch may signal an error or may go undetected, binding variables to arbitrary values, such as nil. (fn BINDINGS &rest BODY)" nil t) @@ -23884,8 +23897,8 @@ All EXPs are evaluated first, and then used to perform destructuring bindings by matching each EXP against its respective PATTERN. Then BODY is evaluated with those bindings in effect. -Each EXP should match (i.e. be of compatible structure) to its -respective PATTERN; a mismatch may signal an error or may go +Each EXP should match its respective PATTERN (i.e. be of structure +compatible to PATTERN); a mismatch may signal an error or may go undetected, binding variables to arbitrary values, such as nil. (fn BINDINGS &rest BODY)" nil t) @@ -32878,7 +32891,7 @@ Add archive file name handler to `file-name-handler-alist'." (when (and tramp-ar ;;; Generated autoloads from net/trampver.el -(push (purecopy '(tramp 2 6 2 29 2)) package--builtin-versions) +(push (purecopy '(tramp 2 6 3 -1)) package--builtin-versions) (register-definition-prefixes "trampver" '("tramp-")) commit 0dab0c0d688faf22adf48a429702bf906a38697b Author: Eli Zaretskii Date: Sun Mar 24 09:05:17 2024 -0400 Bump Emacs version to 29.3 * README: * configure.ac: * nt/README.W32: * msdos/sed2v2.inp: Bump Emacs version to 29.3. diff --git a/README b/README index a968b29f71c..877ebb3c642 100644 --- a/README +++ b/README @@ -2,7 +2,7 @@ Copyright (C) 2001-2024 Free Software Foundation, Inc. See the end of the file for license conditions. -This directory tree holds version 29.2.50 of GNU Emacs, the extensible, +This directory tree holds version 29.3 of GNU Emacs, the extensible, customizable, self-documenting real-time display editor. The file INSTALL in this directory says how to build and install GNU diff --git a/configure.ac b/configure.ac index 34a5a89bea9..473ae06833c 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ dnl along with GNU Emacs. If not, see . AC_PREREQ([2.65]) dnl Note this is parsed by (at least) make-dist and lisp/cedet/ede/emacs.el. -AC_INIT([GNU Emacs], [29.2.50], [bug-gnu-emacs@gnu.org], [], +AC_INIT([GNU Emacs], [29.3], [bug-gnu-emacs@gnu.org], [], [https://www.gnu.org/software/emacs/]) dnl Set emacs_config_options to the options of 'configure', quoted for the shell, diff --git a/msdos/sed2v2.inp b/msdos/sed2v2.inp index 8ca5bbf74d9..8e6f42ebee4 100644 --- a/msdos/sed2v2.inp +++ b/msdos/sed2v2.inp @@ -67,7 +67,7 @@ /^#undef PACKAGE_NAME/s/^.*$/#define PACKAGE_NAME ""/ /^#undef PACKAGE_STRING/s/^.*$/#define PACKAGE_STRING ""/ /^#undef PACKAGE_TARNAME/s/^.*$/#define PACKAGE_TARNAME ""/ -/^#undef PACKAGE_VERSION/s/^.*$/#define PACKAGE_VERSION "29.2.50"/ +/^#undef PACKAGE_VERSION/s/^.*$/#define PACKAGE_VERSION "29.3"/ /^#undef SYSTEM_TYPE/s/^.*$/#define SYSTEM_TYPE "ms-dos"/ /^#undef HAVE_DECL_GETENV/s/^.*$/#define HAVE_DECL_GETENV 1/ /^#undef SYS_SIGLIST_DECLARED/s/^.*$/#define SYS_SIGLIST_DECLARED 1/ diff --git a/nt/README.W32 b/nt/README.W32 index a450c2e84f0..83ef00b8eba 100644 --- a/nt/README.W32 +++ b/nt/README.W32 @@ -1,7 +1,7 @@ Copyright (C) 2001-2024 Free Software Foundation, Inc. See the end of the file for license conditions. - Emacs version 29.2.50 for MS-Windows + Emacs version 29.3 for MS-Windows This README file describes how to set up and run a precompiled distribution of the latest version of GNU Emacs for MS-Windows. You commit 7a5d7be52c5f0690ee47f30bfad973827261abf2 Author: Ihor Radchenko Date: Fri Feb 23 12:56:58 2024 +0300 org--confirm-resource-safe: Fix prompt when prompting in non-file Org buffers * lisp/org/org.el (org--confirm-resource-safe): When called from non-file buffer, do not put stray "f" in the prompt. diff --git a/lisp/org/org.el b/lisp/org/org.el index 76559c91cd3..154388f79c6 100644 --- a/lisp/org/org.el +++ b/lisp/org/org.el @@ -4671,9 +4671,9 @@ returns non-nil if any of them match." (propertize domain 'face '(:inherit org-link :weight normal)) ") as safe.\n ") "") - (propertize "f" 'face 'success) (if current-file (concat + (propertize "f" 'face 'success) " to download this resource, and permanently mark all resources in " (propertize current-file 'face 'underline) " as safe.\n ") commit 2bc865ace050ff118db43f01457f95f95112b877 Author: Ihor Radchenko Date: Tue Feb 20 14:59:20 2024 +0300 org-file-contents: Consider all remote files unsafe * lisp/org/org.el (org-file-contents): When loading files, consider all remote files (like TRAMP-fetched files) unsafe, in addition to URLs. diff --git a/lisp/org/org.el b/lisp/org/org.el index 0f5d17deee2..76559c91cd3 100644 --- a/lisp/org/org.el +++ b/lisp/org/org.el @@ -4576,12 +4576,16 @@ from file or URL, and return nil. If NOCACHE is non-nil, do a fresh fetch of FILE even if cached version is available. This option applies only if FILE is a URL." (let* ((is-url (org-url-p file)) + (is-remote (condition-case nil + (file-remote-p file) + ;; In case of error, be safe. + (t t))) (cache (and is-url (not nocache) (gethash file org--file-cache)))) (cond (cache) - (is-url + ((or is-url is-remote) (if (org--should-fetch-remote-resource-p file) (condition-case error (with-current-buffer (url-retrieve-synchronously file) commit 6f9ea396f49cbe38c2173e0a72ba6af3e03b271c Author: Ihor Radchenko Date: Tue Feb 20 12:47:24 2024 +0300 org-latex-preview: Add protection when `untrusted-content' is non-nil * lisp/org/org.el (org--latex-preview-when-risky): New variable controlling how to handle LaTeX previews in Org files from untrusted origin. (org-latex-preview): Consult `org--latex-preview-when-risky' before generating previews. This patch adds a layer of protection when LaTeX preview is requested for an email attachment, where `untrusted-content' is set to non-nil. diff --git a/lisp/org/org.el b/lisp/org/org.el index c75afbf5a67..0f5d17deee2 100644 --- a/lisp/org/org.el +++ b/lisp/org/org.el @@ -1140,6 +1140,24 @@ the following lines anywhere in the buffer: :package-version '(Org . "8.0") :type 'boolean) +(defvar untrusted-content) ; defined in files.el +(defvar org--latex-preview-when-risky nil + "If non-nil, enable LaTeX preview in Org buffers from unsafe source. + +Some specially designed LaTeX code may generate huge pdf or log files +that may exhaust disk space. + +This variable controls how to handle LaTeX preview when rendering LaTeX +fragments that originate from incoming email messages. It has no effect +when Org mode is unable to determine the origin of the Org buffer. + +An Org buffer is considered to be from unsafe source when the +variable `untrusted-content' has a non-nil value in the buffer. + +If this variable is non-nil, LaTeX previews are rendered unconditionally. + +This variable may be renamed or changed in the future.") + (defcustom org-insert-mode-line-in-empty-file nil "Non-nil means insert the first line setting Org mode in empty files. When the function `org-mode' is called interactively in an empty file, this @@ -15695,6 +15713,7 @@ fragments in the buffer." (interactive "P") (cond ((not (display-graphic-p)) nil) + ((and untrusted-content (not org--latex-preview-when-risky)) nil) ;; Clear whole buffer. ((equal arg '(64)) (org-clear-latex-preview (point-min) (point-max)) commit 937b9042ad7426acdcca33e3d931d8f495bdd804 Author: Ihor Radchenko Date: Tue Feb 20 12:44:30 2024 +0300 * lisp/gnus/mm-view.el (mm-display-inline-fontify): Mark contents untrusted. diff --git a/lisp/gnus/mm-view.el b/lisp/gnus/mm-view.el index 2e1261c4c9c..5f234e5c006 100644 --- a/lisp/gnus/mm-view.el +++ b/lisp/gnus/mm-view.el @@ -504,6 +504,7 @@ If MODE is not set, try to find mode automatically." (setq coding-system (mm-find-buffer-file-coding-system))) (setq text (buffer-string)))) (with-temp-buffer + (setq untrusted-content t) (insert (cond ((eq charset 'gnus-decoded) (with-current-buffer (mm-handle-buffer handle) (buffer-string))) commit ccc188fcf98ad9166ee551fac9d94b2603c3a51b Author: Ihor Radchenko Date: Tue Feb 20 12:43:51 2024 +0300 * lisp/files.el (untrusted-content): New variable. The new variable is to be used when buffer contents comes from untrusted source. diff --git a/lisp/files.el b/lisp/files.el index c0d26b2343c..5536af014f6 100644 --- a/lisp/files.el +++ b/lisp/files.el @@ -695,6 +695,14 @@ Also see the `permanently-enabled-local-variables' variable." Some modes may wish to set this to nil to prevent directory-local settings being applied, but still respect file-local ones.") +(defvar-local untrusted-content nil + "Non-nil means that current buffer originated from an untrusted source. +Email clients and some other modes may set this non-nil to mark the +buffer contents as untrusted. + +This variable might be subject to change without notice.") +(put 'untrusted-content 'permanent-local t) + ;; This is an odd variable IMO. ;; You might wonder why it is needed, when we could just do: ;; (setq-local enable-local-variables nil) commit befa9fcaae29a6c9a283ba371c3c5234c7f644eb Author: Ihor Radchenko Date: Tue Feb 20 12:19:46 2024 +0300 org-macro--set-templates: Prevent code evaluation * lisp/org/org-macro.el (org-macro--set-templates): Get rid of any risk to evaluate code when `org-macro--set-templates' is called as a part of major mode initialization. This way, no code evaluation is ever triggered when user merely opens the file or when `mm-display-org-inline' invokes Org major mode to fontify mime part preview in email messages. diff --git a/lisp/org/org-macro.el b/lisp/org/org-macro.el index 776d162be12..0be51eec512 100644 --- a/lisp/org/org-macro.el +++ b/lisp/org/org-macro.el @@ -109,6 +109,13 @@ previous one, unless VALUE is nil. Return the updated list." (let ((new-templates nil)) (pcase-dolist (`(,name . ,value) templates) (let ((old-definition (assoc name new-templates))) + ;; This code can be evaluated unconditionally, as a part of + ;; loading Org mode. We *must not* evaluate any code present + ;; inside the Org buffer while loading. Org buffers may come + ;; from various sources, like received email messages from + ;; potentially malicious senders. Org mode might be used to + ;; preview such messages and no code evaluation from inside the + ;; received Org text should ever happen without user consent. (when (and (stringp value) (string-match-p "\\`(eval\\>" value)) ;; Pre-process the evaluation form for faster macro expansion. (let* ((args (org-macro--makeargs value)) @@ -121,7 +128,7 @@ previous one, unless VALUE is nil. Return the updated list." (cadr (read value)) (error (user-error "Invalid definition for macro %S" name))))) - (setq value (eval (macroexpand-all `(lambda ,args ,body)) t)))) + (setq value `(lambda ,args ,body)))) (cond ((and value old-definition) (setcdr old-definition value)) (old-definition) (t (push (cons name (or value "")) new-templates))))) commit 3221d8d46116fdefb19742be916d0e352dfab761 Author: Eli Zaretskii Date: Sun Mar 24 08:36:44 2024 -0400 * admin/authors.el (authors-aliases): Add ignored authors. diff --git a/admin/authors.el b/admin/authors.el index 3764c16adf0..88c01f14120 100644 --- a/admin/authors.el +++ b/admin/authors.el @@ -198,8 +198,10 @@ files.") ("Miha Rihtaršič" "Miha Rihtarsic" "miha@kamnitnik\\.top" "miha") ("Mikio Nakajima" "Nakajima Mikio") (nil "montag451@laposte\\.net") - (nil "na@aisrntairetnraoitn") ("Morgan Smith" "Morgan J\\. Smith") + ("Mou Tong" "mou\\.tong@outlook\\.com") + (nil "na@aisrntairetnraoitn") + (nil "nibon7@163\\.com") ("Nelson Jose dos Santos Ferreira" "Nelson Ferreira") ("Noah Peart" "noah\\.v\\.peart@gmail\\.com") ("Noorul Islam" "Noorul Islam K M") commit 8d8253f89915f1d9b45791d46cf974c6bdcc1457 Author: Eli Zaretskii Date: Sun Mar 24 08:19:29 2024 -0400 * etc/NEWS: Update for Emacs 29.3 diff --git a/etc/NEWS b/etc/NEWS index 06086e9bdfb..3f94b0d4634 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -15,32 +15,28 @@ in older Emacs versions. You can narrow news to a specific version by calling 'view-emacs-news' with a prefix argument or by typing 'C-u C-h C-n'. - -* Installation Changes in Emacs 29.3 - - -* Startup Changes in Emacs 29.3 - * Changes in Emacs 29.3 +Emacs 29.3 is an emergency bugfix release intended to fix several +security vulnerabilities described below. - -* Editing Changes in Emacs 29.3 +** Arbitrary Lisp code is no longer evaluated as part of turning on Org mode. +This is for security reasons, to avoid evaluating malicious Lisp code. - -* Changes in Specialized Modes and Packages in Emacs 29.3 +** New buffer-local variable 'untrusted-content'. +When this is non-nil, Lisp programs should treat buffer contents with +extra caution. - -* New Modes and Packages in Emacs 29.3 +** Gnus now treats inline MIME contents as untrusted. +To get back previous insecure behavior, 'untrusted-content' should be +reset to nil in the buffer. - -* Incompatible Lisp Changes in Emacs 29.3 +** LaTeX preview is now by default disabled for email attachments. +To get back previous insecure behavior, set the variable +'org--latex-preview-when-risky' to a non-nil value. - -* Lisp Changes in Emacs 29.3 - - -* Changes in Emacs 29.3 on Non-Free Operating Systems +** Org mode now considers contents of remote files to be untrusted. +Remote files are recognized by calling 'file-remote-p'. * Installation Changes in Emacs 29.2