commit bd1c7ca67e7429e07f78d4ff49163fd7a67a6765 (HEAD, refs/remotes/origin/master)
Author: Paul Eggert <eggert@cs.ucla.edu>
Date:   Thu Apr 21 19:26:34 2016 -0700

    Improve character name escapes
    
    * doc/lispref/nonascii.texi (Character Properties):
    Avoid duplication of Unicode names.  Reformat examples to fit in
    narrow pages.
    * doc/lispref/objects.texi (General Escape Syntax):
    Simplify and better-organize explanation of \N{...} escapes.
    * src/character.h (CHAR_SURROGATE_PAIR_P): Remove; unused.
    (char_surrogate_p): New inline function.
    * src/lread.c: Do not include string.h; no longer needed.
    (invalid_character_name, check_scalar_value): Remove; the ideas
    behind these functions are now bundled into character_name_to_code.
    (character_name_to_code): Remove undocumented support for "CJK
    IDEOGRAPH-XXXX" names, as "U+XXXX" suffices.  Reject monstrosities
    like "\N{U+-0}" and null bytes in \N escapes.  Reject floating
    point in \N escapes instead of returning garbage.  Use
    AUTO_STRING_WITH_LEN to lessen pressure on the garbage collector.
    * test/src/lread-tests.el (lread-char-number, lread-char-name)
    (lread-string-char-number, lread-string-char-name):
    Test runtime behavior, not compile-time, as the test framework
    is not set up to test compile-time.
    (lread-char-surrogate-1, lread-char-surrogate-2)
    (lread-char-surrogate-3, lread-char-surrogate-4)
    (lread-string-char-number-2, lread-string-char-number-3):
    New tests.
    (lread-string-char-number-1): Rename from lread-string-char-number.

diff --git a/doc/lispref/nonascii.texi b/doc/lispref/nonascii.texi
index 66ad9ac..0e4aa86 100644
--- a/doc/lispref/nonascii.texi
+++ b/doc/lispref/nonascii.texi
@@ -622,18 +622,21 @@ This function returns the value of @var{char}'s @var{propname} property.
      @result{} Nd
 @end group
 @group
-;; U+2084 SUBSCRIPT FOUR
-(get-char-code-property ?\u2084 'digit-value)
+;; U+2084
+(get-char-code-property ?\N@{SUBSCRIPT FOUR@}
+                        'digit-value)
      @result{} 4
 @end group
 @group
-;; U+2155 VULGAR FRACTION ONE FIFTH
-(get-char-code-property ?\u2155 'numeric-value)
+;; U+2155
+(get-char-code-property ?\N@{VULGAR FRACTION ONE FIFTH@}
+                        'numeric-value)
      @result{} 0.2
 @end group
 @group
-;; U+2163 ROMAN NUMERAL FOUR
-(get-char-code-property ?\N@{ROMAN NUMERAL FOUR@} 'numeric-value)
+;; U+2163
+(get-char-code-property ?\N@{ROMAN NUMERAL FOUR@}
+                        'numeric-value)
      @result{} 4
 @end group
 @group
diff --git a/doc/lispref/objects.texi b/doc/lispref/objects.texi
index 96b334d..54894b8 100644
--- a/doc/lispref/objects.texi
+++ b/doc/lispref/objects.texi
@@ -353,25 +353,32 @@ following text.)
 control characters, Emacs provides several types of escape syntax that
 you can use to specify non-@acronym{ASCII} text characters.
 
+@enumerate
+@item
 @cindex @samp{\} in character constant
 @cindex backslash in character constants
 @cindex unicode character escape
-  Firstly, you can specify characters by their Unicode values.
-@code{?\u@var{nnnn}} represents a character with Unicode code point
-@samp{U+@var{nnnn}}, where @var{nnnn} is (by convention) a hexadecimal
-number with exactly four digits.  The backslash indicates that the
-subsequent characters form an escape sequence, and the @samp{u}
-specifies a Unicode escape sequence.
-
-  There is a slightly different syntax for specifying Unicode
-characters with code points higher than @code{U+@var{ffff}}:
-@code{?\U00@var{nnnnnn}} represents the character with code point
-@samp{U+@var{nnnnnn}}, where @var{nnnnnn} is a six-digit hexadecimal
-number.  The Unicode Standard only defines code points up to
-@samp{U+@var{10ffff}}, so if you specify a code point higher than
-that, Emacs signals an error.
-
-  Secondly, you can specify characters by their hexadecimal character
+You can specify characters by their Unicode names, if any.
+@code{?\N@{@var{NAME}@}} represents the Unicode character named
+@var{NAME}.  Thus, @samp{?\N@{LATIN SMALL LETTER A WITH GRAVE@}} is
+equivalent to @code{?à} and denotes the Unicode character U+00E0.  To
+simplify entering multi-line strings, you can replace spaces in the
+names by non-empty sequences of whitespace (e.g., newlines).
+
+@item
+You can specify characters by their Unicode values.
+@code{?\N@{U+@var{X}@}} represents a character with Unicode code point
+@var{X}, where @var{X} is a hexadecimal number.  Also,
+@code{?\u@var{xxxx}} and @code{?\U@var{xxxxxxxx}} represent code
+points @var{xxxx} and @var{xxxxxxxx}, respectively, where each @var{x}
+is a single hexadecimal digit.  For example, @code{?\N@{U+E0@}},
+@code{?\u00e0} and @code{?\U000000E0} are all equivalent to @code{?à}
+and to @samp{?\N@{LATIN SMALL LETTER A WITH GRAVE@}}.  The Unicode
+Standard defines code points only up to @samp{U+@var{10ffff}}, so if
+you specify a code point higher than that, Emacs signals an error.
+
+@item
+You can specify characters by their hexadecimal character
 codes.  A hexadecimal escape sequence consists of a backslash,
 @samp{x}, and the hexadecimal character code.  Thus, @samp{?\x41} is
 the character @kbd{A}, @samp{?\x1} is the character @kbd{C-a}, and
@@ -379,23 +386,16 @@ the character @kbd{A}, @samp{?\x1} is the character @kbd{C-a}, and
 You can use any number of hex digits, so you can represent any
 character code in this way.
 
+@item
 @cindex octal character code
-  Thirdly, you can specify characters by their character code in
+You can specify characters by their character code in
 octal.  An octal escape sequence consists of a backslash followed by
 up to three octal digits; thus, @samp{?\101} for the character
 @kbd{A}, @samp{?\001} for the character @kbd{C-a}, and @code{?\002}
 for the character @kbd{C-b}.  Only characters up to octal code 777 can
 be specified this way.
 
-  Fourthly, you can specify characters by their name.  A character
-name escape sequence consists of a backslash, @samp{N@{}, the Unicode
-character name, and @samp{@}}.  Alternatively, you can also put the
-numeric code point value between the braces, using the syntax
-@samp{\N@{U+nnnn@}}, where @samp{nnnn} denotes between one and eight
-hexadecimal digits.  Thus, @samp{?\N@{LATIN CAPITAL LETTER A@}} and
-@samp{?\N@{U+41@}} both denote the character @kbd{A}.  To simplify
-entering multi-line strings, you can replace spaces in the character
-names by arbitrary non-empty sequence of whitespace (e.g., newlines).
+@end enumerate
 
   These escape sequences may also be used in strings.  @xref{Non-ASCII
 in Strings}.
diff --git a/src/character.h b/src/character.h
index bc3e155..586f330 100644
--- a/src/character.h
+++ b/src/character.h
@@ -612,14 +612,13 @@ sanitize_char_width (EMACS_INT width)
    : (c) <= 0xE01EF ? (c) - 0xE0100 + 17	\
    : 0)
 
-/* If C is a high surrogate, return 1.  If C is a low surrogate,
-   return 2.  Otherwise, return 0.  */
+/* Return true if C is a surrogate.  */
 
-#define CHAR_SURROGATE_PAIR_P(c)	\
-  ((c) < 0xD800 ? 0			\
-   : (c) <= 0xDBFF ? 1			\
-   : (c) <= 0xDFFF ? 2			\
-   : 0)
+INLINE bool
+char_surrogate_p (int c)
+{
+  return 0xD800 <= c && c <= 0xDFFF;
+}
 
 /* Data type for Unicode general category.
 
diff --git a/src/lread.c b/src/lread.c
index c3b6bd7..a42c1f6 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -44,7 +44,6 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 #include "termhooks.h"
 #include "blockinput.h"
 #include <c-ctype.h>
-#include <string.h>
 
 #ifdef MSDOS
 #include "msdos.h"
@@ -2151,88 +2150,42 @@ grow_read_buffer (void)
 			 MAX_MULTIBYTE_LENGTH, -1, 1);
 }
 
-/* Signal an invalid-read-syntax error indicating that the character
-   name in an \N{…} literal is invalid.  */
-static _Noreturn void
-invalid_character_name (Lisp_Object name)
-{
-  AUTO_STRING (format, "\\N{%s}");
-  xsignal1 (Qinvalid_read_syntax, CALLN (Fformat, format, name));
-}
-
-/* Check that CODE is a valid Unicode scalar value, and return its
-   value.  CODE should be parsed from the character name given by
-   NAME.  NAME is used for error messages.  */
+/* Return the scalar value that has the Unicode character name NAME.
+   Raise 'invalid-read-syntax' if there is no such character.  */
 static int
-check_scalar_value (Lisp_Object code, Lisp_Object name)
+character_name_to_code (char const *name, ptrdiff_t name_len)
 {
-  if (! NUMBERP (code))
-    invalid_character_name (name);
-  EMACS_INT i = XINT (code);
-  if (! (0 <= i && i <= MAX_UNICODE_CHAR)
-      /* Don't allow surrogates.  */
-      || (0xD800 <= code && code <= 0xDFFF))
-    invalid_character_name (name);
-  return i;
-}
+  Lisp_Object code;
 
-/* If NAME starts with PREFIX, interpret the rest as a hexadecimal
-   number and return its value.  Raise invalid-read-syntax if the
-   number is not a valid scalar value.  Return −1 if NAME doesn’t
-   start with PREFIX.  */
-static int
-parse_code_after_prefix (Lisp_Object name, const char *prefix)
-{
-  ptrdiff_t name_len = SBYTES (name);
-  ptrdiff_t prefix_len = strlen (prefix);
-  /* Allow between one and eight hexadecimal digits after the
-     prefix.  */
-  if (prefix_len < name_len && name_len <= prefix_len + 8
-      && memcmp (SDATA (name), prefix, prefix_len) == 0)
+  /* Code point as U+XXXX....  */
+  if (name[0] == 'U' && name[1] == '+')
     {
-      Lisp_Object code = string_to_number (SDATA (name) + prefix_len, 16, false);
-      if (NUMBERP (code))
-        return check_scalar_value (code, name);
+      /* Pass the leading '+' to string_to_number, so that it
+	 rejects monstrosities such as negative values.  */
+      code = string_to_number (name + 1, 16, false);
+    }
+  else
+    {
+      /* Look up the name in the table returned by 'ucs-names'.  */
+      AUTO_STRING_WITH_LEN (namestr, name, name_len);
+      Lisp_Object names = call0 (Qucs_names);
+      code = CDR (Fassoc (namestr, names));
     }
-  return -1;
-}
 
-/* Returns the scalar value that has the Unicode character name NAME.
-   Raises `invalid-read-syntax' if there is no such character.  */
-static int
-character_name_to_code (Lisp_Object name)
-{
-  /* Code point as U+N, where N is between 1 and 8 hexadecimal
-     digits.  */
-  int code = parse_code_after_prefix (name, "U+");
-  if (code >= 0)
-    return code;
-
-  /* CJK ideographs are not contained in the association list returned
-     by `ucs-names'.  But they follow a predictable naming pattern: a
-     fixed prefix plus the hexadecimal codepoint value.  */
-  code = parse_code_after_prefix (name, "CJK IDEOGRAPH-");
-  if (code >= 0)
+  if (! (INTEGERP (code)
+	 && 0 <= XINT (code) && XINT (code) <= MAX_UNICODE_CHAR
+	 && ! char_surrogate_p (XINT (code))))
     {
-      /* Various ranges of CJK characters; see UnicodeData.txt.  */
-      if ((0x3400 <= code && code <= 0x4DB5)
-          || (0x4E00 <= code && code <= 0x9FD5)
-          || (0x20000 <= code && code <= 0x2A6D6)
-          || (0x2A700 <= code && code <= 0x2B734)
-          || (0x2B740 <= code && code <= 0x2B81D)
-          || (0x2B820 <= code && code <= 0x2CEA1))
-        return code;
-      else
-        invalid_character_name (name);
+      AUTO_STRING (format, "\\N{%s}");
+      AUTO_STRING_WITH_LEN (namestr, name, name_len);
+      xsignal1 (Qinvalid_read_syntax, CALLN (Fformat, format, namestr));
     }
 
-  /* Look up the name in the table returned by `ucs-names'.  */
-  Lisp_Object names = call0 (Qucs_names);
-  return check_scalar_value (CDR (Fassoc (name, names)), name);
+  return XINT (code);
 }
 
 /* Bound on the length of a Unicode character name.  As of
-   Unicode 9.0.0 the maximum is 83, so this should be safe. */
+   Unicode 9.0.0 the maximum is 83, so this should be safe.  */
 enum { UNICODE_CHARACTER_NAME_LENGTH_BOUND = 200 };
 
 /* Read a \-escape sequence, assuming we already read the `\'.
@@ -2458,14 +2411,14 @@ read_escape (Lisp_Object readcharfun, bool stringp)
               end_of_file_error ();
             if (c == '}')
               break;
-            if (! c_isascii (c))
+            if (! (0 < c && c < 0x80))
               {
                 AUTO_STRING (format,
-                             "Non-ASCII character U+%04X in character name");
+                             "Invalid character U+%04X in character name");
                 xsignal1 (Qinvalid_read_syntax,
                           CALLN (Fformat, format, make_natnum (c)));
               }
-            /* We treat multiple adjacent whitespace characters as a
+            /* Treat multiple adjacent whitespace characters as a
                single space character.  This makes it easier to use
                character names in e.g. multi-line strings.  */
             if (c_isspace (c))
@@ -2483,7 +2436,8 @@ read_escape (Lisp_Object readcharfun, bool stringp)
           }
         if (length == 0)
           invalid_syntax ("Empty character name");
-        return character_name_to_code (make_unibyte_string (name, length));
+	name[length] = '\0';
+	return character_name_to_code (name, length);
       }
 
     default:
diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el
index ff5d0f6..2ebaf49 100644
--- a/test/src/lread-tests.el
+++ b/test/src/lread-tests.el
@@ -1,6 +1,6 @@
 ;;; lread-tests.el --- tests for lread.c -*- lexical-binding: t; -*-
 
-;; Copyright (C) 2016  Google Inc.
+;; Copyright (C) 2016 Free Software Foundation, Inc.
 
 ;; Author: Philipp Stephani <phst@google.com>
 
@@ -26,11 +26,10 @@
 ;;; Code:
 
 (ert-deftest lread-char-number ()
-  (should (equal ?\N{U+A817} #xA817)))
+  (should (equal (read "?\\N{U+A817}") #xA817)))
 
 (ert-deftest lread-char-name ()
-  (should (equal ?\N{SYLOTI  NAGRI LETTER
-                 DHO}
+  (should (equal (read "?\\N{SYLOTI  NAGRI LETTER \n DHO}")
                  #xA817)))
 
 (ert-deftest lread-char-invalid-number ()
@@ -46,16 +45,23 @@
 (ert-deftest lread-char-empty-name ()
   (should-error (read "?\\N{}") :type 'invalid-read-syntax))
 
-(ert-deftest lread-char-cjk-name ()
-  (should (equal ?\N{CJK IDEOGRAPH-2B734} #x2B734)))
-
-(ert-deftest lread-char-invalid-cjk-name ()
-  (should-error (read "?\\N{CJK IDEOGRAPH-2B735}") :type 'invalid-read-syntax))
-
-(ert-deftest lread-string-char-number ()
-  (should (equal "a\N{U+A817}b" "a\uA817b")))
+(ert-deftest lread-char-surrogate-1 ()
+  (should-error (read "?\\N{U+D800}") :type 'invalid-read-syntax))
+(ert-deftest lread-char-surrogate-2 ()
+  (should-error (read "?\\N{U+D801}") :type 'invalid-read-syntax))
+(ert-deftest lread-char-surrogate-3 ()
+  (should-error (read "?\\N{U+Dffe}") :type 'invalid-read-syntax))
+(ert-deftest lread-char-surrogate-4 ()
+  (should-error (read "?\\N{U+DFFF}") :type 'invalid-read-syntax))
+
+(ert-deftest lread-string-char-number-1 ()
+  (should (equal (read "a\\N{U+A817}b") "a\uA817bx")))
+(ert-deftest lread-string-char-number-2 ()
+  (should-error (read "?\\N{0.5}") :type 'invalid-read-syntax))
+(ert-deftest lread-string-char-number-3 ()
+  (should-error (read "?\\N{U+-0}") :type 'invalid-read-syntax))
 
 (ert-deftest lread-string-char-name ()
-  (should (equal "a\N{SYLOTI NAGRI  LETTER DHO}b" "a\uA817b")))
+  (should (equal (read "a\\N{SYLOTI NAGRI  LETTER DHO}b") "a\uA817b")))
 
 ;;; lread-tests.el ends here

commit e7cb38edc946ff60c1c878b30b068376d6ef56d2
Author: Philipp Stephani <phst@google.com>
Date:   Thu Apr 21 14:51:30 2016 -0700

    Use 'ucs-names' for character name escapes
    
    * lread.c (invalid_character_name, check_scalar_value)
    (parse_code_after_prefix, character_name_to_code): New helper
    functions that use 'ucs-names' and parsing for CJK ideographs.
    (read_escape): Use helper functions.
    (syms_of_lread): New symbol 'ucs-names'.
    * test/src/lread-tests.el: New tests; fix a couple of bugs in
    existing tests.

diff --git a/src/lread.c b/src/lread.c
index dbe51bb..c3b6bd7 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -44,6 +44,7 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 #include "termhooks.h"
 #include "blockinput.h"
 #include <c-ctype.h>
+#include <string.h>
 
 #ifdef MSDOS
 #include "msdos.h"
@@ -2150,36 +2151,90 @@ grow_read_buffer (void)
 			 MAX_MULTIBYTE_LENGTH, -1, 1);
 }
 
-/* Hash table that maps Unicode character names to code points.  */
-static Lisp_Object character_names;
+/* Signal an invalid-read-syntax error indicating that the character
+   name in an \N{…} literal is invalid.  */
+static _Noreturn void
+invalid_character_name (Lisp_Object name)
+{
+  AUTO_STRING (format, "\\N{%s}");
+  xsignal1 (Qinvalid_read_syntax, CALLN (Fformat, format, name));
+}
 
-/* Length of the longest Unicode character name, in bytes. */
-static ptrdiff_t max_character_name_length;
+/* Check that CODE is a valid Unicode scalar value, and return its
+   value.  CODE should be parsed from the character name given by
+   NAME.  NAME is used for error messages.  */
+static int
+check_scalar_value (Lisp_Object code, Lisp_Object name)
+{
+  if (! NUMBERP (code))
+    invalid_character_name (name);
+  EMACS_INT i = XINT (code);
+  if (! (0 <= i && i <= MAX_UNICODE_CHAR)
+      /* Don't allow surrogates.  */
+      || (0xD800 <= code && code <= 0xDFFF))
+    invalid_character_name (name);
+  return i;
+}
 
-/* Initializes `character_names' and `max_character_name_length'.
-   Called by `read_escape'.  */
-void init_character_names (void)
+/* If NAME starts with PREFIX, interpret the rest as a hexadecimal
+   number and return its value.  Raise invalid-read-syntax if the
+   number is not a valid scalar value.  Return −1 if NAME doesn’t
+   start with PREFIX.  */
+static int
+parse_code_after_prefix (Lisp_Object name, const char *prefix)
 {
-  character_names = CALLN (Fmake_hash_table,
-                           QCtest, Qequal,
-                           /* Currently around 100,000 Unicode
-                              characters are defined.  */
-                           QCsize, make_natnum (100000));
-  Lisp_Object get_property =
-    Fsymbol_function (intern_c_string ("get-char-code-property"));
-  ptrdiff_t length = 0;
-  for (int i = 0; i <= MAX_UNICODE_CHAR; ++i)
+  ptrdiff_t name_len = SBYTES (name);
+  ptrdiff_t prefix_len = strlen (prefix);
+  /* Allow between one and eight hexadecimal digits after the
+     prefix.  */
+  if (prefix_len < name_len && name_len <= prefix_len + 8
+      && memcmp (SDATA (name), prefix, prefix_len) == 0)
     {
-      Lisp_Object code = make_natnum (i);
-      Lisp_Object name = call2 (get_property, code, Qname);
-      if (NILP (name)) continue;
-      CHECK_STRING (name);
-      length = max (length, SBYTES (name));
-      Fputhash (name, code, character_names);
+      Lisp_Object code = string_to_number (SDATA (name) + prefix_len, 16, false);
+      if (NUMBERP (code))
+        return check_scalar_value (code, name);
+    }
+  return -1;
+}
+
+/* Returns the scalar value that has the Unicode character name NAME.
+   Raises `invalid-read-syntax' if there is no such character.  */
+static int
+character_name_to_code (Lisp_Object name)
+{
+  /* Code point as U+N, where N is between 1 and 8 hexadecimal
+     digits.  */
+  int code = parse_code_after_prefix (name, "U+");
+  if (code >= 0)
+    return code;
+
+  /* CJK ideographs are not contained in the association list returned
+     by `ucs-names'.  But they follow a predictable naming pattern: a
+     fixed prefix plus the hexadecimal codepoint value.  */
+  code = parse_code_after_prefix (name, "CJK IDEOGRAPH-");
+  if (code >= 0)
+    {
+      /* Various ranges of CJK characters; see UnicodeData.txt.  */
+      if ((0x3400 <= code && code <= 0x4DB5)
+          || (0x4E00 <= code && code <= 0x9FD5)
+          || (0x20000 <= code && code <= 0x2A6D6)
+          || (0x2A700 <= code && code <= 0x2B734)
+          || (0x2B740 <= code && code <= 0x2B81D)
+          || (0x2B820 <= code && code <= 0x2CEA1))
+        return code;
+      else
+        invalid_character_name (name);
     }
-  max_character_name_length = length;
+
+  /* Look up the name in the table returned by `ucs-names'.  */
+  Lisp_Object names = call0 (Qucs_names);
+  return check_scalar_value (CDR (Fassoc (name, names)), name);
 }
 
+/* Bound on the length of a Unicode character name.  As of
+   Unicode 9.0.0 the maximum is 83, so this should be safe. */
+enum { UNICODE_CHARACTER_NAME_LENGTH_BOUND = 200 };
+
 /* Read a \-escape sequence, assuming we already read the `\'.
    If the escape sequence forces unibyte, return eight-bit char.  */
 
@@ -2393,10 +2448,7 @@ read_escape (Lisp_Object readcharfun, bool stringp)
         c = READCHAR;
         if (c != '{')
           invalid_syntax ("Expected opening brace after \\N");
-        if (NILP (character_names))
-          init_character_names ();
-        USE_SAFE_ALLOCA;
-        char *name = SAFE_ALLOCA (max_character_name_length + 1);
+        char name[UNICODE_CHARACTER_NAME_LENGTH_BOUND + 1];
         bool whitespace = false;
         ptrdiff_t length = 0;
         while (true)
@@ -2407,11 +2459,12 @@ read_escape (Lisp_Object readcharfun, bool stringp)
             if (c == '}')
               break;
             if (! c_isascii (c))
-              xsignal1 (Qinvalid_read_syntax,
-                        CALLN (Fformat,
-                               build_pure_c_string ("Non-ASCII character U+%04X"
-                                                    " in character name"),
-                               make_natnum (c)));
+              {
+                AUTO_STRING (format,
+                             "Non-ASCII character U+%04X in character name");
+                xsignal1 (Qinvalid_read_syntax,
+                          CALLN (Fformat, format, make_natnum (c)));
+              }
             /* We treat multiple adjacent whitespace characters as a
                single space character.  This makes it easier to use
                character names in e.g. multi-line strings.  */
@@ -2425,25 +2478,12 @@ read_escape (Lisp_Object readcharfun, bool stringp)
             else
               whitespace = false;
             name[length++] = c;
-            if (length >= max_character_name_length)
+            if (length >= sizeof name)
               invalid_syntax ("Character name too long");
           }
         if (length == 0)
           invalid_syntax ("Empty character name");
-        name[length] = 0;
-        Lisp_Object lisp_name = make_unibyte_string (name, length);
-        Lisp_Object code =
-          (length >= 3 && length <= 10 && name[0] == 'U' && name[1] == '+') ?
-          /* Code point as U+N, where N is between 1 and 8 hexadecimal
-             digits.  */
-          string_to_number (name + 2, 16, false) :
-          Fgethash (lisp_name, character_names, Qnil);
-        SAFE_FREE ();
-        if (! RANGED_INTEGERP (0, code, MAX_UNICODE_CHAR))
-          xsignal1 (Qinvalid_read_syntax,
-                    CALLN (Fformat,
-                           build_pure_c_string ("\\N{%s}"), lisp_name));
-        return XINT (code);
+        return character_name_to_code (make_unibyte_string (name, length));
       }
 
     default:
@@ -4835,6 +4875,5 @@ that are loaded before your customizations are read!  */);
   DEFSYM (Qrehash_size, "rehash-size");
   DEFSYM (Qrehash_threshold, "rehash-threshold");
 
-  character_names = Qnil;
-  staticpro (&character_names);
+  DEFSYM (Qucs_names, "ucs-names");
 }
diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el
index 1f87334..ff5d0f6 100644
--- a/test/src/lread-tests.el
+++ b/test/src/lread-tests.el
@@ -40,10 +40,17 @@
   (should-error (read "?\\N{DOES NOT EXIST}")) :type 'invalid-read-syntax)
 
 (ert-deftest lread-char-non-ascii-name ()
-  (should-error (read "?\\N{LATIN CAPITAL LETTER Ø}")) 'invalid-read-syntax)
+  (should-error (read "?\\N{LATIN CAPITAL LETTER Ø}")
+                :type 'invalid-read-syntax))
 
 (ert-deftest lread-char-empty-name ()
-  (should-error (read "?\\N{}")) 'invalid-read-syntax)
+  (should-error (read "?\\N{}") :type 'invalid-read-syntax))
+
+(ert-deftest lread-char-cjk-name ()
+  (should (equal ?\N{CJK IDEOGRAPH-2B734} #x2B734)))
+
+(ert-deftest lread-char-invalid-cjk-name ()
+  (should-error (read "?\\N{CJK IDEOGRAPH-2B735}") :type 'invalid-read-syntax))
 
 (ert-deftest lread-string-char-number ()
   (should (equal "a\N{U+A817}b" "a\uA817b")))

commit 753c875714f708c0257a2d352635c5616be66fdc
Author: Philipp Stephani <phst@google.com>
Date:   Thu Apr 21 14:47:46 2016 -0700

    Minor cleanups for character name escapes
    
    * src/lread.c (init_character_names): Add missing 'void'.
    Remove top-level 'const'.
    (read_escape): Simplify loop a bit.  Remove top-level 'const'.

diff --git a/src/lread.c b/src/lread.c
index 9fa46a8..dbe51bb 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -2158,20 +2158,20 @@ static ptrdiff_t max_character_name_length;
 
 /* Initializes `character_names' and `max_character_name_length'.
    Called by `read_escape'.  */
-void init_character_names ()
+void init_character_names (void)
 {
   character_names = CALLN (Fmake_hash_table,
                            QCtest, Qequal,
                            /* Currently around 100,000 Unicode
                               characters are defined.  */
                            QCsize, make_natnum (100000));
-  const Lisp_Object get_property =
+  Lisp_Object get_property =
     Fsymbol_function (intern_c_string ("get-char-code-property"));
   ptrdiff_t length = 0;
   for (int i = 0; i <= MAX_UNICODE_CHAR; ++i)
     {
-      const Lisp_Object code = make_natnum (i);
-      const Lisp_Object name = call2 (get_property, code, Qname);
+      Lisp_Object code = make_natnum (i);
+      Lisp_Object name = call2 (get_property, code, Qname);
       if (NILP (name)) continue;
       CHECK_STRING (name);
       length = max (length, SBYTES (name));
@@ -2417,25 +2417,22 @@ read_escape (Lisp_Object readcharfun, bool stringp)
                character names in e.g. multi-line strings.  */
             if (c_isspace (c))
               {
-                if (! whitespace)
-                  {
-                    whitespace = true;
-                    name[length++] = ' ';
-                  }
+                if (whitespace)
+                  continue;
+                c = ' ';
+                whitespace = true;
               }
             else
-              {
-                whitespace = false;
-                name[length++] = c;
-              }
+              whitespace = false;
+            name[length++] = c;
             if (length >= max_character_name_length)
               invalid_syntax ("Character name too long");
           }
         if (length == 0)
           invalid_syntax ("Empty character name");
         name[length] = 0;
-        const Lisp_Object lisp_name = make_unibyte_string (name, length);
-        const Lisp_Object code =
+        Lisp_Object lisp_name = make_unibyte_string (name, length);
+        Lisp_Object code =
           (length >= 3 && length <= 10 && name[0] == 'U' && name[1] == '+') ?
           /* Code point as U+N, where N is between 1 and 8 hexadecimal
              digits.  */

commit a58d4e3c0f513294b9aebacb539542ad1b87be19
Author: Philipp Stephani <phst@google.com>
Date:   Thu Apr 21 14:47:05 2016 -0700

    Add documentation for character name escapes

diff --git a/doc/lispref/nonascii.texi b/doc/lispref/nonascii.texi
index 9cf3b57..66ad9ac 100644
--- a/doc/lispref/nonascii.texi
+++ b/doc/lispref/nonascii.texi
@@ -633,7 +633,7 @@ This function returns the value of @var{char}'s @var{propname} property.
 @end group
 @group
 ;; U+2163 ROMAN NUMERAL FOUR
-(get-char-code-property ?\u2163 'numeric-value)
+(get-char-code-property ?\N@{ROMAN NUMERAL FOUR@} 'numeric-value)
      @result{} 4
 @end group
 @group
diff --git a/doc/lispref/objects.texi b/doc/lispref/objects.texi
index 3245930..96b334d 100644
--- a/doc/lispref/objects.texi
+++ b/doc/lispref/objects.texi
@@ -387,6 +387,16 @@ up to three octal digits; thus, @samp{?\101} for the character
 for the character @kbd{C-b}.  Only characters up to octal code 777 can
 be specified this way.
 
+  Fourthly, you can specify characters by their name.  A character
+name escape sequence consists of a backslash, @samp{N@{}, the Unicode
+character name, and @samp{@}}.  Alternatively, you can also put the
+numeric code point value between the braces, using the syntax
+@samp{\N@{U+nnnn@}}, where @samp{nnnn} denotes between one and eight
+hexadecimal digits.  Thus, @samp{?\N@{LATIN CAPITAL LETTER A@}} and
+@samp{?\N@{U+41@}} both denote the character @kbd{A}.  To simplify
+entering multi-line strings, you can replace spaces in the character
+names by arbitrary non-empty sequence of whitespace (e.g., newlines).
+
   These escape sequences may also be used in strings.  @xref{Non-ASCII
 in Strings}.
 
diff --git a/etc/NEWS b/etc/NEWS
index 0411f19..a9b3922 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -166,6 +166,11 @@ different group ID.
 *** Support for completing attribute values and bang-rules using the
 'completion-at-point' command.
 
++++
+** Emacs now supports character name escape sequences in character and
+string literals.  The syntax variants \N{character name} and
+\N{U+code} are supported.
+
 
 * New Modes and Packages in Emacs 25.2
 

commit de7d5f36e0f3261a7300fa3a3d87ae3b758b8a73
Author: Philipp Stephani <phst@google.com>
Date:   Thu Apr 21 14:45:22 2016 -0700

    Implement named character escapes, similar to Perl
    
    * lread.c (init_character_names): New function.
    (read_escape): Read Perl-style named character escape sequences.
    (syms_of_lread): Initialize new variable 'character_names'.
    * test/src/lread-tests.el (lread-char-empty-name): Add test file
    for src/lread.c.

diff --git a/src/lread.c b/src/lread.c
index fedfcb8..9fa46a8 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -43,6 +43,7 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 #include "systime.h"
 #include "termhooks.h"
 #include "blockinput.h"
+#include <c-ctype.h>
 
 #ifdef MSDOS
 #include "msdos.h"
@@ -2149,6 +2150,36 @@ grow_read_buffer (void)
 			 MAX_MULTIBYTE_LENGTH, -1, 1);
 }
 
+/* Hash table that maps Unicode character names to code points.  */
+static Lisp_Object character_names;
+
+/* Length of the longest Unicode character name, in bytes. */
+static ptrdiff_t max_character_name_length;
+
+/* Initializes `character_names' and `max_character_name_length'.
+   Called by `read_escape'.  */
+void init_character_names ()
+{
+  character_names = CALLN (Fmake_hash_table,
+                           QCtest, Qequal,
+                           /* Currently around 100,000 Unicode
+                              characters are defined.  */
+                           QCsize, make_natnum (100000));
+  const Lisp_Object get_property =
+    Fsymbol_function (intern_c_string ("get-char-code-property"));
+  ptrdiff_t length = 0;
+  for (int i = 0; i <= MAX_UNICODE_CHAR; ++i)
+    {
+      const Lisp_Object code = make_natnum (i);
+      const Lisp_Object name = call2 (get_property, code, Qname);
+      if (NILP (name)) continue;
+      CHECK_STRING (name);
+      length = max (length, SBYTES (name));
+      Fputhash (name, code, character_names);
+    }
+  max_character_name_length = length;
+}
+
 /* Read a \-escape sequence, assuming we already read the `\'.
    If the escape sequence forces unibyte, return eight-bit char.  */
 
@@ -2356,6 +2387,68 @@ read_escape (Lisp_Object readcharfun, bool stringp)
 	return i;
       }
 
+    case 'N':
+      /* Named character.  */
+      {
+        c = READCHAR;
+        if (c != '{')
+          invalid_syntax ("Expected opening brace after \\N");
+        if (NILP (character_names))
+          init_character_names ();
+        USE_SAFE_ALLOCA;
+        char *name = SAFE_ALLOCA (max_character_name_length + 1);
+        bool whitespace = false;
+        ptrdiff_t length = 0;
+        while (true)
+          {
+            c = READCHAR;
+            if (c < 0)
+              end_of_file_error ();
+            if (c == '}')
+              break;
+            if (! c_isascii (c))
+              xsignal1 (Qinvalid_read_syntax,
+                        CALLN (Fformat,
+                               build_pure_c_string ("Non-ASCII character U+%04X"
+                                                    " in character name"),
+                               make_natnum (c)));
+            /* We treat multiple adjacent whitespace characters as a
+               single space character.  This makes it easier to use
+               character names in e.g. multi-line strings.  */
+            if (c_isspace (c))
+              {
+                if (! whitespace)
+                  {
+                    whitespace = true;
+                    name[length++] = ' ';
+                  }
+              }
+            else
+              {
+                whitespace = false;
+                name[length++] = c;
+              }
+            if (length >= max_character_name_length)
+              invalid_syntax ("Character name too long");
+          }
+        if (length == 0)
+          invalid_syntax ("Empty character name");
+        name[length] = 0;
+        const Lisp_Object lisp_name = make_unibyte_string (name, length);
+        const Lisp_Object code =
+          (length >= 3 && length <= 10 && name[0] == 'U' && name[1] == '+') ?
+          /* Code point as U+N, where N is between 1 and 8 hexadecimal
+             digits.  */
+          string_to_number (name + 2, 16, false) :
+          Fgethash (lisp_name, character_names, Qnil);
+        SAFE_FREE ();
+        if (! RANGED_INTEGERP (0, code, MAX_UNICODE_CHAR))
+          xsignal1 (Qinvalid_read_syntax,
+                    CALLN (Fformat,
+                           build_pure_c_string ("\\N{%s}"), lisp_name));
+        return XINT (code);
+      }
+
     default:
       return c;
     }
@@ -4744,4 +4837,7 @@ that are loaded before your customizations are read!  */);
   DEFSYM (Qweakness, "weakness");
   DEFSYM (Qrehash_size, "rehash-size");
   DEFSYM (Qrehash_threshold, "rehash-threshold");
+
+  character_names = Qnil;
+  staticpro (&character_names);
 }
diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el
new file mode 100644
index 0000000..1f87334
--- /dev/null
+++ b/test/src/lread-tests.el
@@ -0,0 +1,54 @@
+;;; lread-tests.el --- tests for lread.c -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2016  Google Inc.
+
+;; Author: Philipp Stephani <phst@google.com>
+
+;; This file is part of GNU Emacs.
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; Unit tests for code in src/lread.c.
+
+;;; Code:
+
+(ert-deftest lread-char-number ()
+  (should (equal ?\N{U+A817} #xA817)))
+
+(ert-deftest lread-char-name ()
+  (should (equal ?\N{SYLOTI  NAGRI LETTER
+                 DHO}
+                 #xA817)))
+
+(ert-deftest lread-char-invalid-number ()
+  (should-error (read "?\\N{U+110000}") :type 'invalid-read-syntax))
+
+(ert-deftest lread-char-invalid-name ()
+  (should-error (read "?\\N{DOES NOT EXIST}")) :type 'invalid-read-syntax)
+
+(ert-deftest lread-char-non-ascii-name ()
+  (should-error (read "?\\N{LATIN CAPITAL LETTER Ø}")) 'invalid-read-syntax)
+
+(ert-deftest lread-char-empty-name ()
+  (should-error (read "?\\N{}")) 'invalid-read-syntax)
+
+(ert-deftest lread-string-char-number ()
+  (should (equal "a\N{U+A817}b" "a\uA817b")))
+
+(ert-deftest lread-string-char-name ()
+  (should (equal "a\N{SYLOTI NAGRI  LETTER DHO}b" "a\uA817b")))
+
+;;; lread-tests.el ends here

commit 7621a521452d988b27e761c76ad8e667e932192e
Author: Paul Eggert <eggert@cs.ucla.edu>
Date:   Thu Apr 21 14:44:31 2016 -0700

    ; Spelling fixes

diff --git a/etc/NEWS b/etc/NEWS
index 2373347..0411f19 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -26,8 +26,8 @@ otherwise leave it unmarked.
 * Installation Changes in Emacs 25.2
 
 ** The new option 'configure --enable-gcc-warnings=warn-only' causes
-GCC to issue warnings without stopping the bild.  This behavior is now
-the default in developer builds.  As before, use
+GCC to issue warnings without stopping the build.  This behavior is
+now the default in developer builds.  As before, use
 '--disable-gcc-warnings' to suppress GCC's warnings, and
 '--enable-gcc-warnings' to stop the build if GCC issues warnings.
 
diff --git a/lisp/gnus/ChangeLog.2 b/lisp/gnus/ChangeLog.2
index 73f5f09..ed0e81f 100644
--- a/lisp/gnus/ChangeLog.2
+++ b/lisp/gnus/ChangeLog.2
@@ -4705,7 +4705,7 @@
 	illegible and invisible text.
 
 	* gnus-util.el (gnus-multiple-choice): Separate choices with
-	", ".  Suggested by Dan Jacobson <jidanni@dman.ddts.net>.
+	", ".  Suggested by Dan Jacobson <jidanni@dman.ddts.net>.
 
 2003-02-18  Jesper Harder  <harder@ifa.au.dk>
 
diff --git a/lisp/gnus/ChangeLog.3 b/lisp/gnus/ChangeLog.3
index e6cbe04..f734e6e 100644
--- a/lisp/gnus/ChangeLog.3
+++ b/lisp/gnus/ChangeLog.3
@@ -9090,7 +9090,7 @@
 	(shr-kinsoku-shorten): New internal variable.
 	(shr-find-fill-point): Make kinsoku shorten text line if
 	shr-kinsoku-shorten is bound to non-nil.
-	(shr-tag-table): Bild shr-kinsoku-shorten to t; refer to
+	(shr-tag-table): Bind shr-kinsoku-shorten to t; refer to
 	shr-indentation too when testing if table is wider than frame width.
 	(shr-insert-table): Use `string-width' instead of `length' to measure
 	text width.
diff --git a/lisp/org/ChangeLog.1 b/lisp/org/ChangeLog.1
index ccc849d..9b6702f 100644
--- a/lisp/org/ChangeLog.1
+++ b/lisp/org/ChangeLog.1
@@ -9043,7 +9043,7 @@
 	(pcomplete/org-mode/file-option/email)
 	(pcomplete/org-mode/file-option/date): Use the new macro to offer
 	completion over default values for #+OPTIONS, #+TITLE, #+AUTHOR,
-	#+EMAIL and #+DATE.
+	#+EMAIL and #+DATE.
 
 	* org-agenda.el (org-agenda-write): Fix bug when writing agenda to
 	an external file while `org-agenda-sticky' is non-nil.
diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el
index 7a8213b..6489199 100644
--- a/lisp/progmodes/cc-langs.el
+++ b/lisp/progmodes/cc-langs.el
@@ -3069,7 +3069,7 @@ is in effect or not."
 
 (c-lang-defconst c-special-brace-lists
 "List of open- and close-chars that makes up a pike-style brace list,
-i.e. for a ([ ]) list there should be a cons (?\\[ . ?\\]) in this
+i.e., for a ([ ]) list there should be a cons (?\\[ . ?\\]) in this
 list."
   t    nil
   pike '((?{ . ?}) (?\[ . ?\]) (?< . ?>)))
diff --git a/test/lisp/erc/erc-track-tests.el b/test/lisp/erc/erc-track-tests.el
index 8e39e1b..2e60b0d 100644
--- a/test/lisp/erc/erc-track-tests.el
+++ b/test/lisp/erc/erc-track-tests.el
@@ -3,7 +3,7 @@
 ;; Copyright © 2016 Free Software Foundation, Inc.
 
 ;; Author: Mario Lang <mlang@delysid.org>
-;; Author: Vivek Das Mohapatra <vivek@etla.org>
+;; Author: Vivek Dasmohapatra <vivek@etla.org>
 
 ;; This file is part of GNU Emacs.
 
diff --git a/test/lisp/vc/vc-tests.el b/test/lisp/vc/vc-tests.el
index 2b3445a..1a3e8e0 100644
--- a/test/lisp/vc/vc-tests.el
+++ b/test/lisp/vc/vc-tests.el
@@ -218,7 +218,7 @@ For backends which dont support it, `vc-not-supported' is signalled."
 
 (defun vc-test--register (backend)
   "Register and unregister a file.
-This checks also `vc-backend' and `vc-reponsible-backend'."
+This checks also `vc-backend' and `vc-responsible-backend'."
 
   (let ((vc-handled-backends `(,backend))
 	(default-directory

commit 798caa12af4260f5cd26cac06e5438eedff47994
Author: Fabrice Popineau <fabrice.popineau@gmail.com>
Date:   Thu Apr 21 19:23:00 2016 +0300

    Avoid run-time dependency on libwinpthread DLL on MS-Windows
    
    * nt/mingw-cfg.site (ac_cv_search_clock_gettime)
    (ac_cv_func_clock_gettime, ac_cv_func_clock_settime): Force to not
    present, so that MinGW64 builds don't depend on libwinpthread.
    (Bug#22959)

diff --git a/nt/mingw-cfg.site b/nt/mingw-cfg.site
index ff9df60..9d63008 100644
--- a/nt/mingw-cfg.site
+++ b/nt/mingw-cfg.site
@@ -40,6 +40,12 @@ gl_cv_sys_struct_timespec_in_pthread_h=no
 # Or at all...
 ac_cv_header_pthread_h=no
 
+# We don't want to check for these functions
+# because they are implemented in libwinpthread.
+ac_cv_search_clock_gettime="none required"
+ac_cv_func_clock_gettime=no
+ac_cv_func_clock_settime=no
+
 # ACL functions are implemented in w32.c
 ac_cv_search_acl_get_file="none required"
 ac_cv_func_acl_get_file=yes

commit f24004906e68597d4940f31a570d0e2bd7a9afe5
Author: Matthew Leach <matthew@mattleach.net>
Date:   Thu Apr 21 19:16:41 2016 +0300

    Add LIBSYSTEMD to the list of supported features
    
    * configure.ac: Add LIBSYSTEMD to EMACS_CONFIG_FEATURES and print a
    message at the end of configure stating whether Emacs will be build
    with libsystemd support.

diff --git a/configure.ac b/configure.ac
index 1cd9017..5a6a72a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5207,7 +5207,7 @@ emacs_config_features=
 for opt in XAW3D XPM JPEG TIFF GIF PNG RSVG CAIRO IMAGEMAGICK SOUND GPM DBUS \
   GCONF GSETTINGS NOTIFY ACL LIBSELINUX GNUTLS LIBXML2 FREETYPE M17N_FLT \
   LIBOTF XFT ZLIB TOOLKIT_SCROLL_BARS X_TOOLKIT X11 NS MODULES \
-  XWIDGETS; do
+  XWIDGETS LIBSYSTEMD; do
 
     case $opt in
       NOTIFY|ACL) eval val=\${${opt}_SUMMARY} ;;
@@ -5254,6 +5254,7 @@ AS_ECHO(["  Does Emacs use -lXaw3d?                                 ${HAVE_XAW3D
   Does Emacs use -lm17n-flt?                              ${HAVE_M17N_FLT}
   Does Emacs use -lotf?                                   ${HAVE_LIBOTF}
   Does Emacs use -lxft?                                   ${HAVE_XFT}
+  Does Emacs use -lsystemd?                               ${HAVE_LIBSYSTEMD}
   Does Emacs directly use zlib?                           ${HAVE_ZLIB}
   Does Emacs have dynamic modules support?                ${HAVE_MODULES}
   Does Emacs use toolkit scroll bars?                     ${USE_TOOLKIT_SCROLL_BARS}

commit caa31fcaa2fc7b05e411c8155d7641e7660a735a
Author: Vasilij Schneidermann <v.schneidermann@gmail.com>
Date:   Thu Apr 21 18:59:37 2016 +0300

    Support absolute column movement in 'ansi-term'
    
    * lisp/term.el (term-handle-ansi-escape): Handle the "\E[G" sequence
    for absolute column movement.  (Bug#23303)

diff --git a/lisp/term.el b/lisp/term.el
index 3520a39..2d5d3e9 100644
--- a/lisp/term.el
+++ b/lisp/term.el
@@ -3260,6 +3260,10 @@ See `term-prompt-regexp'."
    ;; \E[D - cursor left (terminfo: cub)
    ((eq char ?D)
     (term-move-columns (- (max 1 term-terminal-parameter))))
+   ;; \E[G - cursor motion to absolute column (terminfo: hpa)
+   ((eq char ?G)
+    (term-move-columns (- (max 0 (min term-width term-terminal-parameter))
+                          (term-current-column))))
    ;; \E[J - clear to end of screen (terminfo: ed, clear)
    ((eq char ?J)
     (term-erase-in-display term-terminal-parameter))