commit 51c1fdcccf785df94878cec12d6a9910f2fdcb03 (HEAD, refs/remotes/origin/master) Author: Paul Eggert Date: Sat May 19 10:01:23 2018 -0700 * admin/notes/unicode: HELLO is again UTF-8. diff --git a/admin/notes/unicode b/admin/notes/unicode index ff0de8aeff..b3a962deee 100644 --- a/admin/notes/unicode +++ b/admin/notes/unicode @@ -232,10 +232,6 @@ nontrivial changes to the build process. * iso-2022-7bit - This file switches between CJK charsets, which is not encoded in UTF-8. - - etc/HELLO - Each of these files contains just one CJK charset, but Emacs currently has no easy way to specify set-charset-priority on a per-file basis, so converting any of these files to UTF-8 might commit 3589c966b743850c29b89b83710783d4ea026770 Author: Eli Zaretskii Date: Sat May 19 18:17:38 2018 +0300 Use Enriched mode in etc/HELLO to keep charset information This allows to encode HELLO in UTF-8, thus supporting the entire repertory of Unicode, while still keeping the charset info where that is important. Suggested by Michael Welsh Duggan . * lisp/textmodes/enriched.el (enriched-translations): Add translations for 'charset'. (enriched-decode-charset, enriched-handle-charset-prop): New functions. * lisp/facemenu.el (facemenu-special-menu): Add sub-menu for 'charset' property. (facemenu-set-charset): New function. (facemenu-remove-special): Remove the 'charset' property as well. * etc/NEWS: Announce the new feature of Enriched mode. * etc/HELLO: Recode in UTF-8 and place under Enriched mode. * doc/emacs/text.texi (Enriched Properties): Mention the support for 'charset'. diff --git a/doc/emacs/text.texi b/doc/emacs/text.texi index 6a5fc7c6f6..1e96163105 100644 --- a/doc/emacs/text.texi +++ b/doc/emacs/text.texi @@ -2416,11 +2416,13 @@ to the commands above. @subsection Setting Other Text Properties The Special Properties submenu of Text Properties has entries for -adding or removing three other text properties: @code{read-only}, +adding or removing four other text properties: @code{read-only}, (which disallows alteration of the text), @code{invisible} (which -hides text), and @code{intangible} (which disallows moving point -within the text). The @samp{Remove Special} menu item removes all of -these special properties from the text in the region. +hides text), @code{intangible} (which disallows moving point within +the text), and @code{charset} (which is important for selecting a +proper font to display a character). The @samp{Remove Special} menu +item removes all of these special properties from the text in the +region. The @code{invisible} and @code{intangible} properties are not saved. diff --git a/etc/HELLO b/etc/HELLO index 2c95e21136..f415ec088b 100644 --- a/etc/HELLO +++ b/etc/HELLO @@ -1,99 +1,114 @@ +Content-Type: text/enriched +Text-Width: 70 + This is a list of ways to say hello in various languages. It is not intended to be comprehensive, but to demonstrate some of the character sets that Emacs supports. + Non-ASCII examples: - Europe: ,A!(BHola!, Gr,A|_(B Gott, Hyv,Add(B p,Ad(Biv,Add(B, Tere ,Au(Bhtust, Bon,Cu(Bu - Cze,B6f(B!, Dobr,B}(B den, ,L7T`PRabRcYbU(B!, ,FCei\(B ,Fsar(B, $,1J2J0J;J0J@JOJ=J1J0(B - Africa: $(3!A!,!>(B - Middle/Near East: ,Hylem(B, $,1-g.$-s.1.$-g.%(B $,1-y.$.*.#.%(B - South Asia: $,19h9n9x:-9d:'(B, $,15h5n5x6-5d6'(B, $,1?(?.?8?M>u?>?0(B, $,1@H@N@X@m@5@^@P@"(B, $,1;6;A;#;?;,;G(B, - $,1AFAzB4AvB=B AqB*(B, $,1-=U=~=p=B(B, $(7"7"!#C!;"E"S"G!;"7"2"[!;"D"["#"G!>(B - South East Asia: $,1\'\f\:\V\4\?\]\:(B, (1JP:R-4U(B, $,1H9H$HZHYH"HH3gGO<latin-iso8859-1¡Hola!, Grüß Gott, Hyvää päivää, Tere õhtust, Bonlatin-iso8859-3ġu + Czelatin-iso8859-2ść!, Dobrý den, cyrillic-iso8859-5Здравствуйте!, greek-iso8859-7Γειά σας, mule-unicode-0100-24ffგამარჯობა + Africa: ethiopicሠላም + Middle/Near East: hebrew-iso8859-8שלום, mule-unicode-0100-24ffالسّلام عليكم + South Asia: નમસ્તે, नमस्ते, ನಮಸ್ಕಾರ, നമസ്കാരം, ଶୁଣିବେ, + ආයුබෝවන්, வணக்கம், నమస్కారం, tibetanབཀྲ་ཤིས་བདེ་ལེགས༎ + South East Asia: mule-unicode-0100-24ffជំរាបសួរ, laoສະບາຍດີ, mule-unicode-0100-24ffမင်္ဂလာပါ, thai-tis620สวัสดีครับ, Chvietnamese-viscii-lowerào bạn + East Asia: chinese-gb2312你好, chinese-big5-1早晨, japanese-jisx0208こんにちは, korean-ksc5601안녕하세요 + Misc: Elatin-iso8859-3ĥoŝanĝo ĉiuĵaŭde, mule-unicode-2500-33ff⠓⠑⠇⠇⠕, mule-unicode-0100-24ff∀ p ∈ world • hello p mule-unicode-2500-33ff□ + CJK variety: GB(chinese-gb2312元气,开发), BIG5(chinese-big5-1元氣,開發), JIS(japanese-jisx0208元気,開発), KSC(korean-ksc5601元氣,開發) + Unicode charset: Emule-unicode-0100-24ffĥoŝanĝo ĉiuĵaŭde, Γειά σας, שלום, Здравствуйте! + LANGUAGE (NATIVE NAME) HELLO ---------------------- ----- -Amharic ($,1O M[MmN{(B) $,1M`MKM](B -Arabic ($,1-g.$-y-q-h.*.1-i(B) $,1-g.$-s.1.$-g.%(B $,1-y.$.*.#.%(B -Armenian ($,1+p+a+u+e, +e+v(B) $,1+2+a, ,'(B $,1+q+e+f(B -Bengali ($,17,7>6b727>(B) $,17(7.787M6u7>70(B -Braille $,2(3(1('('(5(B -Burmese ($,1H9H\H4HZH9HL(B) $,1H9H$HZHYH"Hmule-unicode-2500-33ff⠓⠑⠇⠇⠕ +Burmese (mule-unicode-0100-24ffမြန်မာ) မင်္ဂလာပါ C printf ("Hello, world!\n"); -Czech (,Bh(Be,B9(Btina) Dobr,A}(B den -Danish (dansk) Hej / Goddag / Hall,Ax(Bj +Czech (latin-iso8859-2čeština) Dobrlatin-iso8859-1ý den +Danish (dansk) Hej / Goddag / Halløj Dutch (Nederlands) Hallo / Dag Emacs emacs --no-splash -f view-hello-file -English /$(O+S,0!,D?$(O*y(Bl,0!$(O*h(B/ Hello -Esperanto Saluton (E,C6(Bo,C~(Ban,Cx(Bo ,Cf(Biu,C<(Ba,C}(Bde) -Estonian (eesti keel) Tere p,Ad(Bevast / Tere ,Au(Bhtust -Finnish (suomi) Hei / Hyv,Add(B p,Ad(Biv,Add(B -French (fran,Ag(Bais) Bonjour / Salut -Georgian ($,1JEJ0J@J7J5J4J:J8(B) $,1J2J0J;J0J@JOJ=J1J0(B -German (Deutsch) Guten Tag / Gr,A|_(B Gott -Greek (,Fekkgmij\(B) ,FCei\(B ,Fsar(B -Greek, ancient ($,1p1,Fkkgmij^(B) ,FO$,1pv,Fk](B ,Fte(B ,Fja$,1q6(B ,Fl]ca(B ,Fwa$,1r6,Fqe(B -Gujarati ($,19W:!9\9p9~9d: (B) $,19h9n9x:-9d:'(B -Hebrew ($,1-",q-(,y-*(B) ,Hylem(B -Hungarian (magyar) Sz,Bi(Bp j,Bs(B napot! -Hindi ($,15y55B5f6 (B) $,15h5n5x6-5d6'(B / $,15h5n5x6-5U5~5p(B $,16D(B + +Emoji unicode👋 +latin-iso8859-1English /japanese-jisx0213-1ˈipaɪlatin-iso8859-4ŋjapanese-jisx0213-1ɡlipaɪjapanese-jisx0213-1ʃ/ Hello +Esperanto Saluton (Elatin-iso8859-3ĥoŝanĝo ĉiuĵaŭde) +Estonian (eesti keel) Tere platin-iso8859-1äevast / Tere õhtust +Finnish (suomi) Hei / Hyvää päivää +French (français) Bonjour / Salut +Georgian (mule-unicode-0100-24ffქართველი) გამარჯობა +German (Deutsch) Guten Tag / Grlatin-iso8859-1üß Gott +Greek (greek-iso8859-7ελληνικά) Γειά σας +Greek, ancient (mule-unicode-0100-24ffἑgreek-iso8859-7λληνική) Οmule-unicode-0100-24ffὖgreek-iso8859-7λέ τε καmule-unicode-0100-24ffὶ greek-iso8859-7μέγα χαmule-unicode-0100-24ffῖgreek-iso8859-7ρε +Gujarati (mule-unicode-0100-24ffગુજરાતી) નમસ્તે +Hebrew hebrew-iso8859-8(עברית)mule-unicode-0100-24ff hebrew-iso8859-8שלום +Hungarian (magyar) Szlatin-iso8859-2ép jó napot! +Hindi (mule-unicode-0100-24ffहिंदी) नमस्ते / नमस्कार । Italian (italiano) Ciao / Buon giorno Javanese (Jawa) System.out.println("Sugeng siang!"); -Kannada ($,1>u?(?M?(?!(B) $,1?(?.?8?M>u?>?0(B -Khmer ($,1\7\V\?\V\!\r\8\b\:(B) $,1\'\f\:\V\4\?\]\:(B -Lao ((1>RJRERG(B) (1JP:R-4U(B / (1"mcKib*!4U(B -Malayalam ($,1@N@R@O@^@S@"(B) $,1@H@N@X@m@5@^@P@"(B -Maltese (il-Malti) Bon,Cu(Bu / Sa,C11(Ba -Mathematics $,1x (B p $,1x((B world $,1s"(B hello p $,2!a(B -Mongolian (,L\^]S^[(B ,Lem[(B) ,LAPY](B ,LQPY]P(B ,Lcc(B? +Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ +Khmer (ភាសាខ្មែរ) ជំរាបសួរ +Lao (laoພາສາລາວ) ສະບາຍດີ / ຂໍໃຫ້ໂຊກດີ +Malayalam (mule-unicode-0100-24ffമലയാളം) നമസ്കാരം +Maltese (il-Malti) Bonlatin-iso8859-3ġu / Saħħa +Mathematics mule-unicode-0100-24ff∀ p ∈ world • hello p mule-unicode-2500-33ff□ +Mongolian (cyrillic-iso8859-5монгол хэл) Сайн байна уу? Norwegian (norsk) Hei / God dag -Oriya ($,1:s;\;?:f(B) $,1;6;A;#;?;,;G(B -Polish (j,Bj(Bzyk polski) Dzie,Bq(B dobry! / Cze,B6f(B! -Russian (,L`caaZXY(B) ,L7T`P$(O+Z,LRabRcYbU(B! -Sinhala ($,1B#B2ABB$A}(B) $,1AFAzB4AvB=B AqB*(B -Slovak (sloven,Bh(Bina) Dobr,A}(B de,Br(B -Slovenian (sloven,B9h(Bina) Pozdravljeni! -Spanish (espa,Aq(Bol) ,A!(BHola! -Swedish (svenska) Hej / Goddag / Hall,Ae(B -Tamil ($,1&=r>!=W>!(B) $,1=h=n=x>-=U=~=p=B(B -Thai (,T@RIRd7B(B) ,TJGQJ4U$CQ:(B / ,TJGQJ4U$hP(B -Tibetan ($(7"7"]"2!;"G#!"2!;(B) $(7"7"!#C!;"E"S"G!;"7"2"[!;"D"["#"G!>(B -Tigrigna ($,1NUP-MmN{(B) $,1MpMKM[NU(B -Turkish (T,A|(Brk,Ag(Be) Merhaba -Ukrainian (,LcZ`Pw]alZP(B) ,L2vbPn(B -Vietnamese (ti,1*(Bng Vi,1.(Bt) Ch,A`(Bo b,1U(Bn - -Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B / (I:]FAJ(B -Chinese ($AVPND(B,$AFUM(;0(B,$A::So(B) $ADc:C(B -Cantonese ($(0GnM$(B,$(0N]0*Hd(B) $(0*/=((B, $(0+$)p(B -Korean ($(CGQ1[(B) $(C>H3gGO<H3gGO=J4O1n(B - - +Oriya (mule-unicode-0100-24ffଓଡ଼ିଆ) ଶୁଣିବେ +Polish (jlatin-iso8859-2ęzyk polski) Dzień dobry! / Cześć! +Russian (cyrillic-iso8859-5русский) Здраjapanese-jisx0213-1́cyrillic-iso8859-5вствуйте! +Sinhala (mule-unicode-0100-24ffසිංහල) ආයුබෝවන් +Slovak (slovenlatin-iso8859-2čina) Dobrlatin-iso8859-1ý delatin-iso8859-2ň +Slovenian (slovenščina) Pozdravljeni! +Spanish (espalatin-iso8859-1ñol) ¡Hola! +Swedish (svenska) Hej / Goddag / Hallå +Tamil (mule-unicode-0100-24ffதமிழ்) வணக்கம் +Telugu (తెలుగు) నమస్కారం +Thai (thai-tis620ภาษาไทย) สวัสดีครับ / สวัสดีค่ะ +Tibetan (tibetanབོད་སྐད་) བཀྲ་ཤིས་བདེ་ལེགས༎ +Tigrigna (mule-unicode-0100-24ffትግርኛ) ሰላማት +Turkish (Tlatin-iso8859-1ürkçe) Merhaba +Ukrainian (cyrillic-iso8859-5українська) Вітаю +Vietnamese (tivietnamese-viscii-lowerếng Việt) Chlatin-iso8859-1ào bvietnamese-viscii-lowerạn + + +Japanese (japanese-jisx0208日本語) こんにちは / katakana-jisx0201コンニチハ +Chinese (chinese-gb2312中文,普通话,汉语) 你好 +Cantonese (chinese-big5-1粵語,廣東話) 早晨, 你好 +Korean (korean-ksc5601한글) 안녕하세요 / 안녕하십니까 + + +unicode + Copyright (C) 2001-2018 Free Software Foundation, Inc. + This file is part of GNU Emacs. + GNU Emacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + GNU Emacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + You should have received a copy of the GNU General Public License -along with GNU Emacs. If not, see . +along with GNU Emacs. If not, see <. + ;;; Local Variables: ;;; tab-width: 32 ;;; bidi-display-reordering: t -;;; coding: iso-2022-7bit -;;; End: +;;; coding: utf-8 +;;; End: diff --git a/etc/NEWS b/etc/NEWS index c7ffb17ad3..ae8a366f4b 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -233,13 +233,20 @@ and its value has been changed to Duck Duck Go. 'shr-selected-link' face to give the user feedback that the command has been executed. - ** Htmlfontify *** The functions 'hfy-color', 'hfy-color-vals' and 'hfy-fallback-color-values' and the variables 'hfy-fallback-color-map' and 'hfy-rgb-txt-color-map' have been renamed from names that used 'colour' instead of 'color'. ++++ +** Enriched mode supports the 'charset' text property. +You can add or modify the 'charset' text properties of text using the +Edit->Text Properties->Special Properties menu, or by invoking the +'facemenu-set-charset' command. Documents in Enriched mode will be +saved with the charset properties, and those properties will be +restored when the file is visited. + ** Smtpmail Authentication mechanisms can be added via external packages, by defining new cl-defmethod of smtpmail-try-auth-method. diff --git a/etc/enriched.txt b/etc/enriched.txt index 773fa619f2..bf3e91a837 100644 --- a/etc/enriched.txt +++ b/etc/enriched.txt @@ -64,7 +64,11 @@ the right margin, fully justified, centered, or left alone). Excerpts: "For quoted material." -Read-only regions. +Read-only, Invisible, and Intangible regions. + +Charset properties. + +Display properties. @@ -158,6 +162,16 @@ parts of other people's email messages and the like. It is just a face, which is the same as the 'italic' face by default. +bluewhiteCHARSET + + +You can add character set information to stretches of text; this +is important for selecting the font that will display that text. +Users of various charsets, especially in East Asian cultures, +prefer the same characters to be rendered differently depending on +the language/charset context. + + bluewhiteTHE FILE FORMAT @@ -175,7 +189,7 @@ as possible. The text/enriched standard is defined in Internet RFC 1896 -(<). +(<). bluewhiteCUSTOMIZATION @@ -192,9 +206,9 @@ requires you to name your annotation starting "x-" (as in "x-read-only"). Please report any such additions that you think might be of general interest using M-x report-emacs-bug. - + -bluewhiteTODO LIST +bluewhiteTODO LIST [Feel free to work on these and send us the results!] @@ -235,7 +249,7 @@ it. bluewhiteOriginal Author: -whiteblueBoris Goldowskylight blue light blue<blue +whiteblueBoris Goldowskylight blue light blue< diff --git a/lisp/facemenu.el b/lisp/facemenu.el index be5a18c8cc..a4f675b8c1 100644 --- a/lisp/facemenu.el +++ b/lisp/facemenu.el @@ -188,6 +188,8 @@ it will remove any faces not explicitly in the list." (let ((map (make-sparse-keymap "Special"))) (define-key map [?s] (cons (purecopy "Remove Special") 'facemenu-remove-special)) + (define-key map [?c] (cons (purecopy "Charset") + 'facemenu-set-charset)) (define-key map [?t] (cons (purecopy "Intangible") 'facemenu-set-intangible)) (define-key map [?v] (cons (purecopy "Invisible") @@ -433,6 +435,28 @@ This sets the `read-only' text property; it can be undone with (interactive "r") (add-text-properties start end '(read-only t))) +(defun facemenu-set-charset (cset &optional start end) + "Apply CHARSET text property to the region or next character typed. + +If the region is active (normally true except in Transient +Mark mode) and nonempty, and there is no prefix argument, +this command adds CHARSET property to the region. Otherwise, it +sets the CHARSET property of the character at point." + (interactive (list (progn + (barf-if-buffer-read-only) + (read-charset + (format "Use charset (default %s): " (charset-after)) + (charset-after))) + (if (and mark-active (not current-prefix-arg)) + (region-beginning)) + (if (and mark-active (not current-prefix-arg)) + (region-end)))) + (or start + (setq start (min (point) (1- (point-max))) + end (1+ start))) + (remove-text-properties start end '(charset nil)) + (put-text-property start end 'charset cset)) + (defun facemenu-remove-face-props (start end) "Remove `face' and `mouse-face' text properties." (interactive "*r") ; error if buffer is read-only despite the next line. @@ -452,7 +476,7 @@ These special properties include `invisible', `intangible' and `read-only'." (interactive "*r") ; error if buffer is read-only despite the next line. (let ((inhibit-read-only t)) (remove-text-properties - start end '(invisible nil intangible nil read-only nil)))) + start end '(invisible nil intangible nil read-only nil charset nil)))) (defalias 'facemenu-read-color 'read-color) diff --git a/lisp/textmodes/enriched.el b/lisp/textmodes/enriched.el index b9d247132d..6b4c44a39e 100644 --- a/lisp/textmodes/enriched.el +++ b/lisp/textmodes/enriched.el @@ -120,9 +120,11 @@ expression, which is evaluated to get the string to insert.") ;; The following are not part of the standard: (FUNCTION (enriched-decode-foreground "x-color") (enriched-decode-background "x-bg-color") - (enriched-decode-display-prop "x-display")) + (enriched-decode-display-prop "x-display") + (enriched-decode-charset "x-charset")) (read-only (t "x-read-only")) (display (nil enriched-handle-display-prop)) + (charset (nil enriched-handle-charset-prop)) (unknown (nil format-annotate-value)) ; (font-size (2 "bigger") ; unimplemented ; (-2 "smaller")) @@ -492,6 +494,21 @@ Return value is \(begin end name positive-p), or nil if none was found." (list from to 'face (list ':background color)) (message "Warning: no color specified for ") nil)) + +(defun enriched-decode-charset (from to &optional cset) + (let ((cs (when (stringp cset) + (condition-case () + (car (read-from-string cset)) + (error nil))))) + (unless cs + (message "Warning: invalid parameter %s" cset)) + (list from to 'charset cs))) + +(defun enriched-handle-charset-prop (old new) + "Return a list of annotations for a change in the `charset' property." + (cons (and old (list (list "x-charset" (symbol-name old)))) + (and new (list (list "x-charset" (symbol-name new)))))) + ;;; Handling the `display' property. commit 593c367b0727affc739832ab4f4bdb9d7dd1ddd7 Author: Paul Eggert Date: Fri May 18 15:53:21 2018 -0700 * src/alloc.c: Fix comment. diff --git a/src/alloc.c b/src/alloc.c index 231ade5cf8..d959c55350 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -508,8 +508,8 @@ pointer_align (void *ptr, int alignment) DEFINE_KEY_OPS_AS_MACROS, for performance in that case. The macro_* macros are private to this section of code. */ -/* Add a pointer an an integer without complaint about a pointer going - out of range of the underlying array. */ +/* Add a pointer P to an integer I without gcc -fsanitize complaining + about the result being out of range of the underlying array. */ #define macro_PNTR_ADD(p, i) ((p) + (i)) commit a1c925fd41818cb8ad209762739b220efb919d1e Author: Paul Eggert Date: Fri May 18 15:45:42 2018 -0700 Port to GCC 8 -fsanitize=undefined In GCC 8, gcc -fsanitize=undefined flags the undefined behavior that Emacs relies on in its XPNTR and XSYMBOL low-level functions. Disable undefined sanitization in these functions. Although this disabling doesn’t suffice if DEFINE_KEY_OPS_AS_MACROS is true, it works for -fsanitize=undefined -DINLINING=0, which is good enough. * src/alloc.c (macro_PNTR_ADD): New macro. (PNTR_ADD): New function and macro. The function disables -fsanitize=undefined. (macro_XPNTR): Use it. * src/conf_post.h (ATTRIBUTE_NO_SANITIZE_UNDEFINED): New macro. * src/lisp.h (XSYMBOL): Disable -fsanitize=undefined. diff --git a/src/alloc.c b/src/alloc.c index 8264e0623c..231ade5cf8 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -503,18 +503,34 @@ pointer_align (void *ptr, int alignment) return (void *) ROUNDUP ((uintptr_t) ptr, alignment); } -/* Extract the pointer hidden within O. Define this as a function, as - functions are cleaner and can be used in debuggers. Also, define - it as a macro if being compiled with GCC without optimization, for - performance in that case. macro_XPNTR is private to this section - of code. */ +/* Define PNTR_ADD and XPNTR as functions, which are cleaner and can + be used in debuggers. Also, define them as macros if + DEFINE_KEY_OPS_AS_MACROS, for performance in that case. + The macro_* macros are private to this section of code. */ + +/* Add a pointer an an integer without complaint about a pointer going + out of range of the underlying array. */ + +#define macro_PNTR_ADD(p, i) ((p) + (i)) + +static char * ATTRIBUTE_NO_SANITIZE_UNDEFINED ATTRIBUTE_UNUSED +PNTR_ADD (char *p, EMACS_UINT i) +{ + return macro_PNTR_ADD (p, i); +} + +#if DEFINE_KEY_OPS_AS_MACROS +# define PNTR_ADD(p, i) macro_PNTR_ADD (p, i) +#endif + +/* Extract the pointer hidden within O. */ #define macro_XPNTR(o) \ ((void *) \ (SYMBOLP (o) \ - ? ((char *) lispsym \ - - ((EMACS_UINT) Lisp_Symbol << (USE_LSB_TAG ? 0 : VALBITS)) \ - + XLI (o)) \ + ? PNTR_ADD ((char *) lispsym, \ + (XLI (o) \ + - ((EMACS_UINT) Lisp_Symbol << (USE_LSB_TAG ? 0 : VALBITS)))) \ : (char *) XLP (o) - (XLI (o) & ~VALMASK))) static ATTRIBUTE_UNUSED void * diff --git a/src/conf_post.h b/src/conf_post.h index 00e283d289..bf2cfc4f05 100644 --- a/src/conf_post.h +++ b/src/conf_post.h @@ -67,6 +67,7 @@ typedef bool bool_bf; # define __has_attribute_externally_visible GNUC_PREREQ (4, 1, 0) # define __has_attribute_no_address_safety_analysis false # define __has_attribute_no_sanitize_address GNUC_PREREQ (4, 8, 0) +# define __has_attribute_no_sanitize_undefined GNUC_PREREQ (4, 9, 0) #endif /* Simulate __has_builtin on compilers that lack it. It is used only @@ -338,6 +339,17 @@ extern int emacs_setenv_TZ (char const *); # define ATTRIBUTE_NO_SANITIZE_ADDRESS #endif +/* Attribute of functions whose undefined behavior should not be sanitized. */ + +#if __has_attribute (no_sanitize_undefined) +# define ATTRIBUTE_NO_SANITIZE_UNDEFINED __attribute__ ((no_sanitize_undefined)) +#elif __has_attribute (no_sanitize) +# define ATTRIBUTE_NO_SANITIZE_UNDEFINED \ + __attribute__ ((no_sanitize ("undefined"))) +#else +# define ATTRIBUTE_NO_SANITIZE_UNDEFINED +#endif + /* gcc -fsanitize=address does not work with vfork in Fedora 25 x86-64. For now, assume that this problem occurs on all platforms. */ #if ADDRESS_SANITIZER && !defined vfork diff --git a/src/lisp.h b/src/lisp.h index a18b64a588..ee2e72d32b 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -900,7 +900,7 @@ INLINE bool return lisp_h_SYMBOLP (x); } -INLINE struct Lisp_Symbol * +INLINE struct Lisp_Symbol * ATTRIBUTE_NO_SANITIZE_UNDEFINED (XSYMBOL) (Lisp_Object a) { #if USE_LSB_TAG commit f4d9fd3dd45f767eca33fbf1beee40da790fa74e Author: Lars Ingebrigtsen Date: Fri May 18 16:05:02 2018 +0200 (gnus-blocked-images): Clarify privacy implications * lisp/gnus/gnus-art.el (gnus-blocked-images): Clarify the privacy implication of altering the value of this variable. diff --git a/lisp/gnus/gnus-art.el b/lisp/gnus/gnus-art.el index 869ff4e661..f6120dc5c7 100644 --- a/lisp/gnus/gnus-art.el +++ b/lisp/gnus/gnus-art.el @@ -1616,6 +1616,16 @@ It is a string, such as \"PGP\". If nil, ask user." (defcustom gnus-blocked-images 'gnus-block-private-groups "Images that have URLs matching this regexp will be blocked. +Note that the main reason external images are included in HTML +emails (these days) is to allow tracking whether you've read the +email message or not. If you allow loading images in HTML +emails, you give up privacy. + +The default value of this variable blocks loading external +resources when reading email groups (and therefore stops +tracking), but allows loading external resources when reading +from NNTP newsgroups and the like. + This can also be a function to be evaluated. If so, it will be called with the group name as the parameter, and should return a regexp." commit 3aab8626ba5080bb04d0fdae52d99c850a842a52 Author: Eli Zaretskii Date: Fri May 18 16:34:19 2018 +0300 Fix decoding of directories when "~" includes non-ASCII chars * src/fileio.c (Fexpand_file_name): Don't build multibyte strings from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY have different multibyteness, as this adds bytes to the byte sequence, and in some situations, e.g., when the home directory includes non-ASCII characters, can fail file APIs. (Bug#30755) * lisp/startup.el (normal-top-level): Make sure default-directory is set to a multibyte string when decoded on MS-Windows. diff --git a/lisp/startup.el b/lisp/startup.el index 5b2d3e58cb..83fd190ea2 100644 --- a/lisp/startup.el +++ b/lisp/startup.el @@ -560,9 +560,17 @@ It is the default value of the variable `top-level'." (if default-directory (setq default-directory (if (eq system-type 'windows-nt) - ;; Convert backslashes to forward slashes. - (expand-file-name - (decode-coding-string default-directory coding t)) + ;; We pass the decoded default-directory as + ;; the 2nd arg to expand-file-name to make + ;; sure it sees a multibyte string as the + ;; default directory; this avoids the side + ;; effect of returning a unibyte string from + ;; expand-file-name because it still sees + ;; the undecoded value of default-directory. + (let ((defdir (decode-coding-string default-directory + coding t))) + ;; Convert backslashes to forward slashes. + (expand-file-name defdir defdir)) (decode-coding-string default-directory coding t)))))) ;; Decode all the important variables and directory lists, now diff --git a/src/fileio.c b/src/fileio.c index 2f8358f01b..e8d966e163 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -867,33 +867,78 @@ the root directory. */) } } multibyte = STRING_MULTIBYTE (name); - if (multibyte != STRING_MULTIBYTE (default_directory)) + bool defdir_multibyte = STRING_MULTIBYTE (default_directory); + if (multibyte != defdir_multibyte) { + /* We want to make both NAME and DEFAULT_DIRECTORY have the same + multibyteness. Strategy: + . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they + can be converted to the multibyteness of the other one + while keeping the same byte sequence. + . If both are non-ASCII, the only safe conversion is to + convert the multibyte one to be unibyte, because the + reverse conversion potentially adds bytes while raw bytes + are converted to their multibyte forms, which we will be + unable to account for, since the information about the + original multibyteness is lost. If those additional bytes + later leak to system APIs because they are not encoded or + because they are converted to unibyte strings by keeping + the data, file APIs will fail. + + Note: One could argue that if we see a multibyte string, it + is evidence that file-name decoding was already set up, and + we could convert unibyte strings to multibyte using + DECODE_FILE. However, this is risky, because the likes of + string_to_multibyte are able of creating multibyte strings + without any decoding. */ if (multibyte) { - unsigned char *p = SDATA (name); + bool name_ascii_p = SCHARS (name) == SBYTES (name); + unsigned char *p = SDATA (default_directory); - while (*p && ASCII_CHAR_P (*p)) - p++; - if (*p == '\0') + if (!name_ascii_p) + while (*p && ASCII_CHAR_P (*p)) + p++; + if (name_ascii_p || *p != '\0') { - /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is - unibyte. Do not convert DEFAULT_DIRECTORY to - multibyte; instead, convert NAME to a unibyte string, - so that the result of this function is also a unibyte - string. This is needed during bootstrapping and - dumping, when Emacs cannot decode file names, because - the locale environment is not set up. */ + /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII. + Make a unibyte string out of NAME, and arrange for + the result of this function to be a unibyte string. + This is needed during bootstrapping and dumping, when + Emacs cannot decode file names, because the locale + environment is not set up. */ name = make_unibyte_string (SSDATA (name), SBYTES (name)); multibyte = 0; } else - default_directory = string_to_multibyte (default_directory); + { + /* NAME is non-ASCII and multibyte, and + DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a + multibyte string out of DEFAULT_DIRECTORY's data. */ + default_directory = + make_multibyte_string (SSDATA (default_directory), + SCHARS (default_directory), + SCHARS (default_directory)); + } } else { - name = string_to_multibyte (name); - multibyte = 1; + unsigned char *p = SDATA (name); + + while (*p && ASCII_CHAR_P (*p)) + p++; + if (*p == '\0') + { + /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte + and pure-ASCII. Make a multibyte string out of + NAME's data. */ + name = make_multibyte_string (SSDATA (name), + SCHARS (name), SCHARS (name)); + multibyte = 1; + } + else + default_directory = make_unibyte_string (SSDATA (default_directory), + SBYTES (default_directory)); } }