commit 7cfa87ed5bb35465755c5586c1a5b3d2318d0b19 (HEAD, refs/remotes/origin/master) Author: F. Jason Park Date: Mon Nov 21 20:53:25 2022 -0800 ; Fix ERC test failure following recent typo fix * test/lisp/erc/erc-scenarios-base-compat-rename-bouncer.el: Replace expected buffer content in test assertion. * test/lisp/erc/resources/erc-scenarios-common.el: Replace expected buffer content in test assertion. See also 40539c7587dc474b424cff732973fe8958eadf14 "; Fix typos". diff --git a/test/lisp/erc/erc-scenarios-base-compat-rename-bouncer.el b/test/lisp/erc/erc-scenarios-base-compat-rename-bouncer.el index 711dc7b97f..2ffa86aff6 100644 --- a/test/lisp/erc/erc-scenarios-base-compat-rename-bouncer.el +++ b/test/lisp/erc/erc-scenarios-base-compat-rename-bouncer.el @@ -106,7 +106,7 @@ erc-scenarios-common--base-compat-no-rename-bouncer (erc-d-t-search-for 1 "") (erc-d-t-absent-for 0.1 "") (should (eq erc-server-process erc-server-process-bar)) - (erc-d-t-search-for 10 "keeps you from dishonor") + (erc-d-t-search-for 10 "joe: It is a rupture") (erc-d-t-wait-for 5 (not (erc-server-process-alive))))) (when more (funcall more)))) diff --git a/test/lisp/erc/resources/erc-scenarios-common.el b/test/lisp/erc/resources/erc-scenarios-common.el index ef65125241..601c9e95c8 100644 --- a/test/lisp/erc/resources/erc-scenarios-common.el +++ b/test/lisp/erc/resources/erc-scenarios-common.el @@ -296,7 +296,7 @@ erc-scenarios-common--base-network-id-bouncer (erc-d-t-search-for 1 "") (erc-d-t-absent-for 0.1 "") (erc-d-t-wait-for 5 (eq erc-server-process erc-server-process-bar)) - (erc-d-t-search-for 15 "keeps you from dishonour") + (erc-d-t-search-for 15 "joe: It is a rupture") (erc-d-t-wait-for 5 (not (erc-server-process-alive))))) (when after (funcall after)))) commit ea73fd69f0c6251ad9b8fc8267ce0fa68495715e Author: Juanma Barranquero Date: Tue Nov 22 04:40:49 2022 +0100 ; Fix typos in tree-sitter files * admin/notes/tree-sitter/starter-guide (Font-lock) (Debugging queries, Indent, Navigation, Which-func) (More features?): * lisp/treesit.el (treesit--merge-ranges) (treesit-font-lock-feature-list, treesit-font-lock-rules) (treesit-font-lock-fontify-region, treesit--font-lock-notifier) (treesit-simple-indent-presets, treesit--font-lock-fast-mode) (treesit--indent-region-batch-size) (treesit--indent-rules-optimize, treesit-ready-p): Fix typos. diff --git a/admin/notes/tree-sitter/starter-guide b/admin/notes/tree-sitter/starter-guide index faf40bc64f..123dabd9f2 100644 --- a/admin/notes/tree-sitter/starter-guide +++ b/admin/notes/tree-sitter/starter-guide @@ -111,7 +111,7 @@ will be fontified in their capture name. The capture name could also be a function, in which case (NODE OVERRIDE START END) is passed to the function for fontification. START -and END is the start and end of the region to be fontified. The +and END are the start and end of the region to be fontified. The function should only fontify within that region. The function should also allow more optional arguments with (&rest _), for future extensibility. For OVERRIDE check out the docstring of @@ -169,7 +169,7 @@ Neovim also has a bunch of queries to reference: The manual explains how to read grammar files in the bottom of section "Tree-sitter Language Definitions". -** Debugging queires +** Debugging queries If your query has problems, use ‘treesit-query-validate’ to debug the query. It will pop a buffer containing the query (in text format) and @@ -261,8 +261,8 @@ Indent works like this: We have a bunch of rules that look like When the indentation process starts, point is at the BOL of a line, we want to know which column to indent this line to. Let NODE be the node at point, we pass this node to the MATCHER of each rule, one of them -will match the node (eg, "this node is a closing bracket!"). Then we pass -the node to the ANCHOR, which returns a point, eg, the BOL of the +will match the node (eg, "this node is a closing bracket!"). Then we +pass the node to the ANCHOR, which returns a point, eg, the BOL of the previous line. We find the column number of that point (eg, 4), add OFFSET to it (eg, 0), and that is the column we want to indent the current line to (4 + 0 = 4). @@ -297,7 +297,7 @@ There is also a manual section for indent: "Parser-based Indentation". When writing indent rules, you can use ‘treesit-check-indent’ to check if your indentation is correct. To debug what went wrong, set -‘treesit--indent-verboase’ to non-nil. Then when you indent, Emacs +‘treesit--indent-verbose’ to non-nil. Then when you indent, Emacs tells you which rule is applied in the echo area. #+begin_src elisp @@ -358,7 +358,7 @@ definition node, and ’end means we want to go to the end of that node. Tree-sitter has default implementations for ‘beginning-of-defun-function’ and ‘end-of-defun-function’. So for -ordinary languages, it is suffice to set ‘treesit-defun-type-regexp’ +ordinary languages, it is enough to set ‘treesit-defun-type-regexp’ to something that matches all the defun struct types in the language, and call ‘treesit-major-mode-setup’. For example, @@ -375,8 +375,8 @@ and call ‘treesit-major-mode-setup’. For example, If you have an imenu implementation, set ‘which-func-functions’ to nil, and which-func will automatically use imenu’s data. -If you want independent implementation for which-func, you can find -the current function by going up the tree and looking for the +If you want an independent implementation for which-func, you can +find the current function by going up the tree and looking for the function_definition node. See the function below for an example. Since Python allows nested function definitions, that function keeps going until it reaches the root node, and records all the function @@ -410,7 +410,7 @@ For INCLUDE-TYPE see `python-info-current-defun'." * More features? Obviously this list is just a starting point, if there are features in -the major mode that would benefit a parse tree, adding tree-sitter +the major mode that would benefit from a parse tree, adding tree-sitter support for that would be great. But in the minimal case, just adding font-lock is awesome. diff --git a/lisp/treesit.el b/lisp/treesit.el index 24f0e1472d..0dcd16d89a 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -414,8 +414,8 @@ treesit-range-rules (defun treesit--merge-ranges (old-ranges new-ranges start end) "Merge OLD-RANGES and NEW-RANGES, discarding ranges between START and END. -OLD-RANGES and NEW-RANGES are lists of cons of the form (BEG . END). When -merging the two ranges, if a range in OLD-RANGES intersects with +OLD-RANGES and NEW-RANGES are lists of cons of the form (BEG . END). +When merging the two ranges, if a range in OLD-RANGES intersects with another range in NEW-RANGES, discard the one in OLD-RANGES and keep the one in NEW-RANGES. Also discard any range in OLD-RANGES that intersects the region marked by START and END. @@ -521,7 +521,7 @@ treesit-font-lock-feature-list `font-lock-maximum-decoration' controls which levels are activated. -Inside each sublist are feature symbols, which corresponds to the +Inside each sublist are feature symbols, which correspond to the :feature value of a query defined in `treesit-font-lock-rules'. Removing a feature symbol from this list disables the corresponding query during font-lock. @@ -531,7 +531,7 @@ treesit-font-lock-feature-list string-interpolation, comment, doc, string, operator, property, preprocessor, escape-sequence, key (in key-value pairs). Major modes are free to subdivide or extend on these common features. -See the manual for more explanations on some of the feature. +See the manual for more explanations on some of the features. For changes to this variable to take effect, run `treesit-font-lock-recompute-features'.") @@ -585,7 +585,7 @@ treesit-font-lock-rules :feature \\='script \"(script_element) @font-lock-builtin-face\") -For each QUERY, a :language keyword and a :feature keyword is +For each QUERY, a :language keyword and a :feature keyword are required. Each query's :feature is a symbol summarizing what the query fontifies. It is used to allow users to enable/disable certain features. See `treesit-font-lock-kind-list' for more. @@ -798,7 +798,7 @@ treesit--node-length (- (treesit-node-end node) (treesit-node-start node))) (defvar-local treesit--font-lock-fast-mode nil - "If this variable is t, change the way we query so its faster. + "If this variable is t, change the way we query so it's faster. This is not a general optimization and should be RARELY needed! See comments in `treesit-font-lock-fontify-region' for more detail.") @@ -810,8 +810,7 @@ treesit--font-lock-fast-mode ;; applied by regexp-based font-lock. The clipped part will be ;; fontified fine when Emacs fontifies the region containing it. ;; -(defun treesit-font-lock-fontify-region - (start end &optional loudly) +(defun treesit-font-lock-fontify-region (start end &optional loudly) "Fontify the region between START and END. If LOUDLY is non-nil, display some debugging information." (when (or loudly treesit--font-lock-verbose) @@ -832,10 +831,10 @@ treesit-font-lock-fontify-region ;; is very fast anyway (even in large files of size ~10MB). ;; Plus, querying the result of `treesit-node-on' could still ;; miss patterns even if we use some heuristic to enlarge the - ;; node (how much to enlarge? to which extent?), its much safer + ;; node (how much to enlarge? to which extent?), it's much safer ;; to just use the root node. ;; - ;; Sometimes the source file has some errors that causes + ;; Sometimes the source file has some errors that cause ;; tree-sitter to parse it into a enormously tall tree (10k ;; levels tall). In that case querying the root node is very ;; slow. So we try to get top-level nodes and query them. This @@ -896,7 +895,7 @@ treesit-font-lock-fontify-region `(jit-lock-bounds ,start . ,end)) (defun treesit--font-lock-notifier (ranges parser) - "Ensures updated parts of the parse-tree is refontified. + "Ensures updated parts of the parse-tree are refontified. RANGES is a list of (BEG . END) ranges, PARSER is the tree-sitter parser notifying of the change." (with-current-buffer (treesit-parser-buffer parser) @@ -1083,15 +1082,15 @@ treesit-simple-indent-presets "A list of presets. These presets that can be used as MATHER and ANCHOR in `treesit-simple-indent-rules'. MACHTERs and ANCHORs are -functions that takes 3 arguments: NODE, PARENT and BOL. +functions that take 3 arguments: NODE, PARENT and BOL. MATCHER: \(match NODE-TYPE PARENT-TYPE NODE-FIELD NODE-INDEX-MIN NODE-INDEX-MAX) NODE-TYPE checks for NODE's type, PARENT-TYPE checks for - PARENT's type, NODE-FIELD checks for the filed name of NODE - in PARENT, NODE-INDEX-MIN and NODE-INDEX-MAX checks for + PARENT's type, NODE-FIELD checks for the field name of NODE + in PARENT, NODE-INDEX-MIN and NODE-INDEX-MAX check for NODE's index in PARENT. Therefore, to match the first child where PARENT is \"argument_list\", use @@ -1147,7 +1146,7 @@ treesit-simple-indent-presets prev-line - Returns the first non-whitespace character on the previous line. + Returns the first non-whitespace character on the previous line. point-min @@ -1278,7 +1277,7 @@ treesit--indent-region-batch-size "How many lines of indent value do we precompute. In `treesit-indent-region' we indent in batches: precompute indent for each line, apply them in one go, let parser reparse, -and do it again. This way the parser don't need to unnecessarily +and do it again. This way the parser doesn't need to unnecessarily reparse after indenting every single line.") (defun treesit-indent-region (beg end) @@ -1412,7 +1411,7 @@ treesit--indent-rules-optimize RULES should be a value suitable for `treesit-simple-indent-rules'. Return the optimized version of RULES." - ;; Right now this function just compiles queries. it doesn't + ;; Right now this function just compiles queries. It doesn't ;; byte-compile matchers and anchors because it doesn't make much ;; difference. (cl-loop for setting in rules @@ -1551,8 +1550,8 @@ treesit-ready-p If tree-sitter is not ready, emit a warning and return nil. If the user has chosen to activate tree-sitter for LANGUAGE and tree-sitter is ready, return non-nil. If QUIET is t, don't emit -warning in either case; if quiet is `message', display a message -instead of emitting warning." +a warning in either case; if quiet is `message', display a message +instead of emitting a warning." (let ((language-list (if (consp language) language (list language))) commit 19954da8dd927f9db4ca95d8a1320207e6c404cd Author: Stefan Kangas Date: Tue Nov 22 02:38:41 2022 +0100 Refresh menus in gnus.texi Fixes problem reported by Po Lu . * doc/misc/gnus.texi (Top, Starting Up, Article Treatment) (The Gnus Diary Library, Searching, nnmairix): Refresh menus. diff --git a/doc/misc/gnus.texi b/doc/misc/gnus.texi index 1d522bf9ca..0508b5cc05 100644 --- a/doc/misc/gnus.texi +++ b/doc/misc/gnus.texi @@ -402,21 +402,21 @@ Top @end iftex @menu -* Don't Panic:: Your first 20 minutes with Gnus. -* Starting Up:: Finding news can be a pain. -* Group Buffer:: Selecting, subscribing and killing groups. -* Summary Buffer:: Reading, saving and posting articles. -* Article Buffer:: Displaying and handling articles. -* Composing Messages:: Information on sending mail and news. -* Select Methods:: Gnus reads all messages from various select methods. -* Scoring:: Assigning values to articles. -* Searching:: Mail and News search engines. -* Various:: General purpose settings. -* The End:: Farewell and goodbye. -* Appendices:: Terminology, Emacs intro, @acronym{FAQ}, History, Internals. -* GNU Free Documentation License:: The license for this documentation. -* Index:: Variable, function and concept index. -* Key Index:: Key Index. +* Don't Panic:: Your first 20 minutes with Gnus. +* Starting Up:: Finding news can be a pain. +* Group Buffer:: Selecting, subscribing and killing groups. +* Summary Buffer:: Reading, saving and posting articles. +* Article Buffer:: Displaying and handling articles. +* Composing Messages:: Information on sending mail and news. +* Select Methods:: Gnus reads all messages from various select methods. +* Scoring:: Assigning values to articles. +* Searching:: Mail and News search engines. +* Various:: General purpose settings. +* The End:: Farewell and goodbye. +* Appendices:: Terminology, Emacs intro, @acronym{FAQ}, History, Internals. +* GNU Free Documentation License:: The license for this documentation. +* Index:: Variable, function and concept index. +* Key Index:: Key Index. @c Doesn't work right in html. @c FIXME Do this in a more standard way. @@ -596,7 +596,8 @@ Top * Article Buttons:: Click on URLs, Message-IDs, addresses and the like. * Article Button Levels:: Controlling appearance of buttons. * Article Date:: Grumble, UT! -* Article Display:: Display various stuff---X-Face, Picons, Smileys, Gravatars +* Article Display:: Display various stuff: + X-Face, Picons, Gravatars, Smileys. * Article Signature:: What is a signature? * Article Miscellanea:: Various other stuff. @@ -641,7 +642,7 @@ Top * Getting Mail:: Reading your personal mail with Gnus. * Browsing the Web:: Getting messages from a plethora of Web sources. * Other Sources:: Reading directories, files. -* Virtual Groups:: Combining articles from multiple sources. +* Virtual Groups:: Combining articles and groups together. * Email Based Diary:: Using mails to manage diary events in Gnus. * Gnus Unplugged:: Reading news and mail offline. @@ -666,6 +667,13 @@ Top * Indirect Functions:: Connecting indirectly to the server. * Common Variables:: Understood by several connection functions. +Using IMAP + +* Connecting to an IMAP Server:: Getting started with @acronym{IMAP}. +* Customizing the IMAP Connection:: Variables for @acronym{IMAP} connection. +* Client-Side IMAP Splitting:: Put mail in the correct mail box. +* Support for IMAP Extensions:: Getting extensions and labels from servers. + Getting Mail * Mail in a Newsreader:: Important introductory notes. @@ -685,6 +693,7 @@ Top Mail Sources * Mail Source Specifiers:: How to specify what a mail source is. +* Mail Source Functions:: * Mail Source Customization:: Some variables that influence things. * Fetching Mail:: Using the mail source specifiers. @@ -695,6 +704,10 @@ Top * Mail Spool:: Store your mail in a private spool? * MH Spool:: An mhspool-like back end. * Maildir:: Another one-file-per-message format. +* nnmaildir Group Parameters:: +* Article Identification:: +* NOV Data:: +* Article Marks:: * Mail Folders:: Having one file for each group. * Comparing Mail Back Ends:: An in-depth looks at pros and cons. @@ -734,10 +747,10 @@ Top The Gnus Diary Library -* Diary Summary Line Format:: A nicer summary buffer line format. -* Diary Articles Sorting:: A nicer way to sort messages. -* Diary Headers Generation:: Not doing it manually. -* Diary Group Parameters:: Not handling them manually. +* Diary Summary Line Format:: A nicer summary buffer line format. +* Diary Articles Sorting:: A nicer way to sort messages. +* Diary Headers Generation:: Not doing it manually. +* Diary Group Parameters:: Not handling them manually. Gnus Unplugged @@ -796,9 +809,22 @@ Top Searching * Search Engines:: Selecting and configuring search engines. -* Creating Search Groups:: Creating search groups. +* Creating Search Groups:: How and where. * Search Queries:: Gnus' built-in search syntax. * nnmairix:: Searching with Mairix. +* nnir:: + +nnmairix + +* About mairix:: About the mairix mail search engine +* nnmairix requirements:: What you will need for using nnmairix +* What nnmairix does:: What does nnmairix actually do? +* Setting up mairix:: Set up your mairix installation +* Configuring nnmairix:: Set up the nnmairix back end +* nnmairix keyboard shortcuts:: List of available keyboard shortcuts +* Propagating marks:: How to propagate marks from nnmairix groups +* nnmairix tips and tricks:: Some tips, tricks and examples +* nnmairix caveats:: Some more stuff you might want to know Various @@ -839,8 +865,7 @@ Top * X-Face:: Display a funky, teensy black-and-white image. * Face:: Display a funkier, teensier colored image. -* Smileys:: Show all those happy faces the way they were - meant to be shown. +* Smileys:: Show all those happy faces the way they were meant to be shown. * Picons:: How to display pictures of what you're reading. * Gravatars:: Display the avatar of people you read. @@ -862,12 +887,39 @@ Top * Extending the Spam package:: * Spam Statistics Package:: +Spam Back Ends + +* Blacklists and Whitelists:: +* BBDB Whitelists:: +* Gmane Spam Reporting:: +* Anti-spam Hashcash Payments:: +* Blackholes:: +* Regular Expressions Header Matching:: +* Bogofilter:: +* SpamAssassin back end:: +* ifile spam filtering:: +* Spam Statistics Filtering:: +* SpamOracle:: + Spam Statistics Package * Creating a spam-stat dictionary:: * Splitting mail using spam-stat:: * Low-level interface to the spam-stat dictionary:: +The Gnus Registry + +* Gnus Registry Setup:: +* Registry Article Refer Method:: +* Fancy splitting to parent:: +* Store custom flags and keywords:: +* Store arbitrary data:: + +The Gnus Cloud + +* Gnus Cloud Setup:: +* Gnus Cloud Usage:: + Appendices * History:: How Gnus got where it is today. @@ -895,7 +947,7 @@ Top * Quassia Gnus:: Two times two is four, or Gnus 5.6/5.7. * Pterodactyl Gnus:: Pentad also starts with P, AKA Gnus 5.8/5.9. * Oort Gnus:: It's big. It's far out. Gnus 5.10/5.11. -* No Gnus:: Very punny. Gnus 5.12/5.13 +* No Gnus:: Very punny. Gnus 5.12/5.13. * Ma Gnus:: Celebrating 25 years of Gnus. Customization @@ -1099,15 +1151,15 @@ Starting Up terminology section (@pxref{Terminology}). @menu -* Finding the News:: Choosing a method for getting news. -* The Server is Down:: How can I read my mail then? -* Child Gnusae:: You can have more than one Gnus active at a time. -* New Groups:: What is Gnus supposed to do with new groups? -* Changing Servers:: You may want to move from one server to another. -* Startup Files:: Those pesky startup files---@file{.newsrc}. -* Auto Save:: Recovering from a crash. -* The Active File:: Reading the active file over a slow line Takes Time. -* Startup Variables:: Other variables you might change. +* Finding the News:: Choosing a method for getting news. +* The Server is Down:: How can I read my mail then? +* Child Gnusae:: You can have more than one Gnus active at a time. +* New Groups:: What is Gnus supposed to do with new groups? +* Changing Servers:: You may want to move from one server to another. +* Startup Files:: Those pesky startup files---@file{.newsrc}. +* Auto Save:: Recovering from a crash. +* The Active File:: Reading the active file over a slow line Takes Time. +* Startup Variables:: Other variables you might change. @end menu @@ -8718,7 +8770,7 @@ Article Treatment @menu * Article Highlighting:: You want to make the article look like fruit salad. -* Article Fontifying:: Making emphasized text look nice. +* Article Fontifying:: Making emphasized text look nice. * Article Hiding:: You also want to make certain info go away. * Article Washing:: Lots of way-neat functions to make life better. * Article Header:: Doing various header transformations. @@ -18399,10 +18451,10 @@ The Gnus Diary Library @menu -* Diary Summary Line Format:: A nicer summary buffer line format. -* Diary Articles Sorting:: A nicer way to sort messages. -* Diary Headers Generation:: Not doing it manually. -* Diary Group Parameters:: Not handling them manually. +* Diary Summary Line Format:: A nicer summary buffer line format. +* Diary Articles Sorting:: A nicer way to sort messages. +* Diary Headers Generation:: Not doing it manually. +* Diary Group Parameters:: Not handling them manually. @end menu @node Diary Summary Line Format @@ -21579,10 +21631,11 @@ Searching details on Gnus' query language, see @ref{Search Queries}. @menu -* Search Engines:: Selecting and configuring search engines. -* Creating Search Groups:: How and where. -* Search Queries:: Gnus' built-in search syntax. -* nnmairix:: Searching with Mairix. +* Search Engines:: Selecting and configuring search engines. +* Creating Search Groups:: How and where. +* Search Queries:: Gnus' built-in search syntax. +* nnmairix:: Searching with Mairix. +* nnir:: @end menu @node Search Engines @@ -21878,7 +21931,7 @@ nnmairix * What nnmairix does:: What does nnmairix actually do? * Setting up mairix:: Set up your mairix installation * Configuring nnmairix:: Set up the nnmairix back end -* nnmairix keyboard shortcuts:: List of available keyboard shortcuts +* nnmairix keyboard shortcuts:: List of available keyboard shortcuts * Propagating marks:: How to propagate marks from nnmairix groups * nnmairix tips and tricks:: Some tips, tricks and examples * nnmairix caveats:: Some more stuff you might want to know commit d901059281ffca45cbb49fed4feb59ab3609354a Author: Stefan Kangas Date: Tue Nov 22 02:10:59 2022 +0100 Improve wording and markup in gnus-faq.texi * doc/misc/gnus-faq.texi (FAQ 2-4): Improve wording and markup. (FAQ 2-1, FAQ 2-4, FAQ 2-5, FAQ 3 - Getting Messages, FAQ 3-1) (FAQ 3-2, FAQ 3-2, FAQ 3-4, FAQ 4-1, FAQ 4-2, FAQ 4-3, FAQ 4-4) (FAQ 4-8, FAQ 4-9, FAQ 4-10, FAQ 4-12, FAQ 5-1, FAQ 5-2) (FAQ 5-7, FAQ 6-1, FAQ 6-2, FAQ 6-2, FAQ 6-3, FAQ 6-4, FAQ 6-5) (FAQ 7-2, FAQ 7-3, FAQ 7-3, FAQ 7-4, FAQ 8-1, FAQ 8-5, FAQ 9-1): Improve markup and reflow some paragraphs. diff --git a/doc/misc/gnus-faq.texi b/doc/misc/gnus-faq.texi index 167a525ce8..49022ac341 100644 --- a/doc/misc/gnus-faq.texi +++ b/doc/misc/gnus-faq.texi @@ -144,9 +144,8 @@ FAQ 2-1 are now asked if you want to restore that information from the auto-save file. -To prevent this message make sure you exit Gnus -via @samp{q} in group buffer instead of -just killing Emacs. +To prevent this message make sure you exit Gnus via @kbd{q} in group +buffer instead of just killing Emacs. @node FAQ 2-2 @subsubheading Question 2.2 @@ -180,25 +179,23 @@ FAQ 2-3 @node FAQ 2-4 @subsubheading Question 2.4 -My group buffer becomes a bit crowded, is there a way to -sort my groups into categories so I can easier browse -through them? +My group buffer is a bit crowded. Is there a way to sort groups into +categories so I can browse them more easily? @subsubheading Answer -Gnus offers the topic mode, it allows you to sort your -groups in, well, topics, e.g., all groups dealing with -Linux under the topic linux, all dealing with music under -the topic music and all dealing with Scottish music under -the topic scottish which is a subtopic of music. +Gnus offers the topic mode, it allows you to sort your groups in, +well, topics. For example, all groups dealing with Linux under the +topic @samp{linux}, all dealing with music under the topic +@samp{music} and all dealing with Scottish music under the topic +@samp{scottish} which is a subtopic of @samp{music}. -To enter topic mode, just hit t while in Group buffer. Now -you can use @samp{T n} to create a topic -at point and @samp{T m} to move a group to -a specific topic. For more commands see the manual or the -menu. You might want to include the %P specifier at the -beginning of your gnus-group-line-format variable to have -the groups nicely indented. +To enter topic mode, just hit @kbd{t} while in Group buffer. Now you +can use @kbd{T n} to create a topic at point and @kbd{T m} to move a +group to a specific topic. For more commands see the manual or the +menu. You might want to include the @samp{%P} specifier at the +beginning of your @var{gnus-group-line-format} variable to have the +groups nicely indented. @node FAQ 2-5 @subsubheading Question 2.5 @@ -208,16 +205,14 @@ FAQ 2-5 @subsubheading Answer -Move point over the group you want to move and -hit @samp{C-k}, now move point to the -place where you want the group to be and -hit @samp{C-y}. +Move point over the group you want to move and hit @kbd{C-k}, now move +point to the place where you want the group to be and hit @kbd{C-y}. @node FAQ 3 - Getting Messages @subsection Getting Messages @menu -* FAQ 3-1:: I just installed Gnus, started it via @samp{M-x gnus} +* FAQ 3-1:: I just installed Gnus, started it via @kbd{M-x gnus} but it only says "nntp (news) open error", what to do? * FAQ 3-2:: I'm working under Windows and have no idea what ~/.gnus.el means. @@ -242,9 +237,8 @@ FAQ 3 - Getting Messages @node FAQ 3-1 @subsubheading Question 3.1 -I just installed Gnus, started it via -@samp{M-x gnus} -but it only says "nntp (news) open error", what to do? +I just installed Gnus, started it via @kbd{M-x gnus} but it only says +"nntp (news) open error", what to do? @subsubheading Answer @@ -270,7 +264,7 @@ FAQ 3-2 for the configuration files. However, you don't really need to know what this means, it suffices that Emacs knows what it means :-) You can type -@samp{C-x C-f ~/.gnus.el @key{RET}} +@kbd{C-x C-f ~/.gnus.el @key{RET}} (yes, with the forward slash, even on Windows), and Emacs will open the right file for you. (It will most likely be new, and thus empty.) @@ -295,7 +289,7 @@ FAQ 3-2 name HOME and value C:\myhome. Rebooting is not necessary. Now to create @file{~/.gnus.el}, say -@samp{C-x C-f ~/.gnus.el @key{RET} C-x C-s}. +@kbd{C-x C-f ~/.gnus.el @key{RET} C-x C-s}. in Emacs. @node FAQ 3-3 @@ -331,14 +325,12 @@ FAQ 3-4 @subsubheading Answer -If you know the name of the group say @samp{U -name.of.group @key{RET}} in group buffer (use the -tab-completion Luke). Otherwise hit ^ in group buffer, -this brings you to the server buffer. Now place point (the -cursor) over the server which carries the group you want, -hit @samp{@key{RET}}, move point to the group -you want to subscribe to and say @samp{u} -to subscribe to it. +If you know the name of the group say @kbd{U name.of.group @key{RET}} +in group buffer (use the tab-completion Luke). Otherwise hit @kbd{^} +in group buffer, this brings you to the server buffer. Now place +point (the cursor) over the server which carries the group you want, +hit @kbd{RET}, move point to the group you want to subscribe to and +say @kbd{u} to subscribe to it. @node FAQ 3-5 @subsubheading Question 3.5 @@ -625,12 +617,10 @@ FAQ 4-1 @subsubheading Answer -If you enter the group by saying -@samp{@key{RET}} -in group buffer with point over the group, only unread and ticked messages are loaded. Say -@samp{C-u @key{RET}} -instead to load all available messages. If you want only the 300 newest say -@samp{C-u 300 @key{RET}} +If you enter the group by saying @kbd{@key{RET}} in group buffer with +point over the group, only unread and ticked messages are loaded. Say +@kbd{C-u @key{RET}} instead to load all available messages. If you +want only the 300 newest say @kbd{C-u 300 @key{RET}} Loading only unread messages can be annoying if you have threaded view enabled, say @@ -643,12 +633,12 @@ FAQ 4-1 all articles (Warning: Both settings enlarge the amount of data which is fetched when you enter a group and slow down the process of entering a group). -You can say @samp{/o N} in the summary buffer to load the last N +You can say @kbd{/o N} in the summary buffer to load the last N messages. If you don't want all old messages, but the parent of the message you're just reading, -you can say @samp{^}, if you want to retrieve the whole thread -the message you're just reading belongs to, @samp{A T} is your friend. +you can say @kbd{^}, if you want to retrieve the whole thread +the message you're just reading belongs to, @kbd{A T} is your friend. @node FAQ 4-2 @subsubheading Question 4.2 @@ -659,10 +649,10 @@ FAQ 4-2 @subsubheading Answer You can tick important messages. To do this hit -@samp{u} while point is in summary buffer +@kbd{u} while point is in summary buffer over the message. When you want to remove the mark, hit -either @samp{d} (this deletes the tick -mark and set's unread mark) or @samp{M c} +either @kbd{d} (this deletes the tick +mark and set's unread mark) or @kbd{M c} (which deletes all marks for the message). @node FAQ 4-3 @@ -672,10 +662,7 @@ FAQ 4-3 @subsubheading Answer -Say @samp{t} -to show all headers, one more -@samp{t} -hides them again. +Say @kbd{t} to show all headers, one more @kbd{t} hides them again. @node FAQ 4-4 @subsubheading Question 4.4 @@ -684,11 +671,8 @@ FAQ 4-4 @subsubheading Answer -Say -@samp{C-u g} -to show the raw message -@samp{g} -returns to normal view. +Type @kbd{C-u g} to show the raw message @kbd{g} returns to normal +view. @node FAQ 4-5 @subsubheading Question 4.5 @@ -765,11 +749,11 @@ FAQ 4-8 Gnus offers you several functions to ``wash'' incoming mail, you can find them if you browse through the menu, item Article->Washing. The most interesting ones are probably ``Wrap -long lines'' (@samp{W w}), ``Decode ROT13'' -(@samp{W r}) and ``Outlook Deuglify'' which repairs +long lines'' (@kbd{W w}), ``Decode ROT13'' +(@kbd{W r}) and ``Outlook Deuglify'' which repairs the dumb quoting used by many users of Microsoft products -(@samp{W Y f} gives you full deuglify. -See @samp{W Y C-h} or have a look at the menus for +(@kbd{W Y f} gives you full deuglify. +See @kbd{W Y C-h} or have a look at the menus for other deuglifications). @node FAQ 4-9 @@ -792,21 +776,21 @@ FAQ 4-9 up rules based on the article you are just reading. Say you're reading a message by a guy who always writes nonsense and you want to ignore his messages in the future. Hit -@samp{L}, to set up a rule which lowers the score. +@kbd{L}, to set up a rule which lowers the score. Now Gnus asks you which the criteria for lowering the Score shall -be. Hit @samp{?} twice to see all possibilities, -we want @samp{a} which means the author (the from +be. Hit @kbd{?} twice to see all possibilities, +we want @kbd{a} which means the author (the from header). Now Gnus wants to know which kind of matching we want. -Hit either @samp{e} for an exact match or -@samp{s} for substring-match and delete afterwards +Hit either @kbd{e} for an exact match or +@kbd{s} for substring-match and delete afterwards everything but the name to score down all authors with the given name no matter which email address is used. Now you need to tell Gnus when to apply the rule and how long it should last, hit -@samp{p} to apply the rule now and let it last +@kbd{p} to apply the rule now and let it last forever. If you want to raise the score instead of lowering it say -@samp{I} instead of @samp{L}. +@kbd{I} instead of @kbd{L}. -You can also set up rules by hand. To do this say @samp{V +You can also set up rules by hand. To do this say @kbd{V f} in summary buffer. Then you are asked for the name of the score file, it's name.of.group.SCORE for rules valid in only one group or all.Score for rules valid in all groups. See the @@ -851,7 +835,7 @@ FAQ 4-10 @subsubheading Answer While in group buffer move point over the group and hit -@samp{G c}, this opens a buffer where you +@kbd{G c}, this opens a buffer where you can set options for the group. At the bottom of the buffer you'll find an item that allows you to set variables locally for the group. To disable threading enter @@ -889,10 +873,10 @@ FAQ 4-12 lowest-article-number = total-number-of-articles''. This works OK for Usenet groups, but if you delete and move many messages in mail groups, this fails. To cure the -symptom, enter the group via @samp{C-u @key{RET}} +symptom, enter the group via @kbd{C-u @key{RET}} (this makes Gnus get all messages), then -hit @samp{M P b} to mark all messages and -then say @samp{B m name.of.group} to move +hit @kbd{M P b} to mark all messages and +then say @kbd{B m name.of.group} to move all messages to the group they have been in before, they get new message numbers in this process and the count is right again (until you delete and move your mail to other @@ -1110,28 +1094,20 @@ FAQ 5-1 @subsubheading Answer -To start composing a new mail hit @samp{m} -either in Group or Summary buffer, for a posting, it's -either @samp{a} in Group buffer and -filling the Newsgroups header manually -or @samp{a} in the Summary buffer of the -group where the posting shall be send to. Replying by mail -is -@samp{r} if you don't want to cite the -author, or import the cited text manually and -@samp{R} to cite the text of the original -message. For a follow up to a newsgroup, it's -@samp{f} and @samp{F} -(analogously to @samp{r} and -@samp{R}). - -Enter new headers above the line saying "--text follows -this line--", enter the text below the line. When ready -hit @samp{C-c C-c}, to send the message, -if you want to finish it later hit @samp{C-c -C-d} to save it in the drafts group, where you -can start editing it again by saying @samp{D -e}. +To start composing a new mail hit @kbd{m} either in Group or Summary +buffer, for a posting, it's either @kbd{a} in Group buffer and filling +the Newsgroups header manually or @kbd{a} in the Summary buffer of the +group where the posting shall be send to. Replying by mail is @kbd{r} +if you don't want to cite the author, or import the cited text +manually and @kbd{R} to cite the text of the original message. For a +follow up to a newsgroup, it's @kbd{f} and @kbd{F} (analogously to +@kbd{r} and @kbd{R}). + +Enter new headers above the line saying "--text follows this line--", +enter the text below the line. When ready hit @kbd{C-c C-c}, to send +the message, if you want to finish it later hit @kbd{C-c C-d} to save +it in the drafts group, where you can start editing it again by saying +@kbd{D e}. @node FAQ 5-2 @subsubheading Question 5.2 @@ -1156,8 +1132,7 @@ FAQ 5-2 in @file{~/.gnus.el}. -You can reformat a paragraph by hitting @samp{M-q} -(as usual). +You can reformat a paragraph by hitting @kbd{M-q} (as usual). @node FAQ 5-3 @subsubheading Question 5.3 @@ -1358,16 +1333,13 @@ FAQ 5-7 @end example @noindent -Now you should be ready to go. Say @samp{M-x bbdb @key{RET} -@key{RET}} to open a bbdb buffer showing all -entries. Say @samp{c} to create a new -entry, @samp{b} to search your BBDB and -@samp{C-o} to add a new field to an -entry. If you want to add a sender to the BBDB you can -also just hit @kbd{:} on the posting in the summary buffer and -you are done. When you now compose a new mail, -hit @samp{TAB} to cycle through know -recipients. +Now you should be ready to go. Say @kbd{M-x bbdb @key{RET} @key{RET}} +to open a bbdb buffer showing all entries. Say @kbd{c} to create a +new entry, @kbd{b} to search your BBDB and @kbd{C-o} to add a new +field to an entry. If you want to add a sender to the BBDB you can +also just hit @kbd{:} on the posting in the summary buffer and you are +done. When you now compose a new mail, hit @kbd{TAB} to cycle through +know recipients. @node FAQ 5-8 @subsubheading Question 5.8 @@ -1576,17 +1548,17 @@ FAQ 6-1 Now you've got to import this mbox file into Gnus. To do this, create a nndoc group based on the mbox file by -saying @samp{G f /path/file.mbox @key{RET}} in +saying @kbd{G f /path/file.mbox @key{RET}} in Group buffer. You now have read-only access to your mail. If you want to import the messages to your normal Gnus mail groups hierarchy, enter the nndoc group you've -just created by saying @samp{C-u @key{RET}} +just created by saying @kbd{C-u @key{RET}} (thus making sure all messages are retrieved), mark all -messages by saying @samp{M P b} and +messages by saying @kbd{M P b} and either copy them to the desired group by saying -@samp{B c name.of.group @key{RET}} or send them +@kbd{B c name.of.group @key{RET}} or send them through nnmail-split-methods (respool them) by saying -@samp{B r}. +@kbd{B r}. @node FAQ 6-2 @subsubheading Question 6.2 @@ -1598,7 +1570,7 @@ FAQ 6-2 If you stumble across an interesting message, say in gnu.emacs.gnus and want to archive it there are several solutions. The first and easiest is to save it to a file -by saying @samp{O f}. However, wouldn't +by saying @kbd{O f}. However, wouldn't it be much more convenient to have more direct access to the archived message from Gnus? If you say yes, put this snippet by Frank Haun in @@ -1621,10 +1593,9 @@ FAQ 6-2 @end example @noindent -You can now say @samp{M-x -my-archive-article} in summary buffer to -archive the article under the cursor in a nnml -group. (Change nnml to your preferred back end.) +You can now say @kbd{M-x my-archive-article} in summary buffer to +archive the article under the cursor in a nnml group. (Change nnml to +your preferred back end.) Of course you can also make sure the cache is enabled by saying @@ -1644,26 +1615,20 @@ FAQ 6-3 @subsubheading Answer -There are several ways for this, too. For a posting from -a Usenet group the easiest solution is probably to ask -@uref{https://groups.google.com, groups.google.com}, -if you found the posting there, tell Google to display -the raw message, look for the message-id, and say -@samp{M-^ the@@message.id @key{RET}} in a -summary buffer. -There's a Gnus interface for -groups.google.com which you can call with -@samp{G W}) in group buffer. - -Another idea which works for both mail and news groups -is to enter the group where the message you are -searching is and use the standard Emacs search -@samp{C-s}, it's smart enough to look at -articles in collapsed threads, too. If you want to -search bodies, too try @samp{M-s} -instead. Further on there are the -gnus-summary-limit-to-foo functions, which can help you, -too. +There are several ways for this, too. For a posting from a Usenet +group the easiest solution is probably to ask +@uref{https://groups.google.com, groups.google.com}, if you found the +posting there, tell Google to display the raw message, look for the +message-id, and say @kbd{M-^ the@@message.id @key{RET}} in a summary +buffer. There's a Gnus interface for @samp{groups.google.com} which +you can call with @kbd{G W}) in group buffer. + +Another idea which works for both mail and news groups is to enter the +group where the message you are searching is and use the standard +Emacs search @kbd{C-s}, it's smart enough to look at articles in +collapsed threads, too. If you want to search bodies, too try +@kbd{M-s} instead. Further on there are the gnus-summary-limit-to-foo +functions, which can help you, too. @node FAQ 6-4 @subsubheading Question 6.4 @@ -1673,18 +1638,18 @@ FAQ 6-4 @subsubheading Answer You can of course just mark the mail you don't need -anymore by saying @samp{#} with point -over the mail and then say @samp{B @key{DEL}} +anymore by saying @kbd{#} with point +over the mail and then say @kbd{B @key{DEL}} to get rid of them forever. You could also instead of actually deleting them, send them to a junk-group by -saying @samp{B m nnml:trash-bin} which +saying @kbd{B m nnml:trash-bin} which you clear from time to time, but both are not the intended way in Gnus. In Gnus, we let mail expire like news expires on a news server. That means you tell Gnus the message is expirable (you tell Gnus "I don't need this mail -anymore") by saying @samp{E} with point +anymore") by saying @kbd{E} with point over the mail in summary buffer. Now when you leave the group, Gnus looks at all messages which you marked as expirable before and if they are old enough (default is @@ -1703,13 +1668,13 @@ FAQ 6-5 got two choices: auto-expire and total-expire. Auto-expire means, that every article which has no marks set and is selected for reading is -marked as expirable, Gnus hits @samp{E} +marked as expirable, Gnus hits @kbd{E} for you every time you read a message. Total-expire follows a slightly different approach, here all article where the read mark is set are expirable. To activate auto-expire, include auto-expire in the -Group parameters for the group. (Hit @samp{G +Group parameters for the group. (Hit @kbd{G c} in summary buffer with point over the group to change group parameters). For total-expire add total-expire to the group-parameters. @@ -1721,10 +1686,10 @@ FAQ 6-5 If you want a message to be excluded from expiration in a group where total or auto expire is active, set either -tick (hit @samp{u}) or dormant mark (hit -@samp{u}), when you use auto-expire, you +tick (hit @kbd{u}) or dormant mark (hit +@kbd{u}), when you use auto-expire, you can also set the read mark (hit -@samp{d}). +@kbd{d}). @node FAQ 6-6 @subsubheading Question 6.6 @@ -1817,12 +1782,12 @@ FAQ 7-2 You've got to select the servers whose groups can be stored locally. To do this, open the server buffer -(that is press @samp{^} while in the +(that is press @kbd{^} while in the group buffer). Now select a server by moving point to the line naming that server. Finally, agentize the -server by typing @samp{J a}. If you +server by typing @kbd{J a}. If you make a mistake, or change your mind, you can undo this -action by typing @samp{J r}. When +action by typing @kbd{J r}. When you're done, type 'q' to return to the group buffer. Now the next time you enter a group on an agentized server, the headers will be stored on disk and read from @@ -1838,7 +1803,7 @@ FAQ 7-3 You can tell the agent to automatically fetch the bodies of articles which fulfill certain predicates, this is done in a special buffer which can be reached by -saying @samp{J c} in group +saying @kbd{J c} in group buffer. Please refer to the documentation for information which predicates are possible and how exactly to do it. @@ -1847,12 +1812,12 @@ FAQ 7-3 articles to store on disk. There are two ways to do this: Number one: In the summary buffer, process mark a set of articles that shall be stored in the agent by -saying @samp{#} with point over the -article and then type @samp{J s}. The +saying @kbd{#} with point over the +article and then type @kbd{J s}. The other possibility is to set, again in the summary buffer, downloadable (%) marks for the articles you -want by typing @samp{@@} with point over -the article and then typing @samp{J u}. +want by typing @kbd{@@} with point over +the article and then typing @kbd{J u}. What's the difference? Well, process marks are erased as soon as you exit the summary buffer while downloadable marks are permanent. You can actually set downloadable @@ -1874,10 +1839,10 @@ FAQ 7-4 All you've got to do is to tell Gnus when you are online (plugged) and when you are offline (unplugged), the rest works automatically. You can toggle plugged/unplugged -state by saying @samp{J j} in group -buffer. To start Gnus unplugged say @samp{M-x +state by saying @kbd{J j} in group +buffer. To start Gnus unplugged say @kbd{M-x gnus-unplugged} instead of -@samp{M-x gnus}. Note that for this to +@kbd{M-x gnus}. Note that for this to work, the agent must be active. @node FAQ 8 - Getting help @@ -1901,14 +1866,14 @@ FAQ 8-1 @subsubheading Answer The first stop should be the Gnus manual (Say -@samp{C-h i d m Gnus @key{RET}} to start the +@kbd{C-h i d m Gnus @key{RET}} to start the Gnus manual, then walk through the menus or do a -full-text search with @samp{s}). Then +full-text search with @kbd{s}). Then there are the general Emacs help commands starting with -C-h, type @samp{C-h ? ?} to get a list +@kbd{C-h}, type @kbd{C-h ? ?} to get a list of all available help commands and their meaning. Finally -@samp{M-x apropos-command} lets you -search through all available functions and @samp{M-x +@kbd{M-x apropos-command} lets you +search through all available functions and @kbd{M-x apropos} searches the bound variables. @node FAQ 8-2 @@ -1963,7 +1928,7 @@ FAQ 8-5 @subsubheading Answer -Say @samp{M-x gnus-bug}, this will start +Say @kbd{M-x gnus-bug}, this will start a message to the @email{bugs@@gnus.org, gnus bug mailing list} including information about your environment which make @@ -1998,7 +1963,7 @@ FAQ 9-1 active file, see the node "The Active File" in the Gnus manual for things you might try to speed the process up. An other idea would be to byte compile your @file{~/.gnus.el} (say -@samp{M-x byte-compile-file @key{RET} ~/.gnus.el +@kbd{M-x byte-compile-file @key{RET} ~/.gnus.el @key{RET}} to do it). Finally, if you have require statements in your .gnus, you could replace them with @code{with-eval-after-load}, which loads the stuff not at startup commit 40539c7587dc474b424cff732973fe8958eadf14 Author: Stefan Kangas Date: Mon Nov 21 15:39:43 2022 +0100 ; Fix typos diff --git a/admin/notes/tree-sitter/build-module/README b/admin/notes/tree-sitter/build-module/README index d205661e6c..2fcb9778da 100644 --- a/admin/notes/tree-sitter/build-module/README +++ b/admin/notes/tree-sitter/build-module/README @@ -14,4 +14,4 @@ To build all modules at once, run This gives you C, JSON, Go, HTML, Javascript, CSS, Python, Typescript (tsx), C# (csharp), C++ (cpp), Rust. More can be added to batch.sh -unless it's directory strucure is not standard. \ No newline at end of file +unless it's directory structure is not standard. \ No newline at end of file diff --git a/admin/notes/tree-sitter/html-manual/Multiple-Languages.html b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html index 0ae0b1897e..46985649a8 100644 --- a/admin/notes/tree-sitter/html-manual/Multiple-Languages.html +++ b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html @@ -277,7 +277,7 @@ pairs. Each query is a tree-sitter query in either the string, s-expression or compiled form, or a function.

-

If query is a tree-sitter query, it should be preceeded by two +

If query is a tree-sitter query, it should be preceded by two :keyword/value pairs, where the :embed keyword specifies the embedded language, and the :host keyword specified the host language. diff --git a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html index 3027bbaae9..95005de6d1 100644 --- a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html +++ b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html @@ -223,7 +223,7 @@

prev-line

This anchor is a function that is called with 3 arguments: node, parent, and bol, and returns the first non-whitespace -charater on the previous line. +character on the previous line.

point-min
diff --git a/admin/notes/tree-sitter/starter-guide b/admin/notes/tree-sitter/starter-guide index 700b020850..faf40bc64f 100644 --- a/admin/notes/tree-sitter/starter-guide +++ b/admin/notes/tree-sitter/starter-guide @@ -1,4 +1,4 @@ -STARTER GUIDE ON WRITTING MAJOR MODE WITH TREE-SITTER -*- org -*- +STARTER GUIDE ON WRITING MAJOR MODE WITH TREE-SITTER -*- org -*- This document guides you on adding tree-sitter support to a major mode. @@ -274,7 +274,7 @@ the anchor point. Below are some convenient builtin matchers and anchors. For MATHCER we have (parent-is TYPE) => matches if PARENT’s type matches TYPE as regexp - (node-is TYPE) => mathces NODE’s type + (node-is TYPE) => matches NODE’s type (query QUERY) => matches if querying PARENT with QUERY captures NODE. diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index b334105f1e..c472f9b441 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -5000,7 +5000,7 @@ Parser-based Indentation @item prev-line This anchor is a function that is called with 3 arguments: @var{node}, @var{parent}, and @var{bol}, and returns the first non-whitespace -charater on the previous line. +character on the previous line. @item point-min This anchor is a function that is called with 3 arguments: @var{node}, diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index f21d94ec8b..0f6a99b299 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -1611,7 +1611,7 @@ Multiple Languages pairs. Each @var{query} is a tree-sitter query in either the string, s-expression or compiled form, or a function. -If @var{query} is a tree-sitter query, it should be preceeded by two +If @var{query} is a tree-sitter query, it should be preceded by two @var{:keyword}/@var{value} pairs, where the @code{:embed} keyword specifies the embedded language, and the @code{:host} keyword specified the host language. diff --git a/doc/misc/cc-mode.texi b/doc/misc/cc-mode.texi index bade04fb95..a8f5248c4c 100644 --- a/doc/misc/cc-mode.texi +++ b/doc/misc/cc-mode.texi @@ -898,7 +898,7 @@ Movement Commands @vindex defun-tactic @r{(c-)} Move to the beginning or end of the current or next function. Other -constructs (such as a structs or classes) which have a brace block +constructs (such as structs or classes) which have a brace block also count as ``functions'' here. To move over several functions, you can give these commands a repeat count. diff --git a/doc/misc/edt.texi b/doc/misc/edt.texi index 8b4ac0da5d..d6f9c9faf9 100644 --- a/doc/misc/edt.texi +++ b/doc/misc/edt.texi @@ -317,7 +317,7 @@ Starting emulation Emacs binds keys to @acronym{ASCII} control characters and so does the real EDT@. Where EDT key bindings and Emacs key bindings conflict, the default Emacs key bindings are retained by the EDT emulation by -default. If you are a diehard EDT user you may not like this. The +default. If you are a die-hard EDT user you may not like this. The @ref{Control keys} section explains how to change this so that the EDT bindings to @acronym{ASCII} control characters override the default Emacs bindings. @@ -443,7 +443,7 @@ GNU/Linux of the @key{PF1} key. The PC keypad can now emulate an LK-201 keypad (less the comma key), the standard keyboard supplied with DEC terminals VT-200 and above. This @file{.xmodmaprc} file switches the role of the -@key{F12} and @key{NumLock} keys. It has been tested on RedHat +@key{F12} and @key{NumLock} keys. It has been tested on Red Hat GNU/Linux 5.2. Other versions of GNU/Linux may require different keycodes. (@ref{Unix} for further help on how to do this.) @@ -460,7 +460,7 @@ Unix needed to see how to do this on a particular system. You will need to look at the output generated by @code{xmodmap} invoked -with the "-pm" switch. For example, on RedHat GNU/Linux 5.2 on a PC, we +with the "-pm" switch. For example, on Red Hat GNU/Linux 5.2 on a PC, we get the following output when running @samp{xmodmap -pm}: @example @@ -495,7 +495,7 @@ Unix . @end example -@noindent So, in RedHat GNU/Linux 5.2 on a PC, Num_Lock generates keycode 77. +@noindent So, in Red Hat GNU/Linux 5.2 on a PC, Num_Lock generates keycode 77. The following steps are taken: @enumerate @@ -883,7 +883,7 @@ Words (setq edt-word-entities '(?\t) ; specifies TAB, the default @end example -@noindent You can also specify characters by their decimal ascii values: +@noindent You can also specify characters by their decimal ASCII values: @example (setq edt-word-entities '(9 45 47)) ; specifies TAB, - , and / @@ -893,7 +893,7 @@ Control keys @section Enabling EDT Control Key Sequence Bindings Where EDT key bindings and Emacs key bindings conflict, the default -Emacs key bindings are retained by default. Some diehard EDT users +Emacs key bindings are retained by default. Some die-hard EDT users may not like this. So, if the variable @code{edt-use-EDT-control-key-bindings} is set to true in a user's @file{.emacs} file, then the default EDT Emulation mode will enable most diff --git a/doc/misc/efaq-w32.texi b/doc/misc/efaq-w32.texi index b58f6be758..bc3f545b2b 100644 --- a/doc/misc/efaq-w32.texi +++ b/doc/misc/efaq-w32.texi @@ -457,12 +457,12 @@ Associate files with Emacs @node Using with Explorer @subsection For use with Internet Explorer @cindex Internet Explorer, view source in Emacs -@cindex mailto urls, associating with Emacs -@cindex news urls, associating with Emacs +@cindex mailto URLs, associating with Emacs +@cindex news URLs, associating with Emacs @cindex URLs, associating mail and news URLs with Emacs You can use Emacs as the editor for composing mail for -@indicateurl{mailto:} links, reading usenet for @indicateurl{news:} +@indicateurl{mailto:} links, reading Usenet for @indicateurl{news:} links, and viewing source. The following registry entries control this: diff --git a/doc/misc/eglot.texi b/doc/misc/eglot.texi index 04bdcc6161..a815aebf59 100644 --- a/doc/misc/eglot.texi +++ b/doc/misc/eglot.texi @@ -280,7 +280,7 @@ Setting Up LSP Servers Sometimes, multiple servers are acceptable alternatives for handling a given major-mode. In those cases, you may combine the helper function -@code{eglot-alternatives} with the funcional form of +@code{eglot-alternatives} with the functional form of @code{eglot-server-programs}. @lisp @@ -994,8 +994,8 @@ Customizing Eglot Here @code{:@var{server}} identifies a particular language server and @var{plist} is the corresponding keyword-value property list of one or more parameter settings for that server, serialized by Eglot as a JSON -object. @var{plist} may be arbitrarity complex, generally containing -other keywork-value property sublists corresponding to JSON subobjects. +object. @var{plist} may be arbitrarily complex, generally containing +other keyword-value property sublists corresponding to JSON subobjects. The JSON values @code{true}, @code{false}, @code{null} and @code{@{@}} are represented by the Lisp values @code{t}, @code{:json-false}, @code{nil}, and @code{eglot-@{@}}, respectively. diff --git a/doc/misc/emacs-gnutls.texi b/doc/misc/emacs-gnutls.texi index 1b9f5e1040..8c8a6f3154 100644 --- a/doc/misc/emacs-gnutls.texi +++ b/doc/misc/emacs-gnutls.texi @@ -132,7 +132,7 @@ Help For Users distributions. By default the following locations are tried in this order: @file{/etc/ssl/certs/ca-certificates.crt} for Debian, Ubuntu, Gentoo and Arch Linux; @file{/etc/pki/tls/certs/ca-bundle.crt} for -Fedora and RHEL; @file{/etc/ssl/ca-bundle.pem} for Suse; +Fedora and RHEL; @file{/etc/ssl/ca-bundle.pem} for SUSE; @file{/usr/ssl/certs/ca-bundle.crt} for Cygwin; @file{/usr/local/share/certs/ca-root-nss.crt} for FreeBSD@. You can easily customize @code{gnutls-trustfiles} to be something else, but diff --git a/doc/misc/gnus-faq.texi b/doc/misc/gnus-faq.texi index 7cb5621b69..167a525ce8 100644 --- a/doc/misc/gnus-faq.texi +++ b/doc/misc/gnus-faq.texi @@ -189,7 +189,7 @@ FAQ 2-4 Gnus offers the topic mode, it allows you to sort your groups in, well, topics, e.g., all groups dealing with Linux under the topic linux, all dealing with music under -the topic music and all dealing with scottish music under +the topic music and all dealing with Scottish music under the topic scottish which is a subtopic of music. To enter topic mode, just hit t while in Group buffer. Now diff --git a/doc/misc/gnus.texi b/doc/misc/gnus.texi index c4705928d3..1d522bf9ca 100644 --- a/doc/misc/gnus.texi +++ b/doc/misc/gnus.texi @@ -589,7 +589,7 @@ Top Article Treatment * Article Highlighting:: You want to make the article look like fruit salad. -* Article Fontisizing:: Making emphasized text look nice. +* Article Fontifying:: Making emphasized text look nice. * Article Hiding:: You also want to make certain info go away. * Article Washing:: Lots of way-neat functions to make life better. * Article Header:: Doing various header transformations. @@ -1002,7 +1002,7 @@ Don't Panic Each server maintains a list of groups, and those groups contain articles. Because Gnus presents a unified interface to a wide variety of servers, the vocabulary doesn't always quite line up (@pxref{FAQ -- Glossary}, for a more complete glossary). Thus a local maildir is +- Glossary}, for a more complete glossary). Thus a local Maildir is referred to as a ``server'' (@pxref{Finding the News}) the same as a Usenet or IMAP server is; ``groups'' (@pxref{Group Buffer}) might mean an NNTP group, IMAP folder, or local mail directory; and an @@ -1039,7 +1039,7 @@ Don't Panic New mail has to come from somewhere. Some servers, such as NNTP or IMAP, are themselves responsible for fetching newly-arrived articles. -Others, such as maildir or mbox servers, only store articles and don't +Others, such as Maildir or mbox servers, only store articles and don't fetch them from anywhere. In the latter case, Gnus provides for @code{mail sources}: places @@ -8718,7 +8718,7 @@ Article Treatment @menu * Article Highlighting:: You want to make the article look like fruit salad. -* Article Fontisizing:: Making emphasized text look nice. +* Article Fontifying:: Making emphasized text look nice. * Article Hiding:: You also want to make certain info go away. * Article Washing:: Lots of way-neat functions to make life better. * Article Header:: Doing various header transformations. @@ -8840,8 +8840,8 @@ Article Highlighting @xref{Customizing Articles}, for how to highlight articles automatically. -@node Article Fontisizing -@subsection Article Fontisizing +@node Article Fontifying +@subsection Article Fontifying @cindex emphasis @cindex article emphasis diff --git a/doc/misc/idlwave.texi b/doc/misc/idlwave.texi index 4bdbd5c219..0c59fdf738 100644 --- a/doc/misc/idlwave.texi +++ b/doc/misc/idlwave.texi @@ -3799,7 +3799,7 @@ HTML Help Browser Tips @code{idlwave-help-browser-function} inherits the browser configured in @code{browse-url-browser-function}. -Note that the HTML files decompiled from the help sources contain +Note that the HTML files recompiled from the help sources contain specific references to the @samp{Symbol} font, which by default is not permitted in normal encodings (it's invalid, technically). Though it only impacts a few symbols, you can trick Mozilla-based browsers into diff --git a/doc/misc/mairix-el.texi b/doc/misc/mairix-el.texi index 3632c64bd4..28b86c43ac 100644 --- a/doc/misc/mairix-el.texi +++ b/doc/misc/mairix-el.texi @@ -70,7 +70,7 @@ About Mairix is a tool for indexing and searching words in locally stored mail. It was written by Richard Curnow and is licensed under the GPL@. Mairix comes with most popular GNU/Linux distributions, but it also -runs under Windows (with cygwin), macOS and Solaris. The website can +runs under Windows (with Cygwin), macOS and Solaris. The website can be found at @uref{http://www.rpcurnow.force9.co.uk/mairix/index.html} diff --git a/doc/misc/message.texi b/doc/misc/message.texi index cfad4f4e07..fb6e97f0d3 100644 --- a/doc/misc/message.texi +++ b/doc/misc/message.texi @@ -935,7 +935,7 @@ IDNA @section IDNA @cindex IDNA @cindex internationalized domain names -@cindex non-ascii domain names +@cindex non-ASCII domain names @acronym{IDNA} is a standard way to encode non-@acronym{ASCII} domain names into a readable @acronym{ASCII} string. The details can be diff --git a/etc/NEWS.24 b/etc/NEWS.24 index 8ef479ac0a..fab8a39b0e 100644 --- a/etc/NEWS.24 +++ b/etc/NEWS.24 @@ -951,7 +951,7 @@ Also the following files used by the now obsolete otodo-mode.el: *** the old version of todo-mode.el (renamed to otodo-mode.el). -*** xesam.el (owing to the cancellation of the XESAM project). +*** xesam.el (owing to the cancelation of the XESAM project). *** yow.el; use fortune.el or cookie1.el instead. diff --git a/lib-src/ChangeLog.1 b/lib-src/ChangeLog.1 index 0829f50a56..1a9767661a 100644 --- a/lib-src/ChangeLog.1 +++ b/lib-src/ChangeLog.1 @@ -5643,7 +5643,7 @@ 1998-04-06 Andreas Schwab Silence -Wimplicit: - * movemail.c: Move cancellations up. Include if + * movemail.c: Move cancelations up. Include if available. * fakemail.c (_XOPEN_SOURCE): Define for declaration of cuserid. (parse_header): Explicitly declare return type. diff --git a/lisp/ChangeLog.9 b/lisp/ChangeLog.9 index 4cb10d2d55..469d0970f8 100644 --- a/lisp/ChangeLog.9 +++ b/lisp/ChangeLog.9 @@ -1888,7 +1888,7 @@ (uniquify-item-greaterp): Substitutes uniquify-item-lessp. This is equivalent to what the old code did. (uniquify-rationalize-a-list): Never recompute the proposed - name. Sort the conflicting sublist before rationalising it: this + name. Sort the conflicting sublist before rationalizing it: this is equivalent to what the old code did, but one directory element at a time, and only when necessary. (uniquify-rationalize-conflicting-sublist): Recompute here the diff --git a/lisp/gnus/ChangeLog.3 b/lisp/gnus/ChangeLog.3 index c55d6225e3..a1ad22fd62 100644 --- a/lisp/gnus/ChangeLog.3 +++ b/lisp/gnus/ChangeLog.3 @@ -1109,7 +1109,7 @@ * gnus-icalendar.el (gnus-icalendar-event:sync-to-org) (gnus-icalendar-event:inline-org-buttons): Allow for appointment - cancellations to be synced to org if the original appt has an org + cancelations to be synced to org if the original appt has an org outline. 2013-11-13 Jan Tatarik diff --git a/lisp/gnus/message.el b/lisp/gnus/message.el index 3bbd68bdcd..dca5b90089 100644 --- a/lisp/gnus/message.el +++ b/lisp/gnus/message.el @@ -7520,7 +7520,7 @@ message-is-yours-p ;;;###autoload (defun message-cancel-news (&optional arg) "Cancel an article you posted. -If ARG, allow editing of the cancellation message." +If ARG, allow editing of the cancelation message." (interactive "P") (unless (message-news-p) (error "This is not a news article; canceling is impossible")) diff --git a/lisp/jsonrpc.el b/lisp/jsonrpc.el index 90833e1c1d..1387fa3692 100644 --- a/lisp/jsonrpc.el +++ b/lisp/jsonrpc.el @@ -308,7 +308,7 @@ jsonrpc-request (setq canceled t)) `(canceled ,cancel-on-input-retval)) (t (while t (accept-process-output nil 30))))) - ;; In normal operation, cancellation is handled by the + ;; In normal operation, cancelation is handled by the ;; timeout function and response filter, but we still have ;; to protect against user-quit (C-g) or the ;; `cancel-on-input' case. diff --git a/lisp/treesit.el b/lisp/treesit.el index b81396fc22..24f0e1472d 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -361,7 +361,7 @@ treesit-range-rules "Produce settings for `treesit-range-settings'. QUERY-SPECS are a series of QUERY-SPECs, where each QUERY-SPEC is -a QUERY preceeded by zero or more pairs of :KEYWORD and VALUE, +a QUERY preceded by zero or more pairs of :KEYWORD and VALUE, like this: :KEYWORD VALUE... QUERY @@ -572,7 +572,7 @@ treesit-font-lock-rules or compiled form. For each query, captured nodes are highlighted with the capture name as its face. -:KEYWORD and VALUE pairs preceeding a QUERY add meta information +:KEYWORD and VALUE pairs preceding a QUERY add meta information to QUERY. For example, (treesit-font-lock-rules @@ -742,7 +742,7 @@ treesit-fontify-with-override (defun treesit--set-nonsticky (start end sym &optional remove) "Set `rear-nonsticky' property between START and END. -Set the proeprty to a list containing SYM. If there is already a +Set the property to a list containing SYM. If there is already a list, add SYM to that list. If REMOVE is non-nil, remove SYM instead." (let* ((prop (get-text-property start 'rear-nonsticky)) @@ -902,7 +902,7 @@ treesit--font-lock-notifier (with-current-buffer (treesit-parser-buffer parser) (dolist (range ranges) (when treesit--font-lock-verbose - (message "Notifier recieved range: %s-%s" + (message "Notifier received range: %s-%s" (car range) (cdr range))) (put-text-property (car range) (cdr range) 'fontified nil)))) diff --git a/nt/icons/README b/nt/icons/README index 4d9fb15e52..f84d4635b3 100644 --- a/nt/icons/README +++ b/nt/icons/README @@ -23,7 +23,7 @@ License: GNU General Public License version 3 or later (see COPYING) "These are some images of a 3D stylized gnu head that I created back in 1998. I started studying pictures of gnus and wildebeests and - worked with a 3D modeller, sPatch, until I came up with these. Then + worked with a 3D modeler, sPatch, until I came up with these. Then I worked to make them into icons - cropping the horns off the sides so the images were big enough to be recognizable (to me anyway)." diff --git a/src/treesit.c b/src/treesit.c index 91c26374b3..463e2458a6 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -295,7 +295,7 @@ #define ts_tree_root_node fn_ts_tree_root_node a node. But since we can just retrieve a new node, it shouldn't be a limitation. - - I didn't expose setting timeout and cancellation flag for a + - I didn't expose setting timeout and cancelation flag for a parser, mainly because I don't think they are really necessary in Emacs's use cases. @@ -891,7 +891,7 @@ treesit_ensure_parsed (Lisp_Object parser) when 1) language is not set (impossible in Emacs because the user has to supply a language to create a parser), 2) parse canceled due to timeout (impossible because we don't set a timeout), 3) - parse canceled due to cancellation flag (impossible because we + parse canceled due to cancelation flag (impossible because we don't set the flag). (See comments for ts_parser_parse in tree_sitter/api.h.) */ if (new_tree == NULL) @@ -1079,7 +1079,7 @@ treesit_compose_query_signal_data (uint32_t error_offset, /* Ensure the QUERY is compiled. Return the TSQuery. It could be NULL if error occurs, in which case ERROR_OFFSET and ERROR_TYPE are - bound. If error occures, return NULL, and assign SIGNAL_SYMBOL and + bound. If error occurs, return NULL, and assign SIGNAL_SYMBOL and SIGNAL_DATA accordingly. */ static TSQuery * treesit_ensure_query_compiled (Lisp_Object query, Lisp_Object *signal_symbol, @@ -1366,7 +1366,7 @@ treesit_check_range_argument (Lisp_Object ranges) } /* Generate a list of ranges in Lisp from RANGES. This function - doens't take ownership of RANGES. BUFFER is used to convert + doesn't take ownership of RANGES. BUFFER is used to convert between tree-sitter buffer offset and buffer position. */ static Lisp_Object treesit_make_ranges (const TSRange *ranges, uint32_t len, @@ -2538,7 +2538,7 @@ treesit_traverse_sibling_helper (TSNode node, bool forward, bool named) } } -/* Return the first/last named/unamed child of NODE. FORWARD controls +/* Return the first/last named/unnamed child of NODE. FORWARD controls the direction and NAMED controls the nameness. */ static TSNode treesit_traverse_child_helper (TSNode node, bool forward, bool named) diff --git a/test/lisp/erc/erc-scenarios-base-compat-rename-bouncer.el b/test/lisp/erc/erc-scenarios-base-compat-rename-bouncer.el index 474739d01b..711dc7b97f 100644 --- a/test/lisp/erc/erc-scenarios-base-compat-rename-bouncer.el +++ b/test/lisp/erc/erc-scenarios-base-compat-rename-bouncer.el @@ -106,7 +106,7 @@ erc-scenarios-common--base-compat-no-rename-bouncer (erc-d-t-search-for 1 "") (erc-d-t-absent-for 0.1 "") (should (eq erc-server-process erc-server-process-bar)) - (erc-d-t-search-for 10 "keeps you from dishonour") + (erc-d-t-search-for 10 "keeps you from dishonor") (erc-d-t-wait-for 5 (not (erc-server-process-alive))))) (when more (funcall more)))) commit aeadba1418d8fc18f17b4ae415cde35e9e272e7a (refs/remotes/origin/feature/tree-sitter) Author: Yuan Fu Date: Mon Nov 21 13:46:52 2022 -0800 ; * lisp/progmodes/js.el (js-ts-mode): Add autoload cookie. diff --git a/lisp/progmodes/js.el b/lisp/progmodes/js.el index 77106ec583..51d105b9d7 100644 --- a/lisp/progmodes/js.el +++ b/lisp/progmodes/js.el @@ -3808,6 +3808,7 @@ js-mode ;;(syntax-propertize (point-max)) ) +;;;###autoload (define-derived-mode js-ts-mode js-base-mode "JavaScript" "Major mode for editing JavaScript. commit 3f37f6b43565242db4409022dd9bc980cb86c3f6 Author: Yuan Fu Date: Mon Nov 21 13:34:38 2022 -0800 ; * test/src/treesit-tests.el (treesit-misc): Remove test. This test is for treesit--setting-for-mode, which is removed when we switched from using treesit-settings to using separate major modes. diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index c736f97e31..59264722ba 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -522,27 +522,6 @@ treesit-node-check (insert "]") (should (treesit-node-check array-node 'outdated)))) -(ert-deftest treesit-misc () - "Misc helper functions." - (skip-unless (treesit-available-p)) - (let ((settings '((t 0 t) - (c-mode 1 t) - (text-mode 2 nil) - (prog-mode 3 t) - (fundamental-mode 4 t)))) - ;; `treesit--setting-for-mode'. - ;; Exact match. - (should (eq 1 (treesit--setting-for-mode 'c-mode settings))) - ;; Inherit from t. - (should (eq 0 (treesit--setting-for-mode 'non-exist settings))) - ;; Inherit from prog-mode rather than fundamental-mode. - (require 'elisp-mode) - (should (eq 3 (treesit--setting-for-mode 'emacs-lisp-mode settings))) - ;; Not inherit from text-mode. - (require 'outline) - (should (not (eq 2 (treesit--setting-for-mode 'outline-mode settings)))) - )) - ;; TODO ;; - Functions in treesit.el ;; - treesit-load-name-override-list commit eb1a35adc1c5a1a9d14ec8594580c5eb0e3d28fe Author: Yuan Fu Date: Mon Nov 21 13:33:03 2022 -0800 ; Update tree-sitter starter guide * admin/notes/tree-sitter/starter-guide: Reflect recent changes. * admin/notes/tree-sitter/html-manual/Using-Parser.html: * admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html: * admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html: * admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html: * admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html: * admin/notes/tree-sitter/html-manual/Multiple-Languages.html: * admin/notes/tree-sitter/html-manual/Language-Definitions.html: Update. diff --git a/admin/notes/tree-sitter/html-manual/Language-Definitions.html b/admin/notes/tree-sitter/html-manual/Language-Definitions.html index 4fd7eb5687..6dd589f825 100644 --- a/admin/notes/tree-sitter/html-manual/Language-Definitions.html +++ b/admin/notes/tree-sitter/html-manual/Language-Definitions.html @@ -230,19 +230,38 @@ body: (compound_statement)) +

Exploring the syntax tree

+ + + +

To aid in understanding the syntax of a language and in debugging of +Lisp program that use the syntax tree, Emacs provides an “explore” +mode, which displays the syntax tree of the source in the current +buffer in real time. Emacs also comes with an “inspect mode”, which +displays information of the nodes at point in the mode-line. +

+
+
Command: treesit-explore-mode
+

This mode pops up a window displaying the syntax tree of the source in +the current buffer. Selecting text in the source buffer highlights +the corresponding nodes in the syntax tree display. Clicking +on nodes in the syntax tree highlights the corresponding text in the +source buffer. +

+
Command: treesit-inspect-mode

This minor mode displays on the mode-line the node that starts -at point. The mode-line will display +at point. For example, the mode-line can display

parent field: (node (child (…)))
 
-

where node, child, etc, are nodes which begin at point. +

where node, child, etc., are nodes which begin at point. parent is the parent of node. node is displayed in -bold typeface. field-names are field names of node and -child, etc. +a bold typeface. field-names are field names of node and +of child, etc.

If no node starts at point, i.e., point is in the middle of a node, then the mode line displays the earliest node that spans point, and @@ -343,7 +362,7 @@

token(rule)

marks rule to produce a single leaf node. That is, instead of generating a parent node with individual child nodes under it, -everything is combined into a single leaf node. +everything is combined into a single leaf node. See Retrieving Nodes.

token.immediate(rule)

Normally, grammar rules ignore preceding whitespace; this diff --git a/admin/notes/tree-sitter/html-manual/Multiple-Languages.html b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html index 6d1800fad7..0ae0b1897e 100644 --- a/admin/notes/tree-sitter/html-manual/Multiple-Languages.html +++ b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html @@ -273,12 +273,12 @@ a value that treesit-range-settings can have.

It takes a series of query-specs, where each query-spec is -a query preceded by zero or more pairs of keyword and -value. Each query is a tree-sitter query in either the +a query preceded by zero or more keyword/value +pairs. Each query is a tree-sitter query in either the string, s-expression or compiled form, or a function.

If query is a tree-sitter query, it should be preceeded by two -:keyword value pairs, where the :embed keyword +:keyword/value pairs, where the :embed keyword specifies the embedded language, and the :host keyword specified the host language.

diff --git a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html index 72d82e6ee6..e04a730b05 100644 --- a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html +++ b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html @@ -130,17 +130,17 @@

This function takes a series of query-specs, where each -query-spec is a query preceded by multiple pairs of -:keyword and value. Each query is a tree-sitter -query in either the string, s-expression or compiled form. -

-

For each query, the :keyword and value pairs add -meta information to it. The :lang keyword declares -query’s language. The :feature keyword sets the feature -name of query. Users can control which features are enabled -with font-lock-maximum-decoration and +query-spec is a query preceded by one or more +:keyword/value pairs. Each query is a +tree-sitter query in either the string, s-expression or compiled form. +

+

For each query, the :keyword/value pairs that +precede it add meta information to it. The :lang keyword +declares query’s language. The :feature keyword sets the +feature name of query. Users can control which features are +enabled with font-lock-maximum-decoration and treesit-font-lock-feature-list (described below). These two -keywords are mandated. +keywords are mandatory.

Other keywords are optional:

@@ -177,24 +177,6 @@ ignored.

-

Contextual entities, like multi-line strings, or /* */ style -comments, need special care, because change in these entities might -cause change in a large portion of the buffer. For example, inserting -the closing comment delimiter */ will change all the text -between it and the opening delimiter to comment face. Such entities -should be captured in a special name contextual, so Emacs can -correctly update their fontification. Here is an example for -comments: -

-
-
(treesit-font-lock-rules
- :language 'javascript
- :feature 'comment
- :override t
- '((comment) @font-lock-comment-face)
-   (comment) @contextual))
-
-
Variable: treesit-font-lock-feature-list

This is a list of lists of feature symbols. Each element of the list @@ -208,11 +190,20 @@ list disables the corresponding query during font-lock.

Common feature names, for many programming languages, include -function-name, type, variable-name (left-hand-side or LHS of -assignments), builtin, constant, keyword, string-interpolation, -comment, doc, string, operator, preprocessor, escape-sequence, and key -(in key-value pairs). Major modes are free to subdivide or extend -these common features. +definition, type, assignment, builtin, +constant, keyword, string-interpolation, +comment, doc, string, operator, +preprocessor, escape-sequence, and key. Major +modes are free to subdivide or extend these common features. +

+

Some of these features warrant some explanation: definition +highlights whatever is being defined, e.g., the function name in a +function definition, the struct name in a struct definition, the +variable name in a variable definition; assignment highlights +the whatever is being assigned to, e.g., the variable or field in an +assignment statement; key highlights keys in key-value pairs, +e.g., keys in a JSON object, or a Python dictionary; doc +highlights docstrings or doc-comments.

For example, the value of this variable could be:

diff --git a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html index 5ea1f9bc33..3027bbaae9 100644 --- a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html +++ b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html @@ -183,6 +183,14 @@
(match nil "argument_list" nil nil 0 0)
 
+
+
comment-end
+

This matcher is a function that is called with 3 arguments: +node, parent, and bol, and returns non-nil if +point is before a comment ending token. Comment ending tokens are +defined by regular expression treesit-comment-end +(see treesit-comment-end). +

first-sibling

This anchor is a function that is called with 3 arguments: node, @@ -219,12 +227,28 @@

point-min
-

This anchor is a function is called with 3 arguments: node, +

This anchor is a function that is called with 3 arguments: node, parent, and bol, and returns the beginning of the buffer. This is useful as the beginning of the buffer is always at column 0. +

+
+
comment-start
+

This anchor is a function that is called with 3 arguments: node, +parent, and bol, and returns the position right after the +comment-start token. Comment-start tokens are defined by regular +expression treesit-comment-start (see treesit-comment-start). This function assumes parent is +the comment node. +

+
+
coment-start-skip
+

This anchor is a function that is called with 3 arguments: node, +parent, and bol, and returns the position after the +comment-start token and any whitespace characters following that +token. Comment-start tokens are defined by regular expression +treesit-comment-start. This function assumes parent is +the comment node.

-

Indentation utilities

diff --git a/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html b/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html index ea22421ac4..a0b5775f11 100644 --- a/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html +++ b/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html @@ -106,7 +106,7 @@
  • Tree-sitter Language Definitions
  • Using Tree-sitter Parser
  • -
  • Retrieving Node
  • +
  • Retrieving Nodes
  • Accessing Node Information
  • Pattern Matching Tree-sitter Nodes
  • Parsing Text in Multiple Languages
  • diff --git a/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html b/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html index a80c232616..29d51eecf7 100644 --- a/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html +++ b/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html @@ -133,7 +133,7 @@ ts_node_is_named treesit-node-check ts_node_is_missing treesit-node-check ts_node_is_extra treesit-node-check -ts_node_has_changes treesit-node-check +ts_node_has_changes ts_node_has_error treesit-node-check ts_node_parent treesit-node-parent ts_node_child treesit-node-child diff --git a/admin/notes/tree-sitter/html-manual/Using-Parser.html b/admin/notes/tree-sitter/html-manual/Using-Parser.html index c478a39e55..a4f31f9089 100644 --- a/admin/notes/tree-sitter/html-manual/Using-Parser.html +++ b/admin/notes/tree-sitter/html-manual/Using-Parser.html @@ -33,7 +33,7 @@ - + + + + + + + +
    + +
    +

    37.4 Accessing Node Information

    + +

    Before going further, make sure you have read the basic conventions +about tree-sitter nodes in the previous node. +

    +

    Basic information

    + +

    Every node is associated with a parser, and that parser is associated +with a buffer. The following functions let you retrieve them. +

    +
    +
    Function: treesit-node-parser node
    +

    This function returns node’s associated parser. +

    + +
    +
    Function: treesit-node-buffer node
    +

    This function returns node’s parser’s associated buffer. +

    + +
    +
    Function: treesit-node-language node
    +

    This function returns node’s parser’s associated language. +

    + +

    Each node represents a piece of text in the buffer. Functions below +finds relevant information about that text. +

    +
    +
    Function: treesit-node-start node
    +

    Return the start position of node. +

    + +
    +
    Function: treesit-node-end node
    +

    Return the end position of node. +

    + +
    +
    Function: treesit-node-text node &optional object
    +

    Returns the buffer text that node represents. (If node is +retrieved from parsing a string, it will be text from that string.) +

    + +

    Here are some basic checks on tree-sitter nodes. +

    +
    +
    Function: treesit-node-p object
    +

    Checks if object is a tree-sitter syntax node. +

    + +
    +
    Function: treesit-node-eq node1 node2
    +

    Checks if node1 and node2 are the same node in a syntax +tree. +

    + +

    Property information

    + +

    In general, nodes in a concrete syntax tree fall into two categories: +named nodes and anonymous nodes. Whether a node is named +or anonymous is determined by the language definition +(see named node). +

    + +

    Apart from being named/anonymous, a node can have other properties. A +node can be “missing”: missing nodes are inserted by the parser in +order to recover from certain kinds of syntax errors, i.e., something +should probably be there according to the grammar, but not there. +

    + +

    A node can be “extra”: extra nodes represent things like comments, +which can appear anywhere in the text. +

    + +

    A node “has changes” if the buffer changed since when the node is +retrieved, i.e., outdated. +

    + +

    A node “has error” if the text it spans contains a syntax error. It +can be the node itself has an error, or one of its +children/grandchildren... has an error. +

    +
    +
    Function: treesit-node-check node property
    +

    This function checks if node has property. property +can be 'named, 'missing, 'extra, +'has-changes, or 'has-error. +

    + + +
    +
    Function: treesit-node-type node
    +

    Named nodes have “types” (see node type). +For example, a named node can be a string_literal node, where +string_literal is its type. +

    +

    This function returns node’s type as a string. +

    + +

    Information as a child or parent

    + +
    +
    Function: treesit-node-index node &optional named
    +

    This function returns the index of node as a child node of its +parent. If named is non-nil, it only count named nodes +(see named node). +

    + +
    +
    Function: treesit-node-field-name node
    +

    A child of a parent node could have a field name (see field name). This function returns the field name +of node as a child of its parent. +

    + +
    +
    Function: treesit-node-field-name-for-child node n
    +

    This function returns the field name of the n’th child of +node. +

    + +
    +
    Function: treesit-child-count node &optional named
    +

    This function finds the number of children of node. If +named is non-nil, it only counts named child (see named node). +

    + +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/Language-Definitions.html b/admin/notes/tree-sitter/html-manual/Language-Definitions.html new file mode 100644 index 0000000000..ba3eeb9eeb --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Language-Definitions.html @@ -0,0 +1,326 @@ + + + + + + +Language Definitions (GNU Emacs Lisp Reference Manual) + + + + + + + + + + + + + + + + + + + + +
    + +
    +

    37.1 Tree-sitter Language Definitions

    + +

    Loading a language definition

    + +

    Tree-sitter relies on language definitions to parse text in that +language. In Emacs, A language definition is represented by a symbol. +For example, C language definition is represented as c, and +c can be passed to tree-sitter functions as the language +argument. +

    + + + +

    Tree-sitter language definitions are distributed as dynamic libraries. +In order to use a language definition in Emacs, you need to make sure +that the dynamic library is installed on the system. Emacs looks for +language definitions under load paths in +treesit-extra-load-path, user-emacs-directory/tree-sitter, +and system default locations for dynamic libraries, in that order. +Emacs tries each extensions in treesit-load-suffixes. If Emacs +cannot find the library or has problem loading it, Emacs signals +treesit-load-language-error. The signal data is a list of +specific error messages. +

    +
    +
    Function: treesit-language-available-p language
    +

    This function checks whether the dynamic library for language is +present on the system, and return non-nil if it is. +

    + + +

    By convention, the dynamic library for language is +libtree-sitter-language.ext, where ext is the +system-specific extension for dynamic libraries. Also by convention, +the function provided by that library is named +tree_sitter_language. If a language definition doesn’t +follow this convention, you should add an entry +

    +
    +
    (language library-base-name function-name)
    +
    + +

    to treesit-load-name-override-list, where +library-base-name is the base filename for the dynamic library +(conventionally libtree-sitter-language), and +function-name is the function provided by the library +(conventionally tree_sitter_language). For example, +

    +
    +
    (cool-lang "libtree-sitter-coool" "tree_sitter_cooool")
    +
    + +

    for a language too cool to abide by conventions. +

    +
    +
    Function: treesit-language-version &optional min-compatible
    +

    Tree-sitter library has a language version, a language +definition’s version needs to match this version to be compatible. +

    +

    This function returns tree-sitter library’s language version. If +min-compatible is non-nil, it returns the minimal compatible +version. +

    + +

    Concrete syntax tree

    + +

    A syntax tree is what a parser generates. In a syntax tree, each node +represents a piece of text, and is connected to each other by a +parent-child relationship. For example, if the source text is +

    +
    +
    1 + 2
    +
    + +

    its syntax tree could be +

    +
    +
                      +--------------+
    +                  | root "1 + 2" |
    +                  +--------------+
    +                         |
    +        +--------------------------------+
    +        |       expression "1 + 2"       |
    +        +--------------------------------+
    +           |             |            |
    ++------------+   +--------------+   +------------+
    +| number "1" |   | operator "+" |   | number "2" |
    ++------------+   +--------------+   +------------+
    +
    + +

    We can also represent it in s-expression: +

    +
    +
    (root (expression (number) (operator) (number)))
    +
    + +

    Node types

    + + + + +

    Names like root, expression, number, +operator are nodes’ type. However, not all nodes in a +syntax tree have a type. Nodes that don’t are anonymous nodes, +and nodes with a type are named nodes. Anonymous nodes are +tokens with fixed spellings, including punctuation characters like +bracket ‘]’, and keywords like return. +

    +

    Field names

    + + +

    To make the syntax tree easier to +analyze, many language definitions assign field names to child +nodes. For example, a function_definition node could have a +declarator and a body: +

    +
    +
    (function_definition
    + declarator: (declaration)
    + body: (compound_statement))
    +
    + +
    +
    Command: treesit-inspect-mode
    +

    This minor mode displays the node that starts at point in +mode-line. The mode-line will display +

    +
    +
    parent field-name: (child (grand-child (...)))
    +
    + +

    child, grand-child, and grand-grand-child, etc, are +nodes that have their beginning at point. And parent is the +parent of child. +

    +

    If there is no node that starts at point, i.e., point is in the middle +of a node, then the mode-line only displays the smallest node that +spans point, and its immediate parent. +

    +

    This minor mode doesn’t create parsers on its own. It simply uses the +first parser in (treesit-parser-list) (see Using Tree-sitter Parser). +

    + +

    Reading the grammar definition

    + +

    Authors of language definitions define the grammar of a +language, and this grammar determines how does a parser construct a +concrete syntax tree out of the text. In order to use the syntax +tree effectively, we need to read the grammar file. +

    +

    The grammar file is usually grammar.js in a language +definition’s project repository. The link to a language definition’s +home page can be found in tree-sitter’s homepage +(https://tree-sitter.github.io/tree-sitter). +

    +

    The grammar is written in JavaScript syntax. For example, the rule +matching a function_definition node looks like +

    +
    +
    function_definition: $ => seq(
    +  $.declaration_specifiers,
    +  field('declarator', $.declaration),
    +  field('body', $.compound_statement)
    +)
    +
    + +

    The rule is represented by a function that takes a single argument +$, representing the whole grammar. The function itself is +constructed by other functions: the seq function puts together a +sequence of children; the field function annotates a child with +a field name. If we write the above definition in BNF syntax, it +would look like +

    +
    +
    function_definition :=
    +  <declaration_specifiers> <declaration> <compound_statement>
    +
    + +

    and the node returned by the parser would look like +

    +
    +
    (function_definition
    +  (declaration_specifier)
    +  declarator: (declaration)
    +  body: (compound_statement))
    +
    + +

    Below is a list of functions that one will see in a grammar +definition. Each function takes other rules as arguments and returns +a new rule. +

    +
      +
    • seq(rule1, rule2, ...) matches each rule one after another. + +
    • choice(rule1, rule2, ...) matches one of the rules in its +arguments. + +
    • repeat(rule) matches rule for zero or more times. +This is like the ‘*’ operator in regular expressions. + +
    • repeat1(rule) matches rule for one or more times. +This is like the ‘+’ operator in regular expressions. + +
    • optional(rule) matches rule for zero or one time. +This is like the ‘?’ operator in regular expressions. + +
    • field(name, rule) assigns field name name to the child +node matched by rule. + +
    • alias(rule, alias) makes nodes matched by rule appear as +alias in the syntax tree generated by the parser. For example, + +
      +
      alias(preprocessor_call_exp, call_expression)
      +
      + +

      makes any node matched by preprocessor_call_exp to appear as +call_expression. +

    + +

    Below are grammar functions less interesting for a reader of a +language definition. +

    +
      +
    • token(rule) marks rule to produce a single leaf node. +That is, instead of generating a parent node with individual child +nodes under it, everything is combined into a single leaf node. + +
    • Normally, grammar rules ignore preceding whitespaces, +token.immediate(rule) changes rule to match only when +there is no preceding whitespaces. + +
    • prec(n, rule) gives rule a level n precedence. + +
    • prec.left([n,] rule) marks rule as left-associative, +optionally with level n. + +
    • prec.right([n,] rule) marks rule as right-associative, +optionally with level n. + +
    • prec.dynamic(n, rule) is like prec, but the precedence +is applied at runtime instead. +
    + +

    The tree-sitter project talks about writing a grammar in more detail: +https://tree-sitter.github.io/tree-sitter/creating-parsers. +Read especially “The Grammar DSL” section. +

    +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/Multiple-Languages.html b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html new file mode 100644 index 0000000000..1ee2df7f44 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html @@ -0,0 +1,255 @@ + + + + + + +Multiple Languages (GNU Emacs Lisp Reference Manual) + + + + + + + + + + + + + + + + + + + + + +
    + +
    +

    37.6 Parsing Text in Multiple Languages

    + +

    Sometimes, the source of a programming language could contain sources +of other languages, HTML + CSS + JavaScript is one example. In that +case, we need to assign individual parsers to text segments written in +different languages. Traditionally this is achieved by using +narrowing. While tree-sitter works with narrowing (see narrowing), the recommended way is to set ranges in which +a parser will operate. +

    +
    +
    Function: treesit-parser-set-included-ranges parser ranges
    +

    This function sets the range of parser to ranges. Then +parser will only read the text covered in each range. Each +range in ranges is a list of cons (beg +. end). +

    +

    Each range in ranges must come in order and not overlap. That +is, in pseudo code: +

    +
    +
    (cl-loop for idx from 1 to (1- (length ranges))
    +         for prev = (nth (1- idx) ranges)
    +         for next = (nth idx ranges)
    +         should (<= (car prev) (cdr prev)
    +                    (car next) (cdr next)))
    +
    + + +

    If ranges violates this constraint, or something else went +wrong, this function signals a treesit-range-invalid. The +signal data contains a specific error message and the ranges we are +trying to set. +

    +

    This function can also be used for disabling ranges. If ranges +is nil, the parser is set to parse the whole buffer. +

    +

    Example: +

    +
    +
    (treesit-parser-set-included-ranges
    + parser '((1 . 9) (16 . 24) (24 . 25)))
    +
    +
    + +
    +
    Function: treesit-parser-included-ranges parser
    +

    This function returns the ranges set for parser. The return +value is the same as the ranges argument of +treesit-parser-included-ranges: a list of cons +(beg . end). And if parser doesn’t have any +ranges, the return value is nil. +

    +
    +
    (treesit-parser-included-ranges parser)
    +    ⇒ ((1 . 9) (16 . 24) (24 . 25))
    +
    +
    + +
    +
    Function: treesit-set-ranges parser-or-lang ranges
    +

    Like treesit-parser-set-included-ranges, this function sets +the ranges of parser-or-lang to ranges. Conveniently, +parser-or-lang could be either a parser or a language. If it is +a language, this function looks for the first parser in +(treesit-parser-list) for that language in the current buffer, +and set range for it. +

    + +
    +
    Function: treesit-get-ranges parser-or-lang
    +

    This function returns the ranges of parser-or-lang, like +treesit-parser-included-ranges. And like +treesit-set-ranges, parser-or-lang can be a parser or +a language symbol. +

    + +
    +
    Function: treesit-query-range source query &optional beg end
    +

    This function matches source with query and returns the +ranges of captured nodes. The return value has the same shape of +other functions: a list of (beg . end). +

    +

    For convenience, source can be a language symbol, a parser, or a +node. If a language symbol, this function matches in the root node of +the first parser using that language; if a parser, this function +matches in the root node of that parser; if a node, this function +matches in that node. +

    +

    Parameter query is the query used to capture nodes +(see Pattern Matching Tree-sitter Nodes). The capture names don’t matter. Parameter +beg and end, if both non-nil, limits the range in which +this function queries. +

    +

    Like other query functions, this function raises an +treesit-query-error if query is malformed. +

    + +
    +
    Function: treesit-language-at point
    +

    This function tries to figure out which language is responsible for +the text at point. It goes over each parser in +(treesit-parser-list) and see if that parser’s range covers +point. +

    + +
    +
    Variable: treesit-range-functions
    +

    A list of range functions. Font-locking and indenting code uses +functions in this alist to set correct ranges for a language parser +before using it. +

    +

    The signature of each function should be +

    +
    +
    (start end &rest _)
    +
    + +

    where start and end marks the region that is about to be +used. A range function only need to (but not limited to) update +ranges in that region. +

    +

    Each function in the list is called in-order. +

    + +
    +
    Function: treesit-update-ranges &optional start end
    +

    This function is used by font-lock and indent to update ranges before +using any parser. Each range function in +treesit-range-functions is called in-order. Arguments +start and end are passed to each range function. +

    + +

    An example

    + +

    Normally, in a set of languages that can be mixed together, there is a +major language and several embedded languages. We first parse the +whole document with the major language’s parser, set ranges for the +embedded languages, then parse the embedded languages. +

    +

    Suppose we want to parse a very simple document that mixes HTML, CSS +and JavaScript: +

    +
    +
    <html>
    +  <script>1 + 2</script>
    +  <style>body { color: "blue"; }</style>
    +</html>
    +
    + +

    We first parse with HTML, then set ranges for CSS and JavaScript: +

    +
    +
    ;; Create parsers.
    +(setq html (treesit-get-parser-create 'html))
    +(setq css (treesit-get-parser-create 'css))
    +(setq js (treesit-get-parser-create 'javascript))
    +
    +;; Set CSS ranges.
    +(setq css-range
    +      (treesit-query-range
    +       'html
    +       "(style_element (raw_text) @capture)"))
    +(treesit-parser-set-included-ranges css css-range)
    +
    +;; Set JavaScript ranges.
    +(setq js-range
    +      (treesit-query-range
    +       'html
    +       "(script_element (raw_text) @capture)"))
    +(treesit-parser-set-included-ranges js js-range)
    +
    + +

    We use a query pattern (style_element (raw_text) @capture) to +find CSS nodes in the HTML parse tree. For how to write query +patterns, see Pattern Matching Tree-sitter Nodes. +

    +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html new file mode 100644 index 0000000000..ec89b7749c --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html @@ -0,0 +1,160 @@ + + + + + + +Parser-based Font Lock (GNU Emacs Lisp Reference Manual) + + + + + + + + + + + + + + + + + + + + +
    + +
    +

    24.6.10 Parser-based Font Lock

    + + +

    Besides simple syntactic font lock and regexp-based font lock, Emacs +also provides complete syntactic font lock with the help of a parser, +currently provided by the tree-sitter library (see Parsing Program Source). +

    +
    +
    Function: treesit-font-lock-enable
    +

    This function enables parser-based font lock in the current buffer. +

    + +

    Parser-based font lock and other font lock mechanism are not mutually +exclusive. By default, if enabled, parser-based font lock runs first, +then the simple syntactic font lock (if enabled), then regexp-based +font lock. +

    +

    Although parser-based font lock doesn’t share the same customization +variables with regexp-based font lock, parser-based font lock uses +similar customization schemes. The tree-sitter counterpart of +font-lock-keywords is treesit-font-lock-settings. +

    +
    +
    Function: treesit-font-lock-rules :keyword value query...
    +

    This function is used to set treesit-font-lock-settings. It +takes care of compiling queries and other post-processing and outputs +a value that treesit-font-lock-settings accepts. An example: +

    +
    +
    (treesit-font-lock-rules
    + :language 'javascript
    + :override t
    + '((true) @font-lock-constant-face
    +   (false) @font-lock-constant-face)
    + :language 'html
    + "(script_element) @font-lock-builtin-face")
    +
    + +

    This function takes a list of text or s-exp queries. Before each +query, there are :keyword and value pairs that configure +that query. The :lang keyword sets the query’s language and +every query must specify the language. Other keywords are optional: +

    + + + + + + + +
    KeywordValueDescription
    :overridenilIf the region already has a face, discard the new face
    tAlways apply the new face
    appendAppend the new face to existing ones
    prependPrepend the new face to existing ones
    keepFill-in regions without an existing face
    + +

    Capture names in query should be face names like +font-lock-keyword-face. The captured node will be fontified +with that face. Capture names can also be function names, in which +case the function is called with (start end node), +where start and end are the start and end position of the +node in buffer, and node is the node itself. If a capture name +is both a face and a function, the face takes priority. If a capture +name is not a face name nor a function name, it is ignored. +

    + +
    +
    Variable: treesit-font-lock-settings
    +

    A list of settings for tree-sitter font lock. The exact format +of this variable is considered internal. One should always use +treesit-font-lock-rules to set this variable. +

    +

    Each setting is of form +

    +
    +
    (language query)
    +
    + +

    Each setting controls one parser (often of different language). +And language is the language symbol (see Tree-sitter Language Definitions); query is the query (see Pattern Matching Tree-sitter Nodes). +

    + +

    Multi-language major modes should provide range functions in +treesit-range-functions, and Emacs will set the ranges +accordingly before fontifing a region (see Parsing Text in Multiple Languages). +

    +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html new file mode 100644 index 0000000000..691c8fba8c --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html @@ -0,0 +1,244 @@ + + + + + + +Parser-based Indentation (GNU Emacs Lisp Reference Manual) + + + + + + + + + + + + + + + + + + + + +
    + +
    +

    24.7.2 Parser-based Indentation

    + + +

    When built with the tree-sitter library (see Parsing Program Source), Emacs could parse program source and produce a syntax tree. +And this syntax tree can be used for indentation. For maximum +flexibility, we could write a custom indent function that queries the +syntax tree and indents accordingly for each language, but that would +be a lot of work. It is more convenient to use the simple indentation +engine described below: we only need to write some indentation rules +and the engine takes care of the rest. +

    +

    To enable the indentation engine, set the value of +indent-line-function to treesit-indent. +

    +
    +
    Variable: treesit-indent-function
    +

    This variable stores the actual function called by +treesit-indent. By default, its value is +treesit-simple-indent. In the future we might add other +more complex indentation engines. +

    + +

    Writing indentation rules

    + +
    +
    Variable: treesit-simple-indent-rules
    +

    This local variable stores indentation rules for every language. It is +a list of +

    +
    +
    (language . rules)
    +
    + +

    where language is a language symbol, and rules is a list +of +

    +
    +
    (matcher anchor offset)
    +
    + +

    First Emacs passes the node at point to matcher, if it return +non-nil, this rule applies. Then Emacs passes the node to +anchor, it returns a point. Emacs takes the column number of +that point, add offset to it, and the result is the indent for +the current line. +

    +

    The matcher and anchor are functions, and Emacs provides +convenient presets for them. You can skip over to +treesit-simple-indent-presets below, those presets should be +more than enough. +

    +

    A matcher or an anchor is a function that takes three +arguments (node parent bol). Argument bol is +the point at where we are indenting: the position of the first +non-whitespace character from the beginning of line; node is the +largest (highest-in-tree) node that starts at that point; parent +is the parent of node. A matcher returns nil/non-nil, and +anchor returns a point. +

    + +
    +
    Variable: treesit-simple-indent-presets
    +

    This is a list of presets for matchers and anchors in +treesit-simple-indent-rules. Each of them represent a function +that takes node, parent and bol as arguments. +

    +
    +
    no-node
    +
    + +

    This matcher matches the case where node is nil, i.e., there is +no node that starts at bol. This is the case when bol is +at an empty line or inside a multi-line string, etc. +

    +
    +
    (parent-is type)
    +
    + +

    This matcher matches if parent’s type is type. +

    +
    +
    (node-is type)
    +
    + +

    This matcher matches if node’s type is type. +

    +
    +
    (query query)
    +
    + +

    This matcher matches if querying parent with query +captures node. The capture name does not matter. +

    +
    +
    (match node-type parent-type
    +       node-field node-index-min node-index-max)
    +
    + +

    This matcher checks if node’s type is node-type, +parent’s type is parent-type, node’s field name in +parent is node-field, and node’s index among its +siblings is between node-index-min and node-index-max. If +the value of a constraint is nil, this matcher doesn’t check for that +constraint. For example, to match the first child where parent is +argument_list, use +

    +
    +
    (match nil "argument_list" nil nil 0 0)
    +
    + +
    +
    first-sibling
    +
    + +

    This anchor returns the start of the first child of parent. +

    +
    +
    parent
    +
    + +

    This anchor returns the start of parent. +

    +
    +
    parent-bol
    +
    + +

    This anchor returns the beginning of non-space characters on the line +where parent is on. +

    +
    +
    prev-sibling
    +
    + +

    This anchor returns the start of the previous sibling of node. +

    +
    +
    no-indent
    +
    + +

    This anchor returns the start of node, i.e., no indent. +

    +
    +
    prev-line
    +
    + +

    This anchor returns the first non-whitespace charater on the previous +line. +

    + +

    Indentation utilities

    + +

    Here are some utility functions that can help writing indentation +rules. +

    +
    +
    Function: treesit-check-indent mode
    +

    This function checks current buffer’s indentation against major mode +mode. It indents the current buffer in mode and compares +the indentation with the current indentation. Then it pops up a diff +buffer showing the difference. Correct indentation (target) is in +green, current indentation is in red. +

    + +

    It is also helpful to use treesit-inspect-mode when writing +indentation rules. +

    +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html b/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html new file mode 100644 index 0000000000..7b6e51468a --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html @@ -0,0 +1,125 @@ + + + + + + +Parsing Program Source (GNU Emacs Lisp Reference Manual) + + + + + + + + + + + + + + + + + + + + + +
    + +
    +

    37 Parsing Program Source

    + +

    Emacs provides various ways to parse program source text and produce a +syntax tree. In a syntax tree, text is no longer a +one-dimensional stream but a structured tree of nodes, where each node +representing a piece of text. Thus a syntax tree can enable +interesting features like precise fontification, indentation, +navigation, structured editing, etc. +

    +

    Emacs has a simple facility for parsing balanced expressions +(see Parsing Expressions). There is also SMIE library for generic +navigation and indentation (see Simple Minded Indentation Engine). +

    +

    Emacs also provides integration with tree-sitter library +(https://tree-sitter.github.io/tree-sitter) if compiled with +it. The tree-sitter library implements an incremental parser and has +support from a wide range of programming languages. +

    +
    +
    Function: treesit-available-p
    +

    This function returns non-nil if tree-sitter features are available +for this Emacs instance. +

    + +

    For tree-sitter integration with existing Emacs features, +see Parser-based Font Lock, Parser-based Indentation, and +Moving over Balanced Expressions. +

    +

    To access the syntax tree of the text in a buffer, we need to first +load a language definition and create a parser with it. Next, we can +query the parser for specific nodes in the syntax tree. Then, we can +access various information about the node, and we can pattern-match a +node with a powerful syntax. Finally, we explain how to work with +source files that mixes multiple languages. The following sections +explain how to do each of the tasks in detail. +

    + + +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/Pattern-Matching.html b/admin/notes/tree-sitter/html-manual/Pattern-Matching.html new file mode 100644 index 0000000000..e14efe7162 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Pattern-Matching.html @@ -0,0 +1,430 @@ + + + + + + +Pattern Matching (GNU Emacs Lisp Reference Manual) + + + + + + + + + + + + + + + + + + + + + +
    + +
    +

    37.5 Pattern Matching Tree-sitter Nodes

    + +

    Tree-sitter let us pattern match with a small declarative language. +Pattern matching consists of two steps: first tree-sitter matches a +pattern against nodes in the syntax tree, then it captures +specific nodes in that pattern and returns the captured nodes. +

    +

    We describe first how to write the most basic query pattern and how to +capture nodes in a pattern, then the pattern-match function, finally +more advanced pattern syntax. +

    +

    Basic query syntax

    + + + +

    A query consists of multiple patterns. Each pattern is an +s-expression that matches a certain node in the syntax node. A +pattern has the following shape: +

    +
    +
    (type child...)
    +
    + +

    For example, a pattern that matches a binary_expression node that +contains number_literal child nodes would look like +

    +
    +
    (binary_expression (number_literal))
    +
    + +

    To capture a node in the query pattern above, append +@capture-name after the node pattern you want to capture. For +example, +

    +
    +
    (binary_expression (number_literal) @number-in-exp)
    +
    + +

    captures number_literal nodes that are inside a +binary_expression node with capture name number-in-exp. +

    +

    We can capture the binary_expression node too, with capture +name biexp: +

    +
    +
    (binary_expression
    + (number_literal) @number-in-exp) @biexp
    +
    + +

    Query function

    + +

    Now we can introduce the query functions. +

    +
    +
    Function: treesit-query-capture node query &optional beg end node-only
    +

    This function matches patterns in query in node. +Parameter query can be either a string, a s-expression, or a +compiled query object. For now, we focus on the string syntax; +s-expression syntax and compiled query are described at the end of the +section. +

    +

    Parameter node can also be a parser or a language symbol. A +parser means using its root node, a language symbol means find or +create a parser for that language in the current buffer, and use the +root node. +

    +

    The function returns all captured nodes in a list of +(capture_name . node). If node-only is +non-nil, a list of node is returned instead. If beg and +end are both non-nil, this function only pattern matches nodes +in that range. +

    + +

    This function raise a treesit-query-error if query is +malformed. The signal data contains a description of the specific +error. You can use treesit-query-validate to debug the query. +

    + +

    For example, suppose node’s content is 1 + 2, and +query is +

    +
    +
    (setq query
    +      "(binary_expression
    +        (number_literal) @number-in-exp) @biexp")
    +
    + +

    Querying that query would return +

    +
    +
    (treesit-query-capture node query)
    +    ⇒ ((biexp . <node for "1 + 2">)
    +       (number-in-exp . <node for "1">)
    +       (number-in-exp . <node for "2">))
    +
    + +

    As we mentioned earlier, a query could contain multiple +patterns. For example, it could have two top-level patterns: +

    +
    +
    (setq query
    +      "(binary_expression) @biexp
    +       (number_literal)  @number @biexp")
    +
    + +
    +
    Function: treesit-query-string string query language
    +

    This function parses string with language, pattern matches +its root node with query, and returns the result. +

    + +

    More query syntax

    + +

    Besides node type and capture, tree-sitter’s query syntax can express +anonymous node, field name, wildcard, quantification, grouping, +alternation, anchor, and predicate. +

    +

    Anonymous node

    + +

    An anonymous node is written verbatim, surrounded by quotes. A +pattern matching (and capturing) keyword return would be +

    +
    +
    "return" @keyword
    +
    + +

    Wild card

    + +

    In a query pattern, ‘(_)’ matches any named node, and ‘_’ +matches any named and anonymous node. For example, to capture any +named child of a binary_expression node, the pattern would be +

    +
    +
    (binary_expression (_) @in_biexp)
    +
    + +

    Field name

    + +

    We can capture child nodes that has specific field names: +

    +
    +
    (function_definition
    +  declarator: (_) @func-declarator
    +  body: (_) @func-body)
    +
    + +

    We can also capture a node that doesn’t have certain field, say, a +function_definition without a body field. +

    +
    +
    (function_definition !body) @func-no-body
    +
    + +

    Quantify node

    + +

    Tree-sitter recognizes quantification operators ‘*’, ‘+’ and +‘?’. Their meanings are the same as in regular expressions: +‘*’ matches the preceding pattern zero or more times, ‘+’ +matches one or more times, and ‘?’ matches zero or one time. +

    +

    For example, this pattern matches type_declaration nodes +that has zero or more long keyword. +

    +
    +
    (type_declaration "long"*) @long-type
    +
    + +

    And this pattern matches a type declaration that has zero or one +long keyword: +

    +
    +
    (type_declaration "long"?) @long-type
    +
    + +

    Grouping

    + +

    Similar to groups in regular expression, we can bundle patterns into a +group and apply quantification operators to it. For example, to +express a comma separated list of identifiers, one could write +

    +
    +
    (identifier) ("," (identifier))*
    +
    + +

    Alternation

    + +

    Again, similar to regular expressions, we can express “match anyone +from this group of patterns” in the query pattern. The syntax is a +list of patterns enclosed in square brackets. For example, to capture +some keywords in C, the query pattern would be +

    +
    +
    [
    +  "return"
    +  "break"
    +  "if"
    +  "else"
    +] @keyword
    +
    + +

    Anchor

    + +

    The anchor operator ‘.’ can be used to enforce juxtaposition, +i.e., to enforce two things to be directly next to each other. The +two “things” can be two nodes, or a child and the end of its parent. +For example, to capture the first child, the last child, or two +adjacent children: +

    +
    +
    ;; Anchor the child with the end of its parent.
    +(compound_expression (_) @last-child .)
    +
    +;; Anchor the child with the beginning of its parent.
    +(compound_expression . (_) @first-child)
    +
    +;; Anchor two adjacent children.
    +(compound_expression
    + (_) @prev-child
    + .
    + (_) @next-child)
    +
    + +

    Note that the enforcement of juxtaposition ignores any anonymous +nodes. +

    +

    Predicate

    + +

    We can add predicate constraints to a pattern. For example, if we use +the following query pattern +

    +
    +
    (
    + (array . (_) @first (_) @last .)
    + (#equal @first @last)
    +)
    +
    + +

    Then tree-sitter only matches arrays where the first element equals to +the last element. To attach a predicate to a pattern, we need to +group then together. A predicate always starts with a ‘#’. +Currently there are two predicates, #equal and #match. +

    +
    +
    Predicate: equal arg1 arg2
    +

    Matches if arg1 equals to arg2. Arguments can be either a +string or a capture name. Capture names represent the text that the +captured node spans in the buffer. +

    + +
    +
    Predicate: match regexp capture-name
    +

    Matches if the text that capture-name’s node spans in the buffer +matches regular expression regexp. Matching is case-sensitive. +

    + +

    Note that a predicate can only refer to capture names appeared in the +same pattern. Indeed, it makes little sense to refer to capture names +in other patterns anyway. +

    +

    S-expression patterns

    + +

    Besides strings, Emacs provides a s-expression based syntax for query +patterns. It largely resembles the string-based syntax. For example, +the following pattern +

    +
    +
    (treesit-query-capture
    + node "(addition_expression
    +        left: (_) @left
    +        \"+\" @plus-sign
    +        right: (_) @right) @addition
    +
    +        [\"return\" \"break\"] @keyword")
    +
    + +

    is equivalent to +

    +
    +
    (treesit-query-capture
    + node '((addition_expression
    +         left: (_) @left
    +         "+" @plus-sign
    +         right: (_) @right) @addition
    +
    +         ["return" "break"] @keyword))
    +
    + +

    Most pattern syntax can be written directly as strange but +never-the-less valid s-expressions. Only a few of them needs +modification: +

    +
      +
    • Anchor ‘.’ is written as :anchor. +
    • ?’ is written as ‘:?’. +
    • *’ is written as ‘:*’. +
    • +’ is written as ‘:+’. +
    • #equal is written as :equal. In general, predicates +change their ‘#’ to ‘:’. +
    + +

    For example, +

    +
    +
    "(
    +  (compound_expression . (_) @first (_)* @rest)
    +  (#match \"love\" @first)
    +  )"
    +
    + +

    is written in s-expression as +

    +
    +
    '((
    +   (compound_expression :anchor (_) @first (_) :* @rest)
    +   (:match "love" @first)
    +   ))
    +
    + +

    Compiling queries

    + +

    If a query will be used repeatedly, especially in tight loops, it is +important to compile that query, because a compiled query is much +faster than an uncompiled one. A compiled query can be used anywhere +a query is accepted. +

    +
    +
    Function: treesit-query-compile language query
    +

    This function compiles query for language into a compiled +query object and returns it. +

    +

    This function raise a treesit-query-error if query is +malformed. The signal data contains a description of the specific +error. You can use treesit-query-validate to debug the query. +

    + +
    +
    Function: treesit-query-expand query
    +

    This function expands the s-expression query into a string +query. +

    + +
    +
    Function: treesit-pattern-expand pattern
    +

    This function expands the s-expression pattern into a string +pattern. +

    + +

    Finally, tree-sitter project’s documentation about +pattern-matching can be found at +https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries. +

    +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/Retrieving-Node.html b/admin/notes/tree-sitter/html-manual/Retrieving-Node.html new file mode 100644 index 0000000000..1bea0dde76 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Retrieving-Node.html @@ -0,0 +1,362 @@ + + + + + + +Retrieving Node (GNU Emacs Lisp Reference Manual) + + + + + + + + + + + + + + + + + + + + + +
    + +
    +

    37.3 Retrieving Node

    + + + +

    Before we continue, lets go over some conventions of tree-sitter +functions. +

    +

    We talk about a node being “smaller” or “larger”, and “lower” or +“higher”. A smaller and lower node is lower in the syntax tree and +therefore spans a smaller piece of text; a larger and higher node is +higher up in the syntax tree, containing many smaller nodes as its +children, and therefore spans a larger piece of text. +

    +

    When a function cannot find a node, it returns nil. And for the +convenience for function chaining, all the functions that take a node +as argument and returns a node accept the node to be nil; in that +case, the function just returns nil. +

    + +

    Nodes are not automatically updated when the associated buffer is +modified. And there is no way to update a node once it is retrieved. +Using an outdated node throws treesit-node-outdated error. +

    +

    Retrieving node from syntax tree

    + +
    +
    Function: treesit-node-at beg end &optional parser-or-lang named
    +

    This function returns the smallest node that starts at or after +the point. In other words, the start of the node is equal or +greater than point. +

    +

    When parser-or-lang is nil, this function uses the first parser +in (treesit-parser-list) in the current buffer. If +parser-or-lang is a parser object, it use that parser; if +parser-or-lang is a language, it finds the first parser using +that language in (treesit-parser-list) and use that. +

    +

    If named is non-nil, this function looks for a named node +only (see named node). +

    +

    Example: +

    +
    ;; Find the node at point in a C parser's syntax tree.
    +(treesit-node-at (point) 'c)
    +    
    +
    + +
    +
    Function: treesit-node-on beg end &optional parser-or-lang named
    +

    This function returns the smallest node that covers the span +from beg to end. In other words, the start of the node is +less or equal to beg, and the end of the node is greater or +equal to end. +

    +

    Beware that calling this function on an empty line that is not +inside any top-level construct (function definition, etc) most +probably will give you the root node, because the root node is the +smallest node that covers that empty line. Most of the time, you want +to use treesit-node-at. +

    +

    When parser-or-lang is nil, this function uses the first parser +in (treesit-parser-list) in the current buffer. If +parser-or-lang is a parser object, it use that parser; if +parser-or-lang is a language, it finds the first parser using +that language in (treesit-parser-list) and use that. +

    +

    If named is non-nil, this function looks for a named node only +(see named node). +

    + +
    +
    Function: treesit-parser-root-node parser
    +

    This function returns the root node of the syntax tree generated by +parser. +

    + +
    +
    Function: treesit-buffer-root-node &optional language
    +

    This function finds the first parser that uses language in +(treesit-parser-list) in the current buffer, and returns the +root node of that buffer. If it cannot find an appropriate parser, +nil is returned. +

    + +

    Once we have a node, we can retrieve other nodes from it, or query for +information about this node. +

    +

    Retrieving node from other nodes

    + +

    By kinship

    + +
    +
    Function: treesit-node-parent node
    +

    This function returns the immediate parent of node. +

    + +
    +
    Function: treesit-node-child node n &optional named
    +

    This function returns the n’th child of node. If +named is non-nil, then it only counts named nodes +(see named node). For example, in a node +that represents a string: "text", there are three children +nodes: the opening quote ", the string content text, and +the enclosing quote ". Among these nodes, the first child is +the opening quote ", the first named child is the string +content text. +

    + +
    +
    Function: treesit-node-children node &optional named
    +

    This function returns all of node’s children in a list. If +named is non-nil, then it only retrieves named nodes. +

    + +
    +
    Function: treesit-next-sibling node &optional named
    +

    This function finds the next sibling of node. If named is +non-nil, it finds the next named sibling. +

    + +
    +
    Function: treesit-prev-sibling node &optional named
    +

    This function finds the previous sibling of node. If +named is non-nil, it finds the previous named sibling. +

    + +

    By field name

    + +

    To make the syntax tree easier to analyze, many language definitions +assign field names to child nodes (see field name). For example, a function_definition node +could have a declarator and a body. +

    +
    +
    Function: treesit-child-by-field-name node field-name
    +

    This function finds the child of node that has field-name +as its field name. +

    +
    +
    ;; Get the child that has "body" as its field name.
    +(treesit-child-by-field-name node "body")
    +    
    +
    + +

    By position

    + +
    +
    Function: treesit-first-child-for-pos node pos &optional named
    +

    This function finds the first child of node that extends beyond +pos. “Extend beyond” means the end of the child node >= +pos. This function only looks for immediate children of +node, and doesn’t look in its grand children. If named is +non-nil, it only looks for named child (see named node). +

    + +
    +
    Function: treesit-node-descendant-for-range node beg end &optional named
    +

    This function finds the smallest child/grandchild... of +node that spans the range from beg to end. It is +similar to treesit-node-at. If named is non-nil, it only +looks for named child. +

    + +

    Searching for node

    + +
    +
    Function: treesit-search-subtree node predicate &optional all backward limit
    +

    This function traverses the subtree of node (including +node), and match predicate with each node along the way. +And predicate is a regexp that matches (case-insensitively) +against each node’s type, or a function that takes a node and returns +nil/non-nil. If a node matches, that node is returned, if no node +ever matches, nil is returned. +

    +

    By default, this function only traverses named nodes, if all is +non-nil, it traverses all nodes. If backward is non-nil, it +traverses backwards. If limit is non-nil, it only traverses +that number of levels down in the tree. +

    + +
    +
    Function: treesit-search-forward start predicate &optional all backward up
    +

    This function is somewhat similar to treesit-search-subtree. +It also traverse the parse tree and match each node with +predicate (except for start), where predicate can be +a (case-insensitive) regexp or a function. For a tree like the below +where start is marked 1, this function traverses as numbered: +

    +
    +
                  o
    +              |
    +     3--------4-----------8
    +     |        |           |
    +o--o-+--1  5--+--6    9---+-----12
    +|  |    |        |    |         |
    +o  o    2        7  +-+-+    +--+--+
    +                    |   |    |  |  |
    +                    10  11   13 14 15
    +
    + +

    Same as in treesit-search-subtree, this function only searches +for named nodes by default. But if all is non-nil, it searches +for all nodes. If backward is non-nil, it searches backwards. +

    +

    If up is non-nil, this function will only traverse to siblings +and parents. In that case, only 1 3 4 8 would be traversed. +

    + +
    +
    Function: treesit-search-forward-goto predicate side &optional all backward up
    +

    This function jumps to the start or end of the next node in buffer +that matches predicate. Parameters predicate, all, +backward, and up are the same as in +treesit-search-forward. And side controls which side of +the matched no do we stop at, it can be start or end. +

    + +
    +
    Function: treesit-induce-sparse-tree root predicate &optional process-fn limit
    +

    This function creates a sparse tree from root’s subtree. +

    +

    Basically, it takes the subtree under root, and combs it so only +the nodes that match predicate are left, like picking out grapes +on the vine. Like previous functions, predicate can be a regexp +string that matches against each node’s type case-insensitively, or a +function that takes a node and return nil/non-nil. +

    +

    For example, for a subtree on the left that consist of both numbers +and letters, if predicate is “letter only”, the returned tree +is the one on the right. +

    +
    +
        a                 a              a
    +    |                 |              |
    ++---+---+         +---+---+      +---+---+
    +|   |   |         |   |   |      |   |   |
    +b   1   2         b   |   |      b   c   d
    +    |   |     =>      |   |  =>      |
    +    c   +--+          c   +          e
    +    |   |  |          |   |
    + +--+   d  4       +--+   d
    + |  |              |
    + e  5              e
    +
    + +

    If process-fn is non-nil, instead of returning the matched +nodes, this function passes each node to process-fn and uses the +returned value instead. If non-nil, limit is the number of +levels to go down from root. +

    +

    Each node in the returned tree looks like (tree-sitter +node . (child ...)). The tree-sitter node of the root +of this tree will be nil if ROOT doesn’t match pred. If +no node matches predicate, return nil. +

    + +

    More convenient functions

    + +
    +
    Function: treesit-filter-child node pred &optional named
    +

    This function finds immediate children of node that satisfies +pred. +

    +

    Function pred takes the child node as the argument and should +return non-nil to indicated keeping the child. If named +non-nil, this function only searches for named nodes. +

    + +
    +
    Function: treesit-parent-until node pred
    +

    This function repeatedly finds the parent of node, and returns +the parent if it satisfies pred (which takes the parent as the +argument). If no parent satisfies pred, this function returns +nil. +

    + +
    +
    Function: treesit-parent-while
    +

    This function repeatedly finds the parent of node, and keeps +doing so as long as the parent satisfies pred (which takes the +parent as the single argument). I.e., this function returns the +farthest parent that still satisfies pred. +

    + +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html b/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html new file mode 100644 index 0000000000..77cea6b3f9 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html @@ -0,0 +1,212 @@ + + + + + + +Tree-sitter C API (GNU Emacs Lisp Reference Manual) + + + + + + + + + + + + + + + + + + + + +
    + +
    +

    37.7 Tree-sitter C API Correspondence

    + +

    Emacs’ tree-sitter integration doesn’t expose every feature +tree-sitter’s C API provides. Missing features include: +

    +
      +
    • Creating a tree cursor and navigating the syntax tree with it. +
    • Setting timeout and cancellation flag for a parser. +
    • Setting the logger for a parser. +
    • Printing a DOT graph of the syntax tree to a file. +
    • Coping and modifying a syntax tree. (Emacs doesn’t expose a tree +object.) +
    • Using (row, column) coordinates as position. +
    • Updating a node with changes. (In Emacs, retrieve a new node instead +of updating the existing one.) +
    • Querying statics of a language definition. +
    + +

    In addition, Emacs makes some changes to the C API to make the API more +convenient and idiomatic: +

    +
      +
    • Instead of using byte positions, the ELisp API uses character +positions. +
    • Null nodes are converted to nil. +
    + +

    Below is the correspondence between all C API functions and their +ELisp counterparts. Sometimes one ELisp function corresponds to +multiple C functions, and many C functions don’t have an ELisp +counterpart. +

    +
    +
    ts_parser_new                           treesit-parser-create
    +ts_parser_delete
    +ts_parser_set_language
    +ts_parser_language                      treesit-parser-language
    +ts_parser_set_included_ranges           treesit-parser-set-included-ranges
    +ts_parser_included_ranges               treesit-parser-included-ranges
    +ts_parser_parse
    +ts_parser_parse_string                  treesit-parse-string
    +ts_parser_parse_string_encoding
    +ts_parser_reset
    +ts_parser_set_timeout_micros
    +ts_parser_timeout_micros
    +ts_parser_set_cancellation_flag
    +ts_parser_cancellation_flag
    +ts_parser_set_logger
    +ts_parser_logger
    +ts_parser_print_dot_graphs
    +ts_tree_copy
    +ts_tree_delete
    +ts_tree_root_node
    +ts_tree_language
    +ts_tree_edit
    +ts_tree_get_changed_ranges
    +ts_tree_print_dot_graph
    +ts_node_type                            treesit-node-type
    +ts_node_symbol
    +ts_node_start_byte                      treesit-node-start
    +ts_node_start_point
    +ts_node_end_byte                        treesit-node-end
    +ts_node_end_point
    +ts_node_string                          treesit-node-string
    +ts_node_is_null
    +ts_node_is_named                        treesit-node-check
    +ts_node_is_missing                      treesit-node-check
    +ts_node_is_extra                        treesit-node-check
    +ts_node_has_changes                     treesit-node-check
    +ts_node_has_error                       treesit-node-check
    +ts_node_parent                          treesit-node-parent
    +ts_node_child                           treesit-node-child
    +ts_node_field_name_for_child            treesit-node-field-name-for-child
    +ts_node_child_count                     treesit-node-child-count
    +ts_node_named_child                     treesit-node-child
    +ts_node_named_child_count               treesit-node-child-count
    +ts_node_child_by_field_name             treesit-node-by-field-name
    +ts_node_child_by_field_id
    +ts_node_next_sibling                    treesit-next-sibling
    +ts_node_prev_sibling                    treesit-prev-sibling
    +ts_node_next_named_sibling              treesit-next-sibling
    +ts_node_prev_named_sibling              treesit-prev-sibling
    +ts_node_first_child_for_byte            treesit-first-child-for-pos
    +ts_node_first_named_child_for_byte      treesit-first-child-for-pos
    +ts_node_descendant_for_byte_range       treesit-descendant-for-range
    +ts_node_descendant_for_point_range
    +ts_node_named_descendant_for_byte_range treesit-descendant-for-range
    +ts_node_named_descendant_for_point_range
    +ts_node_edit
    +ts_node_eq                              treesit-node-eq
    +ts_tree_cursor_new
    +ts_tree_cursor_delete
    +ts_tree_cursor_reset
    +ts_tree_cursor_current_node
    +ts_tree_cursor_current_field_name
    +ts_tree_cursor_current_field_id
    +ts_tree_cursor_goto_parent
    +ts_tree_cursor_goto_next_sibling
    +ts_tree_cursor_goto_first_child
    +ts_tree_cursor_goto_first_child_for_byte
    +ts_tree_cursor_goto_first_child_for_point
    +ts_tree_cursor_copy
    +ts_query_new
    +ts_query_delete
    +ts_query_pattern_count
    +ts_query_capture_count
    +ts_query_string_count
    +ts_query_start_byte_for_pattern
    +ts_query_predicates_for_pattern
    +ts_query_step_is_definite
    +ts_query_capture_name_for_id
    +ts_query_string_value_for_id
    +ts_query_disable_capture
    +ts_query_disable_pattern
    +ts_query_cursor_new
    +ts_query_cursor_delete
    +ts_query_cursor_exec                    treesit-query-capture
    +ts_query_cursor_did_exceed_match_limit
    +ts_query_cursor_match_limit
    +ts_query_cursor_set_match_limit
    +ts_query_cursor_set_byte_range
    +ts_query_cursor_set_point_range
    +ts_query_cursor_next_match
    +ts_query_cursor_remove_match
    +ts_query_cursor_next_capture
    +ts_language_symbol_count
    +ts_language_symbol_name
    +ts_language_symbol_for_name
    +ts_language_field_count
    +ts_language_field_name_for_id
    +ts_language_field_id_for_name
    +ts_language_symbol_type
    +ts_language_version
    +
    +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/Using-Parser.html b/admin/notes/tree-sitter/html-manual/Using-Parser.html new file mode 100644 index 0000000000..438e3858f1 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Using-Parser.html @@ -0,0 +1,186 @@ + + + + + + +Using Parser (GNU Emacs Lisp Reference Manual) + + + + + + + + + + + + + + + + + + + + + +
    + +
    +

    37.2 Using Tree-sitter Parser

    + + +

    This section described how to create and configure a tree-sitter +parser. In Emacs, each tree-sitter parser is associated with a +buffer. As we edit the buffer, the associated parser and the syntax +tree is automatically kept up-to-date. +

    +
    +
    Variable: treesit-max-buffer-size
    +

    This variable contains the maximum size of buffers in which +tree-sitter can be activated. Major modes should check this value +when deciding whether to enable tree-sitter features. +

    + +
    +
    Function: treesit-can-enable-p
    +

    This function checks whether the current buffer is suitable for +activating tree-sitter features. It basically checks +treesit-available-p and treesit-max-buffer-size. +

    + + +
    +
    Function: treesit-parser-create language &optional buffer no-reuse
    +

    To create a parser, we provide a buffer and the language +to use (see Tree-sitter Language Definitions). If buffer is nil, the +current buffer is used. +

    +

    By default, this function reuses a parser if one already exists for +language in buffer, if no-reuse is non-nil, this +function always creates a new parser. +

    + +

    Given a parser, we can query information about it: +

    +
    +
    Function: treesit-parser-buffer parser
    +

    Returns the buffer associated with parser. +

    + +
    +
    Function: treesit-parser-language parser
    +

    Returns the language that parser uses. +

    + +
    +
    Function: treesit-parser-p object
    +

    Checks if object is a tree-sitter parser. Return non-nil if it +is, return nil otherwise. +

    + +

    There is no need to explicitly parse a buffer, because parsing is done +automatically and lazily. A parser only parses when we query for a +node in its syntax tree. Therefore, when a parser is first created, +it doesn’t parse the buffer; it waits until we query for a node for +the first time. Similarly, when some change is made in the buffer, a +parser doesn’t re-parse immediately. +

    + +

    When a parser do parse, it checks for the size of the buffer. +Tree-sitter can only handle buffer no larger than about 4GB. If the +size exceeds that, Emacs signals treesit-buffer-too-large +with signal data being the buffer size. +

    +

    Once a parser is created, Emacs automatically adds it to the +internal parser list. Every time a change is made to the buffer, +Emacs updates parsers in this list so they can update their syntax +tree incrementally. +

    +
    +
    Function: treesit-parser-list &optional buffer
    +

    This function returns the parser list of buffer. And +buffer defaults to the current buffer. +

    + +
    +
    Function: treesit-parser-delete parser
    +

    This function deletes parser. +

    + + +

    Normally, a parser “sees” the whole +buffer, but when the buffer is narrowed (see Narrowing), the +parser will only see the visible region. As far as the parser can +tell, the hidden region is deleted. And when the buffer is later +widened, the parser thinks text is inserted in the beginning and in +the end. Although parsers respect narrowing, narrowing shouldn’t be +the mean to handle a multi-language buffer; instead, set the ranges in +which a parser should operate in. See Parsing Text in Multiple Languages. +

    +

    Because a parser parses lazily, when we narrow the buffer, the parser +is not affected immediately; as long as we don’t query for a node +while the buffer is narrowed, the parser is oblivious of the +narrowing. +

    + +
    +
    Function: treesit-parse-string string language
    +

    Besides creating a parser for a buffer, we can also just parse a +string. Unlike a buffer, parsing a string is a one-time deal, and +there is no way to update the result. +

    +

    This function parses string with language, and returns the +root node of the generated syntax tree. +

    + +
    +
    + + + + + + diff --git a/admin/notes/tree-sitter/html-manual/build-manual.sh b/admin/notes/tree-sitter/html-manual/build-manual.sh new file mode 100755 index 0000000000..adde3f2a2a --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/build-manual.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +MANUAL_DIR="../../../doc/lispref" +THIS_DIR=$(pwd) + +echo "Build manual" +cd "${MANUAL_DIR}" +make elisp.html HTML_OPTS="--html --css-ref=./manual.css" + +cd "${THIS_DIR}" + +echo "Copy manual" +cp -f "${MANUAL_DIR}/elisp.html/Parsing-Program-Source.html" . +cp -f "${MANUAL_DIR}/elisp.html/Language-Definitions.html" . +cp -f "${MANUAL_DIR}/elisp.html/Using-Parser.html" . +cp -f "${MANUAL_DIR}/elisp.html/Retrieving-Node.html" . +cp -f "${MANUAL_DIR}/elisp.html/Accessing-Node.html" . +cp -f "${MANUAL_DIR}/elisp.html/Pattern-Matching.html" . +cp -f "${MANUAL_DIR}/elisp.html/Multiple-Languages.html" . +cp -f "${MANUAL_DIR}/elisp.html/Tree_002dsitter-C-API.html" . + +cp -f "${MANUAL_DIR}/elisp.html/Parser_002dbased-Font-Lock.html" . +cp -f "${MANUAL_DIR}/elisp.html/Parser_002dbased-Indentation.html" . diff --git a/admin/notes/tree-sitter/html-manual/manual.css b/admin/notes/tree-sitter/html-manual/manual.css new file mode 100644 index 0000000000..5a6790a345 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/manual.css @@ -0,0 +1,374 @@ +/* Style-sheet to use for Emacs manuals */ + +/* Copyright (C) 2013-2014 Free Software Foundation, Inc. + +Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without any warranty. +*/ + +/* style.css begins here */ + +/* This stylesheet is used by manuals and a few older resources. */ + +/* reset.css begins here */ + +/* +Software License Agreement (BSD License) + +Copyright (c) 2006, Yahoo! Inc. +All rights reserved. + +Redistribution and use of this software in source and +binary forms, with or without modification, arepermitted +provided that the following conditions are met: + +* Redistributions of source code must retain the above +copyright notice, this list of conditions and the +following disclaimer. + +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other +materials provided with the distribution. + +* Neither the name of Yahoo! Inc. nor the names of its +contributors may be used to endorse or promote products +derived from this software without specific prior +written permission of Yahoo! Inc. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. +*/ + +html { + color: #000; + background: #FFF; +} + +body, div, dl, dt, dd, ul, ol, li, h1, h2, h3, h4, +h5, h6, pre, code, form, fieldset, legend, input, +button, textarea, p, blockquote, th, td { + margin: 0; + padding: 0; +} + +table { + border-collapse: collapse; + border-spacing: 0; +} + +fieldset, img { + border: 0; +} + +address, caption, cite, code, dfn, em, strong, +th, var, optgroup { + font-style: inherit; + font-weight: inherit; +} + +del, ins { + text-decoration: none; +} + +li { + list-style:none; +} + +caption, th { + text-align: left; +} + +h1, h2, h3, h4, h5, h6 { + font-size: 100%; + font-weight: normal; +} + +q:before, q:after { + content:''; +} + +abbr, acronym { + border: 0; + font-variant: normal; +} + +sup { + vertical-align: baseline; +} +sub { + vertical-align: baseline; +} + +legend { + color: #000; +} + +input, button, textarea, select, optgroup, option { + font-family: inherit; + font-size: inherit; + font-style: inherit; + font-weight: inherit; +} + +input, button, textarea, select { + *font-size: 100%; +} + + +/* reset.css ends here */ + +/*** PAGE LAYOUT ***/ + +html, body { + font-size: 1em; + text-align: left; + text-decoration: none; +} +html { background-color: #e7e7e7; } + +body { + max-width: 74.92em; + margin: 0 auto; + padding: .5em 1em 1em 1em; + background-color: white; + border: .1em solid #c0c0c0; +} + + +/*** BASIC ELEMENTS ***/ + +/* Size and positioning */ + +p, pre, li, dt, dd, table, code, address { line-height: 1.3em; } + +h1 { font-size: 2em; margin: 1em 0 } +h2 { font-size: 1.50em; margin: 1.0em 0 0.87em 0; } +h3 { font-size: 1.30em; margin: 1.0em 0 0.87em 0; } +h4 { font-size: 1.13em; margin: 1.0em 0 0.88em 0; } +h5 { font-size: 1.00em; margin: 1.0em 0 1.00em 0; } + +p, pre { margin: 1em 0; } +pre { overflow: auto; padding-bottom: .3em; } + +ul, ol, blockquote { margin-left: 1.5%; margin-right: 1.5%; } +hr { margin: 1em 0; } +/* Lists of underlined links are difficult to read. The top margin + gives a little more spacing between entries. */ +ul li { margin: .5em 1em; } +ol li { margin: 1em; } +ol ul li { margin: .5em 1em; } +ul li p, ul ul li { margin-top: .3em; margin-bottom: .3em; } +ul ul, ol ul { margin-top: 0; margin-bottom: 0; } + +/* Separate description lists from preceding text */ +dl { margin: 1em 0 0 0; } +/* separate the "term" from subsequent "description" */ +dt { margin: .5em 0; } +/* separate the "description" from subsequent list item + when the final
    child is an anonymous box */ +dd { margin: .5em 3% 1em 3%; } +/* separate anonymous box (used to be the first element in
    ) + from subsequent

    */ +dd p { margin: .5em 0; } + +table { + display: block; overflow: auto; + margin-top: 1.5em; margin-bottom: 1.5em; +} +th { padding: .3em .5em; text-align: center; } +td { padding: .2em .5em; } + +address { margin-bottom: 1em; } +caption { margin-bottom: .5em; text-align: center; } +sup { vertical-align: super; } +sub { vertical-align: sub; } + +/* Style */ + +h1, h2, h3, h4, h5, h6, strong, dt, th { font-weight: bold; } + +/* The default color (black) is too dark for large text in + bold font. */ +h1, h2, h3, h4 { color: #333; } +h5, h6, dt { color: #222; } + +a[href] { color: #005090; } +a[href]:visited { color: #100070; } +a[href]:active, a[href]:hover { + color: #100070; + text-decoration: none; +} + +h1 a[href]:visited, h2 a[href]:visited, h3 a[href]:visited, +h4 a[href]:visited { color: #005090; } +h1 a[href]:hover, h2 a[href]:hover, h3 a[href]:hover, +h4 a[href]:hover { color: #100070; } + +ol { list-style: decimal outside;} +ul { list-style: square outside; } +ul ul, ol ul { list-style: circle; } +li { list-style: inherit; } + +hr { background-color: #ede6d5; } +table { border: 0; } + +abbr,acronym { + border-bottom:1px dotted #000; + text-decoration: none; + cursor:help; +} +del { text-decoration: line-through; } +em { font-style: italic; } +small { font-size: .9em; } + +img { max-width: 100%} + + +/*** SIMPLE CLASSES ***/ + +.center, .c { text-align: center; } +.nocenter{ text-align: left; } + +.underline { text-decoration: underline; } +.nounderline { text-decoration: none; } + +.no-bullet { list-style: none; } +.inline-list li { display: inline } + +.netscape4, .no-display { display: none; } + + +/*** MANUAL PAGES ***/ + +/* This makes the very long tables of contents in Gnulib and other + manuals easier to read. */ +.contents ul, .shortcontents ul { font-weight: bold; } +.contents ul ul, .shortcontents ul ul { font-weight: normal; } +.contents ul { list-style: none; } + +/* For colored navigation bars (Emacs manual): make the bar extend + across the whole width of the page and give it a decent height. */ +.header, .node { margin: 0 -1em; padding: 0 1em; } +.header p, .node p { line-height: 2em; } + +/* For navigation links */ +.node a, .header a { display: inline-block; line-height: 2em; } +.node a:hover, .header a:hover { background: #f2efe4; } + +/* Inserts */ +table.cartouche td { padding: 1.5em; } + +div.display, div.lisp, div.smalldisplay, +div.smallexample, div.smalllisp { margin-left: 3%; } + +div.example { padding: .8em 1.2em .4em; } +pre.example { padding: .8em 1.2em; } +div.example, pre.example { + margin: 1em 0 1em 3% ; + -webkit-border-radius: .3em; + -moz-border-radius: .3em; + border-radius: .3em; + border: 1px solid #d4cbb6; + background-color: #f2efe4; +} +div.example > pre.example { + padding: 0 0 .4em; + margin: 0; + border: none; +} + +pre.menu-comment { padding-top: 1.3em; margin: 0; } + + +/*** FOR WIDE SCREENS ***/ + +@media (min-width: 40em) { + body { padding: .5em 3em 1em 3em; } + div.header, div.node { margin: 0 -3em; padding: 0 3em; } +} + +/* style.css ends here */ + +/* makeinfo convert @deffn and similar functions to something inside +

    . style.css uses italic for blockquote. This looks poor + in the Emacs manuals, which make extensive use of @defun (etc). + In particular, references to function arguments appear as + inside
    . Since is also italic, it makes it + impossible to distinguish variables. We could change to + e.g. bold-italic, or normal, or a different color, but that does + not look as good IMO. So we just override blockquote to be non-italic. + */ +blockquote { font-style: normal; } + +var { font-style: italic; } + +div.header { + background-color: #DDDDFF; + padding-top: 0.2em; +} + + +/*** Customization ***/ + +body { + font-family: Charter, serif; + font-size: 14pt; + line-height: 1.4; + background-color: #fefefc; + color: #202010; +} + +pre.menu-comment { + font-family: Charter, serif; + font-size: 14pt; +} + +body > *, body > div.display, body > div.lisp, body > div.smalldisplay, +body > div.example, body > div.smallexample, body > div.smalllisp { + width: 700px; + margin-left: auto; + margin-right: auto; +} + +div.header { + width: 100%; + min-height: 3em; + font-size: 13pt; +} + +/* Documentation block for functions and variables. Make then + narrower*/ +dd { + margin: .5em 6% 1em 6% +} + +code, pre, kbd, samp, tt { + font-size: 12pt; + font-family: monospace; +} + +/* In each node we have index table to all sub-nodes. Make more space + for the first column, which is the name to each sub-node. */ +table.menu tbody tr td:nth-child(1) { + white-space: nowrap; +} + +div.header p { + text-align: center; + margin: 0.5em auto 0.5em auto; +} diff --git a/admin/notes/tree-sitter/starter-guide b/admin/notes/tree-sitter/starter-guide new file mode 100644 index 0000000000..6cf8cf8a23 --- /dev/null +++ b/admin/notes/tree-sitter/starter-guide @@ -0,0 +1,442 @@ +STARTER GUIDE ON WRITTING MAJOR MODE WITH TREE-SITTER -*- org -*- + +This document guides you on adding tree-sitter support to a major +mode. + +TOC: + +- Building Emacs with tree-sitter +- Install language definitions +- Setup +- Font-lock +- Indent +- Imenu +- Navigation +- Which-func +- More features? +- Common tasks (code snippets) +- Manual + +* Building Emacs with tree-sitter + +You can either install tree-sitter by your package manager, or from +source: + + git clone https://github.com/tree-sitter/tree-sitter.git + cd tree-sitter + make + make install + +Then pull the tree-sitter branch (or the master branch, if it has +merged) and rebuild Emacs. + +* Install language definitions + +Tree-sitter by itself doesn’t know how to parse any particular +language. We need to install language definitions (or “grammars”) for +a language to be able to parse it. There are a couple of ways to get +them. + +You can use this script that I put together here: + + https://github.com/casouri/tree-sitter-module + +You can also find them under this directory in /build-modules. + +This script automatically pulls and builds language definitions for C, +C++, Rust, JSON, Go, HTML, Javascript, CSS, Python, Typescript, +and C#. Better yet, I pre-built these language definitions for +GNU/Linux and macOS, they can be downloaded here: + + https://github.com/casouri/tree-sitter-module/releases/tag/v2.1 + +To build them yourself, run + + git clone git@github.com:casouri/tree-sitter-module.git + cd tree-sitter-module + ./batch.sh + +and language definitions will be in the /dist directory. You can +either copy them to standard dynamic library locations of your system, +eg, /usr/local/lib, or leave them in /dist and later tell Emacs where +to find language definitions by setting ‘treesit-extra-load-path’. + +Language definition sources can be found on GitHub under +tree-sitter/xxx, like tree-sitter/tree-sitter-python. The tree-sitter +organization has all the "official" language definitions: + + https://github.com/tree-sitter + +* Setting up for adding major mode features + +Start Emacs, and load tree-sitter with + + (require 'treesit) + +Now check if Emacs is built with tree-sitter library + + (treesit-available-p) + +For your major mode, first create a tree-sitter switch: + +#+begin_src elisp +(defcustom python-use-tree-sitter nil + "If non-nil, `python-mode' tries to use tree-sitter. +Currently `python-mode' can utilize tree-sitter for font-locking, +imenu, and movement functions." + :type 'boolean) +#+end_src + +Then in other places, we decide on whether to enable tree-sitter by + +#+begin_src elisp +(and python-use-tree-sitter + (treesit-can-enable-p)) +#+end_src + +* Font-lock + +Tree-sitter works like this: You provide a query made of patterns and +capture names, tree-sitter finds the nodes that match these patterns, +tag the corresponding capture names onto the nodes and return them to +you. The query function returns a list of (capture-name . node). For +font-lock, we use face names as capture names. And the captured node +will be fontified in their capture name. The capture name could also +be a function, in which case (START END NODE) is passed to the +function for font-lock. START and END is the start and end the +captured NODE. + +** Query syntax + +There are two types of nodes, named, like (identifier), +(function_definition), and anonymous, like "return", "def", "(", +"}". Parent-child relationship is expressed as + + (parent (child) (child) (child (grand_child))) + +Eg, an argument list (1, "3", 1) could be: + + (argument_list "(" (number) (string) (number) ")") + +Children could have field names in its parent: + + (function_definition name: (identifier) type: (identifier)) + +Match any of the list: + + ["true" "false" "none"] + +Capture names can come after any node in the pattern: + + (parent (child) @child) @parent + +The query above captures both parent and child. + + ["return" "continue" "break"] @keyword + +The query above captures all the keywords with capture name +"keyword". + +These are the common syntax, see all of them in the manual +("Parsing Program Source" section). + +** Query references + +But how do one come up with the queries? Take python for an +example, open any python source file, evaluate + + (treesit-parser-create 'python) + +so there is a parser available, then enable ‘treesit-inspect-mode’. +Now you should see information of the node under point in +mode-line. Move around and you should be able to get a good +picture. Besides this, you can consult the grammar of the language +definition. For example, Python’s grammar file is at + + https://github.com/tree-sitter/tree-sitter-python/blob/master/grammar.js + +Neovim also has a bunch of queries to reference: + + https://github.com/nvim-treesitter/nvim-treesitter/tree/master/queries + +The manual explains how to read grammar files in the bottom of section +"Tree-sitter Language Definitions". + +** Debugging queires + +If your query has problems, it usually cannot compile. In that case +use ‘treesit-query-validate’ to debug the query. It will pop a buffer +containing the query (in text format) and mark the offending part in +red. + +** Code + +To enable tree-sitter font-lock, set ‘treesit-font-lock-settings’ +buffer-locally and call ‘treesit-font-lock-enable’. For example, see +‘python--treesit-settings’ in python.el. Below I paste a snippet of +it. + +Note that like the current font-lock, if the to-be-fontified region +already has a face (ie, an earlier match fontified part/all of the +region), the new face is discarded rather than applied. If you want +later matches always override earlier matches, use the :override +keyword. + +#+begin_src elisp +(defvar python--treesit-settings + (treesit-font-lock-rules + :language 'python + :override t + `(;; Queries for def and class. + (function_definition + name: (identifier) @font-lock-function-name-face) + + (class_definition + name: (identifier) @font-lock-type-face) + + ;; Comment and string. + (comment) @font-lock-comment-face + + ...))) +#+end_src + +Then in ‘python-mode’, enable tree-sitter font-lock: + +#+begin_src elisp +(treesit-parser-create 'python) +;; This turns off the syntax-based font-lock for comments and +;; strings. So it doesn’t override tree-sitter’s fontification. +(setq-local font-lock-keywords-only t) +(setq-local treesit-font-lock-settings + python--treesit-settings) +(treesit-font-lock-enable) +#+end_src + +Concretely, something like this: + +#+begin_src elisp +(define-derived-mode python-mode prog-mode "Python" + ... + + (treesit-parser-create 'python) + + (if (and python-use-tree-sitter + (treesit-can-enable-p)) + ;; Tree-sitter. + (progn + (setq-local font-lock-keywords-only t) + (setq-local treesit-font-lock-settings + python--treesit-settings) + (treesit-font-lock-enable)) + ;; No tree-sitter + (setq-local font-lock-defaults ...)) + + ...) +#+end_src + +You’ll notice that tree-sitter’s font-lock doesn’t respect +‘font-lock-maximum-decoration’, major modes are free to set +‘treesit-font-lock-settings’ based on the value of +‘font-lock-maximum-decoration’, or provide more fine-grained control +through other mode-specific means. + +* Indent + +Indent works like this: We have a bunch of rules that look like this: + + (MATCHER ANCHOR OFFSET) + +At the beginning point is at the BOL of a line, we want to know which +column to indent this line to. Let NODE be the node at point, we pass +this node to the MATCHER of each rule, one of them will match the node +("this node is a closing bracket!"). Then we pass the node to the +ANCHOR, which returns a point, eg, the BOL of the previous line. We +find the column number of that point (eg, 4), add OFFSET to it (eg, +0), and that is the column we want to indent the current line to (4 + +0 = 4). + +For MATHCER we have + + (parent-is TYPE) + (node-is TYPE) + (query QUERY) => matches if querying PARENT with QUERY + captures NODE. + + (match NODE-TYPE PARENT-TYPE NODE-FIELD + NODE-INDEX-MIN NODE-INDEX-MAX) + + => checks everything. If an argument is nil, don’t match that. Eg, + (match nil nil TYPE) is the same as (parent-is TYPE) + +For ANCHOR we have + + first-sibling => start of the first sibling + parent => start of parent + parent-bol => BOL of the line parent is on. + prev-sibling + no-indent => don’t indent + prev-line => same indent as previous line + +There is also a manual section for indent: "Parser-based Indentation". + +When writing indent rules, you can use ‘treesit-check-indent’ to +check if your indentation is correct. To debug what went wrong, set +‘treesit--indent-verboase’ to non-nil. Then when you indent, Emacs +tells you which rule is applied in the echo area. + +#+begin_src elisp +(defvar typescript-mode-indent-rules + (let ((offset typescript-indent-offset)) + `((typescript + ;; This rule matches if node at point is "}", ANCHOR is the + ;; parent node’s BOL, and offset is 0. + ((node-is "}") parent-bol 0) + ((node-is ")") parent-bol 0) + ((node-is "]") parent-bol 0) + ((node-is ">") parent-bol 0) + ((node-is ".") parent-bol ,offset) + ((parent-is "ternary_expression") parent-bol ,offset) + ((parent-is "named_imports") parent-bol ,offset) + ((parent-is "statement_block") parent-bol ,offset) + ((parent-is "type_arguments") parent-bol ,offset) + ((parent-is "variable_declarator") parent-bol ,offset) + ((parent-is "arguments") parent-bol ,offset) + ((parent-is "array") parent-bol ,offset) + ((parent-is "formal_parameters") parent-bol ,offset) + ((parent-is "template_substitution") parent-bol ,offset) + ((parent-is "object_pattern") parent-bol ,offset) + ((parent-is "object") parent-bol ,offset) + ((parent-is "object_type") parent-bol ,offset) + ((parent-is "enum_body") parent-bol ,offset) + ((parent-is "arrow_function") parent-bol ,offset) + ((parent-is "parenthesized_expression") parent-bol ,offset) + ...)))) +#+end_src + +Then you set ‘treesit-simple-indent-rules’ to your rules, and set +‘indent-line-function’: + +#+begin_src elisp +(setq-local treesit-simple-indent-rules typescript-mode-indent-rules) +(setq-local indent-line-function #'treesit-indent) +#+end_src + +* Imenu + +Not much to say except for utilizing ‘treesit-induce-sparse-tree’. +See ‘python--imenu-treesit-create-index-1’ in python.el for an +example. + +Once you have the index builder, set ‘imenu-create-index-function’. + +* Navigation + +Mainly ‘beginning-of-defun-function’ and ‘end-of-defun-function’. +You can find the end of a defun with something like + +(treesit-search-forward-goto "function_definition" 'end) + +where "function_definition" matches the node type of a function +definition node, and ’end means we want to go to the end of that +node. + +Something like this should suffice: + +#+begin_src elisp +(defun xxx-beginning-of-defun (&optional arg) + (if (> arg 0) + ;; Go backward. + (while (and (> arg 0) + (treesit-search-forward-goto + "function_definition" 'start nil t)) + (setq arg (1- arg))) + ;; Go forward. + (while (and (< arg 0) + (treesit-search-forward-goto + "function_definition" 'start)) + (setq arg (1+ arg))))) + +(setq-local beginning-of-defun-function #'xxx-beginning-of-defun) +#+end_src + +And the same for end-of-defun. + +* Which-func + +You can find the current function by going up the tree and looking for +the function_definition node. See ‘python-info-treesit-current-defun’ +in python.el for an example. Since Python allows nested function +definitions, that function keeps going until it reaches the root node, +and records all the function names along the way. + +#+begin_src elisp +(defun python-info-treesit-current-defun (&optional include-type) + "Identical to `python-info-current-defun' but use tree-sitter. +For INCLUDE-TYPE see `python-info-current-defun'." + (let ((node (treesit-node-at (point))) + (name-list ()) + (type nil)) + (cl-loop while node + if (pcase (treesit-node-type node) + ("function_definition" + (setq type 'def)) + ("class_definition" + (setq type 'class)) + (_ nil)) + do (push (treesit-node-text + (treesit-node-child-by-field-name node "name") + t) + name-list) + do (setq node (treesit-node-parent node)) + finally return (concat (if include-type + (format "%s " type) + "") + (string-join name-list "."))))) +#+end_src + +* More features? + +Obviously this list is just a starting point, if there are features in +the major mode that would benefit a parse tree, adding tree-sitter +support for that would be great. But in the minimal case, just adding +font-lock is awesome. + +* Common tasks + +How to... + +** Get the buffer text corresponding to a node? + +(treesit-node-text node) + +BTW ‘treesit-node-string’ does different things. + +** Scan the whole tree for stuff? + +(treesit-search-subtree) +(treesit-search-forward) +(treesit-induce-sparse-tree) + +** Move to next node that...? + +(treesit-search-forward-goto) + +** Get the root node? + +(treesit-buffer-root-node) + +** Get the node at point? + +(treesit-node-at (point)) + +* Manual + +I suggest you read the manual section for tree-sitter in Info. The +section is Parsing Program Source. Typing + + C-h i d m elisp RET g Parsing Program Source RET + +will bring you to that section. You can also read the HTML version +under /html-manual in this directory. I find the HTML version easier +to read. You don’t need to read through every sentence, just read the +text paragraphs and glance over function names. commit 1ea503ed4b3a14b3dc0a597cfbfe57d73b871422 Author: Yuan Fu Date: Tue Oct 4 13:30:00 2022 -0700 Add :override flag for python tree-sitter font-lock settings * lisp/progmodes/python.el (python--treesit-settings): Add :override. diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index 3f85201c10..b498baec60 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -987,6 +987,7 @@ python--treesit-fontify-string (defvar python--treesit-settings (treesit-font-lock-rules :language 'python + :override t `(;; Queries for def and class. (function_definition name: (identifier) @font-lock-function-name-face) commit 23bb724c922de95573f73b22ae311696ae08464e Author: Yuan Fu Date: Tue Oct 4 13:28:46 2022 -0700 Add :override flag for tree-sitter font-lock * doc/lispref/modes.texi (Parser-based Font Lock): Update manual. * lisp/treesit.el (treesit-font-lock-settings): Update docstring. (treesit-font-lock-rules): Handle :override. (treesit-font-lock-fontify-region): Handle :override. Also set inhibit-point-motion-hooks to t. diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index 0d58c28e27..883f9d8491 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -3911,6 +3911,7 @@ Parser-based Font Lock @group (treesit-font-lock-rules :language 'javascript + :override t '((true) @@font-lock-constant-face (false) @@font-lock-constant-face) :language 'html @@ -3919,9 +3920,19 @@ Parser-based Font Lock @end example This function takes a list of text or s-exp queries. Before each -query, there are @var{:keyword} and @var{value} pairs that configures -that query. The @code{:lang} keyword sets the query’s language, and is -currently the only recognized keyword. +query, there are @var{:keyword} and @var{value} pairs that configure +that query. The @code{:lang} keyword sets the query’s language and +every query must specify the language. Other keywords are optional: + +@multitable @columnfractions .15 .15 .6 +@headitem Keyword @tab Value @tab Description +@item @code{:override} @tab nil +@tab If the region already has a face, discard the new face +@item @tab t @tab Always apply the new face +@item @tab @code{append} @tab Append the new face to existing ones +@item @tab @code{prepend} @tab Prepend the new face to existing ones +@item @tab @code{keep} @tab Fill-in regions without an existing face +@end multitable Capture names in @var{query} should be face names like @code{font-lock-keyword-face}. The captured node will be fontified diff --git a/lisp/treesit.el b/lisp/treesit.el index 100bf9ac67..bb13021a27 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -277,6 +277,10 @@ treesit-update-ranges ;;; Font-lock +(define-error 'treesit-font-lock-error + "Generic tree-sitter font-lock error" + 'treesit-error) + (defvar-local treesit-font-lock-settings nil "A list of SETTINGs for treesit-based fontification. @@ -285,7 +289,7 @@ treesit-font-lock-settings Each SETTING is of form - (LANGUAGE QUERY) + (LANGUAGE QUERY OVERRIDE) Each SETTING controls one parser (often of different language). LANGUAGE is the language symbol. See Info node `(elisp)Language @@ -296,7 +300,11 @@ treesit-font-lock-settings a query in either string or s-expression form. When using repeatedly, a compiled query is much faster than a string or sexp one, so it is recommend to compile your queries if it will be -used over and over.") +used over and over. + +OVERRIDE is the override flag for this query. Its value can be +t, nil, append, prepend, keep. See more in +`treesit-font-lock-rules'.") (defun treesit-font-lock-rules (&rest args) "Return a value suitable for `treesit-font-lock-settings'. @@ -311,13 +319,22 @@ treesit-font-lock-rules (treesit-font-lock-rules :language \\='javascript + :override t \\='((true) @font-lock-constant-face (false) @font-lock-constant-face) :language \\='html \"(script_element) @font-lock-builtin-face\") -For each QUERY, a :language keyword is required. Currently the -only recognized keyword is :language. +For each QUERY, a :language keyword is required. Other keywords +include: + + KEYWORD VALUE DESCRIPTION + :override nil If the region already has a face, + discard the new face + t Always apply the new face + append Append the new face to existing ones + prepend Prepend the new face to existing ones + keep Fill-in regions without an existing face Capture names in QUERY should be face names like `font-lock-keyword-face'. The captured node will be fontified @@ -333,6 +350,8 @@ treesit-font-lock-rules (let (;; Tracks the current language that following queries will ;; apply to. (current-language nil) + ;; Tracks :override flag. + (current-override nil) ;; The list this function returns. (result nil)) (while args @@ -343,30 +362,41 @@ treesit-font-lock-rules (when (or (not (symbolp lang)) (null lang)) (signal 'wrong-type-argument `(symbolp ,lang))) (setq current-language lang))) + (:override + (let ((flag (pop args))) + (when (not (memq flag '(t nil append prepend keep))) + (signal 'wrong-type-argument + `((or t nil append prepend keep) + ,flag))) + (setq current-override flag))) ((pred treesit-query-p) (when (null current-language) - (signal 'treesit-error + (signal 'treesit-font-lock-error `("Language unspecified, use :language keyword to specify a language for this query" ,token))) (if (treesit-compiled-query-p token) (push `(,current-language token) result) (push `(,current-language - ,(treesit-query-compile current-language token)) + ,(treesit-query-compile current-language token) + ,current-override) result)) ;; Clears any configurations set for this query. - (setq current-language nil)) - (_ (signal 'treesit-error + (setq current-language nil + current-override nil)) + (_ (signal 'treesit-font-lock-error `("Unexpected value" token)))))) (nreverse result))) -(defun treesit-font-lock-fontify-region (start end &optional loudly) +(defun treesit-font-lock-fontify-region + (start end &optional loudly) "Fontify the region between START and END. If LOUDLY is non-nil, message some debugging information." (treesit-update-ranges start end) (font-lock-unfontify-region start end) (dolist (setting treesit-font-lock-settings) - (when-let* ((language (nth 0 setting)) - (match-pattern (nth 1 setting)) - (parser (treesit-parser-create language))) + (let* ((language (nth 0 setting)) + (match-pattern (nth 1 setting)) + (override (nth 2 setting)) + (parser (treesit-parser-create language))) (when-let ((node (treesit-node-on start end parser))) (let ((captures (treesit-query-capture node match-pattern @@ -374,17 +404,33 @@ treesit-font-lock-fontify-region ;; often than not, NODE will be the root ;; node, and if we don't specify the range, ;; we are basically querying the whole file. - start end))) + start end)) + (inhibit-point-motion-hooks t)) (with-silent-modifications (dolist (capture captures) (let* ((face (car capture)) (node (cdr capture)) (start (treesit-node-start node)) (end (treesit-node-end node))) - (cond ((facep face) - (put-text-property start end 'face face)) - ((functionp face) - (funcall face start end node))) + (cond + ((facep face) + (pcase override + ('nil (unless (text-property-not-all + start end 'face nil) + (put-text-property start end 'face face))) + ('t (put-text-property start end 'face face)) + ('append (font-lock-append-text-property + start end 'face face)) + ('prepend (font-lock-prepend-text-property + start end 'face face)) + ('keep (font-lock-fillin-text-property + start end 'face face)) + (_ (signal 'treesit-font-lock-error + (list + "Unrecognized value of :override option" + override))))) + ((functionp face) + (funcall face start end node))) ;; Don't raise an error if FACE is neither a face nor ;; a function. This is to allow intermediate capture ;; names used for #match and #eq. commit 7a4380b9051ddd0bcc4d5c90abe0f826a9b922dc Author: Yuan Fu Date: Tue Oct 4 13:02:07 2022 -0700 ; * doc/lispref/parsing.texi (Language Definitions): Fix typo. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 0511b85bd7..3784531fe5 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -83,7 +83,7 @@ Language Definitions @code{libtree-sitter-@var{language}.@var{ext}}, where @var{ext} is the system-specific extension for dynamic libraries. Also by convention, the function provided by that library is named -@code{tree_sitter_@var{language}. If a language definition doesn't +@code{tree_sitter_@var{language}}. If a language definition doesn't follow this convention, you should add an entry @example commit 253126bf339a16589e86acd51c0f96da07e658d9 Author: Yuan Fu Date: Tue Oct 4 12:18:53 2022 -0700 Fix python tree-sitter font-lock Add fontification for lhs attribute and some operators. * lisp/progmodes/python.el (python--treesit-keywords): Add operators. (python--treesit-settings): Add lhs attribute. diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index 5b70c63f7b..3f85201c10 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -915,7 +915,10 @@ python--treesit-keywords '("as" "assert" "async" "await" "break" "class" "continue" "def" "del" "elif" "else" "except" "exec" "finally" "for" "from" "global" "if" "import" "lambda" "nonlocal" "pass" "print" - "raise" "return" "try" "while" "with" "yield")) + "raise" "return" "try" "while" "with" "yield" + ;; These are technically operators, but we fontify them as + ;; keywords. + "and" "in" "is" "not" "or")) (defvar python--treesit-builtins '("abs" "all" "any" "ascii" "bin" "bool" "breakpoint" "bytearray" @@ -934,6 +937,8 @@ python--treesit-constants "copyright" "credits" "exit" "license" "quit")) (defvar python--treesit-operators + ;; This is not used. And and, or, not, is, in are fontified as + ;; keywords. '("-" "-=" "!=" "*" "**" "**=" "*=" "/" "//" "//=" "/=" "&" "%" "%=" "^" "+" "+=" "<" "<<" "<=" "<>" "=" "==" ">" ">=" ">>" "|" "~" "and" "in" "is" "not" "or")) @@ -1019,6 +1024,9 @@ python--treesit-settings ;; Variable names. (assignment left: (identifier) @font-lock-variable-name-face) + (assignment left: (attribute + attribute: (identifier) + @font-lock-variable-name-face)) (pattern_list (identifier) @font-lock-variable-name-face) (tuple_pattern (identifier) commit 96d44c4321724556819e2f718f1d94470d753d07 Author: Yuan Fu Date: Tue Oct 4 12:16:47 2022 -0700 Add treesit-language-version * doc/lispref/parsing.texi (Language Definitions): Update manual. * src/treesit.c (Ftreesit_language_version): New function diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 05f4050b54..0511b85bd7 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -102,6 +102,15 @@ Language Definitions for a language too cool to abide by conventions. +@defun treesit-language-version &optional min-compatible +Tree-sitter library has a @dfn{language version}, a language +definition's version needs to match this version to be compatible. + +This function returns tree-sitter library’s language version. If +@var{min-compatible} is non-nil, it returns the minimal compatible +version. +@end defun + @heading Concrete syntax tree A syntax tree is what a parser generates. In a syntax tree, each node diff --git a/src/treesit.c b/src/treesit.c index d72c99a974..77b48133ba 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -316,6 +316,20 @@ DEFUN ("treesit-language-available-p", return Qt; } +DEFUN ("treesit-language-version", + Ftreesit_language_version, + Streesit_language_version, + 0, 1, 0, + doc: /* Return the language version of tree-sitter library. +If MIN-COMPATIBLE non-nil, return the minimal compatible version. */) + (Lisp_Object min_compatible) +{ + if (NILP (min_compatible)) + return make_fixnum (TREE_SITTER_LANGUAGE_VERSION); + else + return make_fixnum (TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION); +} + /*** Parsing functions */ static void commit 10f96a8c57b031b521ae8ba5a02413b94ece423b Author: Yuan Fu Date: Sat Oct 1 20:29:35 2022 -0700 Improve documentation of tree-sitter search functions * doc/lispref/parsing.texi (Retrieving Node): Update manual. * src/treesit.c (treesit-search-subtree) (treesit-search-forwardn) (treesit-induce-sparse-tree): Mention case-insensitivity. * lisp/treesit.el (treesit-simple-indent): Mention case-insensitivity. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 4ae2c4e341..05f4050b54 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -575,10 +575,10 @@ Retrieving Node @defun treesit-search-subtree node predicate &optional all backward limit This function traverses the subtree of @var{node} (including @var{node}), and match @var{predicate} with each node along the way. -And @var{predicate} is a regexp that matches against each node's type, -or a function that takes a node and returns nil/non-nil. If a node -matches, that node is returned, if no node ever matches, nil is -returned. +And @var{predicate} is a regexp that matches (case-insensitively) +against each node's type, or a function that takes a node and returns +nil/non-nil. If a node matches, that node is returned, if no node +ever matches, nil is returned. By default, this function only traverses named nodes, if @var{all} is non-nil, it traverses all nodes. If @var{backward} is non-nil, it @@ -590,8 +590,8 @@ Retrieving Node This function is somewhat similar to @code{treesit-search-subtree}. It also traverse the parse tree and match each node with @var{predicate} (except for @var{start}), where @var{predicate} can be -a regexp or a function. For a tree like the below where @var{start} -is marked 1, this function traverses as numbered: +a (case-insensitive) regexp or a function. For a tree like the below +where @var{start} is marked 1, this function traverses as numbered: @example @group @@ -629,8 +629,8 @@ Retrieving Node Basically, it takes the subtree under @var{root}, and combs it so only the nodes that match @var{predicate} are left, like picking out grapes on the vine. Like previous functions, @var{predicate} can be a regexp -string that matches against each node's type, or a function that takes -a node and return nil/non-nil. +string that matches against each node's type case-insensitively, or a +function that takes a node and return nil/non-nil. For example, for a subtree on the left that consist of both numbers and letters, if @var{predicate} is ``letter only'', the returned tree diff --git a/lisp/treesit.el b/lisp/treesit.el index 4f56a14387..100bf9ac67 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -703,8 +703,8 @@ treesit-search-forward-goto Stops at the first node after point that matches PREDICATE. PREDICATE can be either a regexp that matches against each node's -type, or a function that takes a node and returns nil/non-nil for -match/no match. +type case-insensitively, or a function that takes a node and +returns nil/non-nil for match/no match. If a node matches, move to that node and return the node, otherwise return nil. SIDE controls whether we move to the start diff --git a/src/treesit.c b/src/treesit.c index c9bccb123f..d72c99a974 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1973,7 +1973,8 @@ DEFUN ("treesit-search-subtree", Traverse the subtree of NODE, and match PREDICATE with each node along the way. PREDICATE is a regexp string that matches against each -node's type, or a function that takes a node and returns nil/non-nil. +node's type case-insensitively, or a function that takes a node and +returns nil/non-nil. By default, only traverse named nodes, if ALL is non-nil, traverse all nodes. If BACKWARD is non-nil, traverse backwards. If LIMIT is @@ -2018,8 +2019,8 @@ DEFUN ("treesit-search-forward", Start traversing the tree from node START, and match PREDICATE with each node along the way (except START). PREDICATE is a regexp string -that matches against each node's type, or a function that takes a node -and returns nil/non-nil. +that matches against each node's type case-insensitively, or a +function that takes a node and returns nil/non-nil. By default, only search for named nodes, if ALL is non-nil, search for all nodes. If BACKWARD is non-nil, search backwards. @@ -2122,7 +2123,8 @@ DEFUN ("treesit-induce-sparse-tree", Basically, take the subtree under ROOT, and comb it so only the nodes that match PREDICATE are left, like picking out grapes on the vine. -PREDICATE is a regexp string that matches against each node's type. +PREDICATE is a regexp string that matches against each node's type +case-insensitively. For a subtree on the left that consist of both numbers and letters, if PREDICATE is "is letter", the returned tree is the one on the right. commit 9b5ecffeb00f22ca6663aa14e7807c9886ed1716 Author: Yuan Fu Date: Sat Oct 1 20:25:25 2022 -0700 Ignore some capture name in treesit-font-lock-fontify-region * doc/lispref/modes.texi (Parser-based Font Lock): Update manual. * lisp/treesit.el: (treesit-font-lock-fontify-region): Ignore names that are not face nor function. (treesit-font-lock-rules): Update docstring. diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index ab83d8712b..0d58c28e27 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -3929,7 +3929,8 @@ Parser-based Font Lock case the function is called with (@var{start} @var{end} @var{node}), where @var{start} and @var{end} are the start and end position of the node in buffer, and @var{node} is the node itself. If a capture name -is both a face and a function, the face takes priority. +is both a face and a function, the face takes priority. If a capture +name is not a face name nor a function name, it is ignored. @end defun @defvar treesit-font-lock-settings diff --git a/lisp/treesit.el b/lisp/treesit.el index 91e3d05a51..4f56a14387 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -325,7 +325,9 @@ treesit-font-lock-rules which case the function is called with (START END NODE), where START and END are the start and end position of the node in buffer, and NODE is the tree-sitter node object. If a capture -name is both a face and a function, the face takes priority. +name is both a face and a function, the face takes priority. If +a capture name is not a face name nor a function name, it is +ignored. \(fn :KEYWORD VALUE QUERY...)" (let (;; Tracks the current language that following queries will @@ -382,8 +384,10 @@ treesit-font-lock-fontify-region (cond ((facep face) (put-text-property start end 'face face)) ((functionp face) - (funcall face start end node)) - (t (error "Capture name %s is neither a face nor a function" face))) + (funcall face start end node))) + ;; Don't raise an error if FACE is neither a face nor + ;; a function. This is to allow intermediate capture + ;; names used for #match and #eq. (when loudly (message "Fontifying text from %d to %d, Face: %s Language: %s" start end face language))))))))) commit 6a3caeab580f61f88c4ad49ff32cdf74eea4fb97 Author: Yuan Fu Date: Fri Sep 30 17:20:34 2022 -0700 Remove treesit-query-in Because treesit-query-capture can now do everything it does. * doc/lispref/parsing.texi (Pattern Matching): Update manual. * lisp/treesit.el (treesit-query-in): Remove function. * src/treesit.c (Ftreesit_query_capture): Accept parser and language symbol as NODE. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 32d151d45b..4ae2c4e341 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -867,6 +867,11 @@ Pattern Matching s-expression syntax and compiled query are described at the end of the section. +Parameter @var{node} can also be a parser or a language symbol. A +parser means using its root node, a language symbol means find or +create a parser for that language in the current buffer, and use the +root node. + The function returns all captured nodes in a list of @code{(@var{capture_name} . @var{node})}. If @var{node-only} is non-nil, a list of node is returned instead. If @var{beg} and @@ -879,22 +884,6 @@ Pattern Matching error. You can use @code{treesit-query-validate} to debug the query. @end defun -@defun treesit-query-in source query &optional beg end node-only -This function matches patterns in @var{query} in @var{source}, and -returns all captured nodes in a list of @code{(@var{capture_name} -. @var{node})}. If @var{node-only} is non-nil, a list of node is -returned instead. If @var{beg} and @var{end} are both non-nil, it -only pattern match nodes in that range. - -Argument @var{source} designates a node, it can be a language symbol, -a parser, or simply a node. If a language symbol, @var{source} -represents the root node of the first parser for that language in the -current buffer; if a parser, @var{source} represents the root node of -that parser. - -This function also raises @var{treesit-query-error}. -@end defun - For example, suppose @var{node}'s content is @code{1 + 2}, and @var{query} is diff --git a/lisp/treesit.el b/lisp/treesit.el index cf586f9978..91e3d05a51 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -20,11 +20,12 @@ ;;; Commentary: ;; ;; Note to self: we don't create parsers automatically in any provided -;; functions. +;; functions if we don't know what language to use. ;;; Code: (eval-when-compile (require 'cl-lib)) +(eval-when-compile (require 'subr-x)) ; For `string-join'. (require 'cl-seq) (require 'font-lock) @@ -227,33 +228,6 @@ treesit-node-field-name ;;; Query API supplement -(defun treesit-query-in (source query &optional beg end node-only) - "Query the current buffer with QUERY. - -SOURCE can be a language symbol, a parser, or a node. If a -language symbol, use the root node of the first parser for that -language; if a parser, use the root node of that parser; if a -node, use that node. - -QUERY is either a string query, a sexp query, or a compiled -query. See Info node `(elisp)Pattern Matching' for how to write -a query in either string or s-expression form. When using -repeatedly, a compiled query is much faster than a string or sexp -one, so it is recommend to compile your queries if it will be -used over and over. - -BEG and END, if _both_ non-nil, specifies the range in which the query -is executed. If NODE-ONLY non-nil, return a list of nodes. - -Raise an treesit-query-error if QUERY is malformed." - (treesit-query-capture - (cond ((symbolp source) (treesit-buffer-root-node source)) - ((treesit-parser-p source) - (treesit-parser-root-node source)) - ((treesit-node-p source) source)) - query - beg end node-only)) - (defun treesit-query-string (string query language) "Query STRING with QUERY in LANGUAGE. See `treesit-query-capture' for QUERY." @@ -272,7 +246,7 @@ treesit-query-range of (START . END), where START and END specifics the range of each captured node. Capture names don't matter." (cl-loop for capture - in (treesit-query-in source query beg end) + in (treesit-query-capture source query beg end) for node = (cdr capture) collect (cons (treesit-node-start node) (treesit-node-end node)))) @@ -847,7 +821,7 @@ treesit-query-validate (with-temp-buffer (treesit-parser-create language) (condition-case err - (progn (treesit-query-in language query) + (progn (treesit-query-capture language query) (message "QUERY is valid")) (treesit-query-error (with-current-buffer buf diff --git a/src/treesit.c b/src/treesit.c index 1a61e354cf..c9bccb123f 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1691,13 +1691,17 @@ DEFUN ("treesit-query-capture", BEG and END, if both non-nil, specifies the range in which the query is executed. If NODE-ONLY is non-nil, return a list of nodes. +Besides a node, NODE can also be a parser, then the root node of that +parser is used; NODE can be a language symbol, then the root node of a +parser for that language is used. If such a parser doesn't exist, it +is created. + Signals treesit-query-error if QUERY is malformed or something else goes wrong. You can use `treesit-query-validate' to debug the query. */) (Lisp_Object node, Lisp_Object query, Lisp_Object beg, Lisp_Object end, Lisp_Object node_only) { - ts_check_node (node); if (!NILP (beg)) CHECK_INTEGER (beg); if (!NILP (end)) @@ -1707,11 +1711,29 @@ DEFUN ("treesit-query-capture", || CONSP (query) || STRINGP (query))) wrong_type_argument (Qtreesit_query_p, query); + + Lisp_Object lisp_node; + if (TS_NODEP (node)) + lisp_node = node; + else if (TS_PARSERP (node)) + lisp_node = Ftreesit_parser_root_node (node); + else if (SYMBOLP (node)) + { + Lisp_Object parser + = Ftreesit_parser_create (node, Fcurrent_buffer (), Qnil); + lisp_node = Ftreesit_parser_root_node (parser); + } + else + xsignal2 (Qwrong_type_argument, + list4 (Qor, Qtreesit_node_p, + Qtreesit_parser_p, Qsymbolp), + node); + /* Extract C values from Lisp objects. */ - TSNode ts_node = XTS_NODE (node)->node; - Lisp_Object lisp_parser = XTS_NODE (node)->parser; + TSNode ts_node = XTS_NODE (lisp_node)->node; + Lisp_Object lisp_parser = XTS_NODE (lisp_node)->parser; ptrdiff_t visible_beg = - XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg; const TSLanguage *lang = ts_parser_language (XTS_PARSER (lisp_parser)->parser); commit e504eabe88727141e70ae9793be71d4285f839a5 Author: Yuan Fu Date: Fri Sep 30 16:54:42 2022 -0700 Change tree-sitter indent anchor preset 'prev-line' * doc/lispref/modes.texi: Update manual. * lisp/treesit.el (treesit-simple-indent-presets): Change prev-line to mean bol of prev-line. diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index d6797fe04f..ab83d8712b 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -4820,8 +4820,8 @@ Parser-based Indentation prev-line @end example -This anchor returns the start of the first named node on the previous -line. This can be used for indenting an empty line. +This anchor returns the first non-whitespace charater on the previous +line. @end defvar @heading Indentation utilities diff --git a/lisp/treesit.el b/lisp/treesit.el index 001404d88d..cf586f9978 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -522,9 +522,7 @@ treesit-simple-indent-presets (save-excursion (goto-char bol) (forward-line -1) - (skip-chars-forward " \t") - (treesit-node-start - (treesit-node-at (point) nil t)))))) + (skip-chars-forward " \t"))))) "A list of presets. These presets that can be used as MATHER and ANCHOR in `treesit-simple-indent-rules'. @@ -585,8 +583,7 @@ treesit-simple-indent-presets prev-line - Find the named node on the previous line. This can be used when - indenting an empty line: just indent like the previous node.") + The first non-whitespace charater on the previous line.") (defun treesit--simple-apply (fn args) "Apply ARGS to FN. commit 6e60d464188994bdce97f1aef3b81fae3fd28583 Author: Yuan Fu Date: Fri Sep 30 16:51:21 2022 -0700 Fix tree-sitter manuals * doc/lispref/modes.texi: Fix typos, reword, etc. * doc/lispref/parsing.texi: Fix typos, reword, etc. diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index 2d80a9db2f..d6797fe04f 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -3886,10 +3886,7 @@ Parser-based Font Lock Besides simple syntactic font lock and regexp-based font lock, Emacs also provides complete syntactic font lock with the help of a parser, currently provided by the tree-sitter library (@pxref{Parsing Program -Source}). Because it is an optional feature, parser-based font lock -is less integrated with Emacs. Most variables introduced in previous -sections only apply to regexp-based font lock, except for -@var{font-lock-maximum-decoration}. +Source}). @defun treesit-font-lock-enable This function enables parser-based font lock in the current buffer. @@ -3923,7 +3920,7 @@ Parser-based Font Lock This function takes a list of text or s-exp queries. Before each query, there are @var{:keyword} and @var{value} pairs that configures -that query. The @var{:lang} keyword sets the query’s language, and is +that query. The @code{:lang} keyword sets the query’s language, and is currently the only recognized keyword. Capture names in @var{query} should be face names like @@ -3931,8 +3928,8 @@ Parser-based Font Lock with that face. Capture names can also be function names, in which case the function is called with (@var{start} @var{end} @var{node}), where @var{start} and @var{end} are the start and end position of the -node in buffer, and @var{node} is the tree-sitter node object. If a -capture name is both a face and a function, the face takes priority. +node in buffer, and @var{node} is the node itself. If a capture name +is both a face and a function, the face takes priority. @end defun @defvar treesit-font-lock-settings @@ -3948,12 +3945,11 @@ Parser-based Font Lock Each @var{setting} controls one parser (often of different language). And @var{language} is the language symbol (@pxref{Language -Definitions}); @var{query} is either a string query or a sexp query -(@pxref{Pattern Matching}). +Definitions}); @var{query} is the query (@pxref{Pattern Matching}). @end defvar Multi-language major modes should provide range functions in -@var{treesit-range-functions}, and Emacs will set the ranges +@code{treesit-range-functions}, and Emacs will set the ranges accordingly before fontifing a region (@pxref{Multiple Languages}). @node Auto-Indentation @@ -4700,8 +4696,7 @@ Parser-based Indentation This variable stores the actual function called by @code{treesit-indent}. By default, its value is @code{treesit-simple-indent}. In the future we might add other -more complex indentation engines, if @code{treesit-simple-indent} -proves to be insufficient. +more complex indentation engines. @end defvar @heading Writing indentation rules @@ -4714,57 +4709,37 @@ Parser-based Indentation (@var{language} . @var{rules}) @end example -where @var{language} is a language symbol, @var{rules} is a list of +where @var{language} is a language symbol, and @var{rules} is a list +of @example (@var{matcher} @var{anchor} @var{offset}) @end example -The @var{matcher} determines whether this rule applies, @var{anchor} -and @var{offset} together determines which column to indent to. - -A @var{matcher} is a function that takes three arguments (@var{node} -@var{parent} @var{bol}). Argument @var{bol} is the point at where we -are indenting: the position of the first non-whitespace character from -the beginning of line; @var{node} is the largest (highest-in-tree) -node that starts at that point; @var{parent} is the parent of -@var{node}; - -If @var{matcher} returns non-nil, meaning the rule matches, Emacs then -uses @var{anchor} to find an anchor, it should be a function that -takes the same argument (@var{node} @var{parent} @var{bol}) and -returns a point. - -Finally Emacs computes the column of that point returned by -@var{anchor} and adds @var{offset} to it, and indents to that column. - -For @var{matcher} and @var{anchor}, Emacs provides some convenient -presets to spare us from writing these functions ourselves. They are -stored in @var{treesit-simple-indent-presets}, see below. +First Emacs passes the node at point to @var{matcher}, if it return +non-nil, this rule applies. Then Emacs passes the node to +@var{anchor}, it returns a point. Emacs takes the column number of +that point, add @var{offset} to it, and the result is the indent for +the current line. + +The @var{matcher} and @var{anchor} are functions, and Emacs provides +convenient presets for them. You can skip over to +@code{treesit-simple-indent-presets} below, those presets should be +more than enough. + +A @var{matcher} or an @var{anchor} is a function that takes three +arguments (@var{node} @var{parent} @var{bol}). Argument @var{bol} is +the point at where we are indenting: the position of the first +non-whitespace character from the beginning of line; @var{node} is the +largest (highest-in-tree) node that starts at that point; @var{parent} +is the parent of @var{node}. A @var{matcher} returns nil/non-nil, and +@var{anchor} returns a point. @end defvar @defvar treesit-simple-indent-presets This is a list of presets for @var{matcher}s and @var{anchor}s in -@var{treesit-simple-indent-rules}. Each of them represent a -function that takes @var{node}, @var{parent} and @var{bol} as -arguments. - -@example -(match @var{node-type} @var{parent-type} - @var{node-field} @var{node-index-min} @var{node-index-max}) -@end example - -This matcher checks if @var{node}'s type is @var{node-type}, -@var{parent}'s type is @var{parent-type}, @var{node}'s field name in -@var{parent} is @var{node-field}, and @var{node}'s index among its -siblings is between @var{node-index-min} and @var{node-index-max}. If -the value of a constraint is nil, this matcher doesn't check for that -constraint. For example, to match the first child where parent is -@code{argument_list}, use - -@example -(match nil "argument_list" nil nil 0 0) -@end example +@code{treesit-simple-indent-rules}. Each of them represent a function +that takes @var{node}, @var{parent} and @var{bol} as arguments. @example no-node @@ -4793,6 +4768,23 @@ Parser-based Indentation This matcher matches if querying @var{parent} with @var{query} captures @var{node}. The capture name does not matter. +@example +(match @var{node-type} @var{parent-type} + @var{node-field} @var{node-index-min} @var{node-index-max}) +@end example + +This matcher checks if @var{node}'s type is @var{node-type}, +@var{parent}'s type is @var{parent-type}, @var{node}'s field name in +@var{parent} is @var{node-field}, and @var{node}'s index among its +siblings is between @var{node-index-min} and @var{node-index-max}. If +the value of a constraint is nil, this matcher doesn't check for that +constraint. For example, to match the first child where parent is +@code{argument_list}, use + +@example +(match nil "argument_list" nil nil 0 0) +@end example + @example first-sibling @end example @@ -4822,7 +4814,7 @@ Parser-based Indentation no-indent @end example -This anchor returns the start of @var{node}, i.e., do not indent. +This anchor returns the start of @var{node}, i.e., no indent. @example prev-line @@ -4838,8 +4830,8 @@ Parser-based Indentation rules. @defun treesit-check-indent mode -This function check current buffer's indentation against major mode -@var{mode}. It indents the current line in @var{mode} and compares +This function checks current buffer's indentation against major mode +@var{mode}. It indents the current buffer in @var{mode} and compares the indentation with the current indentation. Then it pops up a diff buffer showing the difference. Correct indentation (target) is in green, current indentation is in red. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 32fc6a69a5..32d151d45b 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -66,11 +66,11 @@ Language Definitions In order to use a language definition in Emacs, you need to make sure that the dynamic library is installed on the system. Emacs looks for language definitions under load paths in -@var{treesit-extra-load-path}, @var{user-emacs-directory}/tree-sitter, +@code{treesit-extra-load-path}, @code{user-emacs-directory}/tree-sitter, and system default locations for dynamic libraries, in that order. -Emacs tries each extensions in @var{treesit-load-suffixes}. If Emacs +Emacs tries each extensions in @code{treesit-load-suffixes}. If Emacs cannot find the library or has problem loading it, Emacs signals -@var{treesit-load-language-error}. The signal data is a list of +@code{treesit-load-language-error}. The signal data is a list of specific error messages. @defun treesit-language-available-p language @@ -83,14 +83,14 @@ Language Definitions @code{libtree-sitter-@var{language}.@var{ext}}, where @var{ext} is the system-specific extension for dynamic libraries. Also by convention, the function provided by that library is named -@code{tree_sitter_}. If a language definition doesn't +@code{tree_sitter_@var{language}. If a language definition doesn't follow this convention, you should add an entry @example (@var{language} @var{library-base-name} @var{function-name}) @end example -to @var{treesit-load-name-override-list}, where +to @code{treesit-load-name-override-list}, where @var{library-base-name} is the base filename for the dynamic library (conventionally @code{libtree-sitter-@var{language}}), and @var{function-name} is the function provided by the library @@ -100,14 +100,13 @@ Language Definitions (cool-lang "libtree-sitter-coool" "tree_sitter_cooool") @end example -for a language too cool to abide by the rules. +for a language too cool to abide by conventions. @heading Concrete syntax tree -A syntax tree is what a language definition defines (more or less) and -what a parser generates. In a syntax tree, each node represents a -piece of text, and is connected to each other by a parent-child -relationship. For example, if the source text is +A syntax tree is what a parser generates. In a syntax tree, each node +represents a piece of text, and is connected to each other by a +parent-child relationship. For example, if the source text is @example 1 + 2 @@ -192,7 +191,7 @@ Language Definitions Authors of language definitions define the @dfn{grammar} of a language, and this grammar determines how does a parser construct a -concrete syntax tree out of the text. In order to used the syntax +concrete syntax tree out of the text. In order to use the syntax tree effectively, we need to read the @dfn{grammar file}. The grammar file is usually @code{grammar.js} in a language @@ -319,8 +318,8 @@ Using Parser This section described how to create and configure a tree-sitter parser. In Emacs, each tree-sitter parser is associated with a -buffer. As we edit the buffer, the associated parser is automatically -kept up-to-date. +buffer. As we edit the buffer, the associated parser and the syntax +tree is automatically kept up-to-date. @defvar treesit-max-buffer-size This variable contains the maximum size of buffers in which @@ -331,14 +330,14 @@ Using Parser @defun treesit-can-enable-p This function checks whether the current buffer is suitable for activating tree-sitter features. It basically checks -@code{treesit-available-p} and @var{treesit-max-buffer-size}. +@code{treesit-available-p} and @code{treesit-max-buffer-size}. @end defun @cindex Creating tree-sitter parsers @defun treesit-parser-create language &optional buffer no-reuse -To create a parser, we provide a @var{buffer} to keep track of and the -@var{language} to use (@pxref{Language Definitions}). If @var{buffer} -is nil, the current buffer is used. +To create a parser, we provide a @var{buffer} and the @var{language} +to use (@pxref{Language Definitions}). If @var{buffer} is nil, the +current buffer is used. By default, this function reuses a parser if one already exists for @var{language} in @var{buffer}, if @var{no-reuse} is non-nil, this @@ -363,15 +362,14 @@ Using Parser There is no need to explicitly parse a buffer, because parsing is done automatically and lazily. A parser only parses when we query for a node in its syntax tree. Therefore, when a parser is first created, -it doesn't parse the buffer; instead, it waits until we query for a -node for the first time. Similarly, when some change is made in the -buffer, a parser doesn't re-parse immediately and only records some -necessary information to later re-parse when necessary. +it doesn't parse the buffer; it waits until we query for a node for +the first time. Similarly, when some change is made in the buffer, a +parser doesn't re-parse immediately. @vindex treesit-buffer-too-large When a parser do parse, it checks for the size of the buffer. Tree-sitter can only handle buffer no larger than about 4GB. If the -size exceeds that, Emacs signals @var{treesit-buffer-too-large} +size exceeds that, Emacs signals @code{treesit-buffer-too-large} with signal data being the buffer size. Once a parser is created, Emacs automatically adds it to the @@ -418,9 +416,8 @@ Retrieving Node @cindex tree-sitter find node @cindex tree-sitter get node -There are two ways to retrieve a node: directly from the syntax tree, -or by traveling from other nodes. But before we continue, lets go -over some conventions of tree-sitter functions. +Before we continue, lets go over some conventions of tree-sitter +functions. We talk about a node being ``smaller'' or ``larger'', and ``lower'' or ``higher''. A smaller and lower node is lower in the syntax tree and @@ -435,11 +432,8 @@ Retrieving Node @vindex treesit-node-outdated Nodes are not automatically updated when the associated buffer is -modified. In fact, there is no way to update a node once it is -retrieved. It is best to use a node and throw it away and not save -it. A node is @dfn{outdated} if the buffer has changed since the node -is retrieved. Using an outdated node throws -@var{treesit-node-outdated} error. +modified. And there is no way to update a node once it is retrieved. +Using an outdated node throws @code{treesit-node-outdated} error. @heading Retrieving node from syntax tree @@ -455,12 +449,13 @@ Retrieving Node that language in @code{(treesit-parser-list)} and use that. If @var{named} is non-nil, this function looks for a named node -instead (@pxref{tree-sitter named node, named node}). +only (@pxref{tree-sitter named node, named node}). +Example: @example @group ;; Find the node at point in a C parser's syntax tree. -(treesit-node-on (point) 'c) +(treesit-node-at (point) 'c) @c @result{} # @end group @end example @@ -472,11 +467,11 @@ Retrieving Node less or equal to @var{beg}, and the end of the node is greater or equal to @var{end}. -@emph{Beware}, Calling this function on an empty line that is not +@emph{Beware} that calling this function on an empty line that is not inside any top-level construct (function definition, etc) most probably will give you the root node, because the root node is the -smallest node that covers that empty line. You probably want to use -@code{treesit-node-at} instead. +smallest node that covers that empty line. Most of the time, you want +to use @code{treesit-node-at}. When @var{parser-or-lang} is nil, this function uses the first parser in @code{(treesit-parser-list)} in the current buffer. If @@ -484,8 +479,8 @@ Retrieving Node @var{parser-or-lang} is a language, it finds the first parser using that language in @code{(treesit-parser-list)} and use that. -If @var{named} is non-nil, this function looks for a named node -instead (@pxref{tree-sitter named node, named node}). +If @var{named} is non-nil, this function looks for a named node only +(@pxref{tree-sitter named node, named node}). @end defun @defun treesit-parser-root-node parser @@ -496,8 +491,8 @@ Retrieving Node @defun treesit-buffer-root-node &optional language This function finds the first parser that uses @var{language} in @code{(treesit-parser-list)} in the current buffer, and returns the -root node of that buffer. If it cannot find an appropriate parser, it -returns nil. +root node of that buffer. If it cannot find an appropriate parser, +nil is returned. @end defun Once we have a node, we can retrieve other nodes from it, or query for @@ -524,20 +519,17 @@ Retrieving Node @defun treesit-node-children node &optional named This function returns all of @var{node}'s children in a list. If -@var{named} is non-nil, then it only retrieves named nodes -(@pxref{tree-sitter named node, named node}). +@var{named} is non-nil, then it only retrieves named nodes. @end defun @defun treesit-next-sibling node &optional named This function finds the next sibling of @var{node}. If @var{named} is -non-nil, it finds the next named sibling (@pxref{tree-sitter named -node, named node}). +non-nil, it finds the next named sibling. @end defun @defun treesit-prev-sibling node &optional named This function finds the previous sibling of @var{node}. If -@var{named} is non-nil, it finds the previous named sibling -(@pxref{tree-sitter named node, named node}). +@var{named} is non-nil, it finds the previous named sibling. @end defun @subheading By field name @@ -564,33 +556,34 @@ Retrieving Node @defun treesit-first-child-for-pos node pos &optional named This function finds the first child of @var{node} that extends beyond -@var{pos}. ``Extend beyond'' means the end of the child node -@code{>=} @var{pos}. This function only looks for immediate children of +@var{pos}. ``Extend beyond'' means the end of the child node >= +@var{pos}. This function only looks for immediate children of @var{node}, and doesn't look in its grand children. If @var{named} is non-nil, it only looks for named child (@pxref{tree-sitter named node, named node}). @end defun @defun treesit-node-descendant-for-range node beg end &optional named -This function finds the @emph{smallest} (grand)child of @var{node} -that spans the range from @var{beg} to @var{end}. It is similar to -@code{treesit-node-at}. If @var{named} is non-nil, it only looks -for named child (@pxref{tree-sitter named node, named node}). +This function finds the @emph{smallest} child/grandchild... of +@var{node} that spans the range from @var{beg} to @var{end}. It is +similar to @code{treesit-node-at}. If @var{named} is non-nil, it only +looks for named child. @end defun @heading Searching for node @defun treesit-search-subtree node predicate &optional all backward limit -This function traverses the subtree of @var{node}, and match -@var{predicate} with each node along the way. And @var{predicate} is -a regexp that matches against each node's type, or a function that -takes a node and returns nil/non-nil. If a node matches, that node is -returned, if no node ever matches, nil is returned. +This function traverses the subtree of @var{node} (including +@var{node}), and match @var{predicate} with each node along the way. +And @var{predicate} is a regexp that matches against each node's type, +or a function that takes a node and returns nil/non-nil. If a node +matches, that node is returned, if no node ever matches, nil is +returned. By default, this function only traverses named nodes, if @var{all} is non-nil, it traverses all nodes. If @var{backward} is non-nil, it -traverse backwards. If @var{limit} is non-nil, it only traverses that -number of levels down in the tree. +traverses backwards. If @var{limit} is non-nil, it only traverses +that number of levels down in the tree. @end defun @defun treesit-search-forward start predicate &optional all backward up @@ -598,7 +591,7 @@ Retrieving Node It also traverse the parse tree and match each node with @var{predicate} (except for @var{start}), where @var{predicate} can be a regexp or a function. For a tree like the below where @var{start} -is marked 1, this function will traverse as numbered: +is marked 1, this function traverses as numbered: @example @group @@ -616,8 +609,7 @@ Retrieving Node Same as in @code{treesit-search-subtree}, this function only searches for named nodes by default. But if @var{all} is non-nil, it searches -for all nodes. And If @var{backward} is non-nil, it searches -backwards. +for all nodes. If @var{backward} is non-nil, it searches backwards. If @var{up} is non-nil, this function will only traverse to siblings and parents. In that case, only 1 3 4 8 would be traversed. @@ -628,11 +620,11 @@ Retrieving Node that matches @var{predicate}. Parameters @var{predicate}, @var{all}, @var{backward}, and @var{up} are the same as in @code{treesit-search-forward}. And @var{side} controls which side of -the matched no do we stop at, it can be @code{'start} or @code{'end}. +the matched no do we stop at, it can be @code{start} or @code{end}. @end defun @defun treesit-induce-sparse-tree root predicate &optional process-fn limit -This function creates a sparse tree of @var{root}'s subtree. +This function creates a sparse tree from @var{root}'s subtree. Basically, it takes the subtree under @var{root}, and combs it so only the nodes that match @var{predicate} are left, like picking out grapes @@ -641,8 +633,8 @@ Retrieving Node a node and return nil/non-nil. For example, for a subtree on the left that consist of both numbers -and letters, if @var{predicate} is ``is letter'', the returned tree is -the one on the right. +and letters, if @var{predicate} is ``letter only'', the returned tree +is the one on the right. @example @group @@ -661,20 +653,21 @@ Retrieving Node @end example If @var{process-fn} is non-nil, instead of returning the matched -nodes, pass each node to @var{process-fn} use the return value -instead. If non-nil, @var{limit} is the number of levels to go down -from @var{root}. - -Each node in the returned tree looks like @code{(@var{node} -. (@var{child} ...))}. The root of this tree might be nil, if -@var{root} doesn't match @var{pred}. If no node matches -@var{predicate}, return nil. +nodes, this function passes each node to @var{process-fn} and uses the +returned value instead. If non-nil, @var{limit} is the number of +levels to go down from @var{root}. + +Each node in the returned tree looks like @code{(@var{tree-sitter +node} . (@var{child} ...))}. The @var{tree-sitter node} of the root +of this tree will be nil if @var{ROOT} doesn't match @var{pred}. If +no node matches @var{predicate}, return nil. @end defun @heading More convenient functions @defun treesit-filter-child node pred &optional named -This function finds children of @var{node} that satisfies @var{pred}. +This function finds immediate children of @var{node} that satisfies +@var{pred}. Function @var{pred} takes the child node as the argument and should return non-nil to indicated keeping the child. If @var{named} @@ -731,8 +724,7 @@ Accessing Node @defun treesit-node-text node &optional object Returns the buffer text that @var{node} represents. (If @var{node} is -retrieved from parsing a string, it will be the text from that -string.) +retrieved from parsing a string, it will be text from that string.) @end defun Here are some basic checks on tree-sitter nodes. @@ -765,13 +757,12 @@ Accessing Node @cindex tree-sitter node that has changes A node ``has changes'' if the buffer changed since when the node is -retrieved. In this case, the node's start and end position would be -off and we better throw it away and retrieve a new one. +retrieved, i.e., outdated. @cindex tree-sitter node that has error A node ``has error'' if the text it spans contains a syntax error. It -can be the node itself has an error, or one of its (grand)children has -an error. +can be the node itself has an error, or one of its +children/grandchildren... has an error. @defun treesit-node-check node property This function checks if @var{node} has @var{property}. @var{property} @@ -779,12 +770,13 @@ Accessing Node @code{'has-changes}, or @code{'has-error}. @end defun + +@defun treesit-node-type node Named nodes have ``types'' (@pxref{tree-sitter node type, node type}). For example, a named node can be a @code{string_literal} node, where @code{string_literal} is its type. -@defun treesit-node-type node -Return @var{node}'s type as a string. +This function returns @var{node}'s type as a string. @end defun @heading Information as a child or parent @@ -802,8 +794,8 @@ Accessing Node @end defun @defun treesit-node-field-name-for-child node n -This is a more primitive function that returns the field name of the -@var{n}'th child of @var{node}. +This function returns the field name of the @var{n}'th child of +@var{node}. @end defun @defun treesit-child-count node &optional named @@ -869,10 +861,11 @@ Pattern Matching Now we can introduce the query functions. @defun treesit-query-capture node query &optional beg end node-only -This function matches patterns in @var{query} in @var{node}. Argument -@var{query} can be either a string, a s-expression, or a compiled -query object. For now, we focus on the string syntax; s-expression -syntax and compiled query are described at the end of the section. +This function matches patterns in @var{query} in @var{node}. +Parameter @var{query} can be either a string, a s-expression, or a +compiled query object. For now, we focus on the string syntax; +s-expression syntax and compiled query are described at the end of the +section. The function returns all captured nodes in a list of @code{(@var{capture_name} . @var{node})}. If @var{node-only} is @@ -913,7 +906,6 @@ Pattern Matching @end group @end example -@noindent Querying that query would return @example @@ -996,15 +988,14 @@ Pattern Matching that has @emph{zero or more} @code{long} keyword. @example -(type_declaration "long"* @@long-in-type) +(type_declaration "long"*) @@long-type @end example -@noindent And this pattern matches a type declaration that has zero or one @code{long} keyword: @example -(type_declaration "long"?) @@type-decl +(type_declaration "long"?) @@long-type @end example @subheading Grouping @@ -1185,12 +1176,12 @@ Pattern Matching error. You can use @code{treesit-query-validate} to debug the query. @end defun -@defun treesit-expand-query query +@defun treesit-query-expand query This function expands the s-expression @var{query} into a string query. @end defun -@defun treesit-expand-pattern pattern +@defun treesit-pattern-expand pattern This function expands the s-expression @var{pattern} into a string pattern. @end defun @@ -1231,7 +1222,7 @@ Multiple Languages @vindex treesit-range-invalid If @var{ranges} violates this constraint, or something else went -wrong, this function signals a @var{treesit-range-invalid}. The +wrong, this function signals a @code{treesit-range-invalid}. The signal data contains a specific error message and the ranges we are trying to set. @@ -1279,8 +1270,8 @@ Multiple Languages a language symbol. @end defun -@defun treesit-query-range source pattern &optional beg end -This function matches @var{source} with @var{pattern} and returns the +@defun treesit-query-range source query &optional beg end +This function matches @var{source} with @var{query} and returns the ranges of captured nodes. The return value has the same shape of other functions: a list of @code{(@var{beg} . @var{end})}. @@ -1290,13 +1281,13 @@ Multiple Languages matches in the root node of that parser; if a node, this function matches in that node. -Parameter @var{pattern} is the query pattern used to capture nodes +Parameter @var{query} is the query used to capture nodes (@pxref{Pattern Matching}). The capture names don't matter. Parameter @var{beg} and @var{end}, if both non-nil, limits the range in which this function queries. Like other query functions, this function raises an -@var{treesit-query-error} if @var{pattern} is malformed. +@var{treesit-query-error} if @var{query} is malformed. @end defun @defun treesit-language-at point @@ -1334,12 +1325,9 @@ Multiple Languages @heading An example Normally, in a set of languages that can be mixed together, there is a -major language and several embedded languages. The major language -parses the whole document, and skips the embedded languages. Then the -parser for the major language knows the ranges of the embedded -languages. So we first parse the whole document with the major -language’s parser, set ranges for the embedded languages, then parse -the embedded languages. +major language and several embedded languages. We first parse the +whole document with the major language’s parser, set ranges for the +embedded languages, then parse the embedded languages. Suppose we want to parse a very simple document that mixes HTML, CSS and JavaScript: commit 9ed53535f5543910f746662618927052dd718991 Author: Yuan Fu Date: Sat Sep 24 20:54:03 2022 -0700 ; * lisp/progmodes/python.el (python-mode): Fix typo. diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index fb91d00053..5b70c63f7b 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -6143,7 +6143,7 @@ python-mode (if (and python-use-tree-sitter (treesit-can-enable-p)) (setq-local imenu-create-index-function - #'python-treesit-imenu-create-index) + #'python-imenu-treesit-create-index) (setq-local imenu-create-index-function #'python-imenu-create-index)) commit 9e339415b4c068242e7cd785d2ac95420f1934ba Author: Yuan Fu Date: Sat Sep 24 20:52:32 2022 -0700 Fix treesit-induce-sparse-tree Because not-at-all-werid way we implemented ts_build_sparse_tree, it’s return value needs a bit post-processing (i.e., reverse its cdr). * src/treesit.c (Ftreesit_induce_sparse_tree): Reverse the top-level children list. (ts_build_sparse_tree): Add comment. diff --git a/src/treesit.c b/src/treesit.c index 5917931e44..1a61e354cf 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -2041,7 +2041,9 @@ DEFUN ("treesit-search-forward", } /* Recursively traverse the tree under CURSOR, and append the result - subtree to PARENT's cdr. See more in `ts_build_sparse_tree'. */ + subtree to PARENT's cdr. See more in Ftreesit_induce_sparse_tree. + Note that the top-level children list is reversed, because + reasons. */ static void ts_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent, Lisp_Object pred, @@ -2151,6 +2153,7 @@ DEFUN ("treesit-induce-sparse-tree", ts_build_sparse_tree (&cursor, parent, predicate, process_fn, the_limit, no_limit, parser); + Fsetcdr (parent, Fnreverse (Fcdr (parent))); if (NILP (Fcdr (parent))) return Qnil; else commit ef6e18a6b9ab103f3f076b35100d09cff1687396 Author: Yuan Fu Date: Sat Sep 24 20:42:03 2022 -0700 Improve treesit-search-forward-goto * doc/lispref/parsing.texi (Retrieving Node): Update manual. * lisp/treesit.el (treesit-search-forward-goto): Instead of taking a node, use the node at point, and make sure we make progress. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 868b9bc074..32fc6a69a5 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -623,33 +623,12 @@ Retrieving Node and parents. In that case, only 1 3 4 8 would be traversed. @end defun -@defun treesit-search-forward-goto start predicate side &optional all backward up -For those who want to not only search for a node but also move to it, -this is the function to use. Parameter @var{start}, @var{predicate}, -@var{all}, @var{backward}, and @var{up} are the same as in +@defun treesit-search-forward-goto predicate side &optional all backward up +This function jumps to the start or end of the next node in buffer +that matches @var{predicate}. Parameters @var{predicate}, @var{all}, +@var{backward}, and @var{up} are the same as in @code{treesit-search-forward}. And @var{side} controls which side of the matched no do we stop at, it can be @code{'start} or @code{'end}. - -Beware of this common pitfall: - -@example -@group -;; This will not move point forward. -(while (treesit-search-forward-goto - (treesit-node-at (point)) - "xxx" - 'start) - ...) - -;; This is will move point forward. -(let ((node (treesit-node-at (point)))) - (while (setq node (treesit-search-forward-goto - node "xxx" 'start)) - ...)) -@end group -@end example - -The exact reason why is left as an exercise for the reader. @end defun @defun treesit-induce-sparse-tree root predicate &optional process-fn limit diff --git a/lisp/treesit.el b/lisp/treesit.el index def2e6259e..001404d88d 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -723,13 +723,13 @@ treesit-check-indent ;;; Search (defun treesit-search-forward-goto - (start predicate side &optional all backward up) - "Search for node in the parse tree and move point to it. + (predicate side &optional all backward up) + "Search forward for a node and move to it. -Start traversing the tree from node START, and match PREDICATE with -each node along the way (except START). PREDICATE can be either a -regexp that matches against each node's type, or a function that takes -a node and returns nil/non-nil for match/no match. +Stops at the first node after point that matches PREDICATE. +PREDICATE can be either a regexp that matches against each node's +type, or a function that takes a node and returns nil/non-nil for +match/no match. If a node matches, move to that node and return the node, otherwise return nil. SIDE controls whether we move to the start @@ -737,11 +737,24 @@ treesit-search-forward-goto \\='end. ALL, BACKWARD, and UP are the same as in `treesit-search-forward'." - (when-let ((node (treesit-search-forward - start predicate all backward up))) - (pcase side - ('start (goto-char (treesit-node-start node))) - ('end (goto-char (treesit-node-end node)))) + (let ((node (treesit-node-at (point))) + (start (point))) + ;; When searching forward, it is possible for (point) < start, + ;; because `treesit-search-forward' goes to parents. + (while (and node (if backward + (>= (point) start) + (<= (point) start))) + (setq node (treesit-search-forward + node predicate all backward up)) + (if-let ((pos (pcase side + ('start (treesit-node-start node)) + ('end (treesit-node-end node))))) + (goto-char pos))) + ;; If we made reverse progress, go back to where we started. + (when (if backward + (>= (point) start) + (<= (point) start)) + (goto-char start)) node)) ;;; Debugging commit a31538ea5b006c3901ab85354725993b689a259d Author: Yuan Fu Date: Sat Sep 24 20:41:30 2022 -0700 Fix treesit-search-forward * src/treesit.c (ts_search_forward): Fix return value. diff --git a/src/treesit.c b/src/treesit.c index f3efcbe596..5917931e44 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1920,7 +1920,7 @@ ts_traverse_sibling_helper (TSNode node, bool forward, bool named) (start, pred, parser, named, forward, 0, true, skip_start)) return true; - TSNode next = ts_traverse_sibling_helper(node, forward, named); + TSNode next = ts_traverse_sibling_helper (node, forward, named); while (ts_node_is_null (next)) { node = ts_node_parent (node); @@ -1930,9 +1930,9 @@ ts_traverse_sibling_helper (TSNode node, bool forward, bool named) if (ts_traverse_match_predicate (node, pred, parser)) { *start = node; - return false; + return true; } - next = ts_traverse_sibling_helper(node, forward, named); + next = ts_traverse_sibling_helper (node, forward, named); } if (ts_search_forward (&next, pred, parser, named, forward, up_only, false)) commit 795e01ac248d389a581589b13a02465a2f99202f Author: Yuan Fu Date: Sat Sep 24 19:41:17 2022 -0700 Update and enable treesit-imenu function in python.el * lisp/progmodes/python.el (python--treesit-settings): Add docstring. (python--imenu-treesit-create-index-1): Rewrite with treesit-induce-sparse-tree. (python-imenu-treesit-create-index): Move main body to python--imenu-treesit-create-index-1. (python-imenu-treesit-create-flat-index): Fix typo. (python-mode): Enable treesit-imenu. Also fix indentation for which-func code. diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index 8368f4da51..fb91d00053 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -1035,7 +1035,9 @@ python--treesit-settings `(seq bol (or ,@python--treesit-exceptions) eol)) @font-lock-type-face)) - (type (identifier) @font-lock-type-face)))) + (type (identifier) @font-lock-type-face))) + "Tree-sitter font-lock settings.") + ;;; Indentation @@ -5244,61 +5246,79 @@ python-imenu-create-flat-index (python-imenu-create-index)))))) ;;; Tree-sitter imenu -;; -;; This works, but is slower than the native functions, presumably -;; because traversing the parser tree is slower than scanning the -;; text. Also I'm sure this consumes more memory as we allocate -;; memory for every node in the tree. -(defun python--imenu-treesit-create-index (&optional node) - "Return tree Imenu alist for the current Python buffer. +(defun python--imenu-treesit-create-index-1 (node) + "Given a sparse tree, create an imenu alist. -Change `python-imenu-format-item-label-function', -`python-imenu-format-parent-item-label-function', -`python-imenu-format-parent-item-jump-label-function' to -customize how labels are formatted. +NODE is the root node of the tree returned by +`treesit-induce-sparse-tree' (not a tree-sitter node, its car is +a tree-sitter node). Walk that tree and return an imenu alist. -NODE is the root node of the subtree you want to build an index -of. If nil, use the root node of the whole parse tree. +Return a list of ENTRY where -Similar to `python-imenu-create-index' but use tree-sitter." - (let* ((node (or node (treesit-buffer-root-node 'python))) - (children (treesit-node-children node t)) - (subtrees (mapcan #'python--imenu-treesit-create-index +ENTRY := (NAME . MARKER) + | (NAME . ((JUMP-LABEL . MARKER) + ENTRY + ...) + +NAME is the function/class's name, JUMP-LABEL is like \"*function +definition*\"." + (let* ((ts-node (car node)) + (children (cdr node)) + (subtrees (mapcan #'python--imenu-treesit-create-index-1 children)) - (type (pcase (treesit-node-type node) - ("function_definition" 'def) - ("class_definition" 'class) - (_ nil))) - (name (when type + (type (pcase (treesit-node-type ts-node) + ("function_definition" 'def) + ("class_definition" 'class))) + ;; The root of the tree could have a nil ts-node. + (name (when ts-node (treesit-node-text (treesit-node-child-by-field-name - node "name") t)))) + ts-node "name") t))) + (marker (when ts-node + (set-marker (make-marker) + (treesit-node-start ts-node))))) (cond - ;; 1. This node is a function/class and doesn't have children. - ((and type (not subtrees)) - (let ((label - (funcall python-imenu-format-item-label-function - type name))) - (list (cons label - (set-marker (make-marker) - (treesit-node-start node)))))) - ;; 2. This node is a function/class and has children. - ((and type subtrees) + ((null ts-node) + subtrees) + (subtrees (let ((parent-label (funcall python-imenu-format-parent-item-label-function type name)) (jump-label - (funcall python-imenu-format-parent-item-jump-label-function - type name))) + (funcall + python-imenu-format-parent-item-jump-label-function + type name))) `((,parent-label - ,(cons jump-label (set-marker (make-marker) - (treesit-node-start node))) + ,(cons jump-label marker) ,@subtrees)))) - ;; 3. This node is not a function/class. - ((not type) subtrees)))) + (t (let ((label + (funcall python-imenu-format-item-label-function + type name))) + (list (cons label marker))))))) + +(defun python-imenu-treesit-create-index (&optional node) + "Return tree Imenu alist for the current Python buffer. -(defun python--imenu-treesit-create-flat-index () +Change `python-imenu-format-item-label-function', +`python-imenu-format-parent-item-label-function', +`python-imenu-format-parent-item-jump-label-function' to +customize how labels are formatted. + +NODE is the root node of the subtree you want to build an index +of. If nil, use the root node of the whole parse tree. + +Similar to `python-imenu-create-index' but use tree-sitter." + (let* ((node (or node (treesit-buffer-root-node 'python))) + (tree (treesit-induce-sparse-tree + node + (rx (seq bol + (or "function" "class") + "_definition" + eol))))) + (python--imenu-treesit-create-index-1 tree))) + +(defun python-imenu-treesit-create-flat-index () "Return flat outline of the current Python buffer for Imenu. Change `python-imenu-format-item-label-function', @@ -5309,7 +5329,7 @@ python--imenu-treesit-create-flat-index Similar to `python-imenu-create-flat-index' but use tree-sitter." (python-imenu-create-flat-index - (python--imenu-treesit-create-index))) + (python-imenu-treesit-create-index))) ;;; Misc helpers @@ -6120,14 +6140,18 @@ python-mode (add-hook 'post-self-insert-hook #'python-indent-post-self-insert-function 'append 'local) - (setq-local imenu-create-index-function - #'python-imenu-create-index) + (if (and python-use-tree-sitter + (treesit-can-enable-p)) + (setq-local imenu-create-index-function + #'python-treesit-imenu-create-index) + (setq-local imenu-create-index-function + #'python-imenu-create-index)) (setq-local add-log-current-defun-function #'python-info-current-defun) (if (and python-use-tree-sitter - (treesit-can-enable-p)) + (treesit-can-enable-p)) (add-hook 'which-func-functions #'python-info-treesit-current-defun nil t) (add-hook 'which-func-functions #'python-info-current-defun nil t)) commit f071e61d106e6f3c17b660e3aa1a5b7890ea5d41 Author: Yuan Fu Date: Sat Sep 24 19:39:10 2022 -0700 ; Fix docstrings in treesit.el * lisp/treesit.el (treesit-font-lock-rules) (treesit-inspect-mode): Fix docstrings. diff --git a/lisp/treesit.el b/lisp/treesit.el index 9bdff83da8..def2e6259e 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -336,10 +336,10 @@ treesit-font-lock-rules configure the query (and only that query). For example, (treesit-font-lock-rules - :language 'javascript - '((true) @font-lock-constant-face + :language \\='javascript + \\='((true) @font-lock-constant-face (false) @font-lock-constant-face) - :language 'html + :language \\='html \"(script_element) @font-lock-builtin-face\") For each QUERY, a :language keyword is required. Currently the @@ -803,7 +803,7 @@ treesit-inspect-mode The mode-line displays - PARENT FIELD-NAME: (CHILD (GRAND-CHILD (...))) + PARENT FIELD-NAME: (CHILD FIELD_NAME: (GRAND-CHILD (...))) CHILD, GRAND-CHILD, and GRAND-GRAND-CHILD, etc, are nodes that have their beginning at point. And PARENT is the parent of commit eba65824364474bde89bdce5f57a772d74a2c409 Author: Yuan Fu Date: Sat Sep 24 19:35:11 2022 -0700 Add the treesit-search functions that supplant the removed ones The signatures also changed. treesit-traverse-depth-first -> treesit-search-subtree treesit-traverse-breadth-first -> treesit-traverse-forward -> treesit-search-forward treesit-search-forward -> treesit-search-forward-goto treesit-search-beginning/end -> treesit-search-forward-goto -> treesit-induce-sparse-tree * doc/lispref/parsing.texi (Retrieving Node): Add relevant manual sections. * lisp/treesit.el (treesit-search-forward-goto): New function. * src/treesit.c (ts_traverse_sibling_helper) (ts_traverse_match_predicate) (ts_search_dfs) (ts_search_forward) (treesit-search-subtree) (treesit-search-forward) (ts_build_sparse_tree) (Ftreesit_induce_sparse_tree): Add functions. * test/src/treesit-tests.el (treesit-node-supplemental): Add comments. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 0dbc70ce2d..868b9bc074 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -580,11 +580,116 @@ Retrieving Node @heading Searching for node +@defun treesit-search-subtree node predicate &optional all backward limit +This function traverses the subtree of @var{node}, and match +@var{predicate} with each node along the way. And @var{predicate} is +a regexp that matches against each node's type, or a function that +takes a node and returns nil/non-nil. If a node matches, that node is +returned, if no node ever matches, nil is returned. + +By default, this function only traverses named nodes, if @var{all} is +non-nil, it traverses all nodes. If @var{backward} is non-nil, it +traverse backwards. If @var{limit} is non-nil, it only traverses that +number of levels down in the tree. +@end defun + +@defun treesit-search-forward start predicate &optional all backward up +This function is somewhat similar to @code{treesit-search-subtree}. +It also traverse the parse tree and match each node with +@var{predicate} (except for @var{start}), where @var{predicate} can be +a regexp or a function. For a tree like the below where @var{start} +is marked 1, this function will traverse as numbered: + +@example +@group + o + | + 3--------4-----------8 + | | | +o--o-+--1 5--+--6 9---+-----12 +| | | | | | +o o 2 7 +-+-+ +--+--+ + | | | | | + 10 11 13 14 15 +@end group +@end example + +Same as in @code{treesit-search-subtree}, this function only searches +for named nodes by default. But if @var{all} is non-nil, it searches +for all nodes. And If @var{backward} is non-nil, it searches +backwards. +If @var{up} is non-nil, this function will only traverse to siblings +and parents. In that case, only 1 3 4 8 would be traversed. @end defun +@defun treesit-search-forward-goto start predicate side &optional all backward up +For those who want to not only search for a node but also move to it, +this is the function to use. Parameter @var{start}, @var{predicate}, +@var{all}, @var{backward}, and @var{up} are the same as in +@code{treesit-search-forward}. And @var{side} controls which side of +the matched no do we stop at, it can be @code{'start} or @code{'end}. + +Beware of this common pitfall: + +@example +@group +;; This will not move point forward. +(while (treesit-search-forward-goto + (treesit-node-at (point)) + "xxx" + 'start) + ...) + +;; This is will move point forward. +(let ((node (treesit-node-at (point)))) + (while (setq node (treesit-search-forward-goto + node "xxx" 'start)) + ...)) +@end group +@end example + +The exact reason why is left as an exercise for the reader. @end defun +@defun treesit-induce-sparse-tree root predicate &optional process-fn limit +This function creates a sparse tree of @var{root}'s subtree. + +Basically, it takes the subtree under @var{root}, and combs it so only +the nodes that match @var{predicate} are left, like picking out grapes +on the vine. Like previous functions, @var{predicate} can be a regexp +string that matches against each node's type, or a function that takes +a node and return nil/non-nil. + +For example, for a subtree on the left that consist of both numbers +and letters, if @var{predicate} is ``is letter'', the returned tree is +the one on the right. + +@example +@group + a a a + | | | ++---+---+ +---+---+ +---+---+ +| | | | | | | | | +b 1 2 b | | b c d + | | => | | => | + c +--+ c + e + | | | | | + +--+ d 4 +--+ d + | | | + e 5 e +@end group +@end example + +If @var{process-fn} is non-nil, instead of returning the matched +nodes, pass each node to @var{process-fn} use the return value +instead. If non-nil, @var{limit} is the number of levels to go down +from @var{root}. + +Each node in the returned tree looks like @code{(@var{node} +. (@var{child} ...))}. The root of this tree might be nil, if +@var{root} doesn't match @var{pred}. If no node matches +@var{predicate}, return nil. @end defun @heading More convenient functions diff --git a/lisp/treesit.el b/lisp/treesit.el index 2defd83dc6..9bdff83da8 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -722,9 +722,27 @@ treesit-check-indent ;;; Search - - - +(defun treesit-search-forward-goto + (start predicate side &optional all backward up) + "Search for node in the parse tree and move point to it. + +Start traversing the tree from node START, and match PREDICATE with +each node along the way (except START). PREDICATE can be either a +regexp that matches against each node's type, or a function that takes +a node and returns nil/non-nil for match/no match. + +If a node matches, move to that node and return the node, +otherwise return nil. SIDE controls whether we move to the start +or end of the matches node, it can be either \\='start or +\\='end. + +ALL, BACKWARD, and UP are the same as in `treesit-search-forward'." + (when-let ((node (treesit-search-forward + start predicate all backward up))) + (pcase side + ('start (goto-char (treesit-node-start node))) + ('end (goto-char (treesit-node-end node)))) + node)) ;;; Debugging diff --git a/src/treesit.c b/src/treesit.c index 51261c34a2..f3efcbe596 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1805,6 +1805,358 @@ DEFUN ("treesit-query-capture", return Fnreverse (result); } +/*** Navigation */ + +/* Return the next/previous named/unnamed sibling of NODE. FORWARD + controls the direction and NAMED controls the nameness. + */ +static TSNode +ts_traverse_sibling_helper (TSNode node, bool forward, bool named) +{ + if (forward) + { + if (named) + return ts_node_next_named_sibling (node); + else + return ts_node_next_sibling (node); + } + else + { + if (named) + return ts_node_prev_named_sibling (node); + else + return ts_node_prev_sibling (node); + } +} + +/* Return true if NODE matches PRED. PRED can be a string or a + function. This function doesn't check for PRED's type. */ +static bool +ts_traverse_match_predicate +(TSNode node, Lisp_Object pred, Lisp_Object parser) +{ + if (STRINGP (pred)) + { + const char *type = ts_node_type (node); + return (fast_c_string_match_ignore_case + (pred, type, strlen (type)) >= 0); + } + else + { + Lisp_Object lisp_node = make_ts_node (parser, node); + return !NILP (CALLN (Ffuncall, pred, lisp_node)); + } + +} + +/* Traverse the parse tree starting from ROOT (but ROOT is not + matches against PRED). PRED can be a function (takes a node and + returns nil/non-nil),or a string (treated as regexp matching the + node's type, ignores case, must be all single byte characters). If + the node satisfies PRED , terminate, set ROOT to that node, and + return true. If no node satisfies PRED, return FALSE. PARSER is + the parser of ROOT. + + LIMIT is the number of levels we descend in the tree. If NO_LIMIT + is true, LIMIT is ignored. FORWARD controls the direction in which + we traverse the tree, true means forward, false backward. If NAMED + is true, only traverse named nodes, if false, all nodes. If + SKIP_ROOT is true, don't match ROOT. */ +static bool +ts_search_dfs +(TSNode *root, Lisp_Object pred, Lisp_Object parser, + bool named, bool forward, ptrdiff_t limit, bool no_limit, + bool skip_root) +{ + /* TSTreeCursor doesn't allow us to move backward, so we can't use + it. We could use limit == -1 to indicate no_limit == true, but + separating them is safer. */ + TSNode node = *root; + + if (!skip_root && ts_traverse_match_predicate (node, pred, parser)) + { + *root = node; + return true; + } + + if (!no_limit && limit <= 0) + return false; + else + { + int count = named ? + ts_node_named_child_count( node) + : ts_node_child_count (node); + for (int offset=0; offset < count; offset++) + { + uint32_t idx = forward ? offset + : count - offset - 1; + TSNode child = ts_node_child (node, idx); + + if (!ts_node_is_null (child) + && ts_search_dfs (&child, pred, parser, named, + forward, limit - 1, no_limit, false)) + { + *root = child; + return true; + } + } + return false; + } +} + +/* Go thought the whole tree linearly depth first, starting from + START. PRED, PARSER, NAMED, FORWARD are the same as in + ts_search_subtre. If UP_ONLY is true, never go to children, only + sibling and parents. If SKIP_START is true, don'tt match + START. */ +static bool +ts_search_forward +(TSNode *start, Lisp_Object pred, Lisp_Object parser, + bool named, bool forward, bool up_only, bool skip_start) +{ + TSNode node = *start; + + if (!up_only && ts_search_dfs + (start, pred, parser, named, forward, 0, true, skip_start)) + return true; + + TSNode next = ts_traverse_sibling_helper(node, forward, named); + while (ts_node_is_null (next)) + { + node = ts_node_parent (node); + if (ts_node_is_null (node)) + return false; + + if (ts_traverse_match_predicate (node, pred, parser)) + { + *start = node; + return false; + } + next = ts_traverse_sibling_helper(node, forward, named); + } + if (ts_search_forward + (&next, pred, parser, named, forward, up_only, false)) + { + *start = next; + return true; + } + else + return false; +} + +DEFUN ("treesit-search-subtree", + Ftreesit_search_subtree, + Streesit_search_subtree, 2, 5, 0, + doc: /* Traverse the parse tree depth-first. + +Traverse the subtree of NODE, and match PREDICATE with each node along +the way. PREDICATE is a regexp string that matches against each +node's type, or a function that takes a node and returns nil/non-nil. + +By default, only traverse named nodes, if ALL is non-nil, traverse all +nodes. If BACKWARD is non-nil, traverse backwards. If LIMIT is +non-nil, we only traverse that number of levels down in the tree. + +Return the first matched node, or nil if none matches. */) + (Lisp_Object node, Lisp_Object predicate, Lisp_Object all, + Lisp_Object backward, Lisp_Object limit) +{ + CHECK_TS_NODE (node); + CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), + list3 (Qor, Qstringp, Qfunctionp), predicate); + CHECK_SYMBOL (all); + CHECK_SYMBOL (backward); + + ptrdiff_t the_limit = 0; + bool no_limit = false; + if (NILP (limit)) + no_limit = true; + else + { + CHECK_FIXNUM (limit); + the_limit = XFIXNUM (limit); + } + + TSNode ts_node = XTS_NODE (node)->node; + Lisp_Object parser = XTS_NODE (node)->parser; + if (ts_search_dfs + (&ts_node, predicate, parser, NILP (all), + NILP (backward), the_limit, no_limit, false)) + { + return make_ts_node (parser, ts_node); + } + else + return Qnil; +} + +DEFUN ("treesit-search-forward", + Ftreesit_search_forward, + Streesit_search_forward, 2, 5, 0, + doc: /* Search for node in the parse tree. + +Start traversing the tree from node START, and match PREDICATE with +each node along the way (except START). PREDICATE is a regexp string +that matches against each node's type, or a function that takes a node +and returns nil/non-nil. + +By default, only search for named nodes, if ALL is non-nil, search for +all nodes. If BACKWARD is non-nil, search backwards. + +Return the first matched node, or nil if none matches. + +For a tree like the below where START is marked 1, traverse as +numbered: + 16 + | + 3--------4-----------8 + | | | + o--o-+--1 5--+--6 9---+-----12 + | | | | | | + o o 2 7 +-+-+ +--+--+ + | | | | | + 10 11 13 14 15 + +If UP is non-nil, only traverse to siblings and parents. In that +case, only 1 3 4 8 16 would be traversed. */) + (Lisp_Object start, Lisp_Object predicate, Lisp_Object all, + Lisp_Object backward, Lisp_Object up) +{ + CHECK_TS_NODE (start); + CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), + list3 (Qor, Qstringp, Qfunctionp), predicate); + CHECK_SYMBOL (all); + CHECK_SYMBOL (backward); + CHECK_SYMBOL (up); + + TSNode ts_start = XTS_NODE (start)->node; + Lisp_Object parser = XTS_NODE (start)->parser; + if (ts_search_forward + (&ts_start, predicate, parser, NILP (all), + NILP (backward), !NILP (up), true)) + { + return make_ts_node (parser, ts_start); + } + else + return Qnil; +} + +/* Recursively traverse the tree under CURSOR, and append the result + subtree to PARENT's cdr. See more in `ts_build_sparse_tree'. */ +static void +ts_build_sparse_tree +(TSTreeCursor *cursor, Lisp_Object parent, Lisp_Object pred, + Lisp_Object process_fn, ptrdiff_t limit, + bool no_limit, Lisp_Object parser) +{ + + TSNode node = ts_tree_cursor_current_node (cursor); + bool match = ts_traverse_match_predicate (node, pred, parser); + if (match) + { + /* If this node matches pred, add a new node to the parent's + children list. */ + Lisp_Object lisp_node = make_ts_node (parser, node); + if (!NILP (process_fn)) + { + lisp_node = CALLN (Ffuncall, process_fn, lisp_node); + } + Lisp_Object this = Fcons (lisp_node, Qnil); + Fsetcdr (parent, Fcons (this, Fcdr (parent))); + /* Now for children nodes, this is the new parent. */ + parent = this; + } + /* Go through each child. */ + if ((no_limit || limit > 0) + && ts_tree_cursor_goto_first_child (cursor)) + { + do + { + /* Make sure not to use node after the recursive funcall. + Then C compilers should be smart enough not to copy NODE + to stack. */ + ts_build_sparse_tree + (cursor, parent, pred, process_fn, + limit - 1, no_limit, parser); + } + while (ts_tree_cursor_goto_next_sibling (cursor)); + /* Don't forget to come back to this node. */ + ts_tree_cursor_goto_parent (cursor); + } + /* Before we go, reverse children in the sparse tree. */ + if (match) + { + /* When match == true, "parent" is actually the node we added in + this layer (parent = this). */ + Fsetcdr (parent, Fnreverse (Fcdr (parent))); + } +} + +DEFUN ("treesit-induce-sparse-tree", + Ftreesit_induce_sparse_tree, + Streesit_induce_sparse_tree, 2, 4, 0, + doc: /* Create a sparse tree of ROOT's subtree. + +Basically, take the subtree under ROOT, and comb it so only the nodes +that match PREDICATE are left, like picking out grapes on the vine. +PREDICATE is a regexp string that matches against each node's type. + +For a subtree on the left that consist of both numbers and letters, if +PREDICATE is "is letter", the returned tree is the one on the right. + + a a a + | | | + +---+---+ +---+---+ +---+---+ + | | | | | | | | | + b 1 2 b | | b c d + | | => | | => | + c +--+ c + e + | | | | | + +--+ d 4 +--+ d + | | | + e 5 e + +If PROCESS-FN is non-nil, instead of returning the matched nodes, pass +each node to PROCESS-FN use the return value instead. If non-nil, +LIMIT is the number of levels to go down from ROOT. + +Each node in the returned tree looks like (NODE . (CHILD ...)). The +root of this tree might be nil, if ROOT doesn't match PREDICATE. If +no node matches PRED, return nil. + +PREDICATE can also be a function that takes a node and returns +nil/non-nil, but it is slower and more memory consuming than +regexp. */) + (Lisp_Object root, Lisp_Object predicate, Lisp_Object process_fn, + Lisp_Object limit) +{ + CHECK_TS_NODE (root); + CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), + list3 (Qor, Qstringp, Qfunctionp), predicate); + + if (!NILP (process_fn)) + CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn); + ptrdiff_t the_limit = 0; + bool no_limit = false; + if (NILP (limit)) + no_limit = true; + else + { + CHECK_FIXNUM (limit); + the_limit = XFIXNUM (limit); + } + + TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node); + Lisp_Object parser = XTS_NODE (root)->parser; + Lisp_Object parent = Fcons (Qnil, Qnil); + ts_build_sparse_tree + (&cursor, parent, predicate, process_fn, + the_limit, no_limit, parser); + if (NILP (Fcdr (parent))) + return Qnil; + else + return parent; +} + /*** Initialization */ /* Initialize the tree-sitter routines. */ @@ -1835,6 +2187,8 @@ syms_of_treesit (void) "user-emacs-directory"); DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted"); + DEFSYM (Qor, "or"); + define_error (Qtreesit_error, "Generic tree-sitter error", Qerror); define_error (Qtreesit_query_error, "Query pattern is malformed", Qtreesit_error); @@ -1925,4 +2279,8 @@ syms_of_treesit (void) defsubr (&Streesit_query_expand); defsubr (&Streesit_query_compile); defsubr (&Streesit_query_capture); + + defsubr (&Streesit_search_subtree); + defsubr (&Streesit_search_forward); + defsubr (&Streesit_induce_sparse_tree); } diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index fbf99ff087..6fa891a136 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -434,12 +434,17 @@ treesit-node-supplemental ;; `treesit-parent-while' ;; `treesit-node-children' ;; `treesit-node-field-name' + ;; `treesit-search-forward-goto' )) ;; TODO ;; - Functions in treesit.el ;; - treesit-load-name-override-list +;; - treesit-search-subtree ;; - treesit-search-forward +;; - treesit-induce-sparse-tree +;; - treesit-search-forward + (provide 'treesit-tests) ;;; treesit-tests.el ends here commit c957832cbf3e87e5a25f7c2bdb70abd959391d98 Author: Yuan Fu Date: Sat Sep 24 19:29:15 2022 -0700 Remove treesit-traverse functions Remove before adding the replacements. * doc/lispref/parsing.texi (Retrieving Node): Remove relevant sections. * lisp/treesit.el (treesit-traverse-depth-first) (treesit--traverse-breadth-first-1) (treesit-traverse-breadth-first) (treesit-next-sibling-or-up) (treesit-traverse-forward) (treesit-search-forward) (treesit-search-beginning): (treesit-search-end): Remove functions. (treesit-defun-query): Remove variable. (treesit-beginning-of-defun) (treesit-end-of-defun): Remove functions. * test/src/treesit-tests.el: Remove comments. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index a83ad20281..0dbc70ce2d 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -580,34 +580,11 @@ Retrieving Node @heading Searching for node -@defun treesit-search-beginning query arg &optional lang up-only -This function searches for the next node that @var{query} captures, -starting at point. Use the parser in current buffer that has -@var{lang} as its language, if @var{lang} is nil, use the first parser -in current buffer’s buffer list. - -This function stops at the @var{arg}'th match. If @var{arg} is -negative, search backward. If the search succeeds, stop at the -beginning of the matched node and return the node. Return nil if -search failed. - -By default, this function searches by traversing the parse tree depth -first, starting from the node at point. If @var{up-only} is non-nil, -this function only go to siblings and parents, but never go down into -children nodes. + @end defun -@defun treesit-search-end query arg &optional lang up-only -This function is like @code{treesit-search-beginning}, but stops at -the end of the matched node. @end defun -@defun treesit-search-forward pos-fn arg query &optional lang up-only -This function is like @code{treesit-search-beginning} and -@code{treesit-search-end}, but instead of stopping at the beginning or -end of the matched node, it determines where to stop by @var{pos-fn}, -where @var{pos-fn} is a function that takes a node and returns a -position @end defun @heading More convenient functions @@ -634,73 +611,6 @@ Retrieving Node farthest parent that still satisfies @var{pred}. @end defun -@cindex trees-sitter tree traversal -@defun treesit-traverse-depth-first node pred &optional step depth -Traverse the subtree of @var{node} depth-first. Traverse starting from -@var{node} (i.e., @var{node} is passed to @var{pred}). For each node -traversed, we call @var{pred} with the node, and we stop and return -the node if @var{pred} returns non-nil. If no node satisfies -@var{pred}, return nil. - -If @var{step} >= 0 or nil, go forward, if @var{step} < 0, go backward. -(The quantity of @var{step} doesn't matter.) - -@var{depth} can be a positive integer or 0, meaning go @var{depth} -levels deep, counting from @var{node}, or nil, meaning there is no -limit. For example, a value 0 means only traverse @var{node} itself, -a value 1 means traverse @var{node} and its immediate children. -@end defun - -@defun treesit-traverse-breadth-first node pred &optional step -Traverse the subtree of @var{node} breadth-first. Traverse starting -from @var{node} (i.e., @var{node} is passed to @var{pred}). For each -node traversed, call @var{pred} with the node, stop and return the -node if @var{pred} returns non-nil. If no node satisfies @var{pred}, -return nil. - -If @var{step} >= 0 or nil, go forward, if @var{step} < 0, go backward. -(The quantity of @var{step} doesn't matter.) -@end defun - -@defun treesit-traverse-forward node pred &optional step depth -Traverses the whole tree forward from NODE depth-first. Traverse -starting from @var{node} (i.e., @var{node} is passed to @var{pred}). -For each node traversed, call @var{pred} with the node, stop and -return the node if @var{pred} returns non-nil. If no node satisfies -@var{pred}, return nil. - -If @var{step} >= 0 or nil, go forward, if @var{step} < 0, go backward. -(The quantity of @var{step} doesn't matter.) - -Traversing forward means that for a tree like the below where -@var{node} is marked 1, traverse as numbered: - -@example -@group - 16 - | - 3--------4-----------8 - | | | - o--o-+--1 5--+--6 9---+-----12 - | | | | | | - o o 2 7 +-+-+ +--+--+ - | | | | | - 10 11 13 14 15 -@end group -@end example - -@var{depth} can be a positive integer, 0, nil, or @code{'up}. A -positive integer or 0 means go @var{depth} deep counting from -@var{node}. A nil means no limit. And a symbol @code{'up} means go -upwards only: only traverse to sibling and parent, never go down to -children. - -The difference between 0 and @code{'up} is subtle: in the above -example, if given 0 as @var{depth}, node 1 3 4 5 6 8 9 12 16 are -visited; if given @code{'up} as @var{depth}, only node 1 3 4 8 16 are -visited. -@end defun - @node Accessing Node @section Accessing Node Information diff --git a/doc/lispref/positions.texi b/doc/lispref/positions.texi index 809ac207d2..7945232bf8 100644 --- a/doc/lispref/positions.texi +++ b/doc/lispref/positions.texi @@ -834,32 +834,6 @@ List Motion of using its normal method. @end defvar -When tree-sitter support is available (@pxref{Parsing Program -Source}), Emacs can find the beginning and end of a function according -to the syntax tree. - -@defvar treesit-defun-query -Set this variable to a tree-sitter query that matches defun -definitions, then @code{treesit-beginning-of-defun} and -@code{treesit-end-of-defun} can find the beginning and end of a defun. - -Make sure to use a compiled query for this variable, otherwise -@code{treesit-beginning-of-defun} and @code{treesit-end-of-defun} will -be very slow. -@end defvar - -@defun treesit-beginning-of-defun &optional arg -This function finds the beginning of a defun according to -@var{treesit-defun-query}. This function is suitable for the value of -@var{beginning-of-defun-function}. -@end defun - -@defun treesit-end-of-defun &optional arg -This function finds the end of a defun according to -@var{treesit-defun-query}. This function is suitable for the value of -@var{end-of-defun-function}. -@end defun - @node Skipping Characters @subsection Skipping Characters @cindex skipping characters diff --git a/lisp/treesit.el b/lisp/treesit.el index 28a74657f9..2defd83dc6 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -203,130 +203,6 @@ treesit-parent-while (defalias 'treesit-traverse-parent #'treesit-parent-until) -(defun treesit-traverse-depth-first (node pred &optional step depth) - "Traverse the subtree of NODE depth-first. - -Traverse starting from NODE (i.e., NODE is passed to PRED). For -each node traversed, call PRED with the node, stop and return the -node if PRED returns non-nil. If STEP >= 0 or nil, go forward, -if STEP < 0, go backward. If no node satisfies PRED, return -nil. - -DEPTH can be a positive integer or 0, meaning go DEPTH deep -counting from NODE; or nil, meaning there is no limit." - (if (and (numberp depth) (<= depth 0)) - nil - (if (funcall pred node) - node - (cl-loop for child in (if (or (null step) (>= step 0)) - (treesit-node-children node) - (nreverse (treesit-node-children node))) - if (treesit-traverse-depth-first - child pred step (if (numberp depth) (1- depth) depth)) - return child)))) - -(defun treesit--traverse-breadth-first-1 (pred step queue tail) - "The work horse for `treesit-traverse-breadth-first'. -PRED and STEP are the same as in -`treesit-traverse-breadth-first'. This function simply runes BFS -on QUEUE: pops an element from QUEUE, append children to QUEUE, -process the element, and next iteration. TAIL is the pointer to -the last cons in QUEUE, used for appending elements." - (cl-loop while queue - if (funcall pred (car queue)) return (car queue) - else do - (let ((children (if (or (null step) (>= step 0)) - (treesit-node-children (car queue)) - (nreverse (treesit-node-children (car queue)))))) - ;; Append children to the end. - (setcdr tail children) - (setq tail (last tail)) - ;; Pop the head off. - (setq queue (cdr queue))) - finally return nil)) - -(defun treesit-traverse-breadth-first (node pred &optional step) - "Traverse the subtree of NODE breadth-first. - -Traverse starting from NODE (i.e., NODE is passed to PRED). For -each node traversed, call PRED with the node, stop and return the -node if PRED returns non-nil. If STEP >= 0 or nil, go forward, -if STEP < 0, go backward. If no node satisfies PRED, return -nil." - ;; Traverse with a queue. - (let* ((queue (list node)) - (tail (last queue))) - (treesit--traverse-breadth-first-1 pred step queue tail))) - -(defun treesit-next-sibling-or-up (node step) - "Return the next sibling of NODE. - -If there is no next sibling of NODE but NODE has a parent, return -the parent. If there is no parent, return nil. If STEP >= 0 or -nil, return the next sibling, if STEP < 0, return the previous -one. - -Return either ('sibling node) or ('parent node)." - ;; First deplete siblings. - (if-let ((sibling (if (or (null step) (>= step 0)) - (treesit-node-next-sibling node) - (treesit-node-prev-sibling node)))) - (list 'sibling sibling) - ;; When siblings depleted, go up one level. - (when (treesit-node-parent node) - (list 'parent (treesit-node-parent node))))) - -(defun treesit-traverse-forward (node pred &optional step depth) - "Traverse the whole tree forward from NODE depth-first. - -Traverse starting from NODE (i.e., NODE is passed to PRED). For -each node traversed, call PRED with the node, stop and return the -node if PRED returns non-nil. If STEP >= 0 or nil, go forward, -if STEP < 0, go backward. If no node satisfies PRED, return -nil. - -Traversing forward depth-first means that for a tree like the -below where NODE is marked 1, traverse as numbered: - - 16 - | - 3--------4-----------8 - | | | - o--o-+--1 5--+--6 9---+-----12 - | | | | | | - o o 2 7 +-+-+ +--+--+ - | | | | | - 10 11 13 14 15 -DEPTH can be a positive integer, 0, nil, or \\='up. A positive -integer or 0 means go DEPTH deep counting from NODE. A nil means -no limit. And a symbol \\='up means go upwards only: only traverse -sibling and parent, never go down to children. - -The difference between 0 and \\='up is subtle: in the above example, -if given 0 as DEPTH, node 1 3 4 5 6 8 9 12 16 are visited; if -given \\='up as DEPTH, only node 1 3 4 8 16 are visited." - ;; First try NODE's subtree, but only under these conditions: if - ;; DEPTH is a number, it has to be greater than 0, if it's a symbol, - ;; it cannot be 'up. - (or (and (if (numberp depth) (> depth 0) (not (eq depth 'up))) - (treesit-traverse-depth-first node pred step depth)) - ;; If no match, try the next node: next sibling, or parent if no - ;; next sibling exists. - (catch 'match - (let ((next (list nil node))) - ;; If NEXT is parent, call PRED on it and keep going. We - ;; can always go to parent, regardless the value of DEPTH. - (while (and (setq next (treesit-next-sibling-or-up - (cadr next) step)) - (eq (car next) 'parent)) - (when (numberp depth) (cl-incf depth)) - (when (funcall pred (cadr next)) - (throw 'match (cadr next)))) - (when next - ;; If NEXT is non-nil, it must be ('sibling node). - (treesit-traverse-forward - (cadr next) pred step depth)))))) - (defun treesit-node-children (node &optional named) "Return a list of NODE's children. If NAMED is non-nil, collect named child only." @@ -846,138 +722,9 @@ treesit-check-indent ;;; Search -;; TODO: It might be more performant if we implement this in C. -(defun treesit-search-forward (pos-fn arg query &optional lang up-only) - "Search forward for nodes that matches QUERY from current point. - -QUERY has to capture the node to match. LANG specifies the -language in which we search for nodes. If LANG is nil, use the -first parser in (`treesit-parser-list'). - -Move forward/backward ARG times, positive ARG means go forward, -negative ARG means go backward. - -POS-FN can be either `treesit-node-start' or `treesit-node-end', -or any function that takes a node and returns a position. - -If the search succeeds, stop at the position returned by POS-FN and -return the matched node. Return nil if search failed. - -We search by traversing the parse tree, visiting every node -that's after (or before) the smallest node at point (retrieved by -`treesit-node-at'). If UP-ONLY is non-nil, only go to sibling or -parent in the tree, never go down into children when traversing -the tree." - (cl-loop for idx from 1 to (abs arg) - for parser = (if lang - (treesit-parser-create lang) - (car (treesit-parser-list))) - for node = - (if-let ((starting-point (point)) - (node (treesit-node-at (point) parser t))) - (treesit-traverse-forward - node - (lambda (node) - (and (not (eq (funcall pos-fn node) - starting-point)) - (cl-loop - for cap-node in - (mapcar - #'cdr - (treesit-query-capture node query)) - if (and (treesit-node-eq cap-node node) - (if (> arg 0) - ;; Make sure we moved forward. - (> (funcall pos-fn node) - starting-point) - ;; Make sure we moved backward. - (< (funcall pos-fn node) - starting-point))) - return t))) - ;; The AND form converts non-nil/nil into t/nil. - arg (and up-only t))) - for pos = (funcall pos-fn node) - ;; If we can find a match, jump to it. - if pos do (goto-char pos) - else return nil - ;; Return t to indicate that search is successful. - finally return node)) - -(defun treesit-search-beginning (query arg &optional lang up-only) - "Search forward for nodes that matches QUERY. - -Stops at the beginning of matched node. - -QUERY has to capture the node to match. LANG specifies the -language in which we search for nodes. If LANG is nil, use the -first parser in current buffer's parser list. - -Move forward/backward ARG times, positive ARG means go forward, -negative ARG means go backward. - -If the search succeeds, return the matched node. Return nil if -search failed. - -We search by traversing the parse tree, visiting every node -that's after (or before) the smallest node at point (retrieved by -`treesit-node-at'). If UP-ONLY is non-nil, only go to sibling or -parent in the tree, never go down into children when traversing -the tree." - (treesit-search-forward #'treesit-node-start arg query lang - up-only)) - -(defun treesit-search-end (query arg &optional lang up-only) - "Search forward for nodes that matches QUERY. - -Stops at the end of matched node. - -QUERY has to capture the node to match. LANG specifies the -language in which we search for nodes. If LANG is nil, use the -first parser in (`treesit-parser-list'). - -Move forward/backward ARG times, positive ARG means go forward, -negative ARG means go backward. - -If the search succeeds, return the matched node. Return nil if -search failed. - -We search by traversing the parse tree, visiting every node -that's after (or before) the smallest node at point (retrieved by -`treesit-node-at'). If UP-ONLY is non-nil, only go to sibling or -parent in the tree, never go down into children when traversing -the tree." - (treesit-search-forward #'treesit-node-end arg query lang - up-only)) - -;;; Navigation - -(defvar-local treesit-defun-query nil - "A tree-sitter query that matches function/class definitions. -Capture names don't matter. This variable is used by navigation -functions like `treesit-beginning-of-defun'. - -It is recommended to use a compiled query for this variable. See -`treesit-query-in' for what a query should look like.") - -(defun treesit-beginning-of-defun (&optional arg) - "Move backward to the beginning of a defun. - -With ARG, do it that many times. Negative ARG means move forward -to the ARGth following beginning of defun. Defun is defined -according to `treesit-defun-query'." - (unless treesit-defun-query - (error "Variable `treesit-defun-query' is unset")) - (treesit-search-beginning treesit-defun-query (- (or arg 1)) nil t)) - -(defun treesit-end-of-defun (&optional arg) - "Move forward to the end of a defun. - -With ARG, do it that many times. Negative ARG means move back to -ARGth preceding end of defun. Defun is defined according to -`treesit-defun-query'." - (unless treesit-defun-query - (error "Variable `treesit-defun-query' is unset")) - (treesit-search-end treesit-defun-query (or arg 1) nil t)) + + + ;;; Debugging diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index aea417d47e..fbf99ff087 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -434,20 +434,12 @@ treesit-node-supplemental ;; `treesit-parent-while' ;; `treesit-node-children' ;; `treesit-node-field-name' - ;; `treesit-next-sibling-or-up' - ;; `treesit-traverse-depth-first' - ;; `treesit-traverse-breadth-first' - ;; `treesit-traverse-forward-depth-first' )) ;; TODO ;; - Functions in treesit.el ;; - treesit-load-name-override-list ;; - treesit-search-forward -;; - treesit-search-beginning -;; - treesit-search-end -;; - treesit-beginning-of-defun -;; - treesit-end-of-defun (provide 'treesit-tests) ;;; treesit-tests.el ends here commit 1575ee2eeb1ebb5b73b4b76fc7dc7f5702748540 Author: Yuan Fu Date: Sat Sep 24 19:24:06 2022 -0700 Accept nil as NODE in treesit-node-text * lisp/treesit.el (treesit-node-text): Just wrap function body in (when ...). diff --git a/lisp/treesit.el b/lisp/treesit.el index 9750ac7b7b..28a74657f9 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -172,14 +172,15 @@ treesit-filter-child (defun treesit-node-text (node &optional no-property) "Return the buffer (or string) content corresponding to NODE. If NO-PROPERTY is non-nil, remove text properties." - (with-current-buffer (treesit-node-buffer node) - (if no-property - (buffer-substring-no-properties + (when node + (with-current-buffer (treesit-node-buffer node) + (if no-property + (buffer-substring-no-properties + (treesit-node-start node) + (treesit-node-end node)) + (buffer-substring (treesit-node-start node) - (treesit-node-end node)) - (buffer-substring - (treesit-node-start node) - (treesit-node-end node))))) + (treesit-node-end node)))))) (defun treesit-parent-until (node pred) "Return the closest parent of NODE that satisfies PRED. commit 17422c2cfcbf1670ba4dbe3ecdf3c5ff719201cc Author: Yuan Fu Date: Sat Sep 24 19:23:21 2022 -0700 ; * src/treesit.c (Ftreesit_node_field_name_for_child): Doc fix. diff --git a/src/treesit.c b/src/treesit.c index 625a7932ae..51261c34a2 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1147,7 +1147,7 @@ DEFUN ("treesit-node-field-name-for-child", Streesit_node_field_name_for_child, 2, 2, 0, doc: /* Return the field name of the Nth child of NODE. -Return nil if there isn't any child or no field is found. +Return nil if not any child or no field is found. If NODE is nil, return nil. */) (Lisp_Object node, Lisp_Object n) { commit b584569014abb1fe9f59decd86ea097a189d9bb5 Author: Yuan Fu Date: Sat Sep 24 19:22:49 2022 -0700 Change make_string to build_string in treesit.c * src/treesit.c (Ftreesit_node_string) (Ftreesit_node_field_name_for_child): Change make_string to build_string. diff --git a/src/treesit.c b/src/treesit.c index eb323e6360..625a7932ae 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1053,7 +1053,7 @@ DEFUN ("treesit-node-string", ts_check_node (node); TSNode ts_node = XTS_NODE (node)->node; char *string = ts_node_string (ts_node); - return make_string (string, strlen (string)); + return build_string (string); } DEFUN ("treesit-node-parent", @@ -1163,7 +1163,7 @@ DEFUN ("treesit-node-field-name-for-child", if (name == NULL) return Qnil; - return make_string (name, strlen (name)); + return build_string (name); } DEFUN ("treesit-node-child-count", commit 914f68da059d5aeed3459e3944769aa4370a4075 Author: Yuan Fu Date: Sat Sep 24 19:20:59 2022 -0700 ; Minor tree-sitter manual fix * doc/lispref/parsing.texi (Retrieving Node): Remove the quote. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index ac156d9996..a83ad20281 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -617,7 +617,7 @@ Retrieving Node Function @var{pred} takes the child node as the argument and should return non-nil to indicated keeping the child. If @var{named} -non-nil, this function only searches for named nodes." +non-nil, this function only searches for named nodes. @end defun @defun treesit-parent-until node pred commit c5147882a91e51b59c7da035e9ef38a4731b943d Author: Yuan Fu Date: Sat Sep 24 19:20:06 2022 -0700 ; Minor manual fix for tree-sitter indent * doc/lispref/modes.texi (Parser-based Indentation): Change var to code. diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index 45a44acf54..2d80a9db2f 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -4694,7 +4694,7 @@ Parser-based Indentation and the engine takes care of the rest. To enable the indentation engine, set the value of -@var{indent-line-function} to @code{treesit-indent}. +@code{indent-line-function} to @code{treesit-indent}. @defvar treesit-indent-function This variable stores the actual function called by commit 08a1c32d0bcaa9369a34e7f7d6d01c3885f62e21 Author: Yuan Fu Date: Sat Sep 24 19:19:03 2022 -0700 Improve printing treesit nodes * src/print.c (print_vectorlike): Instead of position, print the type of the node. diff --git a/src/print.c b/src/print.c index 12b5087435..4f41448d86 100644 --- a/src/print.c +++ b/src/print.c @@ -2024,12 +2024,15 @@ print_vectorlike (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag, printchar ('>', printcharfun); break; case PVEC_TS_NODE: - print_c_string ("#> or + #>. */ + print_c_string ("#node); + const char *delim1 = named ? "(" : "\""; + const char *delim2 = named ? ")" : "\""; + print_c_string (delim1, printcharfun); + print_string (Ftreesit_node_type (obj), printcharfun); + print_c_string (delim2, printcharfun); print_c_string (" in ", printcharfun); print_object (XTS_PARSER (XTS_NODE (obj)->parser)->buffer, printcharfun, escapeflag); commit 013c7d6aaef5f90730b1cfe42a01534d891e895a Author: Yuan Fu Date: Tue Sep 20 18:01:15 2022 -0700 Rename treesit-expand-query/pattern * src/treesit.c (treesit-expand-pattern): Rename to treesit-patter-expand. (treesit-expand-query): Rename to treesit-query-expand. (make_ts_query): Use new name. * test/src/treesit-tests.el (treesit-query-api): Fix name. diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index ebfe650dc0..aea417d47e 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -163,11 +163,11 @@ treesit-query-api (treesit-node-text (cdr entry)))) (treesit-query-capture root-node query)))))) - ;; Test `treesit-expand-query'. + ;; Test `treesit-query-expand'. (should (equal "(type field: (_) @capture .) ? * + \"return\"" - (treesit-expand-query + (treesit-query-expand '((type field: (_) @capture :anchor) :? :* :+ "return"))))))) commit 1cdb24fe35a9ff2e4f92c5acc93a5a5b0e70d93f Author: Yuan Fu Date: Thu Sep 22 11:24:32 2022 -0700 Utilize tree-sitter in python.el * lisp/progmodes/python.el (python-use-tree-sitter): New option. (python--treesit-keywords, python--treesit-builtins) (python--treesit-constants, python--treesit-operators) (python--treesit-special-attributes, python--treesit-exceptions): New variables. (python--treesit-fontify-string, python--treesit-settings) (python--imenu-treesit-create-index) (python--imenu-treesit-create-flat-index) (python-info-treesit-current-defun): New functions. (python-mode): Enable tree-sitter font-lock and which-func. diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index d3ffc2db2c..8368f4da51 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -245,7 +245,9 @@ (require 'ansi-color) (require 'cl-lib) (require 'comint) -(eval-when-compile (require 'subr-x)) ;For `string-empty-p'. +(eval-when-compile (require 'subr-x)) ;For `string-empty-p' and `string-join'. +(require 'treesit) +(require 'pcase) ;; Avoid compiler warnings (defvar compilation-error-regexp-alist) @@ -265,6 +267,11 @@ python :version "24.3" :link '(emacs-commentary-link "python")) +(defcustom python-use-tree-sitter nil + "If non-nil, `python-mode' tries to use tree-sitter. +Currently `python-mode' uses tree-sitter for font-locking, imenu, +and movement functions." + :type 'boolean) ;;; Bindings @@ -899,6 +906,136 @@ python-dotty-syntax-table "Dotty syntax table for Python files. It makes underscores and dots word constituent chars.") +;;; Tree-sitter font-lock + +;; NOTE: Tree-sitter and font-lock works differently so this can't +;; merge with `python-font-lock-keywords-level-2'. + +(defvar python--treesit-keywords + '("as" "assert" "async" "await" "break" "class" "continue" "def" + "del" "elif" "else" "except" "exec" "finally" "for" "from" + "global" "if" "import" "lambda" "nonlocal" "pass" "print" + "raise" "return" "try" "while" "with" "yield")) + +(defvar python--treesit-builtins + '("abs" "all" "any" "ascii" "bin" "bool" "breakpoint" "bytearray" + "bytes" "callable" "chr" "classmethod" "compile" "complex" + "delattr" "dict" "dir" "divmod" "enumerate" "eval" "exec" + "filter" "float" "format" "frozenset" "getattr" "globals" + "hasattr" "hash" "help" "hex" "id" "input" "int" "isinstance" + "issubclass" "iter" "len" "list" "locals" "map" "max" + "memoryview" "min" "next" "object" "oct" "open" "ord" "pow" + "print" "property" "range" "repr" "reversed" "round" "set" + "setattr" "slice" "sorted" "staticmethod" "str" "sum" "super" + "tuple" "type" "vars" "zip" "__import__")) + +(defvar python--treesit-constants + '("Ellipsis" "False" "None" "NotImplemented" "True" "__debug__" + "copyright" "credits" "exit" "license" "quit")) + +(defvar python--treesit-operators + '("-" "-=" "!=" "*" "**" "**=" "*=" "/" "//" "//=" "/=" "&" "%" "%=" + "^" "+" "+=" "<" "<<" "<=" "<>" "=" "==" ">" ">=" ">>" "|" "~" + "and" "in" "is" "not" "or")) + +(defvar python--treesit-special-attributes + '("__annotations__" "__closure__" "__code__" + "__defaults__" "__dict__" "__doc__" "__globals__" + "__kwdefaults__" "__name__" "__module__" "__package__" + "__qualname__" "__all__")) + +(defvar python--treesit-exceptions + '(;; Python 2 and 3: + "ArithmeticError" "AssertionError" "AttributeError" "BaseException" + "BufferError" "BytesWarning" "DeprecationWarning" "EOFError" + "EnvironmentError" "Exception" "FloatingPointError" "FutureWarning" + "GeneratorExit" "IOError" "ImportError" "ImportWarning" + "IndentationError" "IndexError" "KeyError" "KeyboardInterrupt" + "LookupError" "MemoryError" "NameError" "NotImplementedError" + "OSError" "OverflowError" "PendingDeprecationWarning" + "ReferenceError" "RuntimeError" "RuntimeWarning" "StopIteration" + "SyntaxError" "SyntaxWarning" "SystemError" "SystemExit" "TabError" + "TypeError" "UnboundLocalError" "UnicodeDecodeError" + "UnicodeEncodeError" "UnicodeError" "UnicodeTranslateError" + "UnicodeWarning" "UserWarning" "ValueError" "Warning" + "ZeroDivisionError" + ;; Python 2: + "StandardError" + ;; Python 3: + "BlockingIOError" "BrokenPipeError" "ChildProcessError" + "ConnectionAbortedError" "ConnectionError" "ConnectionRefusedError" + "ConnectionResetError" "FileExistsError" "FileNotFoundError" + "InterruptedError" "IsADirectoryError" "NotADirectoryError" + "PermissionError" "ProcessLookupError" "RecursionError" + "ResourceWarning" "StopAsyncIteration" "TimeoutError" + ;; OS specific + "VMSError" "WindowsError" + )) + +(defun python--treesit-fontify-string (beg end _) + "Fontify string between BEG and END. +Do not fontify the initial f for f-strings." + (let ((beg (if (eq (char-after beg) ?f) + (1+ beg) beg))) + (put-text-property beg end 'face 'font-lock-string-face))) + +(defvar python--treesit-settings + (treesit-font-lock-rules + :language 'python + `(;; Queries for def and class. + (function_definition + name: (identifier) @font-lock-function-name-face) + + (class_definition + name: (identifier) @font-lock-type-face) + + ;; Comment and string. + (comment) @font-lock-comment-face + + (string) @python--treesit-fontify-string + ((string) @font-lock-doc-face + (:match "^\"\"\"" @font-lock-doc-face)) + (interpolation (identifier) @font-lock-variable-name-face) + + ;; Keywords, builtins, and constants. + [,@python--treesit-keywords] @font-lock-keyword-face + + ((identifier) @font-lock-keyword-face + (:match "^self$" @font-lock-keyword-face)) + + ((identifier) @font-lock-builtin-face + (:match ,(rx-to-string + `(seq bol + (or ,@python--treesit-builtins + ,@python--treesit-special-attributes) + eol)) + @font-lock-builtin-face)) + + [(true) (false) (none)] @font-lock-constant-face + + ;; Escape sequences + (escape_sequence) @font-lock-constant-face + + ;; Variable names. + (assignment left: (identifier) + @font-lock-variable-name-face) + (pattern_list (identifier) + @font-lock-variable-name-face) + (tuple_pattern (identifier) + @font-lock-variable-name-face) + (list_pattern (identifier) + @font-lock-variable-name-face) + (list_splat_pattern (identifier) + @font-lock-variable-name-face) + + ;; Types and decorators. + (decorator) @font-lock-type-face + ((identifier) @font-lock-type-face + (:match ,(rx-to-string + `(seq bol (or ,@python--treesit-exceptions) + eol)) + @font-lock-type-face)) + (type (identifier) @font-lock-type-face)))) ;;; Indentation @@ -5106,6 +5243,73 @@ python-imenu-create-flat-index (python-imenu-format-parent-item-jump-label-function fn)) (python-imenu-create-index)))))) +;;; Tree-sitter imenu +;; +;; This works, but is slower than the native functions, presumably +;; because traversing the parser tree is slower than scanning the +;; text. Also I'm sure this consumes more memory as we allocate +;; memory for every node in the tree. + +(defun python--imenu-treesit-create-index (&optional node) + "Return tree Imenu alist for the current Python buffer. + +Change `python-imenu-format-item-label-function', +`python-imenu-format-parent-item-label-function', +`python-imenu-format-parent-item-jump-label-function' to +customize how labels are formatted. + +NODE is the root node of the subtree you want to build an index +of. If nil, use the root node of the whole parse tree. + +Similar to `python-imenu-create-index' but use tree-sitter." + (let* ((node (or node (treesit-buffer-root-node 'python))) + (children (treesit-node-children node t)) + (subtrees (mapcan #'python--imenu-treesit-create-index + children)) + (type (pcase (treesit-node-type node) + ("function_definition" 'def) + ("class_definition" 'class) + (_ nil))) + (name (when type + (treesit-node-text + (treesit-node-child-by-field-name + node "name") t)))) + (cond + ;; 1. This node is a function/class and doesn't have children. + ((and type (not subtrees)) + (let ((label + (funcall python-imenu-format-item-label-function + type name))) + (list (cons label + (set-marker (make-marker) + (treesit-node-start node)))))) + ;; 2. This node is a function/class and has children. + ((and type subtrees) + (let ((parent-label + (funcall python-imenu-format-parent-item-label-function + type name)) + (jump-label + (funcall python-imenu-format-parent-item-jump-label-function + type name))) + `((,parent-label + ,(cons jump-label (set-marker (make-marker) + (treesit-node-start node))) + ,@subtrees)))) + ;; 3. This node is not a function/class. + ((not type) subtrees)))) + +(defun python--imenu-treesit-create-flat-index () + "Return flat outline of the current Python buffer for Imenu. + +Change `python-imenu-format-item-label-function', +`python-imenu-format-parent-item-label-function', +`python-imenu-format-parent-item-jump-label-function' to +customize how labels are formatted. + +Similar to `python-imenu-create-flat-index' but use +tree-sitter." + (python-imenu-create-flat-index + (python--imenu-treesit-create-index))) ;;; Misc helpers @@ -5171,6 +5375,29 @@ python-info-current-defun (concat (and type (format "%s " type)) (mapconcat #'identity names "."))))))) +(defun python-info-treesit-current-defun (&optional include-type) + "Identical to `python-info-current-defun' but use tree-sitter. +For INCLUDE-TYPE see `python-info-current-defun'." + (let ((node (treesit-node-at (point))) + (name-list ()) + (type 'def)) + (cl-loop while node + if (pcase (treesit-node-type node) + ("function_definition" + (setq type 'def)) + ("class_definition" + (setq type 'class)) + (_ nil)) + do (push (treesit-node-text + (treesit-node-child-by-field-name node "name") + t) + name-list) + do (setq node (treesit-node-parent node)) + finally return (concat (if include-type + (format "%s " type) + "") + (string-join name-list "."))))) + (defun python-info-current-symbol (&optional replace-self) "Return current symbol using dotty syntax. With optional argument REPLACE-SELF convert \"self\" to current @@ -5851,13 +6078,20 @@ python-mode (setq-local forward-sexp-function python-forward-sexp-function) - (setq-local font-lock-defaults + (if (and python-use-tree-sitter + (treesit-can-enable-p)) + (progn + (setq-local font-lock-defaults '(nil t)) + (setq-local treesit-font-lock-settings + python--treesit-settings) + (treesit-font-lock-enable)) + (setq-local font-lock-defaults `(,python-font-lock-keywords nil nil nil nil (font-lock-syntactic-face-function . python-font-lock-syntactic-face-function) (font-lock-extend-after-change-region-function - . python-font-lock-extend-region))) + . python-font-lock-extend-region)))) (setq-local syntax-propertize-function python-syntax-propertize-function) @@ -5892,7 +6126,11 @@ python-mode (setq-local add-log-current-defun-function #'python-info-current-defun) - (add-hook 'which-func-functions #'python-info-current-defun nil t) + (if (and python-use-tree-sitter + (treesit-can-enable-p)) + (add-hook 'which-func-functions + #'python-info-treesit-current-defun nil t) + (add-hook 'which-func-functions #'python-info-current-defun nil t)) (setq-local skeleton-further-elements '((abbrev-mode nil) commit 361eaae71e30d9554bb78d9ccaeddf3e006115c0 Author: Yuan Fu Date: Tue Sep 20 18:10:01 2022 -0700 Improve treesit-query-capture * src/treesit.c (Ftreesit_query_capture): Add a suggestion in the signal message. diff --git a/src/treesit.c b/src/treesit.c index 775f823fb3..eb323e6360 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1727,10 +1727,11 @@ DEFUN ("treesit-query-capture", &error_offset, &error_type); if (lisp_query == NULL) { - xsignal2 (Qtreesit_query_error, + xsignal3 (Qtreesit_query_error, build_string (ts_query_error_to_string (error_type)), - make_fixnum (error_offset + 1)); + make_fixnum (error_offset + 1), + build_pure_c_string("Debug the query with `treesit-query-validate'")); } /* We don't need need to free TS_QUERY and CURSOR, they are stored in a lisp object, which is tracked by gc. */ commit cb0464bf4248a8c48e3da2dd0c502091e4ec489a Author: Yuan Fu Date: Tue Sep 20 18:02:16 2022 -0700 Fix treesit-query-validate * lisp/treesit.el (treesit-query-validate): Add a call to treesit-query-expand so this function works on both sexp and string query, as expected. diff --git a/lisp/treesit.el b/lisp/treesit.el index 709f826f32..9750ac7b7b 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -1079,7 +1079,7 @@ treesit-query-validate (message (nth 0 data)) (start (nth 1 data))) (erase-buffer) - (insert query) + (insert (treesit-query-expand query)) (goto-char start) (search-forward " " nil t) (put-text-property start (point) 'face 'error) commit 55e01229dafad75affbe49446f2aef3fbca2f3a8 Author: Yuan Fu Date: Tue Sep 20 18:01:15 2022 -0700 Rename treesit-expand-query/pattern * src/treesit.c (treesit-expand-pattern): Rename to treesit-patter-expand. (treesit-expand-query): Rename to treesit-query-expand. (make_ts_query): Use new name. diff --git a/src/treesit.c b/src/treesit.c index 2b3ab643fa..775f823fb3 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -627,7 +627,7 @@ make_ts_query (Lisp_Object query, const TSLanguage *language, uint32_t *error_offset, TSQueryError *error_type) { if (CONSP (query)) - query = Ftreesit_expand_query (query); + query = Ftreesit_query_expand (query); char *source = SSDATA (query); TSQuery *ts_query = ts_query_new (language, source, strlen (source), @@ -1360,11 +1360,9 @@ DEFUN ("treesit-node-eq", /*** Query functions */ -/* If we decide to pre-load tree-sitter.el, maybe we can implement - this function in Lisp. */ -DEFUN ("treesit-expand-pattern", - Ftreesit_expand_pattern, - Streesit_expand_pattern, 1, 1, 0, +DEFUN ("treesit-pattern-expand", + Ftreesit_pattern_expand, + Streesit_pattern_expand, 1, 1, 0, doc: /* Expand PATTERN to its string form. PATTERN can be @@ -1406,16 +1404,16 @@ DEFUN ("treesit-expand-pattern", if (VECTORP (pattern) || CONSP (pattern)) return concat3 (opening_delimeter, Fmapconcat (intern_c_string - ("treesit-expand-pattern"), + ("treesit-pattern-expand"), pattern, build_pure_c_string (" ")), closing_delimiter); return CALLN (Fformat, build_pure_c_string("%S"), pattern); } -DEFUN ("treesit-expand-query", - Ftreesit_expand_query, - Streesit_expand_query, 1, 1, 0, +DEFUN ("treesit-query-expand", + Ftreesit_query_expand, + Streesit_query_expand, 1, 1, 0, doc: /* Expand sexp QUERY to its string form. A PATTERN in QUERY can be @@ -1438,7 +1436,7 @@ DEFUN ("treesit-expand-query", explanation. */) (Lisp_Object query) { - return Fmapconcat (intern_c_string ("treesit-expand-pattern"), + return Fmapconcat (intern_c_string ("treesit-pattern-expand"), query, build_pure_c_string (" ")); } @@ -1922,8 +1920,8 @@ syms_of_treesit (void) defsubr (&Streesit_node_descendant_for_range); defsubr (&Streesit_node_eq); - defsubr (&Streesit_expand_pattern); - defsubr (&Streesit_expand_query); + defsubr (&Streesit_pattern_expand); + defsubr (&Streesit_query_expand); defsubr (&Streesit_query_compile); defsubr (&Streesit_query_capture); } commit 1d3234988a32d32570729b4dfcc00712636ec450 Author: Yuan Fu Date: Thu Sep 8 12:52:25 2022 -0700 Add node-only parameter to treesit-query-capture * doc/lispref/parsing.texi (Pattern Matching): Mention the new parameter. * lisp/treesit.el (treesit-query-in): Add node-only. * src/treesit.c (Ftreesit_query_capture): Add node-only. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 0a025bd249..ac156d9996 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -874,15 +874,17 @@ Pattern Matching Now we can introduce the query functions. -@defun treesit-query-capture node query &optional beg end +@defun treesit-query-capture node query &optional beg end node-only This function matches patterns in @var{query} in @var{node}. Argument @var{query} can be either a string, a s-expression, or a compiled query object. For now, we focus on the string syntax; s-expression syntax and compiled query are described at the end of the section. The function returns all captured nodes in a list of -@code{(@var{capture_name} . @var{node})}. If @var{beg} and @var{end} -are both non-nil, it only pattern matches nodes in that range. +@code{(@var{capture_name} . @var{node})}. If @var{node-only} is +non-nil, a list of node is returned instead. If @var{beg} and +@var{end} are both non-nil, this function only pattern matches nodes +in that range. @vindex treesit-query-error This function raise a @var{treesit-query-error} if @var{query} is @@ -890,11 +892,12 @@ Pattern Matching error. You can use @code{treesit-query-validate} to debug the query. @end defun -@defun treesit-query-in source query &optional beg end +@defun treesit-query-in source query &optional beg end node-only This function matches patterns in @var{query} in @var{source}, and returns all captured nodes in a list of @code{(@var{capture_name} -. @var{node})}. If @var{beg} and @var{end} are both non-nil, it only -pattern match nodes in that range. +. @var{node})}. If @var{node-only} is non-nil, a list of node is +returned instead. If @var{beg} and @var{end} are both non-nil, it +only pattern match nodes in that range. Argument @var{source} designates a node, it can be a language symbol, a parser, or simply a node. If a language symbol, @var{source} diff --git a/lisp/treesit.el b/lisp/treesit.el index a374ceda6d..709f826f32 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -350,7 +350,7 @@ treesit-node-field-name ;;; Query API supplement -(defun treesit-query-in (source query &optional beg end) +(defun treesit-query-in (source query &optional beg end node-only) "Query the current buffer with QUERY. SOURCE can be a language symbol, a parser, or a node. If a @@ -366,7 +366,7 @@ treesit-query-in used over and over. BEG and END, if _both_ non-nil, specifies the range in which the query -is executed. +is executed. If NODE-ONLY non-nil, return a list of nodes. Raise an treesit-query-error if QUERY is malformed." (treesit-query-capture @@ -375,7 +375,7 @@ treesit-query-in (treesit-parser-root-node source)) ((treesit-node-p source) source)) query - beg end)) + beg end node-only)) (defun treesit-query-string (string query language) "Query STRING with QUERY in LANGUAGE. diff --git a/src/treesit.c b/src/treesit.c index 48de9436d2..2b3ab643fa 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1678,7 +1678,7 @@ DEFUN ("treesit-query-compile", DEFUN ("treesit-query-capture", Ftreesit_query_capture, - Streesit_query_capture, 2, 4, 0, + Streesit_query_capture, 2, 5, 0, doc: /* Query NODE with patterns in QUERY. Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name @@ -1691,13 +1691,13 @@ DEFUN ("treesit-query-capture", compile your queries if it will be used over and over. BEG and END, if both non-nil, specifies the range in which the query -is executed. +is executed. If NODE-ONLY is non-nil, return a list of nodes. Signals treesit-query-error if QUERY is malformed or something else goes wrong. You can use `treesit-query-validate' to debug the query. */) (Lisp_Object node, Lisp_Object query, - Lisp_Object beg, Lisp_Object end) + Lisp_Object beg, Lisp_Object end, Lisp_Object node_only) { ts_check_node (node); if (!NILP (beg)) @@ -1775,11 +1775,20 @@ DEFUN ("treesit-query-capture", TSQueryCapture capture = captures[idx]; Lisp_Object captured_node = make_ts_node(lisp_parser, capture.node); - const char *capture_name = ts_query_capture_name_for_id - (ts_query, capture.index, &capture_name_len); - Lisp_Object cap = - Fcons (intern_c_string_1 (capture_name, capture_name_len), - captured_node); + + Lisp_Object cap; + if (NILP (node_only)) + { + const char *capture_name = ts_query_capture_name_for_id + (ts_query, capture.index, &capture_name_len); + cap = + Fcons (intern_c_string_1 (capture_name, capture_name_len), + captured_node); + } + else + { + cap = captured_node; + } result = Fcons (cap, result); } /* Get predicates. */ commit 56dbb5db3b10d031826f02d3cf77449d81a2b682 Author: Yuan Fu Date: Thu Sep 8 12:50:47 2022 -0700 * lisp/treesit.el (treesit-can-enable-p): Minor fix. diff --git a/lisp/treesit.el b/lisp/treesit.el index 83d80ac6da..a374ceda6d 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -48,7 +48,7 @@ treesit-can-enable-p Currently this function checks whether tree-sitter is available and the buffer size." (and (treesit-available-p) - (< (buffer-size) treesit-maximum-size))) + (< (buffer-size) treesit-max-buffer-size))) ;;; Parser API supplement commit c2a9fe0c1dfa6e36514d4c02e4134b84d9cceb46 Author: Yuan Fu Date: Wed Sep 7 16:15:01 2022 -0700 Fix tree-sitter manual entry for treesit-should-enable-p * doc/lispref/parsing.texi: Move two entries in the front to "Using Parser" section, replacing the old entry. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 7777ce9360..0a025bd249 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -26,18 +26,6 @@ Parsing Program Source for this Emacs instance. @end defun -@defvar treesit-max-buffer-size -This variable contains the maximum size of buffers in which -tree-sitter can be activated. Major modes should check this value -when deciding whether to enable tree-sitter features. -@end defvar - -@defun treesit-can-enable-p -This function checks whether the current buffer is suitable for -activating tree-sitter features. It basically checks -@code{treesit-available-p} and @var{treesit-max-buffer-size}. -@end defun - For tree-sitter integration with existing Emacs features, @pxref{Parser-based Font Lock}, @ref{Parser-based Indentation}, and @ref{List Motion}. @@ -334,25 +322,18 @@ Using Parser buffer. As we edit the buffer, the associated parser is automatically kept up-to-date. -@defvar treesit-maximum-size -If users want to turn off tree-sitter for buffers larger than a -particular size (because tree-sitter consumes memory ~10 times the -buffer size for storing the syntax tree), they set this variable to -that size. +@defvar treesit-max-buffer-size +This variable contains the maximum size of buffers in which +tree-sitter can be activated. Major modes should check this value +when deciding whether to enable tree-sitter features. @end defvar -@defun treesit-should-enable-p &optional mode -This function returns non-nil if @var{mode} (default to the current -major mode) should activate tree-sitter features. The result depends -on the value of @var{treesit-disabled-modes} and -@var{treesit-maximum-size} described above. The result also -depends on, of course, the result of @code{treesit-avaliabe-p}. - -Writer of major modes or other packages are responsible for calling -this function and determine whether to activate tree-sitter features. +@defun treesit-can-enable-p +This function checks whether the current buffer is suitable for +activating tree-sitter features. It basically checks +@code{treesit-available-p} and @var{treesit-max-buffer-size}. @end defun - @cindex Creating tree-sitter parsers @defun treesit-parser-create language &optional buffer no-reuse To create a parser, we provide a @var{buffer} to keep track of and the commit a23aec59b3a6ed2e96a89dab18f51a6310f1ac7c Author: Yuan Fu Date: Wed Sep 7 13:20:37 2022 -0700 Remove treesit-disabled-modes and change treesit-should-enable-p Per emacs-devel discussion, remove treesit-disabled-modes and let major modes to provide tree-sitter switches. I also decided to add treesit-max-buffer-size to elisp manual despite it being a user option. Though we should still add it to the user manual. * doc/lispref/parsing.texi (Parsing Program Source): Update manual to remove entries for treesit-diabled-modes and add treesit-max-buffer-size. Also update treesit-should-enable-p. * lisp/treesit.el (treesit-disabled-modes): Remove user option. (treesit-maximum-size): Change to treesit-max-buffer-size. (treesit-should-enable-p): Change to treesit-can-enable-p and remove checks of treesit-disabled-modes. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 6d5c7b8dc2..7777ce9360 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -26,8 +26,21 @@ Parsing Program Source for this Emacs instance. @end defun -For using tree-sitter features in font-lock and indentation, -@pxref{Parser-based Font Lock}, @pxref{Parser-based Indentation}. +@defvar treesit-max-buffer-size +This variable contains the maximum size of buffers in which +tree-sitter can be activated. Major modes should check this value +when deciding whether to enable tree-sitter features. +@end defvar + +@defun treesit-can-enable-p +This function checks whether the current buffer is suitable for +activating tree-sitter features. It basically checks +@code{treesit-available-p} and @var{treesit-max-buffer-size}. +@end defun + +For tree-sitter integration with existing Emacs features, +@pxref{Parser-based Font Lock}, @ref{Parser-based Indentation}, and +@ref{List Motion}. To access the syntax tree of the text in a buffer, we need to first load a language definition and create a parser with it. Next, we can @@ -321,13 +334,6 @@ Using Parser buffer. As we edit the buffer, the associated parser is automatically kept up-to-date. -@defvar treesit-disabled-modes -Before creating a parser, it is perhaps good to check whether we -should use tree-sitter at all. Sometimes a user don't want to use -tree-sitter features for a major mode. To turn-off tree-sitter for a -mode, they add that mode to this variable. -@end defvar - @defvar treesit-maximum-size If users want to turn off tree-sitter for buffers larger than a particular size (because tree-sitter consumes memory ~10 times the diff --git a/lisp/treesit.el b/lisp/treesit.el index 9c66f32ec2..83d80ac6da 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -35,11 +35,7 @@ treesit "Tree-sitter is an incremental parser." :group 'tools) -(defcustom treesit-disabled-modes nil - "A list of major-modes for which tree-sitter support is disabled." - :type '(list symbol)) - -(defcustom treesit-maximum-size (* 4 1024 1024) +(defcustom treesit-max-buffer-size (* 4 1024 1024) "Maximum buffer size for enabling tree-sitter parsing." :type 'integer) @@ -47,21 +43,12 @@ treesit-available-p "Return non-nil if tree-sitter features are available." (fboundp 'treesit-parser-create)) -(defun treesit-should-enable-p (&optional mode) - "Return non-nil if MODE should activate tree-sitter support. -MODE defaults to the value of `major-mode'. The result depends -on the value of `treesit-disabled-modes', -`treesit-maximum-size', and of course, whether tree-sitter is -available on the system at all." - (let* ((mode (or mode major-mode)) - (disabled (cl-loop - for disabled-mode in treesit-disabled-modes - if (provided-mode-derived-p mode disabled-mode) - return t - finally return nil))) - (and (treesit-available-p) - (not disabled) - (< (buffer-size) treesit-maximum-size)))) +(defun treesit-can-enable-p () + "Return non-nil if current buffer can activate tree-sitter. +Currently this function checks whether tree-sitter is available +and the buffer size." + (and (treesit-available-p) + (< (buffer-size) treesit-maximum-size))) ;;; Parser API supplement commit 31ad906bd00a3c624ce024f7caa62e9f0b381b37 Author: Yuan Fu Date: Wed Sep 7 11:52:13 2022 -0700 Add manual entry for tree-sitter search functions * doc/lispref/parsing.texi (Retrieving Node): New subsection "Searching for node". * doc/lispref/positions.texi (List Motion): Add entries for treesit-defun-query, treesit-beginning-of-defun, treesit-end-of-defun. * lisp/treesit.el (treesit-search-forward, treesit-search-beginning) (treesit-search-end): Minor docstring fix-up. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 917779f78a..6d5c7b8dc2 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -591,6 +591,38 @@ Retrieving Node for named child (@pxref{tree-sitter named node, named node}). @end defun +@heading Searching for node + +@defun treesit-search-beginning query arg &optional lang up-only +This function searches for the next node that @var{query} captures, +starting at point. Use the parser in current buffer that has +@var{lang} as its language, if @var{lang} is nil, use the first parser +in current buffer’s buffer list. + +This function stops at the @var{arg}'th match. If @var{arg} is +negative, search backward. If the search succeeds, stop at the +beginning of the matched node and return the node. Return nil if +search failed. + +By default, this function searches by traversing the parse tree depth +first, starting from the node at point. If @var{up-only} is non-nil, +this function only go to siblings and parents, but never go down into +children nodes. +@end defun + +@defun treesit-search-end query arg &optional lang up-only +This function is like @code{treesit-search-beginning}, but stops at +the end of the matched node. +@end defun + +@defun treesit-search-forward pos-fn arg query &optional lang up-only +This function is like @code{treesit-search-beginning} and +@code{treesit-search-end}, but instead of stopping at the beginning or +end of the matched node, it determines where to stop by @var{pos-fn}, +where @var{pos-fn} is a function that takes a node and returns a +position +@end defun + @heading More convenient functions @defun treesit-filter-child node pred &optional named diff --git a/doc/lispref/positions.texi b/doc/lispref/positions.texi index 7945232bf8..809ac207d2 100644 --- a/doc/lispref/positions.texi +++ b/doc/lispref/positions.texi @@ -834,6 +834,32 @@ List Motion of using its normal method. @end defvar +When tree-sitter support is available (@pxref{Parsing Program +Source}), Emacs can find the beginning and end of a function according +to the syntax tree. + +@defvar treesit-defun-query +Set this variable to a tree-sitter query that matches defun +definitions, then @code{treesit-beginning-of-defun} and +@code{treesit-end-of-defun} can find the beginning and end of a defun. + +Make sure to use a compiled query for this variable, otherwise +@code{treesit-beginning-of-defun} and @code{treesit-end-of-defun} will +be very slow. +@end defvar + +@defun treesit-beginning-of-defun &optional arg +This function finds the beginning of a defun according to +@var{treesit-defun-query}. This function is suitable for the value of +@var{beginning-of-defun-function}. +@end defun + +@defun treesit-end-of-defun &optional arg +This function finds the end of a defun according to +@var{treesit-defun-query}. This function is suitable for the value of +@var{end-of-defun-function}. +@end defun + @node Skipping Characters @subsection Skipping Characters @cindex skipping characters diff --git a/lisp/treesit.el b/lisp/treesit.el index b969f18514..9c66f32ec2 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -862,9 +862,6 @@ treesit-check-indent (defun treesit-search-forward (pos-fn arg query &optional lang up-only) "Search forward for nodes that matches QUERY from current point. -This is a more primitive function, you might want to use -`treesit-search-beginning' or `treesit-search-end' instead. - QUERY has to capture the node to match. LANG specifies the language in which we search for nodes. If LANG is nil, use the first parser in (`treesit-parser-list'). @@ -875,7 +872,7 @@ treesit-search-forward POS-FN can be either `treesit-node-start' or `treesit-node-end', or any function that takes a node and returns a position. -If search succeeds, stop at the position returned by POS-FN and +If the search succeeds, stop at the position returned by POS-FN and return the matched node. Return nil if search failed. We search by traversing the parse tree, visiting every node @@ -925,12 +922,12 @@ treesit-search-beginning QUERY has to capture the node to match. LANG specifies the language in which we search for nodes. If LANG is nil, use the -first parser in (`treesit-parser-list'). +first parser in current buffer's parser list. Move forward/backward ARG times, positive ARG means go forward, negative ARG means go backward. -If search succeeds, return the matched node. Return nil if +If the search succeeds, return the matched node. Return nil if search failed. We search by traversing the parse tree, visiting every node @@ -953,7 +950,7 @@ treesit-search-end Move forward/backward ARG times, positive ARG means go forward, negative ARG means go backward. -If search succeeds, return the matched node. Return nil if +If the search succeeds, return the matched node. Return nil if search failed. We search by traversing the parse tree, visiting every node commit 47a6c23751ba2eb097f0d4d61976eefa19425ba1 Author: Yuan Fu Date: Wed Sep 7 11:46:07 2022 -0700 Add tree-sitter font-lock settings helper function/macro 1. Add treesit-font-lock-rules that helps with settings treesit-font-lock-settings. 2. Remove treesit-font-lock-defaults and with it, decoration levels. Now major modes should set treesit-font-lock-settings with the output of treesit-font-lock-rules rather than setting treesit-font-lock-defaults. * lisp/treesit.el (treesit-font-lock-settings): Update docstring. (treesit-font-lock-rules): New function. (treesit-font-lock-defaults): Remove variable. (treesit-font-lock-enable): Remove code that interacts treesit-font-lock-defaults. * doc/lispref/modes.texi: Update manual for treesit-font-lock-settings, treesit-font-lock-rules, treesit-font-lock-defaults. diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index ba8b548554..45a44acf54 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -3883,12 +3883,12 @@ Parser-based Font Lock @c if in the future more parser are supported, feel free to reorganize @c and rewrite this node to describe multiple parsers in parallel. -Besides simple syntactic font lock and search-based font lock, Emacs +Besides simple syntactic font lock and regexp-based font lock, Emacs also provides complete syntactic font lock with the help of a parser, currently provided by the tree-sitter library (@pxref{Parsing Program Source}). Because it is an optional feature, parser-based font lock is less integrated with Emacs. Most variables introduced in previous -sections only apply to search-based font lock, except for +sections only apply to regexp-based font lock, except for @var{font-lock-maximum-decoration}. @defun treesit-font-lock-enable @@ -3897,29 +3897,34 @@ Parser-based Font Lock Parser-based font lock and other font lock mechanism are not mutually exclusive. By default, if enabled, parser-based font lock runs first, -then the simple syntactic font lock (if enabled), then search-based +then the simple syntactic font lock (if enabled), then regexp-based font lock. Although parser-based font lock doesn't share the same customization -variables with search-based font lock, parser-based font lock uses -similar customization schemes. Just like @var{font-lock-keywords} and -@var{font-lock-defaults}, parser-based font lock has -@var{treesit-font-lock-settings} and -@var{treesit-font-lock-defaults}. +variables with regexp-based font lock, parser-based font lock uses +similar customization schemes. The tree-sitter counterpart of +@var{font-lock-keywords} is @var{treesit-font-lock-settings}. -@defvar treesit-font-lock-settings -A list of @var{setting}s for tree-sitter font lock. - -Each @var{setting} should look like +@defun treesit-font-lock-rules :keyword value query... +This function is used to set @var{treesit-font-lock-settings}. It +takes care of compiling queries and other post-processing and outputs +a value that @var{treesit-font-lock-settings} accepts. An example: @example -(@var{language} @var{query}) +@group +(treesit-font-lock-rules + :language 'javascript + '((true) @@font-lock-constant-face + (false) @@font-lock-constant-face) + :language 'html + "(script_element) @@font-lock-builtin-face") +@end group @end example -Each @var{setting} controls one parser (often of different language). -And @var{language} is the language symbol (@pxref{Language -Definitions}); @var{query} is either a string query or a sexp query -(@pxref{Pattern Matching}). +This function takes a list of text or s-exp queries. Before each +query, there are @var{:keyword} and @var{value} pairs that configures +that query. The @var{:lang} keyword sets the query’s language, and is +currently the only recognized keyword. Capture names in @var{query} should be face names like @code{font-lock-keyword-face}. The captured node will be fontified @@ -3927,32 +3932,24 @@ Parser-based Font Lock case the function is called with (@var{start} @var{end} @var{node}), where @var{start} and @var{end} are the start and end position of the node in buffer, and @var{node} is the tree-sitter node object. If a -capture name is both a face and a function, face takes priority. +capture name is both a face and a function, the face takes priority. +@end defun -Generally, major modes should set @var{treesit-font-lock-defaults}, -and let Emacs automatically populate this variable. -@end defvar +@defvar treesit-font-lock-settings +A list of @var{setting}s for tree-sitter font lock. The exact format +of this variable is considered internal. One should always use +@code{treesit-font-lock-rules} to set this variable. -@defvar treesit-font-lock-defaults -This variable stores defaults for tree-sitter font Lock. It is a list -of +Each @var{setting} is of form @example -(@var{default} @var{:keyword} @var{value}...) +(@var{language} @var{query}) @end example -A @var{default} may be a symbol or a list of symbols (for different -levels of fontification). The symbol(s) can be a variable or a -function. If a symbol is both a variable and a function, it is used -as a function. Different levels of fontification can be controlled by -@var{font-lock-maximum-decoration}. - -The symbol(s) in @var{default} should contain or return a -@var{setting} as described in @var{treesit-font-lock-settings}. - -The rest @var{keyword}s and @var{value}s are additional settings that -could be used to alter the fontification behavior. Currently there -aren't any. +Each @var{setting} controls one parser (often of different language). +And @var{language} is the language symbol (@pxref{Language +Definitions}); @var{query} is either a string query or a sexp query +(@pxref{Pattern Matching}). @end defvar Multi-language major modes should provide range functions in diff --git a/lisp/treesit.el b/lisp/treesit.el index cf5001eebc..b969f18514 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -442,7 +442,10 @@ treesit-update-ranges (defvar-local treesit-font-lock-settings nil "A list of SETTINGs for treesit-based fontification. -Each SETTING should look like +The exact format of this variable is considered internal. One +should always use `treesit-font-lock-rules' to set this variable. + +Each SETTING is of form (LANGUAGE QUERY) @@ -455,7 +458,28 @@ treesit-font-lock-settings a query in either string or s-expression form. When using repeatedly, a compiled query is much faster than a string or sexp one, so it is recommend to compile your queries if it will be -used over and over. +used over and over.") + +(defun treesit-font-lock-rules (&rest args) + "Return a value suitable for `treesit-font-lock-settings'. + +Take a series of QUERIES in either string, s-expression or +compiled form. Same as in `treesit-font-lock-settings', for each +query, captured nodes are highlighted with the capture name as +its face. + +Before each QUERY there could be :KEYWORD VALUE pairs that +configure the query (and only that query). For example, + + (treesit-font-lock-rules + :language 'javascript + '((true) @font-lock-constant-face + (false) @font-lock-constant-face) + :language 'html + \"(script_element) @font-lock-builtin-face\") + +For each QUERY, a :language keyword is required. Currently the +only recognized keyword is :language. Capture names in QUERY should be face names like `font-lock-keyword-face'. The captured node will be fontified @@ -463,39 +487,36 @@ treesit-font-lock-settings which case the function is called with (START END NODE), where START and END are the start and end position of the node in buffer, and NODE is the tree-sitter node object. If a capture -name is both a face and a function, face takes priority. - -Generally, major modes should set -`treesit-font-lock-defaults', and let Emacs automatically -populate this variable.") - -(defvar-local treesit-font-lock-defaults nil - "Defaults for tree-sitter Font Lock specified by the major mode. - -This variable should be a list of - - (DEFAULT :KEYWORD VALUE...) - -A DEFAULT may be a symbol or a list of symbols (specifying -different levels of fontification). The symbol(s) can be of a -variable or a function. If a symbol is both a variable and a -function, it is used as a function. Different levels of -fontification can be controlled by -`font-lock-maximum-decoration'. - -The symbol(s) in DEFAULT should contain or return a SETTING as -explained in `treesit-font-lock-settings', which looks like - - (LANGUAGE QUERY) - -KEYWORD and VALUE are additional settings could be used to alter -fontification behavior. Currently there aren't any. - -Multi-language major-modes should provide a range function for -eacn language it supports in `treesit-range-functions', and -Emacs will set the ranges accordingly before fontifing a region. -See Info node `(elisp)Multiple Languages' for what does it mean -to set ranges for a parser.") +name is both a face and a function, the face takes priority. + +\(fn :KEYWORD VALUE QUERY...)" + (let (;; Tracks the current language that following queries will + ;; apply to. + (current-language nil) + ;; The list this function returns. + (result nil)) + (while args + (let ((token (pop args))) + (pcase token + (:language + (let ((lang (pop args))) + (when (or (not (symbolp lang)) (null lang)) + (signal 'wrong-type-argument `(symbolp ,lang))) + (setq current-language lang))) + ((pred treesit-query-p) + (when (null current-language) + (signal 'treesit-error + `("Language unspecified, use :language keyword to specify a language for this query" ,token))) + (if (treesit-compiled-query-p token) + (push `(,current-language token) result) + (push `(,current-language + ,(treesit-query-compile current-language token)) + result)) + ;; Clears any configurations set for this query. + (setq current-language nil)) + (_ (signal 'treesit-error + `("Unexpected value" token)))))) + (nreverse result))) (defun treesit-font-lock-fontify-region (start end &optional loudly) "Fontify the region between START and END. @@ -535,15 +556,6 @@ treesit-font-lock-fontify-region (defun treesit-font-lock-enable () "Enable tree-sitter font-locking for the current buffer." - (let ((default (car treesit-font-lock-defaults)) - (attributes (cdr treesit-font-lock-defaults))) - (ignore attributes) - (setq-local treesit-font-lock-settings - (font-lock-eval-keywords - (font-lock-choose-keywords - default - (font-lock-value-in-major-mode - font-lock-maximum-decoration))))) (setq-local font-lock-fontify-region-function #'treesit-font-lock-fontify-region) ;; If we don't set `font-lock-defaults' to some non-nil value, commit 77d5a0cf9fc4a6dc44f0c6ee5e3295e0eea08273 Merge: e98b4715bb df2f6fb7fc Author: Yuan Fu Date: Mon Aug 29 11:41:10 2022 -0700 Merge remote-tracking branch 'origin/master' into feature/tree-sitter commit e98b4715bb986524bde9356b62429af9786ae716 Author: Yuan Fu Date: Fri Aug 19 00:33:27 2022 -0700 ; * doc/lispref/parsing.texi: Minor touch-up. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 8df14b12b7..917779f78a 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -388,7 +388,7 @@ Using Parser with signal data being the buffer size. Once a parser is created, Emacs automatically adds it to the -buffer-local parser list. Every time a change is made to the buffer, +internal parser list. Every time a change is made to the buffer, Emacs updates parsers in this list so they can update their syntax tree incrementally. @@ -412,8 +412,9 @@ Using Parser which a parser should operate in. @xref{Multiple Languages}. Because a parser parses lazily, when we narrow the buffer, the parser -doesn't act immediately; as long as we don't query for a node while -the buffer is narrowed, narrowing does not affect the parser. +is not affected immediately; as long as we don't query for a node +while the buffer is narrowed, the parser is oblivious of the +narrowing. @cindex tree-sitter parse string @defun treesit-parse-string string language commit 11379ef2369c09c719fecc66cf3b1287d9ad1f3a Author: Yuan Fu Date: Fri Aug 19 00:25:10 2022 -0700 Remove treesit manual entries for deleted functions * doc/lispref/parsing.texi: Remove documentation for treesit-get-parser-create and treesit-get-parser. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 27755e0caa..8df14b12b7 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -348,34 +348,14 @@ Using Parser @cindex Creating tree-sitter parsers -To create a parser, we provide a buffer to parse and the language to -use (@pxref{Language Definitions}). Emacs provides several creation -functions for different use cases. - -@defun treesit-get-parser-create language -This function is the most convenient one. It gives you a parser that -recognizes @var{language} for the current buffer. The function -checks if there already exists a parser suiting the need, and only -creates a new one when it can't find one. - -@example -@group -;; Create a parser for C programming language. -(treesit-get-parser-create 'c) - @c @result{} # -@end group -@end example -@end defun - -@defun treesit-get-parser language -This function is like @code{treesit-get-parser-create}, but it -always creates a new parser. -@end defun - -@defun treesit-parser-create buffer language -This function is the most primitive, requiring both the buffer to -associate to, and the language to use. If @var{buffer} is nil, the -current buffer is used. +@defun treesit-parser-create language &optional buffer no-reuse +To create a parser, we provide a @var{buffer} to keep track of and the +@var{language} to use (@pxref{Language Definitions}). If @var{buffer} +is nil, the current buffer is used. + +By default, this function reuses a parser if one already exists for +@var{language} in @var{buffer}, if @var{no-reuse} is non-nil, this +function always creates a new parser. @end defun Given a parser, we can query information about it: commit f543a1ca0ef25fa86eee747423aef28bd98e731b Author: Yuan Fu Date: Thu Aug 18 01:44:39 2022 -0700 Add treesit-parser-delete * src/treesit.c (ts_record_change): Replace CHECK_TS_PARSER with ts_check_parser. (make_ts_parser): Initialize deleted field. (Ftreesit_parser_delete): Replace CHECK_TS_PARSER with ts_check_parser. Set deleted field. (Ftreesit_parser_buffer, Ftreesit_parser_language): Replace CHECK_TS_PARSER with ts_check_parser. (ts_check_parser): New function. (Ftreesit_parser_root_node, Ftreesit_parser_set_included_ranges) (Ftreesit_parser_included_ranges): Replace CHECK_TS_PARSER with ts_check_parser. (Qtreesit_parser_deleted): New symbol. (Qtreesit_parser_deleted): New error. * src/treesit.h (Lisp_TS_Parser): New field 'deleted'. diff --git a/src/treesit.c b/src/treesit.c index 1917179d09..48de9436d2 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -318,6 +318,14 @@ DEFUN ("treesit-language-available-p", /*** Parsing functions */ +static void +ts_check_parser (Lisp_Object obj) +{ + CHECK_TS_PARSER (obj); + if (XTS_PARSER (obj)->deleted) + xsignal1 (Qtreesit_parser_deleted, obj); +} + /* An auxiliary function that saves a few lines of code. Assumes TREE is not NULL. */ static inline void @@ -349,7 +357,7 @@ ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, { CHECK_CONS (parser_list); Lisp_Object lisp_parser = XCAR (parser_list); - CHECK_TS_PARSER (lisp_parser); + ts_check_parser (lisp_parser); TSTree *tree = XTS_PARSER (lisp_parser)->tree; if (tree != NULL) { @@ -595,6 +603,7 @@ make_ts_parser (Lisp_Object buffer, TSParser *parser, lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer)); lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer)); lisp_parser->timestamp = 0; + lisp_parser->deleted = false; eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); } @@ -748,11 +757,14 @@ DEFUN ("treesit-parser-delete", doc: /* Delete PARSER from its buffer. */) (Lisp_Object parser) { - CHECK_TS_PARSER (parser); + ts_check_parser (parser); + Lisp_Object buffer = XTS_PARSER (parser)->buffer; struct buffer *buf = XBUFFER (buffer); BVAR (buf, ts_parser_list) = Fdelete (parser, BVAR (buf, ts_parser_list)); + + XTS_PARSER (parser)->deleted = true; return Qnil; } @@ -789,7 +801,7 @@ DEFUN ("treesit-parser-buffer", doc: /* Return the buffer of PARSER. */) (Lisp_Object parser) { - CHECK_TS_PARSER (parser); + ts_check_parser (parser); Lisp_Object buf; XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer)); return buf; @@ -802,7 +814,7 @@ DEFUN ("treesit-parser-language", This symbol is the one used to create the parser. */) (Lisp_Object parser) { - CHECK_TS_PARSER (parser); + ts_check_parser (parser); return XTS_PARSER (parser)->language_symbol; } @@ -814,7 +826,7 @@ DEFUN ("treesit-parser-root-node", doc: /* Return the root node of PARSER. */) (Lisp_Object parser) { - CHECK_TS_PARSER (parser); + ts_check_parser (parser); ts_ensure_parsed (parser); TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree); return make_ts_node (parser, root_node); @@ -862,7 +874,7 @@ DEFUN ("treesit-parser-set-included-ranges", is nil, set PARSER to parse the whole buffer. */) (Lisp_Object parser, Lisp_Object ranges) { - CHECK_TS_PARSER (parser); + ts_check_parser (parser); CHECK_CONS (ranges); ts_check_range_argument (ranges); @@ -927,7 +939,7 @@ DEFUN ("treesit-parser-included-ranges", nil. */) (Lisp_Object parser) { - CHECK_TS_PARSER (parser); + ts_check_parser (parser); uint32_t len; const TSRange *ranges = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len); @@ -1813,6 +1825,7 @@ syms_of_treesit (void) "treesit-node-outdated"); DEFSYM (Quser_emacs_directory, "user-emacs-directory"); + DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted"); define_error (Qtreesit_error, "Generic tree-sitter error", Qerror); define_error (Qtreesit_query_error, "Query pattern is malformed", @@ -1831,6 +1844,9 @@ syms_of_treesit (void) define_error (Qtreesit_node_outdated, "This node is outdated, please retrieve a new one", Qtreesit_error); + define_error (Qtreesit_parser_deleted, + "This parser is deleted and cannot be used", + Qtreesit_error); DEFVAR_LISP ("treesit-load-name-override-list", Vtreesit_load_name_override_list, diff --git a/src/treesit.h b/src/treesit.h index cb00fee111..0c043f7d25 100644 --- a/src/treesit.h +++ b/src/treesit.h @@ -62,6 +62,9 @@ #define EMACS_TREESIT_H inherits this timestamp. This way we can make sure the node is not outdated when we access its information. */ ptrdiff_t timestamp; + /* If this field is true, parser functions raises + treesit-parser-deleted signal. */ + bool deleted; }; /* A wrapper around a tree-sitter node. */ commit ac4cafd66e2c3a07a6721201a33e8a03e8880bac Author: Yuan Fu Date: Sun Jun 19 19:06:56 2022 -0700 * src/treesit.c (make_ts_parser): Initialize timestamp field. diff --git a/src/treesit.c b/src/treesit.c index 64593898dc..1917179d09 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -594,6 +594,7 @@ make_ts_parser (Lisp_Object buffer, TSParser *parser, lisp_parser->need_reparse = true; lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer)); lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer)); + lisp_parser->timestamp = 0; eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); } commit c8eeaa4ae4aea05ccf66f415be20842d8447bad7 Author: Yuan Fu Date: Sat Jun 18 16:57:50 2022 -0700 ; * lisp/treesit.el (treesit-traverse-forward): Fix docstring. diff --git a/lisp/treesit.el b/lisp/treesit.el index 3cfafd0d15..cf5001eebc 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -309,15 +309,14 @@ treesit-traverse-forward o o 2 7 +-+-+ +--+--+ | | | | | 10 11 13 14 15 - -DEPTH can be a positive integer, 0, nil, or 'up. A positive +DEPTH can be a positive integer, 0, nil, or \\='up. A positive integer or 0 means go DEPTH deep counting from NODE. A nil means -no limit. And a symbol 'up means go upwards only: only traverse +no limit. And a symbol \\='up means go upwards only: only traverse sibling and parent, never go down to children. -The difference between 0 and 'up is subtle: in the above example, +The difference between 0 and \\='up is subtle: in the above example, if given 0 as DEPTH, node 1 3 4 5 6 8 9 12 16 are visited; if -given 'up as DEPTH, only node 1 3 4 8 16 are visited." +given \\='up as DEPTH, only node 1 3 4 8 16 are visited." ;; First try NODE's subtree, but only under these conditions: if ;; DEPTH is a number, it has to be greater than 0, if it's a symbol, ;; it cannot be 'up. commit 7544e9ab5d20a773db15ae3538f1c4a3a9167760 Author: Yuan Fu Date: Fri Jun 17 17:04:00 2022 -0700 Fix double-free in treesit.c * src/treesit.c (Ftreesit_query_capture): Remove free at the end. diff --git a/src/treesit.c b/src/treesit.c index be0955805c..64593898dc 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1706,16 +1706,10 @@ DEFUN ("treesit-query-capture", /* Initialize query objects, and execute query. */ struct Lisp_TS_Query *lisp_query; - /* If the lisp query is temporary, we need to free it after use. */ - bool lisp_query_temp_p; if (TS_COMPILED_QUERY_P (query)) - { - lisp_query_temp_p = false; lisp_query = XTS_COMPILED_QUERY (query); - } else { - lisp_query_temp_p = true; uint32_t error_offset; TSQueryError error_type; lisp_query = make_ts_query (query, lang, @@ -1727,6 +1721,8 @@ DEFUN ("treesit-query-capture", (ts_query_error_to_string (error_type)), make_fixnum (error_offset + 1)); } + /* We don't need need to free TS_QUERY and CURSOR, they are stored + in a lisp object, which is tracked by gc. */ } TSQuery *ts_query = lisp_query->query; TSQueryCursor *cursor = lisp_query->cursor; @@ -1785,11 +1781,6 @@ DEFUN ("treesit-query-capture", result = prev_result; } } - if (lisp_query_temp_p) - { - ts_query_delete (ts_query); - ts_query_cursor_delete (cursor); - } return Fnreverse (result); } commit 246dbb540a32fd5e68ae0665527717943ebb69b1 Author: Yuan Fu Date: Thu Jun 16 17:55:07 2022 -0700 Change treesit-parser-list from variable to function Effectively making the list internal. Now Emacs user cannot shoot themselves in the foot by removing a parser from the list, make chaanges to buffer and add that parser back to the list. * doc/lispref/parsing.texi (Language Definitions, Using Parser) (Retrieving Node, Multiple Languages): Change variable to function. * lisp/treesit.el (treesit-language-at, treesit-node-on) (treesit-buffer-root-node, treesit-indent, treesit-check-indent) (treesit-search-forward, treesit-search-beginning) (treesit-end-of-defun, treesit-inspect-mode): Change variable to function. * src/buffer.c (bset_ts_parser_list, reset_buffer, init_buffer_once): Add ts_parser_list. * src/buffer.h (struct buffer): Add ts_parser_list. * src/treesit.c (ts_record_change, Ftreesit_parser_create): Use the buffer field instead of the old buffer local variable. (Ftreesit_parser_delete, Ftreesit_parser_list): New functions. (syms_of_treesit): Remove treesit-parser-list. * test/src/treesit-tests.el (treesit-basic-parsing): Use the new function. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index b077f55743..27755e0caa 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -184,7 +184,7 @@ Language Definitions spans point, and its immediate parent. This minor mode doesn't create parsers on its own. It simply uses the -first parser in @var{treesit-parser-list} (@pxref{Using Parser}). +first parser in @code{(treesit-parser-list)} (@pxref{Using Parser}). @end deffn @heading Reading the grammar definition @@ -407,15 +407,19 @@ Using Parser size exceeds that, Emacs signals @var{treesit-buffer-too-large} with signal data being the buffer size. -@vindex treesit-parser-list Once a parser is created, Emacs automatically adds it to the -buffer-local variable @var{treesit-parser-list}. Every time a -change is made to the buffer, Emacs updates parsers in this list so -they can update their syntax tree incrementally. Therefore, one must -not remove parsers from this list and put the parser back in: if any -change is made when that parser is absent, the parser will be -permanently out-of-sync with the buffer content, and shouldn't be used -anymore. +buffer-local parser list. Every time a change is made to the buffer, +Emacs updates parsers in this list so they can update their syntax +tree incrementally. + +@defun treesit-parser-list &optional buffer +This function returns the parser list of @var{buffer}. And +@var{buffer} defaults to the current buffer. +@end defun + +@defun treesit-parser-delete parser +This function deletes @var{parser}. +@end defun @cindex tree-sitter narrowing @anchor{tree-sitter narrowing} Normally, a parser ``sees'' the whole @@ -477,10 +481,10 @@ Retrieving Node greater than @var{point}. When @var{parser-or-lang} is nil, this function uses the first parser -in @var{treesit-parser-list} in the current buffer. If +in @code{(treesit-parser-list)} in the current buffer. If @var{parser-or-lang} is a parser object, it use that parser; if @var{parser-or-lang} is a language, it finds the first parser using -that language in @var{treesit-parser-list} and use that. +that language in @code{(treesit-parser-list)} and use that. If @var{named} is non-nil, this function looks for a named node instead (@pxref{tree-sitter named node, named node}). @@ -507,10 +511,10 @@ Retrieving Node @code{treesit-node-at} instead. When @var{parser-or-lang} is nil, this function uses the first parser -in @var{treesit-parser-list} in the current buffer. If +in @code{(treesit-parser-list)} in the current buffer. If @var{parser-or-lang} is a parser object, it use that parser; if @var{parser-or-lang} is a language, it finds the first parser using -that language in @var{treesit-parser-list} and use that. +that language in @code{(treesit-parser-list)} and use that. If @var{named} is non-nil, this function looks for a named node instead (@pxref{tree-sitter named node, named node}). @@ -523,7 +527,7 @@ Retrieving Node @defun treesit-buffer-root-node &optional language This function finds the first parser that uses @var{language} in -@var{treesit-parser-list} in the current buffer, and returns the +@code{(treesit-parser-list)} in the current buffer, and returns the root node of that buffer. If it cannot find an appropriate parser, it returns nil. @end defun @@ -1267,7 +1271,7 @@ Multiple Languages the ranges of @var{parser-or-lang} to @var{ranges}. Conveniently, @var{parser-or-lang} could be either a parser or a language. If it is a language, this function looks for the first parser in -@var{treesit-parser-list} for that language in the current buffer, +@code{(treesit-parser-list)} for that language in the current buffer, and set range for it. @end defun @@ -1301,7 +1305,7 @@ Multiple Languages @defun treesit-language-at point This function tries to figure out which language is responsible for the text at @var{point}. It goes over each parser in -@var{treesit-parser-list} and see if that parser's range covers +@code{(treesit-parser-list)} and see if that parser's range covers @var{point}. @end defun diff --git a/lisp/treesit.el b/lisp/treesit.el index 5b65e00e07..3cfafd0d15 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -75,7 +75,7 @@ treesit-parse-string (defun treesit-language-at (point) "Return the language used at POINT." - (cl-loop for parser in treesit-parser-list + (cl-loop for parser in (treesit-parser-list) if (treesit-node-on point point parser) return (treesit-parser-language parser))) @@ -122,7 +122,7 @@ treesit-node-at non-nil, only look for named node. If PARSER-OR-LANG is nil, use the first parser in -`treesit-parser-list'; if PARSER-OR-LANG is a parser, use +(`treesit-parser-list'); if PARSER-OR-LANG is a parser, use that parser; if PARSER-OR-LANG is a language, find a parser using that language in the current buffer, and use that." (let ((node (if (treesit-parser-p parser-or-lang) @@ -150,7 +150,7 @@ treesit-node-on node. If PARSER-OR-LANG is nil, use the first parser in -`treesit-parser-list'; if PARSER-OR-LANG is a parser, use +(`treesit-parser-list'); if PARSER-OR-LANG is a parser, use that parser; if PARSER-OR-LANG is a language, find a parser using that language in the current buffer, and use that." (let ((root (if (treesit-parser-p parser-or-lang) @@ -160,13 +160,13 @@ treesit-node-on (defun treesit-buffer-root-node (&optional language) "Return the root node of the current buffer. -Use the first parser in `treesit-parser-list', if LANGUAGE is +Use the first parser in (`treesit-parser-list'), if LANGUAGE is non-nil, use the first parser for LANGUAGE." (if-let ((parser (or (if language (or (treesit-parser-create language) (error "Cannot find a parser for %s" language)) - (or (car treesit-parser-list) + (or (car (treesit-parser-list)) (error "Buffer has no parser")))))) (treesit-parser-root-node parser))) @@ -770,7 +770,7 @@ treesit-indent (skip-chars-forward " \t") (point))) (smallest-node - (cl-loop for parser in treesit-parser-list + (cl-loop for parser in (treesit-parser-list) for node = (treesit-node-at bol parser) if node return node)) (node (treesit-parent-while @@ -856,7 +856,7 @@ treesit-search-forward QUERY has to capture the node to match. LANG specifies the language in which we search for nodes. If LANG is nil, use the -first parser in `treesit-parser-list'. +first parser in (`treesit-parser-list'). Move forward/backward ARG times, positive ARG means go forward, negative ARG means go backward. @@ -875,7 +875,7 @@ treesit-search-forward (cl-loop for idx from 1 to (abs arg) for parser = (if lang (treesit-parser-create lang) - (car treesit-parser-list)) + (car (treesit-parser-list))) for node = (if-let ((starting-point (point)) (node (treesit-node-at (point) parser t))) @@ -914,7 +914,7 @@ treesit-search-beginning QUERY has to capture the node to match. LANG specifies the language in which we search for nodes. If LANG is nil, use the -first parser in `treesit-parser-list'. +first parser in (`treesit-parser-list'). Move forward/backward ARG times, positive ARG means go forward, negative ARG means go backward. @@ -937,7 +937,7 @@ treesit-search-end QUERY has to capture the node to match. LANG specifies the language in which we search for nodes. If LANG is nil, use the -first parser in `treesit-parser-list'. +first parser in (`treesit-parser-list'). Move forward/backward ARG times, positive ARG means go forward, negative ARG means go backward. @@ -993,7 +993,7 @@ treesit-inspect-node-at-point If called interactively, show in echo area, otherwise set `treesit--inspect-name' (which will appear in the mode-line if `treesit-inspect-mode' is enabled). Uses the first parser -in `treesit-parser-list'." +in (`treesit-parser-list')." (interactive "p") ;; NODE-LIST contains all the node that starts at point. (let* ((node-list @@ -1053,7 +1053,7 @@ treesit-inspect-mode its immediate parent. This minor mode doesn't create parsers on its own. It simply -uses the first parser in `treesit-parser-list'." +uses the first parser in (`treesit-parser-list')." :lighter nil (if treesit-inspect-mode (progn diff --git a/src/buffer.c b/src/buffer.c index a0761f5b59..97e9e22738 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -231,6 +231,13 @@ bset_extra_line_spacing (struct buffer *b, Lisp_Object val) { b->extra_line_spacing_ = val; } +#ifdef HAVE_TREE_SITTER +static void +bset_ts_parser_list (struct buffer *b, Lisp_Object val) +{ + b->ts_parser_list_ = val; +} +#endif static void bset_file_format (struct buffer *b, Lisp_Object val) { @@ -1004,6 +1011,9 @@ reset_buffer (register struct buffer *b) (b, BVAR (&buffer_defaults, enable_multibyte_characters)); bset_cursor_type (b, BVAR (&buffer_defaults, cursor_type)); bset_extra_line_spacing (b, BVAR (&buffer_defaults, extra_line_spacing)); +#ifdef HAVE_TREE_SITTER + bset_ts_parser_list (b, Qnil); +#endif b->display_error_modiff = 0; } @@ -5273,6 +5283,9 @@ init_buffer_once (void) XSETFASTINT (BVAR (&buffer_local_flags, tab_line_format), idx); ++idx; XSETFASTINT (BVAR (&buffer_local_flags, cursor_type), idx); ++idx; XSETFASTINT (BVAR (&buffer_local_flags, extra_line_spacing), idx); ++idx; +#ifdef HAVE_TREE_SITTER + XSETFASTINT (BVAR (&buffer_local_flags, ts_parser_list), idx); ++idx; +#endif XSETFASTINT (BVAR (&buffer_local_flags, cursor_in_non_selected_windows), idx); ++idx; /* buffer_local_flags contains no pointers, so it's safe to treat it @@ -5343,6 +5356,9 @@ init_buffer_once (void) bset_bidi_paragraph_separate_re (&buffer_defaults, Qnil); bset_cursor_type (&buffer_defaults, Qt); bset_extra_line_spacing (&buffer_defaults, Qnil); +#ifdef HAVE_TREE_SITTER + bset_ts_parser_list (&buffer_defaults, Qnil); +#endif bset_cursor_in_non_selected_windows (&buffer_defaults, Qt); bset_enable_multibyte_characters (&buffer_defaults, Qt); diff --git a/src/buffer.h b/src/buffer.h index 135eaf72d3..bc07a63b53 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -561,6 +561,10 @@ #define BVAR(buf, field) ((buf)->field ## _) in the display of this buffer. */ Lisp_Object extra_line_spacing_; +#ifdef HAVE_TREE_SITTER + /* A list of tree-sitter parsers for this buffer. */ + Lisp_Object ts_parser_list_; +#endif /* Cursor type to display in non-selected windows. t means to use hollow box cursor. See `cursor-type' for other values. */ diff --git a/src/treesit.c b/src/treesit.c index fcb333b8ec..be0955805c 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -342,8 +342,8 @@ ts_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte, ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, ptrdiff_t new_end_byte) { - for (Lisp_Object parser_list = - Fsymbol_value (Qtreesit_parser_list); + for (Lisp_Object parser_list + = BVAR (current_buffer, ts_parser_list); !NILP (parser_list); parser_list = XCDR (parser_list)) { @@ -704,23 +704,24 @@ DEFUN ("treesit-parser-create", ts_initialize (); CHECK_SYMBOL (language); - struct buffer *old_buffer = current_buffer; - if (!NILP (buffer)) + struct buffer *buf; + if (NILP (buffer)) + buf = current_buffer; + else { CHECK_BUFFER (buffer); - set_buffer_internal (XBUFFER (buffer)); + buf = XBUFFER (buffer); } - ts_check_buffer_size (current_buffer); + ts_check_buffer_size (buf); /* See if we can reuse a parser. */ - for (Lisp_Object tail = Fsymbol_value (Qtreesit_parser_list); + for (Lisp_Object tail = BVAR (buf, ts_parser_list); NILP (no_reuse) && !NILP (tail); tail = XCDR (tail)) { struct Lisp_TS_Parser *parser = XTS_PARSER (XCAR (tail)); if (EQ (parser->language_symbol, language)) { - set_buffer_internal (old_buffer); return XCAR (tail); } } @@ -734,13 +735,53 @@ DEFUN ("treesit-parser-create", Lisp_Object lisp_parser = make_ts_parser (Fcurrent_buffer (), parser, NULL, language); - Fset (Qtreesit_parser_list, - Fcons (lisp_parser, Fsymbol_value (Qtreesit_parser_list))); + BVAR (buf, ts_parser_list) + = Fcons (lisp_parser, BVAR (buf, ts_parser_list)); - set_buffer_internal (old_buffer); return lisp_parser; } +DEFUN ("treesit-parser-delete", + Ftreesit_parser_delete, Streesit_parser_delete, + 1, 1, 0, + doc: /* Delete PARSER from its buffer. */) + (Lisp_Object parser) +{ + CHECK_TS_PARSER (parser); + Lisp_Object buffer = XTS_PARSER (parser)->buffer; + struct buffer *buf = XBUFFER (buffer); + BVAR (buf, ts_parser_list) + = Fdelete (parser, BVAR (buf, ts_parser_list)); + return Qnil; +} + +DEFUN ("treesit-parser-list", + Ftreesit_parser_list, Streesit_parser_list, + 0, 1, 0, + doc: /* Return BUFFER's parser list. +BUFFER defaults to the current buffer. */) + (Lisp_Object buffer) +{ + struct buffer *buf; + if (NILP (buffer)) + buf = current_buffer; + else + { + CHECK_BUFFER (buffer); + buf = XBUFFER (buffer); + } + /* Return a fresh list so messing with that list doesn't affect our + internal data. */ + Lisp_Object return_list = Qnil; + for (Lisp_Object tail = BVAR (buf, ts_parser_list); + !NILP (tail); + tail = XCDR (tail)) + { + return_list = Fcons (XCAR (tail), return_list); + } + return Freverse (return_list); +} + DEFUN ("treesit-parser-buffer", Ftreesit_parser_buffer, Streesit_parser_buffer, 1, 1, 0, @@ -1799,17 +1840,6 @@ syms_of_treesit (void) "This node is outdated, please retrieve a new one", Qtreesit_error); - DEFSYM (Qtreesit_parser_list, "treesit-parser-list"); - DEFVAR_LISP ("treesit-parser-list", Vtreesit_parser_list, - doc: /* A list of tree-sitter parsers. - -If you removed a parser from this list, do not put it back in. Emacs -keeps the parser in this list updated with any change in the buffer. -If removed and put back in, there is no guarantee that the parser is in -sync with the buffer's content. */); - Vtreesit_parser_list = Qnil; - Fmake_variable_buffer_local (Qtreesit_parser_list); - DEFVAR_LISP ("treesit-load-name-override-list", Vtreesit_load_name_override_list, doc: @@ -1848,6 +1878,8 @@ syms_of_treesit (void) defsubr (&Streesit_node_parser); defsubr (&Streesit_parser_create); + defsubr (&Streesit_parser_delete); + defsubr (&Streesit_parser_list); defsubr (&Streesit_parser_buffer); defsubr (&Streesit_parser_language); diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index 416329d94d..ebfe650dc0 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -27,7 +27,7 @@ treesit-basic-parsing (with-temp-buffer (let ((parser (treesit-parser-create 'json))) (should - (eq parser (car treesit-parser-list))) + (eq parser (car (treesit-parser-list)))) (should (equal (treesit-node-string (treesit-parser-root-node parser)) commit 33f7e10a29dad475f7872d6af87ecefaccdb55fc Author: Yuan Fu Date: Thu Jun 16 11:25:08 2022 -0700 Add treesit test for previous change * test/src/treesit-tests.el (treesit-cross-boundary): New test. diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index 32971fbacb..416329d94d 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -223,6 +223,89 @@ treesit-narrow (treesit-parser-root-node parser)) "(document (array (number)))"))))) +(ert-deftest treesit-cross-boundary () + "Tests for cross-boundary edits. +Cross-boundary means crossing visible_beg and visible_end. We +don't test if parser parses correctly, instead we just check +edits like this don't produce assertion errors. (I inserted a +bunch of assertions that checks e.g. visible_beg <= +visible_end.)" + (with-temp-buffer + (let (parser root-node pattern doc-node object-node pair-node) + (progn + (insert "xxx[1,{\"name\": \"Bob\"},2,3]xxx") + (narrow-to-region (+ (point-min) 3) (- (point-max) 3)) + (setq parser (treesit-parser-create 'json)) + ;; Now visible_beg/end = visible boundary. + (setq root-node (treesit-parser-root-node parser))) + ;; Now parser knows the content of the visible region. + (widen) + ;; Now visible_beg/end don't change, but visible region expanded. + (delete-region 1 7) + ;; (1) This change is across visible_beg. I expect + ;; ts_record_change to receive (start=1, old_end=7, new_end=1). + (treesit-parser-root-node parser) + ;; Above form forces a parse which calls + ;; `ts_ensure_position_synced'. Now visible_beg/end matches the + ;; visible region (whole buffer). We want to test that this + ;; doesn't cause assertion error. + + (should (equal "{\"name\": \"Bob\"},2,3]xxx" (buffer-string))) + (narrow-to-region 1 16) + (should (equal "{\"name\": \"Bob\"}" (buffer-string))) + (treesit-parser-root-node parser) + ;; Call `ts_ensure_position_synced' again to update visible_beg/end. + (widen) + (goto-char 14) + (insert "by") + ;; (2) This change is inside [visible_beg, visible_end]. + (should (equal "{\"name\": \"Bobby\"},2,3]xxx" (buffer-string))) + (delete-region 14 23) + ;; This delete is across visible_end. + (should (equal "{\"name\": \"Bobxxx" (buffer-string))) + (treesit-parser-root-node parser) + ;; visible_beg/end synced. + + (narrow-to-region 3 7) + (should (equal "name" (buffer-string))) + (treesit-parser-root-node parser) + ;; visible_beg/end synced. + (widen) + (goto-char (point-min)) + (insert "zzz") + (should (equal "zzz{\"name\": \"Bobxxx" (buffer-string))) + ;; (3) Test inserting before visible_beg. + (treesit-parser-root-node parser) + ;; visible_beg/end synced. + + (narrow-to-region 4 11) + (should (equal "{\"name\"" (buffer-string))) + (treesit-parser-root-node parser) + ;; visible_beg/end synced. + (widen) + (goto-char (point-max)) + (insert "yyy") + ;; (4) This change is after visible_end. + (treesit-parser-root-node parser) + ;; Sync up visible_beg/end. + (should (equal "zzz{\"name\": \"Bobxxxyyy" (buffer-string))) + + (narrow-to-region 1 17) + (should (equal "zzz{\"name\": \"Bob" (buffer-string))) + (treesit-parser-root-node parser) + ;; Sync up visible_beg/end. + (widen) + (delete-region 13 (point-max)) + (treesit-parser-root-node parser) + ;; Sync up visible_beg/end. + (should (equal "zzz{\"name\": " (buffer-string))) + ;; Ideally we want to also test the case where we delete and + ;; insert simultaneously, but the only such use is in + ;; `casify_region', all others either only inserts or only + ;; deletes. I'll leave it to someone to try to write a test + ;; that calls that. + ))) + (ert-deftest treesit-range () "Tests if range works." (with-temp-buffer commit dd65d1c396da2e024468196c4d5bcb72198f524a Author: Yuan Fu Date: Thu Jun 16 01:11:09 2022 -0700 Consolidate treesit parser create functions Merge treesit-parser-create, treesit-get-parser, treesit-get-parser-create into one: treesit-parser-create. * src/treesit.c (Ftreesit_parser_language): make BUFFER parameter optional, add new parameter NO-REUSE. Optionally reuse parser. * test/src/treesit-tests.el: Change all parser creation to use treesit-parser-create. Remove tests for the removed functions. * lisp/treesit.el (treesit-get-parser, treesit-get-parser-create): Remove. * lisp/treesit.el (treesit-set-ranges, treesit-get-ranges) (treesit-buffer-root-node, treesit-query-string) (treesit-font-lock-fontify-region, treesit-search-forward) (treesit-query-validate): Change to use treesit-parser-create. diff --git a/lisp/treesit.el b/lisp/treesit.el index 100ca23316..5b65e00e07 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -65,27 +65,13 @@ treesit-should-enable-p ;;; Parser API supplement -(defun treesit-get-parser (language) - "Find the first parser using LANGUAGE in `treesit-parser-list'." - (catch 'found - (dolist (parser treesit-parser-list) - (when (eq language (treesit-parser-language parser)) - (throw 'found parser))))) - -(defun treesit-get-parser-create (language) - "Find the first parser using LANGUAGE in `treesit-parser-list'. -If none exists, create one and return it." - (or (treesit-get-parser language) - (treesit-parser-create - (current-buffer) language))) - (defun treesit-parse-string (string language) "Parse STRING using a parser for LANGUAGE. Return the root node of the syntax tree." (with-temp-buffer (insert string) (treesit-parser-root-node - (treesit-parser-create (current-buffer) language)))) + (treesit-parser-create language)))) (defun treesit-language-at (point) "Return the language used at POINT." @@ -97,7 +83,7 @@ treesit-set-ranges "Set the ranges of PARSER-OR-LANG to RANGES." (treesit-parser-set-included-ranges (cond ((symbolp parser-or-lang) - (or (treesit-get-parser parser-or-lang) + (or (treesit-parser-create parser-or-lang) (error "Cannot find a parser for %s" parser-or-lang))) ((treesit-parser-p parser-or-lang) parser-or-lang) @@ -109,7 +95,7 @@ treesit-get-ranges "Get the ranges of PARSER-OR-LANG." (treesit-parser-included-ranges (cond ((symbolp parser-or-lang) - (or (treesit-get-parser parser-or-lang) + (or (treesit-parser-create parser-or-lang) (error "Cannot find a parser for %s" parser-or-lang))) ((treesit-parser-p parser-or-lang) parser-or-lang) @@ -178,7 +164,7 @@ treesit-buffer-root-node non-nil, use the first parser for LANGUAGE." (if-let ((parser (or (if language - (or (treesit-get-parser language) + (or (treesit-parser-create language) (error "Cannot find a parser for %s" language)) (or (car treesit-parser-list) (error "Buffer has no parser")))))) @@ -410,7 +396,7 @@ treesit-query-string See `treesit-query-capture' for QUERY." (with-temp-buffer (insert string) - (let ((parser (treesit-parser-create (current-buffer) language))) + (let ((parser (treesit-parser-create language))) (treesit-query-capture (treesit-parser-root-node parser) query)))) @@ -520,7 +506,7 @@ treesit-font-lock-fontify-region (dolist (setting treesit-font-lock-settings) (when-let* ((language (nth 0 setting)) (match-pattern (nth 1 setting)) - (parser (treesit-get-parser-create language))) + (parser (treesit-parser-create language))) (when-let ((node (treesit-node-on start end parser))) (let ((captures (treesit-query-capture node match-pattern @@ -888,7 +874,7 @@ treesit-search-forward the tree." (cl-loop for idx from 1 to (abs arg) for parser = (if lang - (treesit-get-parser-create lang) + (treesit-parser-create lang) (car treesit-parser-list)) for node = (if-let ((starting-point (point)) @@ -1088,7 +1074,7 @@ treesit-query-validate (cl-assert (or (consp query) (stringp query))) (let ((buf (get-buffer-create "*tree-sitter check query*"))) (with-temp-buffer - (treesit-get-parser-create language) + (treesit-parser-create language) (condition-case err (progn (treesit-query-in language query) (message "QUERY is valid")) diff --git a/src/treesit.c b/src/treesit.c index 5a53b09675..fcb333b8ec 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -692,23 +692,38 @@ DEFUN ("treesit-node-parser", DEFUN ("treesit-parser-create", Ftreesit_parser_create, Streesit_parser_create, - 2, 2, 0, + 1, 3, 0, doc: /* Create and return a parser in BUFFER for LANGUAGE. -The parser is automatically added to BUFFER's -`treesit-parser-list'. LANGUAGE should be the symbol of a -function provided by a tree-sitter language dynamic module, e.g., -'treesit-json. If BUFFER is nil, use the current buffer. */) - (Lisp_Object buffer, Lisp_Object language) +The parser is automatically added to BUFFER's `treesit-parser-list'. +LANGUAGE is a language symbol. If BUFFER is nil, use the current +buffer. If BUFFER already has a parser for LANGUAGE, return that +parser. If NO-REUSE is non-nil, always create a new parser. */) + (Lisp_Object language, Lisp_Object buffer, Lisp_Object no_reuse) { - if (NILP (buffer)) - buffer = Fcurrent_buffer (); + ts_initialize (); - CHECK_BUFFER (buffer); CHECK_SYMBOL (language); - ts_check_buffer_size (XBUFFER (buffer)); + struct buffer *old_buffer = current_buffer; + if (!NILP (buffer)) + { + CHECK_BUFFER (buffer); + set_buffer_internal (XBUFFER (buffer)); + } + ts_check_buffer_size (current_buffer); - ts_initialize (); + /* See if we can reuse a parser. */ + for (Lisp_Object tail = Fsymbol_value (Qtreesit_parser_list); + NILP (no_reuse) && !NILP (tail); + tail = XCDR (tail)) + { + struct Lisp_TS_Parser *parser = XTS_PARSER (XCAR (tail)); + if (EQ (parser->language_symbol, language)) + { + set_buffer_internal (old_buffer); + return XCAR (tail); + } + } TSParser *parser = ts_parser_new (); TSLanguage *lang = ts_load_language (language, true); diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index c6d5f25472..32971fbacb 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -25,8 +25,7 @@ (ert-deftest treesit-basic-parsing () "Test basic parsing routines." (with-temp-buffer - (let ((parser (treesit-parser-create - (current-buffer) 'json))) + (let ((parser (treesit-parser-create 'json))) (should (eq parser (car treesit-parser-list))) (should @@ -55,8 +54,7 @@ treesit-node-api (let (parser root-node doc-node object-node pair-node) (progn (insert "[1,2,{\"name\": \"Bob\"},3]") - (setq parser (treesit-parser-create - (current-buffer) 'json)) + (setq parser (treesit-parser-create 'json)) (setq root-node (treesit-parser-root-node parser))) ;; `treesit-node-type'. @@ -129,8 +127,7 @@ treesit-query-api (let (parser root-node pattern doc-node object-node pair-node) (progn (insert "[1,2,{\"name\": \"Bob\"},3]") - (setq parser (treesit-parser-create - (current-buffer) 'json)) + (setq parser (treesit-parser-create 'json)) (setq root-node (treesit-parser-root-node parser))) @@ -181,8 +178,7 @@ treesit-narrow (progn (insert "xxx[1,{\"name\": \"Bob\"},2,3]xxx") (narrow-to-region (+ (point-min) 3) (- (point-max) 3)) - (setq parser (treesit-parser-create - (current-buffer) 'json)) + (setq parser (treesit-parser-create 'json)) (setq root-node (treesit-parser-root-node parser))) ;; This test is from the basic test. @@ -233,8 +229,7 @@ treesit-range (let (parser root-node pattern doc-node object-node pair-node) (progn (insert "[[1],oooxxx[1,2,3],xxx[1,2]]") - (setq parser (treesit-parser-create - (current-buffer) 'json)) + (setq parser (treesit-parser-create 'json)) (setq root-node (treesit-parser-root-node parser))) (should-error @@ -258,9 +253,9 @@ treesit-multi-lang (let (html css js html-range css-range js-range) (progn (insert "") - (setq html (treesit-get-parser-create 'html)) - (setq css (treesit-get-parser-create 'css)) - (setq js (treesit-get-parser-create 'javascript))) + (setq html (treesit-parser-create 'html)) + (setq css (treesit-parser-create 'css)) + (setq js (treesit-parser-create 'javascript))) ;; JavaScript. (setq js-range (treesit-query-range @@ -287,13 +282,6 @@ treesit-multi-lang (ert-deftest treesit-parser-supplemental () "Supplemental node functions." - ;; `treesit-get-parser'. - (with-temp-buffer - (should (equal (treesit-get-parser 'json) nil))) - ;; `treesit-get-parser-create'. - (with-temp-buffer - (should (not (equal (treesit-get-parser-create 'json) - nil)))) ;; `treesit-parse-string'. (should (equal (treesit-node-string (treesit-parse-string @@ -304,14 +292,10 @@ treesit-parser-supplemental (let (parser root-node doc-node object-node pair-node) (progn (insert "[1,2,{\"name\": \"Bob\"},3]") - (setq parser (treesit-parser-create - (current-buffer) 'json)) + (setq parser (treesit-parser-create 'json)) (setq root-node (treesit-parser-root-node parser)) (setq doc-node (treesit-node-child root-node 0))) - ;; `treesit-get-parser'. - (should (not (equal (treesit-get-parser 'json) - nil))) ;; `treesit-language-at'. (should (equal (treesit-language-at (point)) 'json)) @@ -326,8 +310,7 @@ treesit-node-supplemental (let (parser root-node doc-node array-node) (progn (insert "[1,2,{\"name\": \"Bob\"},3]") - (setq parser (treesit-parser-create - (current-buffer) 'json)) + (setq parser (treesit-parser-create 'json)) (setq root-node (treesit-parser-root-node parser)) (setq doc-node (treesit-node-child root-node 0))) commit 7cee82a91d287e42e6596960cbee17157cde4b29 Author: Yuan Fu Date: Wed Jun 15 21:53:15 2022 -0700 Fix treesit function ts_record_change and friends In ts_record_change, the way we calculate tree-sitter change was wrong: ptrdiff_t affected_start = max (visible_beg, start_byte) - visible_beg; ptrdiff_t affected_old_end = min (visible_end, affected_start + bytes_del); ptrdiff_t affected_new_end = affected_start + bytes_ins; I changed it to below (also renamed variables) ptrdiff_t start_offset = min (visible_end, max (visible_beg, start_byte)) - visible_beg; ptrdiff_t old_end_offset = min (visible_end, max (visible_beg, old_end_byte)) - visible_beg; ptrdiff_t new_end_offset = min (visible_end, max (visible_beg, new_end_byte)) - visible_beg; Also previously only visible_end is changed (in a wrong way) XTS_PARSER (lisp_parser)->visible_end = affected_new_end; Now we have a whole new bunch of code that makes the right change. * src/treesit.c (ts_tree_edit_1): Add assertion. (ts_record_change): See above. (ts_ensure_position_synced): Add assertion. (ts_ensure_parsed): Only free if non-NULL. (make_ts_parser): Add assertion. (Ftreesit_parser_set_included_ranges): Ensure parsed before setting ranges. (Ftreesit_parser_included_ranges): Add assertion. diff --git a/src/treesit.c b/src/treesit.c index 585683aa1b..5a53b09675 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -324,6 +324,9 @@ DEFUN ("treesit-language-available-p", ts_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte, ptrdiff_t old_end_byte, ptrdiff_t new_end_byte) { + eassert (start_byte >= 0); + eassert (start_byte <= old_end_byte); + eassert (start_byte <= new_end_byte); TSPoint dummy_point = {0, 0}; TSInputEdit edit = {(uint32_t) start_byte, (uint32_t) old_end_byte, @@ -356,24 +359,56 @@ ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, insert, and think of them as moving unchanged text back and forth. After all, the whole point of updating the tree is to update the position of unchanged text. */ - ptrdiff_t bytes_del = old_end_byte - start_byte; - ptrdiff_t bytes_ins = new_end_byte - start_byte; - ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg; ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end; - - ptrdiff_t affected_start = - max (visible_beg, start_byte) - visible_beg; - ptrdiff_t affected_old_end = - min (visible_end, affected_start + bytes_del); - ptrdiff_t affected_new_end = - affected_start + bytes_ins; - - ts_tree_edit_1 (tree, affected_start, affected_old_end, - affected_new_end); - XTS_PARSER (lisp_parser)->visible_end = affected_new_end; + eassert (visible_beg >= 0); + eassert (visible_beg <= visible_end); + + /* AFFECTED_START/OLD_END/NEW_END are (0-based) offsets from + VISIBLE_BEG. min(visi_end, max(visi_beg, value)) clips + value into [visi_beg, visi_end], and subtracting visi_beg + gives the offset from visi_beg. */ + ptrdiff_t start_offset = + min (visible_end, + max (visible_beg, start_byte)) - visible_beg; + ptrdiff_t old_end_offset = + min (visible_end, + max (visible_beg, old_end_byte)) - visible_beg; + ptrdiff_t new_end_offset = + min (visible_end, + max (visible_beg, new_end_byte)) - visible_beg; + eassert (start_offset <= old_end_offset); + eassert (start_offset <= new_end_offset); + + ts_tree_edit_1 (tree, start_offset, old_end_offset, + new_end_offset); XTS_PARSER (lisp_parser)->need_reparse = true; XTS_PARSER (lisp_parser)->timestamp++; + + /* VISIBLE_BEG/END records tree-sitter's range of view in + the buffer. Ee need to adjust them when tree-sitter's + view changes. */ + ptrdiff_t visi_beg_delta; + if (old_end_byte > new_end_byte) + { + /* Move backward. */ + visi_beg_delta = min (visible_beg, new_end_byte) + - min (visible_beg, old_end_byte); + } + else + { + /* Move forward. */ + visi_beg_delta = old_end_byte < visible_beg + ? new_end_byte - old_end_byte : 0; + } + XTS_PARSER (lisp_parser)->visible_beg + = visible_beg + visi_beg_delta; + XTS_PARSER (lisp_parser)->visible_end + = visible_end + visi_beg_delta + + (new_end_offset - old_end_offset); + eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0); + eassert (XTS_PARSER (lisp_parser)->visible_beg + <= XTS_PARSER (lisp_parser)->visible_end); } } } @@ -389,6 +424,9 @@ ts_ensure_position_synced (Lisp_Object parser) struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg; ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end; + eassert (0 <= visible_beg); + eassert (visible_beg <= visible_end); + /* Before we parse or set ranges, catch up with the narrowing situation. We change visible_beg and visible_end to match BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the @@ -403,6 +441,7 @@ ts_ensure_position_synced (Lisp_Object parser) /* Tree-sitter sees: insert at the beginning. */ ts_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer)); visible_beg = BUF_BEGV_BYTE (buffer); + eassert (visible_beg <= visible_end); } /* 2. Make sure visible_end = BUF_ZV_BYTE. */ if (visible_end < BUF_ZV_BYTE (buffer)) @@ -412,6 +451,7 @@ ts_ensure_position_synced (Lisp_Object parser) visible_end - visible_beg, BUF_ZV_BYTE (buffer) - visible_beg); visible_end = BUF_ZV_BYTE (buffer); + eassert (visible_beg <= visible_end); } else if (visible_end > BUF_ZV_BYTE (buffer)) { @@ -420,6 +460,7 @@ ts_ensure_position_synced (Lisp_Object parser) visible_end - visible_beg, BUF_ZV_BYTE (buffer) - visible_beg); visible_end = BUF_ZV_BYTE (buffer); + eassert (visible_beg <= visible_end); } /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */ if (visible_beg < BUF_BEGV_BYTE (buffer)) @@ -427,6 +468,7 @@ ts_ensure_position_synced (Lisp_Object parser) /* Tree-sitter sees: delete at the beginning. */ ts_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0); visible_beg = BUF_BEGV_BYTE (buffer); + eassert (visible_beg <= visible_end); } eassert (0 <= visible_beg); eassert (visible_beg <= visible_end); @@ -477,7 +519,8 @@ ts_ensure_parsed (Lisp_Object parser) xsignal1 (Qtreesit_parse_error, buf); } - ts_tree_delete (tree); + if (tree != NULL) + ts_tree_delete (tree); XTS_PARSER (parser)->tree = new_tree; XTS_PARSER (parser)->need_reparse = false; } @@ -551,6 +594,7 @@ make_ts_parser (Lisp_Object buffer, TSParser *parser, lisp_parser->need_reparse = true; lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer)); lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer)); + eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); } @@ -673,10 +717,7 @@ DEFUN ("treesit-parser-create", ts_parser_set_language (parser, lang); Lisp_Object lisp_parser - = make_ts_parser (buffer, parser, NULL, language); - - struct buffer *old_buffer = current_buffer; - set_buffer_internal (XBUFFER (buffer)); + = make_ts_parser (Fcurrent_buffer (), parser, NULL, language); Fset (Qtreesit_parser_list, Fcons (lisp_parser, Fsymbol_value (Qtreesit_parser_list))); @@ -835,6 +876,11 @@ DEFUN ("treesit-parser-included-ranges", (XTS_PARSER (parser)->parser, &len); if (len == 0) return Qnil; + + /* Our return value depends on the buffer state (BUF_BEGV_BYTE, + etc), so we need to sync up. */ + ts_ensure_position_synced (parser); + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); Lisp_Object list = Qnil; @@ -843,6 +889,9 @@ DEFUN ("treesit-parser-included-ranges", TSRange range = ranges[idx]; uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer); uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer); + eassert (BUF_BEGV_BYTE (buffer) <= beg_byte); + eassert (beg_byte <= end_byte); + eassert (end_byte <= BUF_ZV_BYTE (buffer)); Lisp_Object lisp_range = Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)) , commit bd1b27b7c7bbd969cf76409499bb84a83600c42a Author: Yuan Fu Date: Wed Jun 15 12:17:10 2022 -0700 ; Minor optimization in treesit range function * src/treesit.c (Ftreesit_parser_set_included_ranges): Lift assignment out of the loop. diff --git a/src/treesit.c b/src/treesit.c index 88d5ea9122..585683aa1b 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -786,12 +786,11 @@ DEFUN ("treesit-parser-set-included-ranges", /* Set ranges for PARSER. */ ptrdiff_t len = list_length (ranges); TSRange *ts_ranges = malloc (sizeof(TSRange) * len); + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); for (int idx=0; !NILP (ranges); idx++, ranges = XCDR (ranges)) { Lisp_Object range = XCAR (ranges); - struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); - EMACS_INT beg_byte = buf_charpos_to_bytepos (buffer, XFIXNUM (XCAR (range))); EMACS_INT end_byte = buf_charpos_to_bytepos commit a4d7bcccba5ffaafb769d8f517c159d64b0887bc Author: Yuan Fu Date: Wed Jun 15 12:15:24 2022 -0700 ; * src/treesit.c: Add comment to explain design decisions. diff --git a/src/treesit.c b/src/treesit.c index 8d0f2e517a..88d5ea9122 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -89,6 +89,14 @@ Copyright (C) 2021-2022 Free Software Foundation, Inc. - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and node type. + We don't parse at every keystroke. Instead we only record the + changes at each keystroke, and only parse when requested. It is + possible that lazy parsing is worse: instead of dispersed little + pauses, now you have less frequent but larger pauses. I doubt + there will be any perceived difference, as the lazy parsing is + going to be pretty frequent anyway. Also this (lazy parsing) is + what the mailing list guys wanted. + Because it is pretty slow (comparing to other tree-sitter operations) for tree-sitter to parse the query and produce a query object, it is very wasteful to reparse the query every time commit 0332b8e2c5f2909b108ae4b63641b38a42c27fd3 Author: Yuan Fu Date: Wed Jun 15 12:14:54 2022 -0700 ; * src/treesit.c (ts_check_buffer_size): Improve error message. diff --git a/src/treesit.c b/src/treesit.c index 9e510a921e..8d0f2e517a 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -434,7 +434,7 @@ ts_check_buffer_size (struct buffer *buffer) (BUF_Z (buffer) - BUF_BEG (buffer)); if (buffer_size > UINT32_MAX) xsignal2 (Qtreesit_buffer_too_large, - build_pure_c_string ("Buffer size too large, size:"), + build_pure_c_string ("Buffer size larger than 4GB, size:"), make_fixnum (buffer_size)); } commit d6b00f7ed96f91d64df9dc3523c9dd36a5ecec57 Author: Yuan Fu Date: Wed Jun 15 12:14:26 2022 -0700 ; * src/treesit.c (ts_read_buffer): Clarify comments. diff --git a/src/treesit.c b/src/treesit.c index 92692f550f..9e510a921e 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -501,7 +501,7 @@ ts_read_buffer (void *parser, uint32_t byte_index, /* This function could run from a user command, so it is better to do nothing instead of raising an error. (It was a pain in the a** to decrypt mega-if-conditions in Emacs source, so I wrote the two - branches separately.) */ + branches separately, you are welcome.) */ if (!BUFFER_LIVE_P (buffer)) { beg = NULL; commit d729e3e3fcad4270f6da358b12ab8e4a52fdc204 Author: Yuan Fu Date: Wed Jun 15 11:33:35 2022 -0700 * src/treesit.c (ts_check_range_argument): Check for point-min/max. diff --git a/src/treesit.c b/src/treesit.c index dc64aef425..92692f550f 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -719,7 +719,11 @@ DEFUN ("treesit-parser-root-node", static void ts_check_range_argument (Lisp_Object ranges) { - EMACS_INT last_point = 1; + struct buffer *buffer = current_buffer; + ptrdiff_t point_min = BUF_BEGV (buffer); + ptrdiff_t point_max = BUF_ZV (buffer); + EMACS_INT last_point = point_min; + for (Lisp_Object tail = ranges; !NILP (tail); tail = XCDR (tail)) { @@ -730,11 +734,10 @@ ts_check_range_argument (Lisp_Object ranges) CHECK_FIXNUM (XCDR (range)); EMACS_INT beg = XFIXNUM (XCAR (range)); EMACS_INT end = XFIXNUM (XCDR (range)); - /* TODO: Maybe we should check for point-min/max, too? */ - if (!(last_point <= beg && beg <= end)) + if (!(last_point <= beg && beg <= end && end <= point_max)) xsignal2 (Qtreesit_range_invalid, build_pure_c_string - ("RANGE is either overlapping or out-of-order"), + ("RANGE is either overlapping or out-of-order or out-of-range"), ranges); last_point = end; } commit b162faba0bc04b8584af6f536bef8d0525076a28 Author: Yuan Fu Date: Tue Jun 14 21:04:52 2022 -0700 Fix compile warnings and errors in treesit.c * src/treesit.c (ts_initialize): Fix. (Ftreesit_parser_set_included_ranges): Fix. (Ftreesit_query_compile): Fix. diff --git a/src/treesit.c b/src/treesit.c index df8c992bb5..dc64aef425 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -123,7 +123,7 @@ ts_calloc_wrapper (size_t n, size_t size) } static void -ts_initialize () +ts_initialize (void) { if (!ts_initialized) { @@ -766,7 +766,7 @@ DEFUN ("treesit-parser-set-included-ranges", /* If RANGES is nil, make parser to parse the whole document. To do that we give tree-sitter a 0 length, the range is a dummy. */ - TSRange ts_range = {0, 0, 0, 0}; + TSRange ts_range = {{0, 0}, {0, 0}, 0, 0}; success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser, &ts_range , 0); } @@ -1527,7 +1527,7 @@ DEFUN ("treesit-query-compile", query. */) (Lisp_Object language, Lisp_Object query) { - if (!Ftreesit_query_p (query)) + if (NILP (Ftreesit_query_p (query))) wrong_type_argument (Qtreesit_query_p, query); CHECK_SYMBOL (language); if (TS_COMPILED_QUERY_P (query)) commit 98bfb240818bae14cd87a1ffeb8fae7cb7846e05 Merge: 184d212042 787c4ad8b0 Author: Yuan Fu Date: Tue Jun 14 15:59:46 2022 -0700 Merge remote-tracking branch 'savannah/master' into feature/tree-sitter commit 184d212042ffa5a4f02c92085d9b6e8346d66e99 Merge: a7288594f4 316bdc334c Author: Yuan Fu Date: Tue Jun 14 15:52:01 2022 -0700 Merge branch 'feature/tree-sitter-depth-control' into feature/tree-sitter commit 316bdc334ca4f3b7101ac6879e84041646852488 Author: Yuan Fu Date: Tue Jun 14 15:49:44 2022 -0700 Add manual for treesit-traverse-forward and friends * doc/lispref/parsing.texi (Retrieving Node): Add manual entry for treesit-traverse-depth-first, treesit-traverse-breadth-first, treesit-traverse-forward. * lisp/treesit.el (treesit-traverse-forward): Fix docstring. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 72be91877b..cadddf0c00 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -630,6 +630,73 @@ Retrieving Node farthest parent that still satisfies @var{pred}. @end defun +@cindex trees-sitter tree traversal +@defun treesit-traverse-depth-first node pred &optional step depth +Traverse the subtree of @var{node} depth-first. Traverse starting from +@var{node} (i.e., @var{node} is passed to @var{pred}). For each node +traversed, we call @var{pred} with the node, and we stop and return +the node if @var{pred} returns non-nil. If no node satisfies +@var{pred}, return nil. + +If @var{step} >= 0 or nil, go forward, if @var{step} < 0, go backward. +(The quantity of @var{step} doesn't matter.) + +@var{depth} can be a positive integer or 0, meaning go @var{depth} +levels deep, counting from @var{node}, or nil, meaning there is no +limit. For example, a value 0 means only traverse @var{node} itself, +a value 1 means traverse @var{node} and its immediate children. +@end defun + +@defun treesit-traverse-breadth-first node pred &optional step +Traverse the subtree of @var{node} breadth-first. Traverse starting +from @var{node} (i.e., @var{node} is passed to @var{pred}). For each +node traversed, call @var{pred} with the node, stop and return the +node if @var{pred} returns non-nil. If no node satisfies @var{pred}, +return nil. + +If @var{step} >= 0 or nil, go forward, if @var{step} < 0, go backward. +(The quantity of @var{step} doesn't matter.) +@end defun + +@defun treesit-traverse-forward node pred &optional step depth +Traverses the whole tree forward from NODE depth-first. Traverse +starting from @var{node} (i.e., @var{node} is passed to @var{pred}). +For each node traversed, call @var{pred} with the node, stop and +return the node if @var{pred} returns non-nil. If no node satisfies +@var{pred}, return nil. + +If @var{step} >= 0 or nil, go forward, if @var{step} < 0, go backward. +(The quantity of @var{step} doesn't matter.) + +Traversing forward means that for a tree like the below where +@var{node} is marked 1, traverse as numbered: + +@example +@group + 16 + | + 3--------4-----------8 + | | | + o--o-+--1 5--+--6 9---+-----12 + | | | | | | + o o 2 7 +-+-+ +--+--+ + | | | | | + 10 11 13 14 15 +@end group +@end example + +@var{depth} can be a positive integer, 0, nil, or @code{'up}. A +positive integer or 0 means go @var{depth} deep counting from +@var{node}. A nil means no limit. And a symbol @code{'up} means go +upwards only: only traverse to sibling and parent, never go down to +children. + +The difference between 0 and @code{'up} is subtle: in the above +example, if given 0 as @var{depth}, node 1 3 4 5 6 8 9 12 16 are +visited; if given @code{'up} as @var{depth}, only node 1 3 4 8 16 are +visited. +@end defun + @node Accessing Node @section Accessing Node Information diff --git a/lisp/treesit.el b/lisp/treesit.el index d6d092ee6a..4e35a46650 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -311,8 +311,8 @@ treesit-traverse-forward if STEP < 0, go backward. If no node satisfies PRED, return nil. -Traversing forward depth-first means, for a tree like the below -where NODE is marked 1, traverse as numbered: +Traversing forward depth-first means that for a tree like the +below where NODE is marked 1, traverse as numbered: 16 | @@ -326,11 +326,12 @@ treesit-traverse-forward DEPTH can be a positive integer, 0, nil, or 'up. A positive integer or 0 means go DEPTH deep counting from NODE. A nil means -no limit. And a symbol 'up means upward only: only traverse -sibling and parent, never go down. The difference between 0 and -'up is subtle: in the above example, if given 0 as DEPTH, node 1 -3 4 5 6 8 9 12 16 are visited; if given t as DEPTH, only node 1 3 -4 8 16 are visited." +no limit. And a symbol 'up means go upwards only: only traverse +sibling and parent, never go down to children. + +The difference between 0 and 'up is subtle: in the above example, +if given 0 as DEPTH, node 1 3 4 5 6 8 9 12 16 are visited; if +given 'up as DEPTH, only node 1 3 4 8 16 are visited." ;; First try NODE's subtree, but only under these conditions: if ;; DEPTH is a number, it has to be greater than 0, if it's a symbol, ;; it cannot be 'up. commit a7288594f493a20664f1934a9fc760537caadcc3 Author: Yuan Fu Date: Tue Jun 14 14:30:39 2022 -0700 Change treesit-check-query and mention it in documentation * doc/lispref/parsing.texi (Pattern Matching): Mention it. * lisp/treesit.el (treesit-check-query): Rename to treesit-query-validate. * src/treesit.c (Ftreesit_query_capture, Ftreesit_query_compile): Mention it. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 1c4a7805a3..36c03364e3 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -816,7 +816,7 @@ Pattern Matching @vindex treesit-query-error This function raise a @var{treesit-query-error} if @var{query} is malformed. The signal data contains a description of the specific -error. +error. You can use @code{treesit-query-validate} to debug the query. @end defun @defun treesit-query-in source query &optional beg end @@ -1111,6 +1111,10 @@ Pattern Matching @defun treesit-query-compile language query This function compiles @var{query} for @var{language} into a compiled query object and returns it. + +This function raise a @var{treesit-query-error} if @var{query} is +malformed. The signal data contains a description of the specific +error. You can use @code{treesit-query-validate} to debug the query. @end defun @defun treesit-expand-query query diff --git a/lisp/treesit.el b/lisp/treesit.el index 09f750f9d5..ad90d9f9f0 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -1038,10 +1038,11 @@ treesit-inspect-mode (remove '(:eval treesit--inspect-name) mode-line-misc-info)))) -(defun treesit-check-query (query language) +(defun treesit-query-validate (language query) "Check if QUERY is valid for LANGUAGE. If QUERY is invalid, display the query in a popup buffer, jumps to the offending pattern and highlight the pattern." + (cl-assert (or (consp query) (stringp query))) (let ((buf (get-buffer-create "*tree-sitter check query*"))) (with-temp-buffer (treesit-get-parser-create language) diff --git a/src/treesit.c b/src/treesit.c index 5b344a2ea1..df8c992bb5 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1522,8 +1522,9 @@ DEFUN ("treesit-query-compile", Querying a compiled query is much faster than an uncompiled one. LANGUAGE is the language this query is for. -Signals treesit-query-error if QUERY is malformed or something -else goes wrong. */) +Signals treesit-query-error if QUERY is malformed or something else +goes wrong. You can use `treesit-query-validate' to debug the +query. */) (Lisp_Object language, Lisp_Object query) { if (!Ftreesit_query_p (query)) @@ -1564,8 +1565,9 @@ DEFUN ("treesit-query-capture", BEG and END, if both non-nil, specifies the range in which the query is executed. -Raise an treesit-query-error if QUERY is malformed, or something -else goes wrong. */) +Signals treesit-query-error if QUERY is malformed or something else +goes wrong. You can use `treesit-query-validate' to debug the +query. */) (Lisp_Object node, Lisp_Object query, Lisp_Object beg, Lisp_Object end) { commit 296900184d2959f8b85dbf2d92c1b627b25cd3f6 Author: Yuan Fu Date: Tue Jun 14 14:17:17 2022 -0700 Add treesit-query-compile to manual * doc/lispref/parsing.texi: Add treesit-query-compile. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index c6909da002..1c4a7805a3 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -804,10 +804,10 @@ Pattern Matching Now we can introduce the query functions. @defun treesit-query-capture node query &optional beg end -This function matches patterns in @var{query} in @var{node}. -Argument @var{query} can be a either string or a s-expression. For -now, we focus on the string syntax; s-expression syntax is described -at the end of the section. +This function matches patterns in @var{query} in @var{node}. Argument +@var{query} can be either a string, a s-expression, or a compiled +query object. For now, we focus on the string syntax; s-expression +syntax and compiled query are described at the end of the section. The function returns all captured nodes in a list of @code{(@var{capture_name} . @var{node})}. If @var{beg} and @var{end} @@ -1101,10 +1101,26 @@ Pattern Matching @end group @end example +@heading Compiling queries + +If a query will be used repeatedly, especially in tight loops, it is +important to compile that query, because a compiled query is much +faster than an uncompiled one. A compiled query can be used anywhere +a query is accepted. + +@defun treesit-query-compile language query +This function compiles @var{query} for @var{language} into a compiled +query object and returns it. +@end defun + @defun treesit-expand-query query This function expands the s-expression @var{query} into a string -query. It is usually a good idea to expand the s-expression patterns -into strings for font-lock queries since they are called repeatedly. +query. +@end defun + +@defun treesit-expand-pattern pattern +This function expands the s-expression @var{pattern} into a string +pattern. @end defun Finally, tree-sitter project's documentation about commit 016e4ca7a74b47979d6180672b13d02e2269baed Author: Yuan Fu Date: Tue Jun 14 14:16:11 2022 -0700 ; * doc/lispref/parsing.texi: Minor fix-up. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 72be91877b..c6909da002 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -93,7 +93,7 @@ Language Definitions @var{library-base-name} is the base filename for the dynamic library (conventionally @code{libtree-sitter-@var{language}}), and @var{function-name} is the function provided by the library -(conventionally @code{tree_sitter_@var{language}). For example, +(conventionally @code{tree_sitter_@var{language}}). For example, @example (cool-lang "libtree-sitter-coool" "tree_sitter_cooool") @@ -763,7 +763,7 @@ Pattern Matching @cindex Tree-sitter query syntax @cindex Tree-sitter query pattern -A @dfn{query} consists of multiple @dfn{patterns}, each pattern is an +A @dfn{query} consists of multiple @dfn{patterns}. Each pattern is an s-expression that matches a certain node in the syntax node. A pattern has the following shape: @@ -1107,8 +1107,8 @@ Pattern Matching into strings for font-lock queries since they are called repeatedly. @end defun -Tree-sitter project's documentation about pattern-matching can be -found at +Finally, tree-sitter project's documentation about +pattern-matching can be found at @uref{https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries}. @node Multiple Languages commit 57b52504744d16b7b1ddf9225bb7c4d2b92510e3 Author: Yuan Fu Date: Tue Jun 14 13:32:14 2022 -0700 Add test for treesit-query-compile * test/src/treesit-tests.el (treesit-query-api): Rename pattern to query, and add treesit-query-compile into the mix. diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index 1b20b86bc9..c6d5f25472 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -134,37 +134,45 @@ treesit-query-api (setq root-node (treesit-parser-root-node parser))) - (dolist (pattern + ;; Test `treesit-query-capture' on string, sexp and compiled + ;; queries. + (dolist (query1 + ;; String query. '("(string) @string (pair key: (_) @keyword) ((_) @bob (#match \"^B.b$\" @bob)) (number) @number ((number) @n3 (#equal \"3\" @n3)) " + ;; Sexp query. ((string) @string (pair key: (_) @keyword) ((_) @bob (:match "^B.b$" @bob)) (number) @number ((number) @n3 (:equal "3" @n3))))) - (should - (equal - '((number . "1") (number . "2") - (keyword . "\"name\"") - (string . "\"name\"") - (string . "\"Bob\"") - (bob . "Bob") - (number . "3") - (n3 . "3")) - (mapcar (lambda (entry) - (cons (car entry) - (treesit-node-text - (cdr entry)))) - (treesit-query-capture root-node pattern)))) - (should - (equal - "(type field: (_) @capture .) ? * + \"return\"" - (treesit-expand-query - '((type field: (_) @capture :anchor) - :? :* :+ "return")))))))) + ;; Test `treesit-query-compile'. + (dolist (query (list query1 + (treesit-query-compile 'json query1))) + (should + (equal + '((number . "1") (number . "2") + (keyword . "\"name\"") + (string . "\"name\"") + (string . "\"Bob\"") + (bob . "Bob") + (number . "3") + (n3 . "3")) + (mapcar (lambda (entry) + (cons (car entry) + (treesit-node-text + (cdr entry)))) + (treesit-query-capture root-node query)))))) + ;; Test `treesit-expand-query'. + (should + (equal + "(type field: (_) @capture .) ? * + \"return\"" + (treesit-expand-query + '((type field: (_) @capture :anchor) + :? :* :+ "return"))))))) (ert-deftest treesit-narrow () "Tests if narrowing works." commit e171ef933feefd67d7f1b3b3693ce730111660e9 Author: Yuan Fu Date: Tue Jun 14 11:36:22 2022 -0700 Support compiled queries in treesit-query-capture Last commit added this new type, this commit adds functionalities. treesit.el only has documentation changes. * lisp/treesit.el (treesit-query-in, treesit-font-lock-settings, treesit-defun-query): Update docstring. * src/treesit.c (make_ts_query): New function. (Ftreesit_query_compile): New function. (Ftreesit_query_capture): Remove code that creates a query object and instead either use make_ts_query or use the give compiled query. Free the query object conditonally. (syms_of_treesit): New symbol. diff --git a/lisp/treesit.el b/lisp/treesit.el index 78dfcae7e5..09f750f9d5 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -366,9 +366,12 @@ treesit-query-in language; if a parser, use the root node of that parser; if a node, use that node. -QUERY is either a string query or a sexp query. See Info node -`(elisp)Pattern Matching' for how to write a query pattern in either -string or s-expression form. +QUERY is either a string query, a sexp query, or a compiled +query. See Info node `(elisp)Pattern Matching' for how to write +a query in either string or s-expression form. When using +repeatedly, a compiled query is much faster than a string or sexp +one, so it is recommend to compile your queries if it will be +used over and over. BEG and END, if _both_ non-nil, specifies the range in which the query is executed. @@ -442,8 +445,12 @@ treesit-font-lock-settings LANGUAGE is the language symbol. See Info node `(elisp)Language Definitions'. -QUERY is either a string query or a sexp query. -See Info node `(elisp)Pattern Matching' for writing queries. +QUERY is either a string query, a sexp query, or a compiled +query. See Info node `(elisp)Pattern Matching' for how to write +a query in either string or s-expression form. When using +repeatedly, a compiled query is much faster than a string or sexp +one, so it is recommend to compile your queries if it will be +used over and over. Capture names in QUERY should be face names like `font-lock-keyword-face'. The captured node will be fontified @@ -923,7 +930,9 @@ treesit-search-end (defvar-local treesit-defun-query nil "A tree-sitter query that matches function/class definitions. Capture names don't matter. This variable is used by navigation -functions like `treesit-beginning-of-defun'.") +functions like `treesit-beginning-of-defun'. + +It is recommended to use compiled query for this variable.") (defun treesit-beginning-of-defun (&optional arg) "Move backward to the beginning of a defun. diff --git a/src/treesit.c b/src/treesit.c index 3c8edc9213..5b344a2ea1 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -88,6 +88,28 @@ Copyright (C) 2021-2022 Free Software Foundation, Inc. parser of buffer changes. - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and node type. + + Because it is pretty slow (comparing to other tree-sitter + operations) for tree-sitter to parse the query and produce a query + object, it is very wasteful to reparse the query every time + treesit-query-capture is called, and it completely kills the + performance of querying in a loop for a moderate amount of times + (hundreds of queries takes seconds rather than milliseconds to + complete). Therefore we want some caching. We can either use a + search.c style transparent caching, or simply expose a new type, + compiled-ts-query and let the user to manually compile AOT. I + believe AOT compiling gives users more control, makes the + performance stable and easy to understand (compiled -> fast, + uncompiled -> slow), and avoids some edge cases transparent cache + could have (see below). So I implemented the AOT compilation. + + Problems a transparent cache could have: Suppose we store cache + entries in a fixed-length linked-list, and compare with EQ. 1) + One-off query could kick out useful cache. 2) if the user messed + up and the query doesn't EQ to the cache anymore, the performance + mysteriously drops. 3) what if a user uses so many stuff that the + default cache size (20) is not enough and we end up thrashing? + These are all imagined scenarios but they are not impossible :-) */ /*** Initialization */ @@ -536,6 +558,31 @@ make_ts_node (Lisp_Object parser, TSNode node) return make_lisp_ptr (lisp_node, Lisp_Vectorlike); } +/* Make a compiled query struct. Return NULL if error occurs. QUERY + has to be either a cons or a string. */ +static struct Lisp_TS_Query * +make_ts_query (Lisp_Object query, const TSLanguage *language, + uint32_t *error_offset, TSQueryError *error_type) +{ + if (CONSP (query)) + query = Ftreesit_expand_query (query); + char *source = SSDATA (query); + + TSQuery *ts_query = ts_query_new (language, source, strlen (source), + error_offset, error_type); + TSQueryCursor *ts_cursor = ts_query_cursor_new (); + + if (ts_query == NULL) + return NULL; + + struct Lisp_TS_Query *lisp_query + = ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_TS_Query, + PVEC_TS_COMPILED_QUERY); + lisp_query->query = ts_query; + lisp_query->cursor = ts_cursor; + return lisp_query; +} + DEFUN ("treesit-parser-p", Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0, doc: /* Return t if OBJECT is a tree-sitter parser. */) @@ -1467,6 +1514,39 @@ ts_query_error_to_string (TSQueryError error) return pass; } +DEFUN ("treesit-query-compile", + Ftreesit_query_compile, + Streesit_query_compile, 2, 2, 0, + doc: /* Compile QUERY to a compiled query. + +Querying a compiled query is much faster than an uncompiled one. +LANGUAGE is the language this query is for. + +Signals treesit-query-error if QUERY is malformed or something +else goes wrong. */) + (Lisp_Object language, Lisp_Object query) +{ + if (!Ftreesit_query_p (query)) + wrong_type_argument (Qtreesit_query_p, query); + CHECK_SYMBOL (language); + if (TS_COMPILED_QUERY_P (query)) + return query; + + TSLanguage *ts_lang = ts_load_language (language, true); + uint32_t error_offset; + TSQueryError error_type; + + struct Lisp_TS_Query *lisp_query + = make_ts_query (query, ts_lang, &error_offset, &error_type); + + if (lisp_query == NULL) + xsignal2 (Qtreesit_query_error, + build_string (ts_query_error_to_string (error_type)), + make_fixnum (error_offset + 1)); + + return make_lisp_ptr (lisp_query, Lisp_Vectorlike); +} + DEFUN ("treesit-query-capture", Ftreesit_query_capture, Streesit_query_capture, 2, 4, 0, @@ -1475,9 +1555,11 @@ DEFUN ("treesit-query-capture", Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name assigned to the node in PATTERN. NODE is the captured node. -QUERY is either a string query or a sexp query. See Info node -`(elisp)Pattern Matching' for how to write a query in either string or -s-expression form. +QUERY is either a string query, a sexp query, or a compiled query. +See Info node `(elisp)Pattern Matching' for how to write a query in +either string or s-expression form. When using repeatedly, a compiled +query is much faster than a string or sexp one, so it is recommend to +compile your queries if it will be used over and over. BEG and END, if both non-nil, specifies the range in which the query is executed. @@ -1493,10 +1575,9 @@ DEFUN ("treesit-query-capture", if (!NILP (end)) CHECK_INTEGER (end); - if (CONSP (query)) - query = Ftreesit_expand_query (query); - else - CHECK_STRING (query); + if (!(TS_COMPILED_QUERY_P (query) + || CONSP (query) || STRINGP (query))) + wrong_type_argument (Qtreesit_query_p, query); /* Extract C values from Lisp objects. */ TSNode ts_node = XTS_NODE (node)->node; @@ -1505,25 +1586,34 @@ DEFUN ("treesit-query-capture", XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; const TSLanguage *lang = ts_parser_language (XTS_PARSER (lisp_parser)->parser); - char *source = SSDATA (query); /* Initialize query objects, and execute query. */ - uint32_t error_offset; - TSQueryError error_type; - /* TODO: We could cache the query object, so that repeatedly - querying with the same query can reuse the query object. It also - saves us from expanding the sexp query into a string. I don't - know how much time that could save though. */ - TSQuery *ts_query = ts_query_new (lang, source, strlen (source), - &error_offset, &error_type); - TSQueryCursor *cursor = ts_query_cursor_new (); - - if (ts_query == NULL) + struct Lisp_TS_Query *lisp_query; + /* If the lisp query is temporary, we need to free it after use. */ + bool lisp_query_temp_p; + if (TS_COMPILED_QUERY_P (query)) { - xsignal2 (Qtreesit_query_error, - build_string (ts_query_error_to_string (error_type)), - make_fixnum (error_offset + 1)); + lisp_query_temp_p = false; + lisp_query = XTS_COMPILED_QUERY (query); } + else + { + lisp_query_temp_p = true; + uint32_t error_offset; + TSQueryError error_type; + lisp_query = make_ts_query (query, lang, + &error_offset, &error_type); + if (lisp_query == NULL) + { + xsignal2 (Qtreesit_query_error, + build_string + (ts_query_error_to_string (error_type)), + make_fixnum (error_offset + 1)); + } + } + TSQuery *ts_query = lisp_query->query; + TSQueryCursor *cursor = lisp_query->cursor; + if (!NILP (beg) && !NILP (end)) { EMACS_INT beg_byte = XFIXNUM (beg); @@ -1578,8 +1668,11 @@ DEFUN ("treesit-query-capture", result = prev_result; } } - ts_query_delete (ts_query); - ts_query_cursor_delete (cursor); + if (lisp_query_temp_p) + { + ts_query_delete (ts_query); + ts_query_cursor_delete (cursor); + } return Fnreverse (result); } @@ -1592,6 +1685,7 @@ syms_of_treesit (void) DEFSYM (Qtreesit_parser_p, "treesit-parser-p"); DEFSYM (Qtreesit_node_p, "treesit-node-p"); DEFSYM (Qtreesit_compiled_query_p, "treesit-compiled-query-p"); + DEFSYM (Qtreesit_query_p, "treesit-query-p"); DEFSYM (Qnamed, "named"); DEFSYM (Qmissing, "missing"); DEFSYM (Qextra, "extra"); @@ -1705,5 +1799,6 @@ syms_of_treesit (void) defsubr (&Streesit_expand_pattern); defsubr (&Streesit_expand_query); + defsubr (&Streesit_query_compile); defsubr (&Streesit_query_capture); } commit a8428b917da3f81feb1aad052a81a3ddfdebec28 Author: Yuan Fu Date: Mon Jun 13 23:07:19 2022 -0700 * src/treesit.c (Ftreesit_query_p): New function. diff --git a/src/treesit.c b/src/treesit.c index 19f8343765..3c8edc9213 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -569,6 +569,18 @@ DEFUN ("treesit-compiled-query-p", return Qnil; } +DEFUN ("treesit-query-p", + Ftreesit_query_p, Streesit_query_p, 1, 1, 0, + doc: /* Return t if OBJECT is a generic tree-sitter query. */) + (Lisp_Object object) +{ + if (TS_COMPILED_QUERY_P (object) + || CONSP (object) || STRINGP (object)) + return Qt; + else + return Qnil; +} + DEFUN ("treesit-node-parser", Ftreesit_node_parser, Streesit_node_parser, 1, 1, 0, @@ -1661,6 +1673,7 @@ syms_of_treesit (void) defsubr (&Streesit_parser_p); defsubr (&Streesit_node_p); defsubr (&Streesit_compiled_query_p); + defsubr (&Streesit_query_p); defsubr (&Streesit_node_parser); commit 8f3b872e30cc1055b1c5d35acfcf1ef7d483b01e Author: Yuan Fu Date: Mon Jun 13 23:01:04 2022 -0700 Add new type treesit-compiled-query No intergration/interaction with the new type, just adding it. * lisp/emacs-lisp/cl-preloaded.el (cl--typeof-types): Add new type. * src/alloc.c (cleanup_vector): Add gc for the new type. * src/data.c (Ftype_of): Add switch case for the new type. (syms_of_data): Add symbols for the new type. * src/lisp.h (DEFINE_GDB_SYMBOL_BEGIN): Add new type. * src/treesit.c (Ftreesit_compiled_query_p): New function. (syms_of_treesit): Add symbol for the new type. * src/treesit.h (struct Lisp_TS_Query): New struct. (TS_COMPILED_QUERY_P, XTS_COMPILED_QUERY, CHECK_TS_COMPILED_QUERY): New macros. * src/print.c (print_vectorlike): Add printing for the new type. diff --git a/lisp/emacs-lisp/cl-preloaded.el b/lisp/emacs-lisp/cl-preloaded.el index 46f5ab35ff..812d0af86b 100644 --- a/lisp/emacs-lisp/cl-preloaded.el +++ b/lisp/emacs-lisp/cl-preloaded.el @@ -80,6 +80,7 @@ cl--typeof-types (user-ptr atom) (tree-sitter-parser atom) (tree-sitter-node atom) + (tree-sitter-compiled-query atom) ;; Plus, really hand made: (null symbol list sequence atom)) "Alist of supertypes. diff --git a/src/alloc.c b/src/alloc.c index 40a3e235ea..3c622d05ff 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -3174,6 +3174,13 @@ cleanup_vector (struct Lisp_Vector *vector) ts_tree_delete(lisp_parser->tree); ts_parser_delete(lisp_parser->parser); } + else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_TS_COMPILED_QUERY)) + { + struct Lisp_TS_Query *lisp_query + = PSEUDOVEC_STRUCT (vector, Lisp_TS_Query); + ts_query_delete (lisp_query->query); + ts_query_cursor_delete (lisp_query->cursor); + } #endif #ifdef HAVE_MODULES else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_MODULE_FUNCTION)) diff --git a/src/data.c b/src/data.c index a28bf41414..8dbb2902a7 100644 --- a/src/data.c +++ b/src/data.c @@ -265,6 +265,8 @@ DEFUN ("type-of", Ftype_of, Stype_of, 1, 1, 0, return Qtreesit_parser; case PVEC_TS_NODE: return Qtreesit_node; + case PVEC_TS_COMPILED_QUERY: + return Qtreesit_compiled_query; case PVEC_SQLITE: return Qsqlite; /* "Impossible" cases. */ @@ -4264,6 +4266,7 @@ #define PUT_ERROR(sym, tail, msg) \ DEFSYM (Qxwidget_view, "xwidget-view"); DEFSYM (Qtreesit_parser, "treesit-parser"); DEFSYM (Qtreesit_node, "treesit-node"); + DEFSYM (Qtreesit_compiled_query, "treesit-compiled-query"); DEFSYM (Qdefun, "defun"); diff --git a/src/lisp.h b/src/lisp.h index eb1f1ec2c2..8832e76b44 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -1060,6 +1060,7 @@ DEFINE_GDB_SYMBOL_END (PSEUDOVECTOR_FLAG) PVEC_NATIVE_COMP_UNIT, PVEC_TS_PARSER, PVEC_TS_NODE, + PVEC_TS_COMPILED_QUERY, PVEC_SQLITE, /* These should be last, for internal_equal and sxhash_obj. */ diff --git a/src/print.c b/src/print.c index d8b8513f31..81b524d79f 100644 --- a/src/print.c +++ b/src/print.c @@ -1982,6 +1982,9 @@ print_vectorlike (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag, printcharfun, escapeflag); printchar ('>', printcharfun); break; + case PVEC_TS_COMPILED_QUERY: + print_c_string ("#", printcharfun); + break; #endif case PVEC_SQLITE: diff --git a/src/treesit.c b/src/treesit.c index 91114b06f1..19f8343765 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -558,6 +558,17 @@ DEFUN ("treesit-node-p", return Qnil; } +DEFUN ("treesit-compiled-query-p", + Ftreesit_compiled_query_p, Streesit_compiled_query_p, 1, 1, 0, + doc: /* Return t if OBJECT is a compiled tree-sitter query. */) + (Lisp_Object object) +{ + if (TS_COMPILED_QUERY_P (object)) + return Qt; + else + return Qnil; +} + DEFUN ("treesit-node-parser", Ftreesit_node_parser, Streesit_node_parser, 1, 1, 0, @@ -1568,6 +1579,7 @@ syms_of_treesit (void) { DEFSYM (Qtreesit_parser_p, "treesit-parser-p"); DEFSYM (Qtreesit_node_p, "treesit-node-p"); + DEFSYM (Qtreesit_compiled_query_p, "treesit-compiled-query-p"); DEFSYM (Qnamed, "named"); DEFSYM (Qmissing, "missing"); DEFSYM (Qextra, "extra"); @@ -1648,6 +1660,7 @@ syms_of_treesit (void) defsubr (&Streesit_parser_p); defsubr (&Streesit_node_p); + defsubr (&Streesit_compiled_query_p); defsubr (&Streesit_node_parser); diff --git a/src/treesit.h b/src/treesit.h index 639c4eedc5..cb00fee111 100644 --- a/src/treesit.h +++ b/src/treesit.h @@ -81,6 +81,17 @@ #define EMACS_TREESIT_H ptrdiff_t timestamp; }; +/* A compiled tree-sitter query. */ +struct Lisp_TS_Query +{ + union vectorlike_header header; + /* Pointer to the query object. */ + TSQuery *query; + /* Pointer to a cursor. If we are storing the query object, we + might as well store a cursor, too. */ + TSQueryCursor *cursor; +}; + INLINE bool TS_PARSERP (Lisp_Object x) { @@ -107,6 +118,19 @@ XTS_NODE (Lisp_Object a) return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Node); } +INLINE bool +TS_COMPILED_QUERY_P (Lisp_Object x) +{ + return PSEUDOVECTORP (x, PVEC_TS_COMPILED_QUERY); +} + +INLINE struct Lisp_TS_Query * +XTS_COMPILED_QUERY (Lisp_Object a) +{ + eassert (TS_COMPILED_QUERY_P (a)); + return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Query); +} + INLINE void CHECK_TS_PARSER (Lisp_Object parser) { @@ -119,6 +143,13 @@ CHECK_TS_NODE (Lisp_Object node) CHECK_TYPE (TS_NODEP (node), Qtreesit_node_p, node); } +INLINE void +CHECK_TS_COMPILED_QUERY (Lisp_Object query) +{ + CHECK_TYPE (TS_COMPILED_QUERY_P (query), + Qtreesit_compiled_query_p, query); +} + void ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, ptrdiff_t new_end_byte); commit 8aa04aac65ad286a06f05af374060d6c77d76189 Author: Yuan Fu Date: Mon Jun 13 14:00:08 2022 -0700 ; * lisp/treesit.el (treesit-defun-query): Improve docstring. diff --git a/lisp/treesit.el b/lisp/treesit.el index 76101509e0..d6d092ee6a 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -964,7 +964,9 @@ treesit-search-end (defvar-local treesit-defun-query nil "A tree-sitter query that matches function/class definitions. Capture names don't matter. This variable is used by navigation -functions like `treesit-beginning-of-defun'.") +functions like `treesit-beginning-of-defun'. + +See `treesit-query-in' for what a query should look like.") (defun treesit-beginning-of-defun (&optional arg) "Move backward to the beginning of a defun. commit b3de8850e0b19c831ebcb0760248c9ef00fbbb98 Author: Yuan Fu Date: Mon Jun 13 13:53:11 2022 -0700 Use the up-only parameter in treesit navigation functions * lisp/treesit.el(treesit-inspect-node-at-point, treesit-end-of-defun): Set up-only to t. diff --git a/lisp/treesit.el b/lisp/treesit.el index 7de7545f4e..76101509e0 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -974,7 +974,7 @@ treesit-beginning-of-defun according to `treesit-defun-query'." (unless treesit-defun-query (error "Variable `treesit-defun-query' is unset")) - (treesit-search-beginning treesit-defun-query (- (or arg 1)))) + (treesit-search-beginning treesit-defun-query (- (or arg 1)) nil t)) (defun treesit-end-of-defun (&optional arg) "Move forward to the end of a defun. @@ -984,7 +984,7 @@ treesit-end-of-defun `treesit-defun-query'." (unless treesit-defun-query (error "Variable `treesit-defun-query' is unset")) - (treesit-search-end treesit-defun-query (or arg 1))) + (treesit-search-end treesit-defun-query (or arg 1) nil t)) ;;; Debugging commit c62473c31ab7dc70fc9c940f93b9217a7d16e7fc Author: Yuan Fu Date: Mon Jun 13 13:48:24 2022 -0700 Add depth control for treesit traverse functions * lisp/treesit.el (treesit-traverse-depth-first, treesit-traverse-forward): Add depth parameter. (treesit-search-forward, treesit-search-beginning, treesit-search-end): Add up-only parameter. diff --git a/lisp/treesit.el b/lisp/treesit.el index 78dfcae7e5..7de7545f4e 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -229,21 +229,27 @@ treesit-parent-while (defalias 'treesit-traverse-parent #'treesit-parent-until) -(defun treesit-traverse-depth-first (node pred &optional step) +(defun treesit-traverse-depth-first (node pred &optional step depth) "Traverse the subtree of NODE depth-first. Traverse starting from NODE (i.e., NODE is passed to PRED). For each node traversed, call PRED with the node, stop and return the node if PRED returns non-nil. If STEP >= 0 or nil, go forward, if STEP < 0, go backward. If no node satisfies PRED, return -nil." - (if (funcall pred node) - node - (cl-loop for child in (if (or (null step) (>= step 0)) - (treesit-node-children node) - (nreverse (treesit-node-children node))) - if (treesit-traverse-depth-first child pred step) - return child))) +nil. + +DEPTH can be a positive integer or 0, meaning go DEPTH deep +counting from NODE; or nil, meaning there is no limit." + (if (and (numberp depth) (<= depth 0)) + nil + (if (funcall pred node) + node + (cl-loop for child in (if (or (null step) (>= step 0)) + (treesit-node-children node) + (nreverse (treesit-node-children node))) + if (treesit-traverse-depth-first + child pred step (if (numberp depth) (1- depth) depth)) + return child)))) (defun treesit--traverse-breadth-first-1 (pred step queue tail) "The work horse for `treesit-traverse-breadth-first'. @@ -296,7 +302,7 @@ treesit-next-sibling-or-up (when (treesit-node-parent node) (list 'parent (treesit-node-parent node))))) -(defun treesit-traverse-forward (node pred &optional step) +(defun treesit-traverse-forward (node pred &optional step depth) "Traverse the whole tree forward from NODE depth-first. Traverse starting from NODE (i.e., NODE is passed to PRED). For @@ -316,23 +322,36 @@ treesit-traverse-forward | | | | | | o o 2 7 +-+-+ +--+--+ | | | | | - 10 11 13 14 15" - ;; First try NODE's subtree. - (or (treesit-traverse-depth-first node pred step) + 10 11 13 14 15 + +DEPTH can be a positive integer, 0, nil, or 'up. A positive +integer or 0 means go DEPTH deep counting from NODE. A nil means +no limit. And a symbol 'up means upward only: only traverse +sibling and parent, never go down. The difference between 0 and +'up is subtle: in the above example, if given 0 as DEPTH, node 1 +3 4 5 6 8 9 12 16 are visited; if given t as DEPTH, only node 1 3 +4 8 16 are visited." + ;; First try NODE's subtree, but only under these conditions: if + ;; DEPTH is a number, it has to be greater than 0, if it's a symbol, + ;; it cannot be 'up. + (or (and (if (numberp depth) (> depth 0) (not (eq depth 'up))) + (treesit-traverse-depth-first node pred step depth)) ;; If no match, try the next node: next sibling, or parent if no ;; next sibling exists. (catch 'match (let ((next (list nil node))) - ;; If NEXT is parent, call PRED on it and keep going. + ;; If NEXT is parent, call PRED on it and keep going. We + ;; can always go to parent, regardless the value of DEPTH. (while (and (setq next (treesit-next-sibling-or-up (cadr next) step)) (eq (car next) 'parent)) + (when (numberp depth) (cl-incf depth)) (when (funcall pred (cadr next)) (throw 'match (cadr next)))) (when next ;; If NEXT is non-nil, it must be ('sibling node). (treesit-traverse-forward - (cadr next) pred step)))))) + (cadr next) pred step depth)))))) (defun treesit-node-children (node &optional named) "Return a list of NODE's children. @@ -834,8 +853,9 @@ treesit-check-indent ;;; Search -(defun treesit-search-forward (pos-fn arg query &optional lang) - "Search forward for nodes that matches QUERY. +;; TODO: It might be more performant if we implement this in C. +(defun treesit-search-forward (pos-fn arg query &optional lang up-only) + "Search forward for nodes that matches QUERY from current point. This is a more primitive function, you might want to use `treesit-search-beginning' or `treesit-search-end' instead. @@ -851,7 +871,13 @@ treesit-search-forward or any function that takes a node and returns a position. If search succeeds, stop at the position returned by POS-FN and -return the matched node. Return nil if search failed." +return the matched node. Return nil if search failed. + +We search by traversing the parse tree, visiting every node +that's after (or before) the smallest node at point (retrieved by +`treesit-node-at'). If UP-ONLY is non-nil, only go to sibling or +parent in the tree, never go down into children when traversing +the tree." (cl-loop for idx from 1 to (abs arg) for parser = (if lang (treesit-get-parser-create lang) @@ -878,7 +904,8 @@ treesit-search-forward (< (funcall pos-fn node) starting-point))) return t))) - arg)) + ;; The AND form converts non-nil/nil into t/nil. + arg (and up-only t))) for pos = (funcall pos-fn node) ;; If we can find a match, jump to it. if pos do (goto-char pos) @@ -886,7 +913,7 @@ treesit-search-forward ;; Return t to indicate that search is successful. finally return node)) -(defun treesit-search-beginning (query arg &optional lang) +(defun treesit-search-beginning (query arg &optional lang up-only) "Search forward for nodes that matches QUERY. Stops at the beginning of matched node. @@ -899,10 +926,17 @@ treesit-search-beginning negative ARG means go backward. If search succeeds, return the matched node. Return nil if -search failed." - (treesit-search-forward #'treesit-node-start arg query lang)) +search failed. + +We search by traversing the parse tree, visiting every node +that's after (or before) the smallest node at point (retrieved by +`treesit-node-at'). If UP-ONLY is non-nil, only go to sibling or +parent in the tree, never go down into children when traversing +the tree." + (treesit-search-forward #'treesit-node-start arg query lang + up-only)) -(defun treesit-search-end (query arg &optional lang) +(defun treesit-search-end (query arg &optional lang up-only) "Search forward for nodes that matches QUERY. Stops at the end of matched node. @@ -915,8 +949,15 @@ treesit-search-end negative ARG means go backward. If search succeeds, return the matched node. Return nil if -search failed." - (treesit-search-forward #'treesit-node-end arg query lang)) +search failed. + +We search by traversing the parse tree, visiting every node +that's after (or before) the smallest node at point (retrieved by +`treesit-node-at'). If UP-ONLY is non-nil, only go to sibling or +parent in the tree, never go down into children when traversing +the tree." + (treesit-search-forward #'treesit-node-end arg query lang + up-only)) ;;; Navigation commit a73f2b9990465820d80c58ed25208b72731d410d Author: Yuan Fu Date: Mon Jun 13 13:22:17 2022 -0700 Fix treesit-search-forward Move the check for movement (if (> arg 0) ;; Make sure we moved forward. (> (funcall pos-fn node) starting-point) ;; Make sure we moved backward. (< (funcall pos-fn node) starting-point)) into cl-loop: if (treesit-node-eq cap-node node) becomes if (and (treesit-node-eq cap-node node) (if (> arg 0) ;; Make sure we moved forward. (> (funcall pos-fn node) starting-point) ;; Make sure we moved backward. (< (funcall pos-fn node) starting-point))) * lisp/treesit.el (treesit-search-forward): Move the check. diff --git a/lisp/treesit.el b/lisp/treesit.el index 98fcf84355..78dfcae7e5 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -864,17 +864,20 @@ treesit-search-forward (lambda (node) (and (not (eq (funcall pos-fn node) starting-point)) - (if (> arg 0) - ;; Make sure we move forward. - (> (funcall pos-fn node) starting-point) - ;; Make sure we move backward. - (< (funcall pos-fn node) starting-point)) - (cl-loop for cap-node in - (mapcar - #'cdr - (treesit-query-capture node query)) - if (treesit-node-eq cap-node node) - return t))) + (cl-loop + for cap-node in + (mapcar + #'cdr + (treesit-query-capture node query)) + if (and (treesit-node-eq cap-node node) + (if (> arg 0) + ;; Make sure we moved forward. + (> (funcall pos-fn node) + starting-point) + ;; Make sure we moved backward. + (< (funcall pos-fn node) + starting-point))) + return t))) arg)) for pos = (funcall pos-fn node) ;; If we can find a match, jump to it. commit c5b172ec586fa5ed0ae0644e1760fcf7ef27dd02 Author: Yuan Fu Date: Sat Jun 11 20:42:26 2022 -0700 * configure.ac (HAVE_TREE_SITTER): Not set TREE_SITTER_LIBS. diff --git a/configure.ac b/configure.ac index bf97dd017c..5a82d47db3 100644 --- a/configure.ac +++ b/configure.ac @@ -3115,7 +3115,6 @@ AC_DEFUN [HAVE_TREE_SITTER=yes], [HAVE_TREE_SITTER=no]) if test "${HAVE_TREE_SITTER}" = yes; then AC_DEFINE(HAVE_TREE_SITTER, 1, [Define if using tree-sitter.]) - TREE_SITTER_LIBS=-ltree-sitter TREE_SITTER_OBJ="treesit.o" fi fi commit 1dd8ddee12e03224f9bcfd029c9e4192d2c47a24 Author: Yuan Fu Date: Sat Jun 11 20:24:38 2022 -0700 Rename treesit-traverse-forward-depth-first * lisp/treesit.el (treesit-traverse-forward): Rename to 'treesit-traverse-forward'. (treesit-traverse-forward, treesit-search-forward): Use the new name. diff --git a/lisp/treesit.el b/lisp/treesit.el index 761c7147a0..98fcf84355 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -296,7 +296,7 @@ treesit-next-sibling-or-up (when (treesit-node-parent node) (list 'parent (treesit-node-parent node))))) -(defun treesit-traverse-forward-depth-first (node pred &optional step) +(defun treesit-traverse-forward (node pred &optional step) "Traverse the whole tree forward from NODE depth-first. Traverse starting from NODE (i.e., NODE is passed to PRED). For @@ -331,7 +331,7 @@ treesit-traverse-forward-depth-first (throw 'match (cadr next)))) (when next ;; If NEXT is non-nil, it must be ('sibling node). - (treesit-traverse-forward-depth-first + (treesit-traverse-forward (cadr next) pred step)))))) (defun treesit-node-children (node &optional named) @@ -859,7 +859,7 @@ treesit-search-forward for node = (if-let ((starting-point (point)) (node (treesit-node-at (point) parser t))) - (treesit-traverse-forward-depth-first + (treesit-traverse-forward node (lambda (node) (and (not (eq (funcall pos-fn node) commit 35e2786c930d5c2125f2af8d1c224a480181dce9 Author: Theodor Thornhill Date: Thu May 19 09:35:13 2022 -0700 Fix typo and argument in treesit-beginning-of-defun, etc * lisp/treesit.el (treesit-beginning-of-defun, treesit-end-of-defun): Fix typo, add shield for argument. diff --git a/lisp/treesit.el b/lisp/treesit.el index 3313168d66..761c7147a0 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -927,20 +927,20 @@ treesit-beginning-of-defun With ARG, do it that many times. Negative ARG means move forward to the ARGth following beginning of defun. Defun is defined -according to `treesit-defun-pattern'." +according to `treesit-defun-query'." (unless treesit-defun-query (error "Variable `treesit-defun-query' is unset")) - (treesit-search-beginning treesit-defun-query (- arg))) + (treesit-search-beginning treesit-defun-query (- (or arg 1)))) (defun treesit-end-of-defun (&optional arg) "Move forward to the end of a defun. With ARG, do it that many times. Negative ARG means move back to ARGth preceding end of defun. Defun is defined according to -`treesit-defun-pattern'." +`treesit-defun-query'." (unless treesit-defun-query (error "Variable `treesit-defun-query' is unset")) - (treesit-search-end treesit-defun-query arg)) + (treesit-search-end treesit-defun-query (or arg 1))) ;;; Debugging commit 74f8572f6cfa846f2a86d5f775b6fc2bd47ad269 Author: Yuan Fu Date: Sat May 14 08:57:23 2022 -0700 ; * lisp/treesit.el (treesit-node-at): Fix typo. diff --git a/lisp/treesit.el b/lisp/treesit.el index 1cfdab95ca..3313168d66 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -142,7 +142,7 @@ treesit-node-at (let ((node (if (treesit-parser-p parser-or-lang) (treesit-parser-root-node parser-or-lang) (treesit-buffer-root-node parser-or-lang)))) - ;; TODO: We might want a `treesit-node-decendant-for-pos' in C. + ;; TODO: We might want a `treesit-node-descendant-for-pos' in C. (while (cond ((and node (< (treesit-node-end node) point)) (setq node (treesit-node-next-sibling node)) t) commit b2b57eda041e4c30fb169031b6f5dadeab4c7d98 Author: Yuan Fu Date: Fri May 13 16:39:29 2022 -0700 Extract out treesit-search-forward * lisp/treesit.el (treesit-search-forward, treesit-search-beginning, treesit-search-end): New functions. (treesit-traverse-defun): Remove function. (treesit-beginning-of-defun, treesit-end-of-defun): Replace 'treesit-traverse-defun' with 'treesit-search-forward' and fiends. * test/src/treesit-tests.el: Add reminder for tests. diff --git a/lisp/treesit.el b/lisp/treesit.el index 60f375e9d9..1cfdab95ca 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -832,48 +832,95 @@ treesit-check-indent (indent-region (point-min) (point-max)) (diff-buffers source-buf (current-buffer))))) -;;; Navigation +;;; Search -(defvar-local treesit-defun-query nil - "A tree-sitter query that matches function/class definitions. -Capture names don't matter. This variable is used by navigation -functions like `treesit-beginning-of-defun'.") +(defun treesit-search-forward (pos-fn arg query &optional lang) + "Search forward for nodes that matches QUERY. -(defun treesit-traverse-defun (pos-fn arg) - "Move forward/backward to the beginning/end of a defun. +This is a more primitive function, you might want to use +`treesit-search-beginning' or `treesit-search-end' instead. -Defun is defined according to `treesit-defun-pattern'. Move -forward/backward ARG time, positive ARG means go forward, +QUERY has to capture the node to match. LANG specifies the +language in which we search for nodes. If LANG is nil, use the +first parser in `treesit-parser-list'. + +Move forward/backward ARG times, positive ARG means go forward, negative ARG means go backward. -POS-FN can be either `treesit-node-start' or `treesit-node-end'." - (unless treesit-defun-query - (error "Variable `treesit-defun-query' is unset")) +POS-FN can be either `treesit-node-start' or `treesit-node-end', +or any function that takes a node and returns a position. + +If search succeeds, stop at the position returned by POS-FN and +return the matched node. Return nil if search failed." (cl-loop for idx from 1 to (abs arg) - for positions = - (remove - nil - (mapcar (lambda (parser) - (if-let ((starting-point (point)) - (node (treesit-node-at - (point) parser t))) - (funcall - pos-fn - (treesit-traverse-forward-depth-first - node - (lambda (node) - (and (not (eq (funcall pos-fn node) - starting-point)) - (treesit-query-capture - node treesit-defun-query))) - arg)))) - treesit-parser-list)) - ;; If we can find a defun start, jump to it. - if positions do (goto-char (apply #'max positions)) + for parser = (if lang + (treesit-get-parser-create lang) + (car treesit-parser-list)) + for node = + (if-let ((starting-point (point)) + (node (treesit-node-at (point) parser t))) + (treesit-traverse-forward-depth-first + node + (lambda (node) + (and (not (eq (funcall pos-fn node) + starting-point)) + (if (> arg 0) + ;; Make sure we move forward. + (> (funcall pos-fn node) starting-point) + ;; Make sure we move backward. + (< (funcall pos-fn node) starting-point)) + (cl-loop for cap-node in + (mapcar + #'cdr + (treesit-query-capture node query)) + if (treesit-node-eq cap-node node) + return t))) + arg)) + for pos = (funcall pos-fn node) + ;; If we can find a match, jump to it. + if pos do (goto-char pos) else return nil - if (eq (point) (point-min)) return nil ;; Return t to indicate that search is successful. - finally return t)) + finally return node)) + +(defun treesit-search-beginning (query arg &optional lang) + "Search forward for nodes that matches QUERY. + +Stops at the beginning of matched node. + +QUERY has to capture the node to match. LANG specifies the +language in which we search for nodes. If LANG is nil, use the +first parser in `treesit-parser-list'. + +Move forward/backward ARG times, positive ARG means go forward, +negative ARG means go backward. + +If search succeeds, return the matched node. Return nil if +search failed." + (treesit-search-forward #'treesit-node-start arg query lang)) + +(defun treesit-search-end (query arg &optional lang) + "Search forward for nodes that matches QUERY. + +Stops at the end of matched node. + +QUERY has to capture the node to match. LANG specifies the +language in which we search for nodes. If LANG is nil, use the +first parser in `treesit-parser-list'. + +Move forward/backward ARG times, positive ARG means go forward, +negative ARG means go backward. + +If search succeeds, return the matched node. Return nil if +search failed." + (treesit-search-forward #'treesit-node-end arg query lang)) + +;;; Navigation + +(defvar-local treesit-defun-query nil + "A tree-sitter query that matches function/class definitions. +Capture names don't matter. This variable is used by navigation +functions like `treesit-beginning-of-defun'.") (defun treesit-beginning-of-defun (&optional arg) "Move backward to the beginning of a defun. @@ -881,7 +928,9 @@ treesit-beginning-of-defun With ARG, do it that many times. Negative ARG means move forward to the ARGth following beginning of defun. Defun is defined according to `treesit-defun-pattern'." - (treesit-traverse-defun #'treesit-node-start (- arg))) + (unless treesit-defun-query + (error "Variable `treesit-defun-query' is unset")) + (treesit-search-beginning treesit-defun-query (- arg))) (defun treesit-end-of-defun (&optional arg) "Move forward to the end of a defun. @@ -889,7 +938,9 @@ treesit-end-of-defun With ARG, do it that many times. Negative ARG means move back to ARGth preceding end of defun. Defun is defined according to `treesit-defun-pattern'." - (treesit-traverse-defun #'treesit-node-end arg)) + (unless treesit-defun-query + (error "Variable `treesit-defun-query' is unset")) + (treesit-search-end treesit-defun-query arg)) ;;; Debugging diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index 65b871693d..1b20b86bc9 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -369,7 +369,9 @@ treesit-node-supplemental ;; TODO ;; - Functions in treesit.el ;; - treesit-load-name-override-list -;; - treesit-traverse-defun +;; - treesit-search-forward +;; - treesit-search-beginning +;; - treesit-search-end ;; - treesit-beginning-of-defun ;; - treesit-end-of-defun commit 750090fd076e6923c5cee6f67b170416d48694da Author: Yuan Fu Date: Fri May 13 16:34:26 2022 -0700 * lisp/treesit.el (treesit-node-at): Add check for nil node. diff --git a/lisp/treesit.el b/lisp/treesit.el index 345aaf2e9b..60f375e9d9 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -143,7 +143,7 @@ treesit-node-at (treesit-parser-root-node parser-or-lang) (treesit-buffer-root-node parser-or-lang)))) ;; TODO: We might want a `treesit-node-decendant-for-pos' in C. - (while (cond ((< (treesit-node-end node) point) + (while (cond ((and node (< (treesit-node-end node) point)) (setq node (treesit-node-next-sibling node)) t) ((treesit-node-child node 0 named) commit d8c9b9c0fb704ed2b20563b79248b837727853c2 Author: Yuan Fu Date: Fri May 13 14:00:56 2022 -0700 Add defun navigation * lisp/treesit.el (treesit-defun-query): New variable. (treesit-traverse-defun, treesit-beginning-of-defun, treesit-end-of-defun): New functions. * test/src/treesit-tests.el: Add reminders for tests. diff --git a/lisp/treesit.el b/lisp/treesit.el index 0fe3a8ed24..345aaf2e9b 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -832,6 +832,65 @@ treesit-check-indent (indent-region (point-min) (point-max)) (diff-buffers source-buf (current-buffer))))) +;;; Navigation + +(defvar-local treesit-defun-query nil + "A tree-sitter query that matches function/class definitions. +Capture names don't matter. This variable is used by navigation +functions like `treesit-beginning-of-defun'.") + +(defun treesit-traverse-defun (pos-fn arg) + "Move forward/backward to the beginning/end of a defun. + +Defun is defined according to `treesit-defun-pattern'. Move +forward/backward ARG time, positive ARG means go forward, +negative ARG means go backward. + +POS-FN can be either `treesit-node-start' or `treesit-node-end'." + (unless treesit-defun-query + (error "Variable `treesit-defun-query' is unset")) + (cl-loop for idx from 1 to (abs arg) + for positions = + (remove + nil + (mapcar (lambda (parser) + (if-let ((starting-point (point)) + (node (treesit-node-at + (point) parser t))) + (funcall + pos-fn + (treesit-traverse-forward-depth-first + node + (lambda (node) + (and (not (eq (funcall pos-fn node) + starting-point)) + (treesit-query-capture + node treesit-defun-query))) + arg)))) + treesit-parser-list)) + ;; If we can find a defun start, jump to it. + if positions do (goto-char (apply #'max positions)) + else return nil + if (eq (point) (point-min)) return nil + ;; Return t to indicate that search is successful. + finally return t)) + +(defun treesit-beginning-of-defun (&optional arg) + "Move backward to the beginning of a defun. + +With ARG, do it that many times. Negative ARG means move forward +to the ARGth following beginning of defun. Defun is defined +according to `treesit-defun-pattern'." + (treesit-traverse-defun #'treesit-node-start (- arg))) + +(defun treesit-end-of-defun (&optional arg) + "Move forward to the end of a defun. + +With ARG, do it that many times. Negative ARG means move back to +ARGth preceding end of defun. Defun is defined according to +`treesit-defun-pattern'." + (treesit-traverse-defun #'treesit-node-end arg)) + ;;; Debugging (defvar-local treesit--inspect-name nil diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index 429e12088f..65b871693d 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -369,6 +369,9 @@ treesit-node-supplemental ;; TODO ;; - Functions in treesit.el ;; - treesit-load-name-override-list +;; - treesit-traverse-defun +;; - treesit-beginning-of-defun +;; - treesit-end-of-defun (provide 'treesit-tests) ;;; treesit-tests.el ends here commit d94c7076dfcb35037e77fc12e48d07d65c2005cf Author: Yuan Fu Date: Fri May 13 13:47:41 2022 -0700 New node traversal functions * lisp/treesit.el (treesit-traverse-parent): New alias. (treesit-traverse-depth-first, treesit--traverse-breadth-first-1, treesit-traverse-breadth-first, treesit-next-sibling-or-up, treesit-traverse-forward-depth-first): New functions. * test/src/treesit-tests.el (treesit-node-supplemental): Add reminders for tests. diff --git a/lisp/treesit.el b/lisp/treesit.el index dbbe0e409a..0fe3a8ed24 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -227,6 +227,113 @@ treesit-parent-while node (treesit-node-parent node))) last)) +(defalias 'treesit-traverse-parent #'treesit-parent-until) + +(defun treesit-traverse-depth-first (node pred &optional step) + "Traverse the subtree of NODE depth-first. + +Traverse starting from NODE (i.e., NODE is passed to PRED). For +each node traversed, call PRED with the node, stop and return the +node if PRED returns non-nil. If STEP >= 0 or nil, go forward, +if STEP < 0, go backward. If no node satisfies PRED, return +nil." + (if (funcall pred node) + node + (cl-loop for child in (if (or (null step) (>= step 0)) + (treesit-node-children node) + (nreverse (treesit-node-children node))) + if (treesit-traverse-depth-first child pred step) + return child))) + +(defun treesit--traverse-breadth-first-1 (pred step queue tail) + "The work horse for `treesit-traverse-breadth-first'. +PRED and STEP are the same as in +`treesit-traverse-breadth-first'. This function simply runes BFS +on QUEUE: pops an element from QUEUE, append children to QUEUE, +process the element, and next iteration. TAIL is the pointer to +the last cons in QUEUE, used for appending elements." + (cl-loop while queue + if (funcall pred (car queue)) return (car queue) + else do + (let ((children (if (or (null step) (>= step 0)) + (treesit-node-children (car queue)) + (nreverse (treesit-node-children (car queue)))))) + ;; Append children to the end. + (setcdr tail children) + (setq tail (last tail)) + ;; Pop the head off. + (setq queue (cdr queue))) + finally return nil)) + +(defun treesit-traverse-breadth-first (node pred &optional step) + "Traverse the subtree of NODE breadth-first. + +Traverse starting from NODE (i.e., NODE is passed to PRED). For +each node traversed, call PRED with the node, stop and return the +node if PRED returns non-nil. If STEP >= 0 or nil, go forward, +if STEP < 0, go backward. If no node satisfies PRED, return +nil." + ;; Traverse with a queue. + (let* ((queue (list node)) + (tail (last queue))) + (treesit--traverse-breadth-first-1 pred step queue tail))) + +(defun treesit-next-sibling-or-up (node step) + "Return the next sibling of NODE. + +If there is no next sibling of NODE but NODE has a parent, return +the parent. If there is no parent, return nil. If STEP >= 0 or +nil, return the next sibling, if STEP < 0, return the previous +one. + +Return either ('sibling node) or ('parent node)." + ;; First deplete siblings. + (if-let ((sibling (if (or (null step) (>= step 0)) + (treesit-node-next-sibling node) + (treesit-node-prev-sibling node)))) + (list 'sibling sibling) + ;; When siblings depleted, go up one level. + (when (treesit-node-parent node) + (list 'parent (treesit-node-parent node))))) + +(defun treesit-traverse-forward-depth-first (node pred &optional step) + "Traverse the whole tree forward from NODE depth-first. + +Traverse starting from NODE (i.e., NODE is passed to PRED). For +each node traversed, call PRED with the node, stop and return the +node if PRED returns non-nil. If STEP >= 0 or nil, go forward, +if STEP < 0, go backward. If no node satisfies PRED, return +nil. + +Traversing forward depth-first means, for a tree like the below +where NODE is marked 1, traverse as numbered: + + 16 + | + 3--------4-----------8 + | | | + o--o-+--1 5--+--6 9---+-----12 + | | | | | | + o o 2 7 +-+-+ +--+--+ + | | | | | + 10 11 13 14 15" + ;; First try NODE's subtree. + (or (treesit-traverse-depth-first node pred step) + ;; If no match, try the next node: next sibling, or parent if no + ;; next sibling exists. + (catch 'match + (let ((next (list nil node))) + ;; If NEXT is parent, call PRED on it and keep going. + (while (and (setq next (treesit-next-sibling-or-up + (cadr next) step)) + (eq (car next) 'parent)) + (when (funcall pred (cadr next)) + (throw 'match (cadr next)))) + (when next + ;; If NEXT is non-nil, it must be ('sibling node). + (treesit-traverse-forward-depth-first + (cadr next) pred step)))))) + (defun treesit-node-children (node &optional named) "Return a list of NODE's children. If NAMED is non-nil, collect named child only." diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index c995542a2a..429e12088f 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -360,6 +360,10 @@ treesit-node-supplemental ;; `treesit-parent-while' ;; `treesit-node-children' ;; `treesit-node-field-name' + ;; `treesit-next-sibling-or-up' + ;; `treesit-traverse-depth-first' + ;; `treesit-traverse-breadth-first' + ;; `treesit-traverse-forward-depth-first' )) ;; TODO commit 78df03329d1e942d5617c0f09f264792e24a063d Author: Yuan Fu Date: Fri May 13 13:38:21 2022 -0700 Redefine treesit-node-at The old 'treesit-node-at' becomes 'treesit-node-on'. The new 'treesit-node-at' has slightly different semantics. Now 'treesit-node-on' gets the smallest node covering a range and 'treesit-node-at' gets the smallest node after a position. The reason of change can be found in the docstring of 'treesit-node-on' (the BEWARE part): its result can be sometimes surprising/unexpected. * doc/lispref/parsing.texi (Retrieving Node): Update manual. * lisp/treesit.el (treesit-node-at): Change to new definition. (treesit-node-on): Inherits the old definition of 'treesit-node-at'. Parameter END is now mandatory. (treesit-language-at, treesit-node-field-name): Use the new '-on' function. (treesit-font-lock-fontify-region, treesit-simple-indent-presets, treesit-indent): Use the new '-at' function. * test/src/treesit-tests.el (treesit-node-supplemental): Update tests. diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index bbe70ff9b1..72be91877b 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -471,11 +471,10 @@ Retrieving Node @heading Retrieving node from syntax tree -@defun treesit-node-at beg &optional end parser-or-lang named -This function returns the @emph{smallest} node that covers the span -from @var{beg} to @var{end}. In other words, the start of the node -@code{<=} @var{beg}, and the end of the node @code{>=} @var{end}. If -@var{end} is omitted, it defaults to the value of @var{beg}. +@defun treesit-node-at beg end &optional parser-or-lang named +This function returns the @emph{smallest} node that starts at or after +the @var{point}. In other words, the start of the node is equal or +greater than @var{point}. When @var{parser-or-lang} is nil, this function uses the first parser in @var{treesit-parser-list} in the current buffer. If @@ -489,12 +488,34 @@ Retrieving Node @example @group ;; Find the node at point in a C parser's syntax tree. -(treesit-node-at (point) (point) 'c) +(treesit-node-on (point) 'c) @c @result{} # @end group @end example @end defun +@defun treesit-node-on beg end &optional parser-or-lang named +This function returns the @emph{smallest} node that covers the span +from @var{beg} to @var{end}. In other words, the start of the node is +less or equal to @var{beg}, and the end of the node is greater or +equal to @var{end}. + +@emph{Beware}, Calling this function on an empty line that is not +inside any top-level construct (function definition, etc) most +probably will give you the root node, because the root node is the +smallest node that covers that empty line. You probably want to use +@code{treesit-node-at} instead. + +When @var{parser-or-lang} is nil, this function uses the first parser +in @var{treesit-parser-list} in the current buffer. If +@var{parser-or-lang} is a parser object, it use that parser; if +@var{parser-or-lang} is a language, it finds the first parser using +that language in @var{treesit-parser-list} and use that. + +If @var{named} is non-nil, this function looks for a named node +instead (@pxref{tree-sitter named node, named node}). +@end defun + @defun treesit-parser-root-node parser This function returns the root node of the syntax tree generated by @var{parser}. diff --git a/lisp/treesit.el b/lisp/treesit.el index eaaa1316af..dbbe0e409a 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -90,7 +90,7 @@ treesit-parse-string (defun treesit-language-at (point) "Return the language used at POINT." (cl-loop for parser in treesit-parser-list - if (treesit-node-at point nil parser) + if (treesit-node-on point point parser) return (treesit-parser-language parser))) (defun treesit-set-ranges (parser-or-lang ranges) @@ -128,11 +128,40 @@ treesit-node-language (treesit-parser-language (treesit-node-parser node))) -(defun treesit-node-at (beg &optional end parser-or-lang named) +(defun treesit-node-at (point &optional parser-or-lang named) + "Return the smallest node that starts at or after POINT. + +\"Starts at or after POINT\" means the start of the node is +greater or larger than POINT. Return nil if none find. If NAMED +non-nil, only look for named node. + +If PARSER-OR-LANG is nil, use the first parser in +`treesit-parser-list'; if PARSER-OR-LANG is a parser, use +that parser; if PARSER-OR-LANG is a language, find a parser using +that language in the current buffer, and use that." + (let ((node (if (treesit-parser-p parser-or-lang) + (treesit-parser-root-node parser-or-lang) + (treesit-buffer-root-node parser-or-lang)))) + ;; TODO: We might want a `treesit-node-decendant-for-pos' in C. + (while (cond ((< (treesit-node-end node) point) + (setq node (treesit-node-next-sibling node)) + t) + ((treesit-node-child node 0 named) + (setq node (treesit-node-child node 0 named)) + t))) + node)) + +(defun treesit-node-on (beg end &optional parser-or-lang named) "Return the smallest node covering BEG to END. -If omitted, END defaults to BEG. Return nil if none find. If -NAMED non-nil, only look for named node. NAMED defaults to nil. +BEWARE! Calling this function on an empty line that is not +inside any top-level construct (function definition, etc) most +probably will give you the root node, because the root node is +the smallest node that covers that empty line. You probably want +to use `treesit-node-at' instead. + +Return nil if none find. If NAMED non-nil, only look for named +node. If PARSER-OR-LANG is nil, use the first parser in `treesit-parser-list'; if PARSER-OR-LANG is a parser, use @@ -358,7 +387,7 @@ treesit-font-lock-fontify-region (when-let* ((language (nth 0 setting)) (match-pattern (nth 1 setting)) (parser (treesit-get-parser-create language))) - (when-let ((node (treesit-node-at start end parser))) + (when-let ((node (treesit-node-on start end parser))) (let ((captures (treesit-query-capture node match-pattern ;; Specifying the range is important. More @@ -500,7 +529,7 @@ treesit-simple-indent-presets (forward-line -1) (skip-chars-forward " \t") (treesit-node-start - (treesit-node-at (point) nil nil t)))))) + (treesit-node-at (point) nil t)))))) "A list of presets. These presets that can be used as MATHER and ANCHOR in `treesit-simple-indent-rules'. @@ -622,8 +651,7 @@ treesit-indent (point))) (smallest-node (cl-loop for parser in treesit-parser-list - for node = (treesit-node-at - bol nil parser) + for node = (treesit-node-at bol parser) if node return node)) (node (treesit-parent-while smallest-node @@ -639,7 +667,7 @@ treesit-indent (parent (cond ((and node parser) (treesit-node-parent node)) (parser - (treesit-node-at bol nil parser)) + (treesit-node-at bol parser)) (t nil))) (`(,anchor . ,offset) (funcall treesit-indent-function node parent bol))) diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index eb6e85c3fd..c995542a2a 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -331,7 +331,11 @@ treesit-node-supplemental 'json)) ;; `treesit-node-at'. (should (equal (treesit-node-string - (treesit-node-at 1 2 'json)) + (treesit-node-at 1 'json)) + "(\"[\")")) + ;; `treesit-node-on' + (should (equal (treesit-node-string + (treesit-node-on 1 2 'json)) "(\"[\")")) ;; `treesit-buffer-root-node'. (should (treesit-node-eq commit eebe5a1d6114ed54eb3cdd5576f43da76590b8fa Author: Yuan Fu Date: Mon May 9 12:49:55 2022 -0700 Fix compilation warnings * src/treesit.c: Add static keywords, remove unused variables, add const qualifier. diff --git a/src/treesit.c b/src/treesit.c index beeb2b7855..91114b06f1 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -100,7 +100,7 @@ ts_calloc_wrapper (size_t n, size_t size) return xzalloc (n * size); } -void +static void ts_initialize () { if (!ts_initialized) @@ -114,7 +114,7 @@ ts_initialize () /* Translates a symbol treesit- to a C name treesit_. */ -void +static void ts_symbol_to_c_name (char *symbol_name) { for (int idx=0; idx < strlen (symbol_name); idx++) @@ -124,7 +124,7 @@ ts_symbol_to_c_name (char *symbol_name) } } -bool +static bool ts_find_override_name (Lisp_Object language_symbol, Lisp_Object *name, Lisp_Object *c_symbol) { @@ -149,7 +149,7 @@ ts_symbol_to_c_name (char *symbol_name) thsi function pushes "lib_base_name.so" and "lib_base_name.dylib" into *path_candidates. Obiviously path_candidates should be a Lisp list of Lisp strings. */ -void +static void ts_load_language_push_for_each_suffix (Lisp_Object lib_base_name, Lisp_Object *path_candidates) { @@ -168,7 +168,7 @@ ts_symbol_to_c_name (char *symbol_name) If SIGNAL is true, signal an error when failed to load LANGUAGE; if false, return NULL when failed. */ -TSLanguage * +static TSLanguage * ts_load_language (Lisp_Object language_symbol, bool signal) { Lisp_Object symbol_name = Fsymbol_name (language_symbol); @@ -242,7 +242,7 @@ ts_load_language (Lisp_Object language_symbol, bool signal) /* Load TSLanguage. */ dynlib_error (); - TSLanguage *(*langfn) (); + TSLanguage *(*langfn) (void); langfn = dynlib_sym (handle, c_name); error = dynlib_error (); if (error != NULL) @@ -348,10 +348,9 @@ ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, } } -void +static void ts_ensure_position_synced (Lisp_Object parser) { - TSParser *ts_parser = XTS_PARSER (parser)->parser; TSTree *tree = XTS_PARSER (parser)->tree; if (tree == NULL) @@ -406,7 +405,7 @@ ts_ensure_position_synced (Lisp_Object parser) XTS_PARSER (parser)->visible_end = visible_end; } -void +static void ts_check_buffer_size (struct buffer *buffer) { ptrdiff_t buffer_size = @@ -419,7 +418,7 @@ ts_check_buffer_size (struct buffer *buffer) /* Parse the buffer. We don't parse until we have to. When we have to, we call this function to parse and update the tree. */ -void +static void ts_ensure_parsed (Lisp_Object parser) { if (!XTS_PARSER (parser)->need_reparse) @@ -456,7 +455,7 @@ ts_ensure_parsed (Lisp_Object parser) /* This is the read function provided to tree-sitter to read from a buffer. It reads one character at a time and automatically skips the gap. */ -const char* +static const char* ts_read_buffer (void *parser, uint32_t byte_index, TSPoint position, uint32_t *bytes_read) { @@ -647,7 +646,7 @@ DEFUN ("treesit-parser-root-node", /* Checks that the RANGES argument of treesit-parser-set-included-ranges is valid. */ -void +static void ts_check_range_argument (Lisp_Object ranges) { EMACS_INT last_point = 1; @@ -706,7 +705,6 @@ DEFUN ("treesit-parser-set-included-ranges", /* Set ranges for PARSER. */ ptrdiff_t len = list_length (ranges); TSRange *ts_ranges = malloc (sizeof(TSRange) * len); - struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); for (int idx=0; !NILP (ranges); idx++, ranges = XCDR (ranges)) { @@ -1246,7 +1244,7 @@ DEFUN ("treesit-expand-query", query, build_pure_c_string (" ")); } -char* +static const char* ts_query_error_to_string (TSQueryError error) { switch (error) @@ -1285,7 +1283,7 @@ ts_query_error_to_string (TSQueryError error) /* Collect predicates for this match and return them in a list. Each predicate is a list of strings and symbols. */ -Lisp_Object +static Lisp_Object ts_predicates_for_pattern (TSQuery *query, uint32_t pattern_index) { @@ -1327,7 +1325,7 @@ ts_query_error_to_string (TSQueryError error) /* Translate a capture NAME (symbol) to the text of the captured node. Signals treesit-query-error if such node is not captured. */ -Lisp_Object +static Lisp_Object ts_predicate_capture_name_to_text (Lisp_Object name, struct capture_range captures) { @@ -1360,7 +1358,7 @@ ts_query_error_to_string (TSQueryError error) false otherwise. A and B can be either string, or a capture name. The capture name evaluates to the text its captured node spans in the buffer. */ -bool +static bool ts_predicate_equal (Lisp_Object args, struct capture_range captures) { @@ -1383,7 +1381,7 @@ ts_query_error_to_string (TSQueryError error) /* Handles predicate (#match "regexp" @node). Return true if "regexp" matches the text spanned by @node; return false otherwise. Matching is case-sensitive. */ -bool +static bool ts_predicate_match (Lisp_Object args, struct capture_range captures) { @@ -1420,7 +1418,7 @@ ts_query_error_to_string (TSQueryError error) /* If all predicates in PREDICATES passes, return true; otherwise return false. */ -bool +static bool ts_eval_predicates (struct capture_range captures, Lisp_Object predicates) { commit 0d4155826a523f28c616295a91e7859c1fc05426 Author: Yuan Fu Date: Mon May 9 12:34:01 2022 -0700 Remove call to nconc to improve performance * src/treesit.c (struct capture_range): New struct. (ts_predicate_capture_name_to_text, ts_predicate_equal, ts_predicate_match, ts_eval_predicates): Replace capture list with capture_range. (Ftreesit_query_capture): Remove call to nconc. diff --git a/src/treesit.c b/src/treesit.c index e127fc2d87..beeb2b7855 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1268,6 +1268,21 @@ ts_query_error_to_string (TSQueryError error) } } +/* This struct is used for passing captures to be check against + predicates. Captures we check for are the ones in START before + END. For example, if START and END are + + START END + v v + (1 . (2 . (3 . (4 . (5 . (6 . nil)))))) + + We only look at captures 1 2 3. */ +struct capture_range +{ + Lisp_Object start; + Lisp_Object end; +}; + /* Collect predicates for this match and return them in a list. Each predicate is a list of strings and symbols. */ Lisp_Object @@ -1313,10 +1328,12 @@ ts_query_error_to_string (TSQueryError error) /* Translate a capture NAME (symbol) to the text of the captured node. Signals treesit-query-error if such node is not captured. */ Lisp_Object -ts_predicate_capture_name_to_text (Lisp_Object name, Lisp_Object captures) +ts_predicate_capture_name_to_text +(Lisp_Object name, struct capture_range captures) { Lisp_Object node = Qnil; - for (Lisp_Object tail = captures; !NILP (tail); tail = XCDR (tail)) + for (Lisp_Object tail = captures.start; + !EQ (tail, captures.end); tail = XCDR (tail)) { if (EQ (XCAR (XCAR (tail)), name)) { @@ -1344,14 +1361,14 @@ ts_predicate_capture_name_to_text (Lisp_Object name, Lisp_Object captures) The capture name evaluates to the text its captured node spans in the buffer. */ bool -ts_predicate_equal (Lisp_Object args, Lisp_Object captures) +ts_predicate_equal +(Lisp_Object args, struct capture_range captures) { if (XFIXNUM (Flength (args)) != 2) xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args)); Lisp_Object arg1 = XCAR (args); Lisp_Object arg2 = XCAR (XCDR (args)); - Lisp_Object tail = captures; Lisp_Object text1 = STRINGP (arg1) ? arg1 : ts_predicate_capture_name_to_text (arg1, captures); Lisp_Object text2 = STRINGP (arg2) ? arg2 : @@ -1367,14 +1384,14 @@ ts_predicate_equal (Lisp_Object args, Lisp_Object captures) matches the text spanned by @node; return false otherwise. Matching is case-sensitive. */ bool -ts_predicate_match (Lisp_Object args, Lisp_Object captures) +ts_predicate_match +(Lisp_Object args, struct capture_range captures) { if (XFIXNUM (Flength (args)) != 2) xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args)); Lisp_Object regexp = XCAR (args); Lisp_Object capture_name = XCAR (XCDR (args)); - Lisp_Object tail = captures; Lisp_Object text = ts_predicate_capture_name_to_text (capture_name, captures); @@ -1404,7 +1421,8 @@ ts_predicate_match (Lisp_Object args, Lisp_Object captures) /* If all predicates in PREDICATES passes, return true; otherwise return false. */ bool -ts_eval_predicates (Lisp_Object captures, Lisp_Object predicates) +ts_eval_predicates +(struct capture_range captures, Lisp_Object predicates) { bool pass = true; /* Evaluate each predicates. */ @@ -1498,13 +1516,21 @@ DEFUN ("treesit-query-capture", TSQueryMatch match; /* Go over each match, collect captures and predicates. Include the - captures in the return list if all predicates in that match - passes. */ + captures in the RESULT list unconditionally as we get them, then + test for predicates. If predicates pass, then all good, if + predicates don't pass, revert the result back to the result + before this loop (PREV_RESULT). (Predicates control the entire + match.) This way we don't need to create a list of captures in + every for loop and nconc it to RESULT every time. That is indeed + the initial implementation in which Yoav found nconc being the + bottleneck (98.4% of the running time spent on nconc). */ Lisp_Object result = Qnil; + Lisp_Object prev_result = result; while (ts_query_cursor_next_match (cursor, &match)) { + /* Record the checkpoint that we may roll back to. */ + prev_result = result; /* Get captured nodes. */ - Lisp_Object captures_lisp = Qnil; const TSQueryCapture *captures = match.captures; for (int idx=0; idx < match.capture_count; idx++) { @@ -1517,21 +1543,23 @@ DEFUN ("treesit-query-capture", Lisp_Object cap = Fcons (intern_c_string_1 (capture_name, capture_name_len), captured_node); - captures_lisp = Fcons (cap, captures_lisp); + result = Fcons (cap, result); } /* Get predicates. */ Lisp_Object predicates = ts_predicates_for_pattern (ts_query, match.pattern_index); - captures_lisp = Fnreverse (captures_lisp); - if (ts_eval_predicates (captures_lisp, predicates)) + /* captures_lisp = Fnreverse (captures_lisp); */ + struct capture_range captures_range = { result, prev_result }; + if (!ts_eval_predicates (captures_range, predicates)) { - result = CALLN (Fnconc, result, captures_lisp); + /* Predicates didn't pass, roll back. */ + result = prev_result; } } ts_query_delete (ts_query); ts_query_cursor_delete (cursor); - return result; + return Fnreverse (result); } /*** Initialization */ commit 82d5e902af68695481b8809e511a7913ef9a75aa Merge: 84847cad82 293a97d61e Author: Yuan Fu Date: Sat May 7 01:57:39 2022 -0700 ; Merge from master. commit 84847cad82e3b667c82f411627cd58d236f55e84 Author: Yuan Fu Date: Sat Mar 12 22:10:06 2022 -0800 Add tree-sitter intergration * configure.ac (HAVE_TREE_SITTER, TREE_SITTER_OBJ): New variables. (DYNAMIC_LIB_SUFFIX): new variable, I copied code from MODULES_SUFFIX so the diff looks this way. * doc/lispref/elisp.texi (Top): Add tree-sitter manual. * doc/lispref/modes.texi (Font Lock Mode): mention tree-sitter. (Parser-based Font Lock): New section. (Auto-Indentation): Mention tree-sitter. (Parser-based Indentation): New section. * doc/lispref/parsing.texi (Parsing Program Source): New chapter. * lisp/emacs-lisp/cl-preloaded.el (cl--typeof-types): Add treesit-parser and treesit-node type. * lisp/treesit.el: New file. * src/Makefile.in (TREE_SITTER_LIBS, TREE_SITTER_FLAGS, TREE_SITTER_OBJ): New variables. * src/alloc.c: (cleanup_vector): Add cleanup code for treesit-parser and treesit-node. * src/casefiddle.c (casify_region): Notify tree-sitter parser of buffer change. * src/data.c (Ftype_of): Add treesit-parser and treesit-node type (Qtreesit_parser, Qtreesit_node): New symbol. * src/emacs.c (main): Add symbols in treesit.c. * src/eval.c (define_error): Move the function to here. * src/insdel.c (insert_1_both, insert_from_string_1, insert_from_gap, insert_from_buffer_1, replace_range, del_range_2): Notify tree-sitter parser of buffer change. * src/json.c (define_error): Move this function out. * src/lisp.h (DEFINE_GDB_SYMBOL_BEGIN): Add treesit-parser and treesit-node. * src/lread.c (Vdynamic_library_suffixes): New variable. * src/print.c (print_vectorlike): Add code for printing treesit-parser and treesit-node. * src/treesit.c: New file. * src/treesit.h: New file. * test/src/treesit-tests.el: New file. diff --git a/configure.ac b/configure.ac index a315eeb6bd..0c174c6a7b 100644 --- a/configure.ac +++ b/configure.ac @@ -457,6 +457,7 @@ AC_DEFUN OPTION_DEFAULT_OFF([imagemagick],[compile with ImageMagick image support]) OPTION_DEFAULT_ON([native-image-api], [don't use native image APIs (GDI+ on Windows)]) OPTION_DEFAULT_IFAVAILABLE([json], [compile with native JSON support]) +OPTION_DEFAULT_IFAVAILABLE([tree-sitter], [compile with tree-sitter]) OPTION_DEFAULT_ON([xft],[don't use XFT for anti aliased fonts]) OPTION_DEFAULT_ON([harfbuzz],[don't use HarfBuzz for text shaping]) @@ -3087,6 +3088,27 @@ AC_DEFUN AC_SUBST(JSON_CFLAGS) AC_SUBST(JSON_OBJ) +HAVE_TREE_SITTER=no +TREE_SITTER_OBJ= + +if test "${with_tree_sitter}" != "no"; then + dnl TODO: we should use tree-sitter >= 0.20.2, but right now all + dnl tree-sitter libraries distributed are versioned at 0.0, so for + dnl the easy of development we'll just leave the version + dnl requirement at 0.0 for now. + EMACS_CHECK_MODULES([TREE_SITTER], [tree-sitter >= 0.0], + [HAVE_TREE_SITTER=yes], [HAVE_TREE_SITTER=no]) + if test "${HAVE_TREE_SITTER}" = yes; then + AC_DEFINE(HAVE_TREE_SITTER, 1, [Define if using tree-sitter.]) + TREE_SITTER_LIBS=-ltree-sitter + TREE_SITTER_OBJ="treesit.o" + fi +fi + +AC_SUBST(TREE_SITTER_LIBS) +AC_SUBST(TREE_SITTER_CFLAGS) +AC_SUBST(TREE_SITTER_OBJ) + NOTIFY_OBJ= NOTIFY_SUMMARY=no @@ -3926,20 +3948,31 @@ AC_DEFUN fi AC_SUBST(LIBZ) +### Dynamic library support +case $opsys in + cygwin|mingw32) DYNAMIC_LIB_SUFFIX=".dll" ;; + darwin) DYNAMIC_LIB_SUFFIX=".dylib" ;; + *) DYNAMIC_LIB_SUFFIX=".so" ;; +esac +case "${opsys}" in + darwin) DYNAMIC_LIB_SECONDARY_SUFFIX='.so' ;; + *) DYNAMIC_LIB_SECONDARY_SUFFIX='' ;; +esac +AC_DEFINE_UNQUOTED(DYNAMIC_LIB_SUFFIX, "$DYNAMIC_LIB_SUFFIX", + [System extension for dynamic libraries]) +AC_DEFINE_UNQUOTED(DYNAMIC_LIB_SECONDARY_SUFFIX, "$DYNAMIC_LIB_SECONDARY_SUFFIX", + [Alternative system extension for dynamic libraries.]) + +AC_SUBST(DYNAMIC_LIB_SUFFIX) +AC_SUBST(DYNAMIC_LIB_SECONDARY_SUFFIX) + ### Dynamic modules support LIBMODULES= HAVE_MODULES=no MODULES_OBJ= NEED_DYNLIB=no -case $opsys in - cygwin|mingw32) MODULES_SUFFIX=".dll" ;; - darwin) MODULES_SUFFIX=".dylib" ;; - *) MODULES_SUFFIX=".so" ;; -esac -case "${opsys}" in - darwin) MODULES_SECONDARY_SUFFIX='.so' ;; - *) MODULES_SECONDARY_SUFFIX='' ;; -esac +MODULES_SUFFIX="${DYNAMIC_LIB_SUFFIX}" +MODULES_SECONDARY_SUFFIX="${DYNAMIC_LIB_SECONDARY_SUFFIX}" if test "${with_modules}" != "no"; then case $opsys in gnu|gnu-linux) @@ -3970,10 +4003,10 @@ AC_DEFUN NEED_DYNLIB=yes AC_DEFINE(HAVE_MODULES, 1, [Define to 1 if dynamic modules are enabled]) AC_DEFINE_UNQUOTED(MODULES_SUFFIX, "$MODULES_SUFFIX", - [System extension for dynamic libraries]) + [System extension for dynamic modules]) if test -n "${MODULES_SECONDARY_SUFFIX}"; then AC_DEFINE_UNQUOTED(MODULES_SECONDARY_SUFFIX, "$MODULES_SECONDARY_SUFFIX", - [Alternative system extension for dynamic libraries.]) + [Alternative system extension for dynamic modules.]) fi fi AC_SUBST(MODULES_OBJ) @@ -4333,6 +4366,12 @@ AC_DEFUN *) MISSING="$MISSING json" WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-json=ifavailable";; esac +case $with_tree_sitter,$HAVE_TREE_SITTER in + no,* | ifavailable,* | *,yes) ;; + *) MISSING="$MISSING tree-sitter" + WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-tree-sitter=ifavailable";; +esac + if test "X${MISSING}" != X; then # If we have a missing library, and we don't have pkg-config installed, # the missing pkg-config may be the reason. Give the user a hint. @@ -6263,7 +6302,7 @@ AC_DEFUN optsep= emacs_config_features= for opt in ACL BE_APP CAIRO DBUS FREETYPE GCONF GIF GLIB GMP GNUTLS GPM GSETTINGS \ - HARFBUZZ IMAGEMAGICK JPEG JSON LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \ + HARFBUZZ IMAGEMAGICK JPEG JSON TREE-SITTER LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \ M17N_FLT MODULES NATIVE_COMP NOTIFY NS OLDXMENU PDUMPER PGTK PNG RSVG SECCOMP \ SOUND SQLITE3 THREADS TIFF TOOLKIT_SCROLL_BARS \ UNEXEC WEBP X11 XAW3D XDBE XFT XIM XINPUT2 XPM XWIDGETS X_TOOLKIT \ @@ -6334,6 +6373,7 @@ AC_DEFUN Does Emacs use -lxft? ${HAVE_XFT} Does Emacs use -lsystemd? ${HAVE_LIBSYSTEMD} Does Emacs use -ljansson? ${HAVE_JSON} + Does Emacs use -ltree-sitter? ${HAVE_TREE_SITTER} Does Emacs use the GMP library? ${HAVE_GMP} Does Emacs directly use zlib? ${HAVE_ZLIB} Does Emacs have dynamic modules support? ${HAVE_MODULES} diff --git a/doc/lispref/elisp.texi b/doc/lispref/elisp.texi index 426bb6d017..7390352016 100644 --- a/doc/lispref/elisp.texi +++ b/doc/lispref/elisp.texi @@ -222,6 +222,7 @@ Top * Non-ASCII Characters:: Non-ASCII text in buffers and strings. * Searching and Matching:: Searching buffers for strings or regexps. * Syntax Tables:: The syntax table controls word and list parsing. +* Parsing Program Source:: Generate syntax tree for program sources. * Abbrevs:: How Abbrev mode works, and its data structures. * Threads:: Concurrency in Emacs Lisp. @@ -1357,6 +1358,16 @@ Top * Syntax Table Internals:: How syntax table information is stored. * Categories:: Another way of classifying character syntax. +Parsing Program Source + +* Language Definitions:: Loading tree-sitter language definitions. +* Using Parser:: Introduction to parsers. +* Retrieving Node:: Retrieving node from syntax tree. +* Accessing Node:: Accessing node information. +* Pattern Matching:: Pattern matching with query patterns. +* Multiple Languages:: Parse text written in multiple languages. +* Tree-sitter C API:: Compare the C API and the ELisp API. + Syntax Descriptors * Syntax Class Table:: Table of syntax classes. @@ -1701,6 +1712,7 @@ Top @include searching.texi @include syntax.texi +@include parsing.texi @include abbrevs.texi @include threads.texi @include processes.texi diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index c29936d5ca..905b21c0d4 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -2826,11 +2826,13 @@ Font Lock Mode in which contexts. This section explains how to customize Font Lock for a particular major mode. - Font Lock mode finds text to highlight in two ways: through -syntactic parsing based on the syntax table, and through searching -(usually for regular expressions). Syntactic fontification happens -first; it finds comments and string constants and highlights them. -Search-based fontification happens second. + Font Lock mode finds text to highlight in three ways: through +syntactic parsing based on the syntax table, through searching +(usually for regular expressions), and through parsing based on a +full-blown parser. Syntactic fontification happens first; it finds +comments and string constants and highlights them. Search-based +fontification happens second. Parser-based fontification can be +optionally enabled and it will precede the other two fontifications. @menu * Font Lock Basics:: Overview of customizing Font Lock. @@ -2845,6 +2847,7 @@ Font Lock Mode * Syntactic Font Lock:: Fontification based on syntax tables. * Multiline Font Lock:: How to coerce Font Lock into properly highlighting multiline constructs. +* Parser-based Font Lock:: Use a parser for fontification. @end menu @node Font Lock Basics @@ -3735,6 +3738,89 @@ Region to Refontify reasonably fast. @end defvar +@node Parser-based Font Lock +@subsection Parser-based Font Lock + +@c This node is written when the only parser Emacs has is tree-sitter, +@c if in the future more parser are supported, feel free to reorganize +@c and rewrite this node to describe multiple parsers in parallel. + +Besides simple syntactic font lock and search-based font lock, Emacs +also provides complete syntactic font lock with the help of a parser, +currently provided by the tree-sitter library (@pxref{Parsing Program +Source}). Because it is an optional feature, parser-based font lock +is less integrated with Emacs. Most variables introduced in previous +sections only apply to search-based font lock, except for +@var{font-lock-maximum-decoration}. + +@defun treesit-font-lock-enable +This function enables parser-based font lock in the current buffer. +@end defun + +Parser-based font lock and other font lock mechanism are not mutually +exclusive. By default, if enabled, parser-based font lock runs first, +then the simple syntactic font lock (if enabled), then search-based +font lock. + +Although parser-based font lock doesn't share the same customization +variables with search-based font lock, parser-based font lock uses +similar customization schemes. Just like @var{font-lock-keywords} and +@var{font-lock-defaults}, parser-based font lock has +@var{treesit-font-lock-settings} and +@var{treesit-font-lock-defaults}. + +@defvar treesit-font-lock-settings +A list of @var{setting}s for tree-sitter font lock. + +Each @var{setting} should look like + +@example +(@var{language} @var{query}) +@end example + +Each @var{setting} controls one parser (often of different language). +And @var{language} is the language symbol (@pxref{Language +Definitions}); @var{query} is either a string query or a sexp query +(@pxref{Pattern Matching}). + +Capture names in @var{query} should be face names like +@code{font-lock-keyword-face}. The captured node will be fontified +with that face. Capture names can also be function names, in which +case the function is called with (@var{start} @var{end} @var{node}), +where @var{start} and @var{end} are the start and end position of the +node in buffer, and @var{node} is the tree-sitter node object. If a +capture name is both a face and a function, face takes priority. + +Generally, major modes should set @var{treesit-font-lock-defaults}, +and let Emacs automatically populate this variable. +@end defvar + +@defvar treesit-font-lock-defaults +This variable stores defaults for tree-sitter font Lock. It is a list +of + +@example +(@var{default} @var{:keyword} @var{value}...) +@end example + +A @var{default} may be a symbol or a list of symbols (for different +levels of fontification). The symbol(s) can be a variable or a +function. If a symbol is both a variable and a function, it is used +as a function. Different levels of fontification can be controlled by +@var{font-lock-maximum-decoration}. + +The symbol(s) in @var{default} should contain or return a +@var{setting} as described in @var{treesit-font-lock-settings}. + +The rest @var{keyword}s and @var{value}s are additional settings that +could be used to alter the fontification behavior. Currently there +aren't any. +@end defvar + +Multi-language major modes should provide range functions in +@var{treesit-range-functions}, and Emacs will set the ranges +accordingly before fontifing a region (@pxref{Multiple Languages}). + @node Auto-Indentation @section Automatic Indentation of code @@ -3791,10 +3877,12 @@ Auto-Indentation so if your language seems somewhat similar to one of those languages, you might try to use that engine. @c FIXME: documentation? Another one is SMIE which takes an approach in the spirit -of Lisp sexps and adapts it to non-Lisp languages. +of Lisp sexps and adapts it to non-Lisp languages. Yet another one is +to rely on a full-blown parser, for example, the tree-sitter library. @menu * SMIE:: A simple minded indentation engine. +* Parser-based indentation:: Parser-based indentation engine. @end menu @node SMIE @@ -4454,6 +4542,176 @@ SMIE Customization @code{eval: (smie-config-local '(@var{rules}))}. @end defun +@node Parser-based Indentation +@subsection Parser-based Indentation + +@c This node is written when the only parser Emacs has is tree-sitter, +@c if in the future more parser are supported, feel free to reorganize +@c and rewrite this node to describe multiple parsers in parallel. + +When built with the tree-sitter library (@pxref{Parsing Program +Source}), Emacs could parse program source and produce a syntax tree. +And this syntax tree can be used for indentation. For maximum +flexibility, we could write a custom indent function that queries the +syntax tree and indents accordingly for each language, but that would +be a lot of work. It is more convenient to use the simple indentation +engine described below: we only need to write some indentation rules +and the engine takes care of the rest. + +To enable the indentation engine, set the value of +@var{indent-line-function} to @code{treesit-indent}. + +@defvar treesit-indent-function +This variable stores the actual function called by +@code{treesit-indent}. By default, its value is +@code{treesit-simple-indent}. In the future we might add other +more complex indentation engines, if @code{treesit-simple-indent} +proves to be insufficient. +@end defvar + +@heading Writing indentation rules + +@defvar treesit-simple-indent-rules +This local variable stores indentation rules for every language. It is +a list of + +@example +(@var{language} . @var{rules}) +@end example + +where @var{language} is a language symbol, @var{rules} is a list of + +@example +(@var{matcher} @var{anchor} @var{offset}) +@end example + +The @var{matcher} determines whether this rule applies, @var{anchor} +and @var{offset} together determines which column to indent to. + +A @var{matcher} is a function that takes three arguments (@var{node} +@var{parent} @var{bol}). Argument @var{bol} is the point at where we +are indenting: the position of the first non-whitespace character from +the beginning of line; @var{node} is the largest (highest-in-tree) +node that starts at that point; @var{parent} is the parent of +@var{node}; + +If @var{matcher} returns non-nil, meaning the rule matches, Emacs then +uses @var{anchor} to find an anchor, it should be a function that +takes the same argument (@var{node} @var{parent} @var{bol}) and +returns a point. + +Finally Emacs computes the column of that point returned by +@var{anchor} and adds @var{offset} to it, and indents to that column. + +For @var{matcher} and @var{anchor}, Emacs provides some convenient +presets to spare us from writing these functions ourselves. They are +stored in @var{treesit-simple-indent-presets}, see below. +@end defvar + +@defvar treesit-simple-indent-presets +This is a list of presets for @var{matcher}s and @var{anchor}s in +@var{treesit-simple-indent-rules}. Each of them represent a +function that takes @var{node}, @var{parent} and @var{bol} as +arguments. + +@example +(match @var{node-type} @var{parent-type} + @var{node-field} @var{node-index-min} @var{node-index-max}) +@end example + +This matcher checks if @var{node}'s type is @var{node-type}, +@var{parent}'s type is @var{parent-type}, @var{node}'s field name in +@var{parent} is @var{node-field}, and @var{node}'s index among its +siblings is between @var{node-index-min} and @var{node-index-max}. If +the value of a constraint is nil, this matcher doesn't check for that +constraint. For example, to match the first child where parent is +@code{argument_list}, use + +@example +(match nil "argument_list" nil nil 0 0) +@end example + +@example +no-node +@end example + +This matcher matches the case where @var{node} is nil, i.e., there is +no node that starts at @var{bol}. This is the case when @var{bol} is +at an empty line or inside a multi-line string, etc. + +@example +(parent-is @var{type}) +@end example + +This matcher matches if @var{parent}'s type is @var{type}. + +@example +(node-is @var{type}) +@end example + +This matcher matches if @var{node}'s type is @var{type}. + +@example +(query @var{query}) +@end example + +This matcher matches if querying @var{parent} with @var{query} +captures @var{node}. The capture name does not matter. + +@example +first-sibling +@end example + +This anchor returns the start of the first child of @var{parent}. + +@example +parent +@end example + +This anchor returns the start of @var{parent}. + +@example +parent-bol +@end example + +This anchor returns the beginning of non-space characters on the line +where @var{parent} is on. + +@example +prev-sibling +@end example + +This anchor returns the start of the previous sibling of @var{node}. + +@example +no-indent +@end example + +This anchor returns the start of @var{node}, i.e., do not indent. + +@example +prev-line +@end example + +This anchor returns the start of the first named node on the previous +line. This can be used for indenting an empty line. +@end defvar + +@heading Indentation utilities + +Here are some utility functions that can help writing indentation +rules. + +@defun treesit-check-indent mode +This function check current buffer's indentation against major mode +@var{mode}. It indents the current line in @var{mode} and compares +the indentation with the current indentation. Then it pops up a diff +buffer showing the difference. Correct indentation (target) is in +green, current indentation is in red. +@end defun + +It is also helpful to use @code{treesit-inspect-mode} when writing +indentation rules. @node Desktop Save Mode @section Desktop Save Mode diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi new file mode 100644 index 0000000000..bbe70ff9b1 --- /dev/null +++ b/doc/lispref/parsing.texi @@ -0,0 +1,1420 @@ +@c -*- mode: texinfo; coding: utf-8 -*- +@c This is part of the GNU Emacs Lisp Reference Manual. +@c Copyright (C) 2021 Free Software Foundation, Inc. +@c See the file elisp.texi for copying conditions. +@node Parsing Program Source +@chapter Parsing Program Source + +Emacs provides various ways to parse program source text and produce a +@dfn{syntax tree}. In a syntax tree, text is no longer a +one-dimensional stream but a structured tree of nodes, where each node +representing a piece of text. Thus a syntax tree can enable +interesting features like precise fontification, indentation, +navigation, structured editing, etc. + +Emacs has a simple facility for parsing balanced expressions +(@pxref{Parsing Expressions}). There is also SMIE library for generic +navigation and indentation (@pxref{SMIE}). + +Emacs also provides integration with tree-sitter library +(@uref{https://tree-sitter.github.io/tree-sitter}) if compiled with +it. The tree-sitter library implements an incremental parser and has +support from a wide range of programming languages. + +@defun treesit-available-p +This function returns non-nil if tree-sitter features are available +for this Emacs instance. +@end defun + +For using tree-sitter features in font-lock and indentation, +@pxref{Parser-based Font Lock}, @pxref{Parser-based Indentation}. + +To access the syntax tree of the text in a buffer, we need to first +load a language definition and create a parser with it. Next, we can +query the parser for specific nodes in the syntax tree. Then, we can +access various information about the node, and we can pattern-match a +node with a powerful syntax. Finally, we explain how to work with +source files that mixes multiple languages. The following sections +explain how to do each of the tasks in detail. + +@menu +* Language Definitions:: Loading tree-sitter language definitions. +* Using Parser:: Introduction to parsers. +* Retrieving Node:: Retrieving node from syntax tree. +* Accessing Node:: Accessing node information. +* Pattern Matching:: Pattern matching with query patterns. +* Multiple Languages:: Parse text written in multiple languages. +* Tree-sitter C API:: Compare the C API and the ELisp API. +@end menu + +@node Language Definitions +@section Tree-sitter Language Definitions + +@heading Loading a language definition + +Tree-sitter relies on language definitions to parse text in that +language. In Emacs, A language definition is represented by a symbol. +For example, C language definition is represented as @code{c}, and +@code{c} can be passed to tree-sitter functions as the @var{language} +argument. + +@vindex treesit-extra-load-path +@vindex treesit-load-language-error +@vindex treesit-load-suffixes +Tree-sitter language definitions are distributed as dynamic libraries. +In order to use a language definition in Emacs, you need to make sure +that the dynamic library is installed on the system. Emacs looks for +language definitions under load paths in +@var{treesit-extra-load-path}, @var{user-emacs-directory}/tree-sitter, +and system default locations for dynamic libraries, in that order. +Emacs tries each extensions in @var{treesit-load-suffixes}. If Emacs +cannot find the library or has problem loading it, Emacs signals +@var{treesit-load-language-error}. The signal data is a list of +specific error messages. + +@defun treesit-language-available-p language +This function checks whether the dynamic library for @var{language} is +present on the system, and return non-nil if it is. +@end defun + +@vindex treesit-load-name-override-list +By convention, the dynamic library for @var{language} is +@code{libtree-sitter-@var{language}.@var{ext}}, where @var{ext} is the +system-specific extension for dynamic libraries. Also by convention, +the function provided by that library is named +@code{tree_sitter_}. If a language definition doesn't +follow this convention, you should add an entry + +@example +(@var{language} @var{library-base-name} @var{function-name}) +@end example + +to @var{treesit-load-name-override-list}, where +@var{library-base-name} is the base filename for the dynamic library +(conventionally @code{libtree-sitter-@var{language}}), and +@var{function-name} is the function provided by the library +(conventionally @code{tree_sitter_@var{language}). For example, + +@example +(cool-lang "libtree-sitter-coool" "tree_sitter_cooool") +@end example + +for a language too cool to abide by the rules. + +@heading Concrete syntax tree + +A syntax tree is what a language definition defines (more or less) and +what a parser generates. In a syntax tree, each node represents a +piece of text, and is connected to each other by a parent-child +relationship. For example, if the source text is + +@example +1 + 2 +@end example + +@noindent +its syntax tree could be + +@example +@group + +--------------+ + | root "1 + 2" | + +--------------+ + | + +--------------------------------+ + | expression "1 + 2" | + +--------------------------------+ + | | | ++------------+ +--------------+ +------------+ +| number "1" | | operator "+" | | number "2" | ++------------+ +--------------+ +------------+ +@end group +@end example + +We can also represent it in s-expression: + +@example +(root (expression (number) (operator) (number))) +@end example + +@subheading Node types + +@cindex tree-sitter node type +@anchor{tree-sitter node type} +@cindex tree-sitter named node +@anchor{tree-sitter named node} +@cindex tree-sitter anonymous node +Names like @code{root}, @code{expression}, @code{number}, +@code{operator} are nodes' @dfn{type}. However, not all nodes in a +syntax tree have a type. Nodes that don't are @dfn{anonymous nodes}, +and nodes with a type are @dfn{named nodes}. Anonymous nodes are +tokens with fixed spellings, including punctuation characters like +bracket @samp{]}, and keywords like @code{return}. + +@subheading Field names + +@cindex tree-sitter node field name +@anchor{tree-sitter node field name} To make the syntax tree easier to +analyze, many language definitions assign @dfn{field names} to child +nodes. For example, a @code{function_definition} node could have a +@code{declarator} and a @code{body}: + +@example +@group +(function_definition + declarator: (declaration) + body: (compound_statement)) +@end group +@end example + +@deffn Command treesit-inspect-mode +This minor mode displays the node that @emph{starts} at point in +mode-line. The mode-line will display + +@example +@var{parent} @var{field-name}: (@var{child} (@var{grand-child} (...))) +@end example + +@var{child}, @var{grand-child}, and @var{grand-grand-child}, etc, are +nodes that have their beginning at point. And @var{parent} is the +parent of @var{child}. + +If there is no node that starts at point, i.e., point is in the middle +of a node, then the mode-line only displays the smallest node that +spans point, and its immediate parent. + +This minor mode doesn't create parsers on its own. It simply uses the +first parser in @var{treesit-parser-list} (@pxref{Using Parser}). +@end deffn + +@heading Reading the grammar definition + +Authors of language definitions define the @dfn{grammar} of a +language, and this grammar determines how does a parser construct a +concrete syntax tree out of the text. In order to used the syntax +tree effectively, we need to read the @dfn{grammar file}. + +The grammar file is usually @code{grammar.js} in a language +definition’s project repository. The link to a language definition’s +home page can be found in tree-sitter’s homepage +(@uref{https://tree-sitter.github.io/tree-sitter}). + +The grammar is written in JavaScript syntax. For example, the rule +matching a @code{function_definition} node looks like + +@example +@group +function_definition: $ => seq( + $.declaration_specifiers, + field('declarator', $.declaration), + field('body', $.compound_statement) +) +@end group +@end example + +The rule is represented by a function that takes a single argument +@var{$}, representing the whole grammar. The function itself is +constructed by other functions: the @code{seq} function puts together a +sequence of children; the @code{field} function annotates a child with +a field name. If we write the above definition in BNF syntax, it +would look like + +@example +@group +function_definition := + +@end group +@end example + +@noindent +and the node returned by the parser would look like + +@example +@group +(function_definition + (declaration_specifier) + declarator: (declaration) + body: (compound_statement)) +@end group +@end example + +Below is a list of functions that one will see in a grammar +definition. Each function takes other rules as arguments and returns +a new rule. + +@itemize @bullet +@item +@code{seq(rule1, rule2, ...)} matches each rule one after another. + +@item +@code{choice(rule1, rule2, ...)} matches one of the rules in its +arguments. + +@item +@code{repeat(rule)} matches @var{rule} for @emph{zero or more} times. +This is like the @samp{*} operator in regular expressions. + +@item +@code{repeat1(rule)} matches @var{rule} for @emph{one or more} times. +This is like the @samp{+} operator in regular expressions. + +@item +@code{optional(rule)} matches @var{rule} for @emph{zero or one} time. +This is like the @samp{?} operator in regular expressions. + +@item +@code{field(name, rule)} assigns field name @var{name} to the child +node matched by @var{rule}. + +@item +@code{alias(rule, alias)} makes nodes matched by @var{rule} appear as +@var{alias} in the syntax tree generated by the parser. For example, + +@example +alias(preprocessor_call_exp, call_expression) +@end example + +makes any node matched by @code{preprocessor_call_exp} to appear as +@code{call_expression}. +@end itemize + +Below are grammar functions less interesting for a reader of a +language definition. + +@itemize +@item +@code{token(rule)} marks @var{rule} to produce a single leaf node. +That is, instead of generating a parent node with individual child +nodes under it, everything is combined into a single leaf node. + +@item +Normally, grammar rules ignore preceding whitespaces, +@code{token.immediate(rule)} changes @var{rule} to match only when +there is no preceding whitespaces. + +@item +@code{prec(n, rule)} gives @var{rule} a level @var{n} precedence. + +@item +@code{prec.left([n,] rule)} marks @var{rule} as left-associative, +optionally with level @var{n}. + +@item +@code{prec.right([n,] rule)} marks @var{rule} as right-associative, +optionally with level @var{n}. + +@item +@code{prec.dynamic(n, rule)} is like @code{prec}, but the precedence +is applied at runtime instead. +@end itemize + +The tree-sitter project talks about writing a grammar in more detail: +@uref{https://tree-sitter.github.io/tree-sitter/creating-parsers}. +Read especially ``The Grammar DSL'' section. + +@node Using Parser +@section Using Tree-sitter Parser +@cindex Tree-sitter parser + +This section described how to create and configure a tree-sitter +parser. In Emacs, each tree-sitter parser is associated with a +buffer. As we edit the buffer, the associated parser is automatically +kept up-to-date. + +@defvar treesit-disabled-modes +Before creating a parser, it is perhaps good to check whether we +should use tree-sitter at all. Sometimes a user don't want to use +tree-sitter features for a major mode. To turn-off tree-sitter for a +mode, they add that mode to this variable. +@end defvar + +@defvar treesit-maximum-size +If users want to turn off tree-sitter for buffers larger than a +particular size (because tree-sitter consumes memory ~10 times the +buffer size for storing the syntax tree), they set this variable to +that size. +@end defvar + +@defun treesit-should-enable-p &optional mode +This function returns non-nil if @var{mode} (default to the current +major mode) should activate tree-sitter features. The result depends +on the value of @var{treesit-disabled-modes} and +@var{treesit-maximum-size} described above. The result also +depends on, of course, the result of @code{treesit-avaliabe-p}. + +Writer of major modes or other packages are responsible for calling +this function and determine whether to activate tree-sitter features. +@end defun + + +@cindex Creating tree-sitter parsers +To create a parser, we provide a buffer to parse and the language to +use (@pxref{Language Definitions}). Emacs provides several creation +functions for different use cases. + +@defun treesit-get-parser-create language +This function is the most convenient one. It gives you a parser that +recognizes @var{language} for the current buffer. The function +checks if there already exists a parser suiting the need, and only +creates a new one when it can't find one. + +@example +@group +;; Create a parser for C programming language. +(treesit-get-parser-create 'c) + @c @result{} # +@end group +@end example +@end defun + +@defun treesit-get-parser language +This function is like @code{treesit-get-parser-create}, but it +always creates a new parser. +@end defun + +@defun treesit-parser-create buffer language +This function is the most primitive, requiring both the buffer to +associate to, and the language to use. If @var{buffer} is nil, the +current buffer is used. +@end defun + +Given a parser, we can query information about it: + +@defun treesit-parser-buffer parser +Returns the buffer associated with @var{parser}. +@end defun + +@defun treesit-parser-language parser +Returns the language that @var{parser} uses. +@end defun + +@defun treesit-parser-p object +Checks if @var{object} is a tree-sitter parser. Return non-nil if it +is, return nil otherwise. +@end defun + +There is no need to explicitly parse a buffer, because parsing is done +automatically and lazily. A parser only parses when we query for a +node in its syntax tree. Therefore, when a parser is first created, +it doesn't parse the buffer; instead, it waits until we query for a +node for the first time. Similarly, when some change is made in the +buffer, a parser doesn't re-parse immediately and only records some +necessary information to later re-parse when necessary. + +@vindex treesit-buffer-too-large +When a parser do parse, it checks for the size of the buffer. +Tree-sitter can only handle buffer no larger than about 4GB. If the +size exceeds that, Emacs signals @var{treesit-buffer-too-large} +with signal data being the buffer size. + +@vindex treesit-parser-list +Once a parser is created, Emacs automatically adds it to the +buffer-local variable @var{treesit-parser-list}. Every time a +change is made to the buffer, Emacs updates parsers in this list so +they can update their syntax tree incrementally. Therefore, one must +not remove parsers from this list and put the parser back in: if any +change is made when that parser is absent, the parser will be +permanently out-of-sync with the buffer content, and shouldn't be used +anymore. + +@cindex tree-sitter narrowing +@anchor{tree-sitter narrowing} Normally, a parser ``sees'' the whole +buffer, but when the buffer is narrowed (@pxref{Narrowing}), the +parser will only see the visible region. As far as the parser can +tell, the hidden region is deleted. And when the buffer is later +widened, the parser thinks text is inserted in the beginning and in +the end. Although parsers respect narrowing, narrowing shouldn't be +the mean to handle a multi-language buffer; instead, set the ranges in +which a parser should operate in. @xref{Multiple Languages}. + +Because a parser parses lazily, when we narrow the buffer, the parser +doesn't act immediately; as long as we don't query for a node while +the buffer is narrowed, narrowing does not affect the parser. + +@cindex tree-sitter parse string +@defun treesit-parse-string string language +Besides creating a parser for a buffer, we can also just parse a +string. Unlike a buffer, parsing a string is a one-time deal, and +there is no way to update the result. + +This function parses @var{string} with @var{language}, and returns the +root node of the generated syntax tree. +@end defun + +@node Retrieving Node +@section Retrieving Node + +@cindex tree-sitter find node +@cindex tree-sitter get node +There are two ways to retrieve a node: directly from the syntax tree, +or by traveling from other nodes. But before we continue, lets go +over some conventions of tree-sitter functions. + +We talk about a node being ``smaller'' or ``larger'', and ``lower'' or +``higher''. A smaller and lower node is lower in the syntax tree and +therefore spans a smaller piece of text; a larger and higher node is +higher up in the syntax tree, containing many smaller nodes as its +children, and therefore spans a larger piece of text. + +When a function cannot find a node, it returns nil. And for the +convenience for function chaining, all the functions that take a node +as argument and returns a node accept the node to be nil; in that +case, the function just returns nil. + +@vindex treesit-node-outdated +Nodes are not automatically updated when the associated buffer is +modified. In fact, there is no way to update a node once it is +retrieved. It is best to use a node and throw it away and not save +it. A node is @dfn{outdated} if the buffer has changed since the node +is retrieved. Using an outdated node throws +@var{treesit-node-outdated} error. + +@heading Retrieving node from syntax tree + +@defun treesit-node-at beg &optional end parser-or-lang named +This function returns the @emph{smallest} node that covers the span +from @var{beg} to @var{end}. In other words, the start of the node +@code{<=} @var{beg}, and the end of the node @code{>=} @var{end}. If +@var{end} is omitted, it defaults to the value of @var{beg}. + +When @var{parser-or-lang} is nil, this function uses the first parser +in @var{treesit-parser-list} in the current buffer. If +@var{parser-or-lang} is a parser object, it use that parser; if +@var{parser-or-lang} is a language, it finds the first parser using +that language in @var{treesit-parser-list} and use that. + +If @var{named} is non-nil, this function looks for a named node +instead (@pxref{tree-sitter named node, named node}). + +@example +@group +;; Find the node at point in a C parser's syntax tree. +(treesit-node-at (point) (point) 'c) + @c @result{} # +@end group +@end example +@end defun + +@defun treesit-parser-root-node parser +This function returns the root node of the syntax tree generated by +@var{parser}. +@end defun + +@defun treesit-buffer-root-node &optional language +This function finds the first parser that uses @var{language} in +@var{treesit-parser-list} in the current buffer, and returns the +root node of that buffer. If it cannot find an appropriate parser, it +returns nil. +@end defun + +Once we have a node, we can retrieve other nodes from it, or query for +information about this node. + +@heading Retrieving node from other nodes + +@subheading By kinship + +@defun treesit-node-parent node +This function returns the immediate parent of @var{node}. +@end defun + +@defun treesit-node-child node n &optional named +This function returns the @var{n}'th child of @var{node}. If +@var{named} is non-nil, then it only counts named nodes +(@pxref{tree-sitter named node, named node}). For example, in a node +that represents a string: @code{"text"}, there are three children +nodes: the opening quote @code{"}, the string content @code{text}, and +the enclosing quote @code{"}. Among these nodes, the first child is +the opening quote @code{"}, the first named child is the string +content @code{text}. +@end defun + +@defun treesit-node-children node &optional named +This function returns all of @var{node}'s children in a list. If +@var{named} is non-nil, then it only retrieves named nodes +(@pxref{tree-sitter named node, named node}). +@end defun + +@defun treesit-next-sibling node &optional named +This function finds the next sibling of @var{node}. If @var{named} is +non-nil, it finds the next named sibling (@pxref{tree-sitter named +node, named node}). +@end defun + +@defun treesit-prev-sibling node &optional named +This function finds the previous sibling of @var{node}. If +@var{named} is non-nil, it finds the previous named sibling +(@pxref{tree-sitter named node, named node}). +@end defun + +@subheading By field name + +To make the syntax tree easier to analyze, many language definitions +assign @dfn{field names} to child nodes (@pxref{tree-sitter node field +name, field name}). For example, a @code{function_definition} node +could have a @code{declarator} and a @code{body}. + +@defun treesit-child-by-field-name node field-name +This function finds the child of @var{node} that has @var{field-name} +as its field name. + +@example +@group +;; Get the child that has "body" as its field name. +(treesit-child-by-field-name node "body") + @c @result{} # +@end group +@end example +@end defun + +@subheading By position + +@defun treesit-first-child-for-pos node pos &optional named +This function finds the first child of @var{node} that extends beyond +@var{pos}. ``Extend beyond'' means the end of the child node +@code{>=} @var{pos}. This function only looks for immediate children of +@var{node}, and doesn't look in its grand children. If @var{named} is +non-nil, it only looks for named child (@pxref{tree-sitter named node, +named node}). +@end defun + +@defun treesit-node-descendant-for-range node beg end &optional named +This function finds the @emph{smallest} (grand)child of @var{node} +that spans the range from @var{beg} to @var{end}. It is similar to +@code{treesit-node-at}. If @var{named} is non-nil, it only looks +for named child (@pxref{tree-sitter named node, named node}). +@end defun + +@heading More convenient functions + +@defun treesit-filter-child node pred &optional named +This function finds children of @var{node} that satisfies @var{pred}. + +Function @var{pred} takes the child node as the argument and should +return non-nil to indicated keeping the child. If @var{named} +non-nil, this function only searches for named nodes." +@end defun + +@defun treesit-parent-until node pred +This function repeatedly finds the parent of @var{node}, and returns +the parent if it satisfies @var{pred} (which takes the parent as the +argument). If no parent satisfies @var{pred}, this function returns +nil. +@end defun + +@defun treesit-parent-while +This function repeatedly finds the parent of @var{node}, and keeps +doing so as long as the parent satisfies @var{pred} (which takes the +parent as the single argument). I.e., this function returns the +farthest parent that still satisfies @var{pred}. +@end defun + +@node Accessing Node +@section Accessing Node Information + +Before going further, make sure you have read the basic conventions +about tree-sitter nodes in the previous node. + +@heading Basic information + +Every node is associated with a parser, and that parser is associated +with a buffer. The following functions let you retrieve them. + +@defun treesit-node-parser node +This function returns @var{node}'s associated parser. +@end defun + +@defun treesit-node-buffer node +This function returns @var{node}'s parser's associated buffer. +@end defun + +@defun treesit-node-language node +This function returns @var{node}'s parser's associated language. +@end defun + +Each node represents a piece of text in the buffer. Functions below +finds relevant information about that text. + +@defun treesit-node-start node +Return the start position of @var{node}. +@end defun + +@defun treesit-node-end node +Return the end position of @var{node}. +@end defun + +@defun treesit-node-text node &optional object +Returns the buffer text that @var{node} represents. (If @var{node} is +retrieved from parsing a string, it will be the text from that +string.) +@end defun + +Here are some basic checks on tree-sitter nodes. + +@defun treesit-node-p object +Checks if @var{object} is a tree-sitter syntax node. +@end defun + +@defun treesit-node-eq node1 node2 +Checks if @var{node1} and @var{node2} are the same node in a syntax +tree. +@end defun + +@heading Property information + +In general, nodes in a concrete syntax tree fall into two categories: +@dfn{named nodes} and @dfn{anonymous nodes}. Whether a node is named +or anonymous is determined by the language definition +(@pxref{tree-sitter named node, named node}). + +@cindex tree-sitter missing node +Apart from being named/anonymous, a node can have other properties. A +node can be ``missing'': missing nodes are inserted by the parser in +order to recover from certain kinds of syntax errors, i.e., something +should probably be there according to the grammar, but not there. + +@cindex tree-sitter extra node +A node can be ``extra'': extra nodes represent things like comments, +which can appear anywhere in the text. + +@cindex tree-sitter node that has changes +A node ``has changes'' if the buffer changed since when the node is +retrieved. In this case, the node's start and end position would be +off and we better throw it away and retrieve a new one. + +@cindex tree-sitter node that has error +A node ``has error'' if the text it spans contains a syntax error. It +can be the node itself has an error, or one of its (grand)children has +an error. + +@defun treesit-node-check node property +This function checks if @var{node} has @var{property}. @var{property} +can be @code{'named}, @code{'missing}, @code{'extra}, +@code{'has-changes}, or @code{'has-error}. +@end defun + +Named nodes have ``types'' (@pxref{tree-sitter node type, node type}). +For example, a named node can be a @code{string_literal} node, where +@code{string_literal} is its type. + +@defun treesit-node-type node +Return @var{node}'s type as a string. +@end defun + +@heading Information as a child or parent + +@defun treesit-node-index node &optional named +This function returns the index of @var{node} as a child node of its +parent. If @var{named} is non-nil, it only count named nodes +(@pxref{tree-sitter named node, named node}). +@end defun + +@defun treesit-node-field-name node +A child of a parent node could have a field name (@pxref{tree-sitter +node field name, field name}). This function returns the field name +of @var{node} as a child of its parent. +@end defun + +@defun treesit-node-field-name-for-child node n +This is a more primitive function that returns the field name of the +@var{n}'th child of @var{node}. +@end defun + +@defun treesit-child-count node &optional named +This function finds the number of children of @var{node}. If +@var{named} is non-nil, it only counts named child (@pxref{tree-sitter +named node, named node}). +@end defun + +@node Pattern Matching +@section Pattern Matching Tree-sitter Nodes + +Tree-sitter let us pattern match with a small declarative language. +Pattern matching consists of two steps: first tree-sitter matches a +@dfn{pattern} against nodes in the syntax tree, then it @dfn{captures} +specific nodes in that pattern and returns the captured nodes. + +We describe first how to write the most basic query pattern and how to +capture nodes in a pattern, then the pattern-match function, finally +more advanced pattern syntax. + +@heading Basic query syntax + +@cindex Tree-sitter query syntax +@cindex Tree-sitter query pattern +A @dfn{query} consists of multiple @dfn{patterns}, each pattern is an +s-expression that matches a certain node in the syntax node. A +pattern has the following shape: + +@example +(@var{type} @var{child}...) +@end example + +@noindent +For example, a pattern that matches a @code{binary_expression} node that +contains @code{number_literal} child nodes would look like + +@example +(binary_expression (number_literal)) +@end example + +To @dfn{capture} a node in the query pattern above, append +@code{@@capture-name} after the node pattern you want to capture. For +example, + +@example +(binary_expression (number_literal) @@number-in-exp) +@end example + +@noindent +captures @code{number_literal} nodes that are inside a +@code{binary_expression} node with capture name @code{number-in-exp}. + +We can capture the @code{binary_expression} node too, with capture +name @code{biexp}: + +@example +(binary_expression + (number_literal) @@number-in-exp) @@biexp +@end example + +@heading Query function + +Now we can introduce the query functions. + +@defun treesit-query-capture node query &optional beg end +This function matches patterns in @var{query} in @var{node}. +Argument @var{query} can be a either string or a s-expression. For +now, we focus on the string syntax; s-expression syntax is described +at the end of the section. + +The function returns all captured nodes in a list of +@code{(@var{capture_name} . @var{node})}. If @var{beg} and @var{end} +are both non-nil, it only pattern matches nodes in that range. + +@vindex treesit-query-error +This function raise a @var{treesit-query-error} if @var{query} is +malformed. The signal data contains a description of the specific +error. +@end defun + +@defun treesit-query-in source query &optional beg end +This function matches patterns in @var{query} in @var{source}, and +returns all captured nodes in a list of @code{(@var{capture_name} +. @var{node})}. If @var{beg} and @var{end} are both non-nil, it only +pattern match nodes in that range. + +Argument @var{source} designates a node, it can be a language symbol, +a parser, or simply a node. If a language symbol, @var{source} +represents the root node of the first parser for that language in the +current buffer; if a parser, @var{source} represents the root node of +that parser. + +This function also raises @var{treesit-query-error}. +@end defun + +For example, suppose @var{node}'s content is @code{1 + 2}, and +@var{query} is + +@example +@group +(setq query + "(binary_expression + (number_literal) @@number-in-exp) @@biexp") +@end group +@end example + +@noindent +Querying that query would return + +@example +@group +(treesit-query-capture node query) + @result{} ((biexp . @var{}) + (number-in-exp . @var{}) + (number-in-exp . @var{})) +@end group +@end example + +As we mentioned earlier, a @var{query} could contain multiple +patterns. For example, it could have two top-level patterns: + +@example +@group +(setq query + "(binary_expression) @@biexp + (number_literal) @@number @@biexp") +@end group +@end example + +@defun treesit-query-string string query language +This function parses @var{string} with @var{language}, pattern matches +its root node with @var{query}, and returns the result. +@end defun + +@heading More query syntax + +Besides node type and capture, tree-sitter's query syntax can express +anonymous node, field name, wildcard, quantification, grouping, +alternation, anchor, and predicate. + +@subheading Anonymous node + +An anonymous node is written verbatim, surrounded by quotes. A +pattern matching (and capturing) keyword @code{return} would be + +@example +"return" @@keyword +@end example + +@subheading Wild card + +In a query pattern, @samp{(_)} matches any named node, and @samp{_} +matches any named and anonymous node. For example, to capture any +named child of a @code{binary_expression} node, the pattern would be + +@example +(binary_expression (_) @@in_biexp) +@end example + +@subheading Field name + +We can capture child nodes that has specific field names: + +@example +@group +(function_definition + declarator: (_) @@func-declarator + body: (_) @@func-body) +@end group +@end example + +We can also capture a node that doesn't have certain field, say, a +@code{function_definition} without a @code{body} field. + +@example +(function_definition !body) @@func-no-body +@end example + +@subheading Quantify node + +Tree-sitter recognizes quantification operators @samp{*}, @samp{+} and +@samp{?}. Their meanings are the same as in regular expressions: +@samp{*} matches the preceding pattern zero or more times, @samp{+} +matches one or more times, and @samp{?} matches zero or one time. + +For example, this pattern matches @code{type_declaration} nodes +that has @emph{zero or more} @code{long} keyword. + +@example +(type_declaration "long"* @@long-in-type) +@end example + +@noindent +And this pattern matches a type declaration that has zero or one +@code{long} keyword: + +@example +(type_declaration "long"?) @@type-decl +@end example + +@subheading Grouping + +Similar to groups in regular expression, we can bundle patterns into a +group and apply quantification operators to it. For example, to +express a comma separated list of identifiers, one could write + +@example +(identifier) ("," (identifier))* +@end example + +@subheading Alternation + +Again, similar to regular expressions, we can express ``match anyone +from this group of patterns'' in the query pattern. The syntax is a +list of patterns enclosed in square brackets. For example, to capture +some keywords in C, the query pattern would be + +@example +@group +[ + "return" + "break" + "if" + "else" +] @@keyword +@end group +@end example + +@subheading Anchor + +The anchor operator @samp{.} can be used to enforce juxtaposition, +i.e., to enforce two things to be directly next to each other. The +two ``things'' can be two nodes, or a child and the end of its parent. +For example, to capture the first child, the last child, or two +adjacent children: + +@example +@group +;; Anchor the child with the end of its parent. +(compound_expression (_) @@last-child .) + +;; Anchor the child with the beginning of its parent. +(compound_expression . (_) @@first-child) + +;; Anchor two adjacent children. +(compound_expression + (_) @@prev-child + . + (_) @@next-child) +@end group +@end example + +Note that the enforcement of juxtaposition ignores any anonymous +nodes. + +@subheading Predicate + +We can add predicate constraints to a pattern. For example, if we use +the following query pattern + +@example +@group +( + (array . (_) @@first (_) @@last .) + (#equal @@first @@last) +) +@end group +@end example + +Then tree-sitter only matches arrays where the first element equals to +the last element. To attach a predicate to a pattern, we need to +group then together. A predicate always starts with a @samp{#}. +Currently there are two predicates, @code{#equal} and @code{#match}. + +@deffn Predicate equal arg1 arg2 +Matches if @var{arg1} equals to @var{arg2}. Arguments can be either a +string or a capture name. Capture names represent the text that the +captured node spans in the buffer. +@end deffn + +@deffn Predicate match regexp capture-name +Matches if the text that @var{capture-name}’s node spans in the buffer +matches regular expression @var{regexp}. Matching is case-sensitive. +@end deffn + +Note that a predicate can only refer to capture names appeared in the +same pattern. Indeed, it makes little sense to refer to capture names +in other patterns anyway. + +@heading S-expression patterns + +Besides strings, Emacs provides a s-expression based syntax for query +patterns. It largely resembles the string-based syntax. For example, +the following pattern + +@example +@group +(treesit-query-capture + node "(addition_expression + left: (_) @@left + \"+\" @@plus-sign + right: (_) @@right) @@addition + + [\"return\" \"break\"] @@keyword") +@end group +@end example + +@noindent +is equivalent to + +@example +@group +(treesit-query-capture + node '((addition_expression + left: (_) @@left + "+" @@plus-sign + right: (_) @@right) @@addition + + ["return" "break"] @@keyword)) +@end group +@end example + +Most pattern syntax can be written directly as strange but +never-the-less valid s-expressions. Only a few of them needs +modification: + +@itemize +@item +Anchor @samp{.} is written as @code{:anchor}. +@item +@samp{?} is written as @samp{:?}. +@item +@samp{*} is written as @samp{:*}. +@item +@samp{+} is written as @samp{:+}. +@item +@code{#equal} is written as @code{:equal}. In general, predicates +change their @samp{#} to @samp{:}. +@end itemize + +For example, + +@example +@group +"( + (compound_expression . (_) @@first (_)* @@rest) + (#match \"love\" @@first) + )" +@end group +@end example + +is written in s-expression as + +@example +@group +'(( + (compound_expression :anchor (_) @@first (_) :* @@rest) + (:match "love" @@first) + )) +@end group +@end example + +@defun treesit-expand-query query +This function expands the s-expression @var{query} into a string +query. It is usually a good idea to expand the s-expression patterns +into strings for font-lock queries since they are called repeatedly. +@end defun + +Tree-sitter project's documentation about pattern-matching can be +found at +@uref{https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries}. + +@node Multiple Languages +@section Parsing Text in Multiple Languages + +Sometimes, the source of a programming language could contain sources +of other languages, HTML + CSS + JavaScript is one example. In that +case, we need to assign individual parsers to text segments written in +different languages. Traditionally this is achieved by using +narrowing. While tree-sitter works with narrowing (@pxref{tree-sitter +narrowing, narrowing}), the recommended way is to set ranges in which +a parser will operate. + +@defun treesit-parser-set-included-ranges parser ranges +This function sets the range of @var{parser} to @var{ranges}. Then +@var{parser} will only read the text covered in each range. Each +range in @var{ranges} is a list of cons @code{(@var{beg} +. @var{end})}. + +Each range in @var{ranges} must come in order and not overlap. That +is, in pseudo code: + +@example +@group +(cl-loop for idx from 1 to (1- (length ranges)) + for prev = (nth (1- idx) ranges) + for next = (nth idx ranges) + should (<= (car prev) (cdr prev) + (car next) (cdr next))) +@end group +@end example + +@vindex treesit-range-invalid +If @var{ranges} violates this constraint, or something else went +wrong, this function signals a @var{treesit-range-invalid}. The +signal data contains a specific error message and the ranges we are +trying to set. + +This function can also be used for disabling ranges. If @var{ranges} +is nil, the parser is set to parse the whole buffer. + +Example: + +@example +@group +(treesit-parser-set-included-ranges + parser '((1 . 9) (16 . 24) (24 . 25))) +@end group +@end example +@end defun + +@defun treesit-parser-included-ranges parser +This function returns the ranges set for @var{parser}. The return +value is the same as the @var{ranges} argument of +@code{treesit-parser-included-ranges}: a list of cons +@code{(@var{beg} . @var{end})}. And if @var{parser} doesn't have any +ranges, the return value is nil. + +@example +@group +(treesit-parser-included-ranges parser) + @result{} ((1 . 9) (16 . 24) (24 . 25)) +@end group +@end example +@end defun + +@defun treesit-set-ranges parser-or-lang ranges +Like @code{treesit-parser-set-included-ranges}, this function sets +the ranges of @var{parser-or-lang} to @var{ranges}. Conveniently, +@var{parser-or-lang} could be either a parser or a language. If it is +a language, this function looks for the first parser in +@var{treesit-parser-list} for that language in the current buffer, +and set range for it. +@end defun + +@defun treesit-get-ranges parser-or-lang +This function returns the ranges of @var{parser-or-lang}, like +@code{treesit-parser-included-ranges}. And like +@code{treesit-set-ranges}, @var{parser-or-lang} can be a parser or +a language symbol. +@end defun + +@defun treesit-query-range source pattern &optional beg end +This function matches @var{source} with @var{pattern} and returns the +ranges of captured nodes. The return value has the same shape of +other functions: a list of @code{(@var{beg} . @var{end})}. + +For convenience, @var{source} can be a language symbol, a parser, or a +node. If a language symbol, this function matches in the root node of +the first parser using that language; if a parser, this function +matches in the root node of that parser; if a node, this function +matches in that node. + +Parameter @var{pattern} is the query pattern used to capture nodes +(@pxref{Pattern Matching}). The capture names don't matter. Parameter +@var{beg} and @var{end}, if both non-nil, limits the range in which +this function queries. + +Like other query functions, this function raises an +@var{treesit-query-error} if @var{pattern} is malformed. +@end defun + +@defun treesit-language-at point +This function tries to figure out which language is responsible for +the text at @var{point}. It goes over each parser in +@var{treesit-parser-list} and see if that parser's range covers +@var{point}. +@end defun + +@defvar treesit-range-functions +A list of range functions. Font-locking and indenting code uses +functions in this alist to set correct ranges for a language parser +before using it. + +The signature of each function should be + +@example +(@var{start} @var{end} &rest @var{_}) +@end example + +where @var{start} and @var{end} marks the region that is about to be +used. A range function only need to (but not limited to) update +ranges in that region. + +Each function in the list is called in-order. +@end defvar + +@defun treesit-update-ranges &optional start end +This function is used by font-lock and indent to update ranges before +using any parser. Each range function in +@var{treesit-range-functions} is called in-order. Arguments +@var{start} and @var{end} are passed to each range function. +@end defun + +@heading An example + +Normally, in a set of languages that can be mixed together, there is a +major language and several embedded languages. The major language +parses the whole document, and skips the embedded languages. Then the +parser for the major language knows the ranges of the embedded +languages. So we first parse the whole document with the major +language’s parser, set ranges for the embedded languages, then parse +the embedded languages. + +Suppose we want to parse a very simple document that mixes HTML, CSS +and JavaScript: + +@example +@group + + + + +@end group +@end example + +We first parse with HTML, then set ranges for CSS and JavaScript: + +@example +@group +;; Create parsers. +(setq html (treesit-get-parser-create 'html)) +(setq css (treesit-get-parser-create 'css)) +(setq js (treesit-get-parser-create 'javascript)) + +;; Set CSS ranges. +(setq css-range + (treesit-query-range + 'html + "(style_element (raw_text) @@capture)")) +(treesit-parser-set-included-ranges css css-range) + +;; Set JavaScript ranges. +(setq js-range + (treesit-query-range + 'html + "(script_element (raw_text) @@capture)")) +(treesit-parser-set-included-ranges js js-range) +@end group +@end example + +We use a query pattern @code{(style_element (raw_text) @@capture)} to +find CSS nodes in the HTML parse tree. For how to write query +patterns, @pxref{Pattern Matching}. + +@node Tree-sitter C API +@section Tree-sitter C API Correspondence + +Emacs' tree-sitter integration doesn't expose every feature +tree-sitter's C API provides. Missing features include: + +@itemize +@item +Creating a tree cursor and navigating the syntax tree with it. +@item +Setting timeout and cancellation flag for a parser. +@item +Setting the logger for a parser. +@item +Printing a DOT graph of the syntax tree to a file. +@item +Coping and modifying a syntax tree. (Emacs doesn't expose a tree +object.) +@item +Using (row, column) coordinates as position. +@item +Updating a node with changes. (In Emacs, retrieve a new node instead +of updating the existing one.) +@item +Querying statics of a language definition. +@end itemize + +In addition, Emacs makes some changes to the C API to make the API more +convenient and idiomatic: + +@itemize +@item +Instead of using byte positions, the ELisp API uses character +positions. +@item +Null nodes are converted to nil. +@end itemize + +Below is the correspondence between all C API functions and their +ELisp counterparts. Sometimes one ELisp function corresponds to +multiple C functions, and many C functions don't have an ELisp +counterpart. + +@example +ts_parser_new treesit-parser-create +ts_parser_delete +ts_parser_set_language +ts_parser_language treesit-parser-language +ts_parser_set_included_ranges treesit-parser-set-included-ranges +ts_parser_included_ranges treesit-parser-included-ranges +ts_parser_parse +ts_parser_parse_string treesit-parse-string +ts_parser_parse_string_encoding +ts_parser_reset +ts_parser_set_timeout_micros +ts_parser_timeout_micros +ts_parser_set_cancellation_flag +ts_parser_cancellation_flag +ts_parser_set_logger +ts_parser_logger +ts_parser_print_dot_graphs +ts_tree_copy +ts_tree_delete +ts_tree_root_node +ts_tree_language +ts_tree_edit +ts_tree_get_changed_ranges +ts_tree_print_dot_graph +ts_node_type treesit-node-type +ts_node_symbol +ts_node_start_byte treesit-node-start +ts_node_start_point +ts_node_end_byte treesit-node-end +ts_node_end_point +ts_node_string treesit-node-string +ts_node_is_null +ts_node_is_named treesit-node-check +ts_node_is_missing treesit-node-check +ts_node_is_extra treesit-node-check +ts_node_has_changes treesit-node-check +ts_node_has_error treesit-node-check +ts_node_parent treesit-node-parent +ts_node_child treesit-node-child +ts_node_field_name_for_child treesit-node-field-name-for-child +ts_node_child_count treesit-node-child-count +ts_node_named_child treesit-node-child +ts_node_named_child_count treesit-node-child-count +ts_node_child_by_field_name treesit-node-by-field-name +ts_node_child_by_field_id +ts_node_next_sibling treesit-next-sibling +ts_node_prev_sibling treesit-prev-sibling +ts_node_next_named_sibling treesit-next-sibling +ts_node_prev_named_sibling treesit-prev-sibling +ts_node_first_child_for_byte treesit-first-child-for-pos +ts_node_first_named_child_for_byte treesit-first-child-for-pos +ts_node_descendant_for_byte_range treesit-descendant-for-range +ts_node_descendant_for_point_range +ts_node_named_descendant_for_byte_range treesit-descendant-for-range +ts_node_named_descendant_for_point_range +ts_node_edit +ts_node_eq treesit-node-eq +ts_tree_cursor_new +ts_tree_cursor_delete +ts_tree_cursor_reset +ts_tree_cursor_current_node +ts_tree_cursor_current_field_name +ts_tree_cursor_current_field_id +ts_tree_cursor_goto_parent +ts_tree_cursor_goto_next_sibling +ts_tree_cursor_goto_first_child +ts_tree_cursor_goto_first_child_for_byte +ts_tree_cursor_goto_first_child_for_point +ts_tree_cursor_copy +ts_query_new +ts_query_delete +ts_query_pattern_count +ts_query_capture_count +ts_query_string_count +ts_query_start_byte_for_pattern +ts_query_predicates_for_pattern +ts_query_step_is_definite +ts_query_capture_name_for_id +ts_query_string_value_for_id +ts_query_disable_capture +ts_query_disable_pattern +ts_query_cursor_new +ts_query_cursor_delete +ts_query_cursor_exec treesit-query-capture +ts_query_cursor_did_exceed_match_limit +ts_query_cursor_match_limit +ts_query_cursor_set_match_limit +ts_query_cursor_set_byte_range +ts_query_cursor_set_point_range +ts_query_cursor_next_match +ts_query_cursor_remove_match +ts_query_cursor_next_capture +ts_language_symbol_count +ts_language_symbol_name +ts_language_symbol_for_name +ts_language_field_count +ts_language_field_name_for_id +ts_language_field_id_for_name +ts_language_symbol_type +ts_language_version +@end example diff --git a/lisp/emacs-lisp/cl-preloaded.el b/lisp/emacs-lisp/cl-preloaded.el index 6aa45526d8..b4be54bbd6 100644 --- a/lisp/emacs-lisp/cl-preloaded.el +++ b/lisp/emacs-lisp/cl-preloaded.el @@ -68,6 +68,8 @@ cl--typeof-types (font-spec atom) (font-entity atom) (font-object atom) (vector array sequence atom) (user-ptr atom) + (tree-sitter-parser atom) + (tree-sitter-node atom) ;; Plus, really hand made: (null symbol list sequence atom)) "Alist of supertypes. diff --git a/lisp/treesit.el b/lisp/treesit.el new file mode 100644 index 0000000000..eaaa1316af --- /dev/null +++ b/lisp/treesit.el @@ -0,0 +1,853 @@ +;;; treesit.el --- tree-sitter utilities -*- lexical-binding: t -*- + +;; Copyright (C) 2021 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: +;; +;; Note to self: we don't create parsers automatically in any provided +;; functions. + +;;; Code: + +(eval-when-compile (require 'cl-lib)) +(require 'cl-seq) +(require 'font-lock) + +;;; Activating tree-sitter + +(defgroup treesit + nil + "Tree-sitter is an incremental parser." + :group 'tools) + +(defcustom treesit-disabled-modes nil + "A list of major-modes for which tree-sitter support is disabled." + :type '(list symbol)) + +(defcustom treesit-maximum-size (* 4 1024 1024) + "Maximum buffer size for enabling tree-sitter parsing." + :type 'integer) + +(defun treesit-available-p () + "Return non-nil if tree-sitter features are available." + (fboundp 'treesit-parser-create)) + +(defun treesit-should-enable-p (&optional mode) + "Return non-nil if MODE should activate tree-sitter support. +MODE defaults to the value of `major-mode'. The result depends +on the value of `treesit-disabled-modes', +`treesit-maximum-size', and of course, whether tree-sitter is +available on the system at all." + (let* ((mode (or mode major-mode)) + (disabled (cl-loop + for disabled-mode in treesit-disabled-modes + if (provided-mode-derived-p mode disabled-mode) + return t + finally return nil))) + (and (treesit-available-p) + (not disabled) + (< (buffer-size) treesit-maximum-size)))) + +;;; Parser API supplement + +(defun treesit-get-parser (language) + "Find the first parser using LANGUAGE in `treesit-parser-list'." + (catch 'found + (dolist (parser treesit-parser-list) + (when (eq language (treesit-parser-language parser)) + (throw 'found parser))))) + +(defun treesit-get-parser-create (language) + "Find the first parser using LANGUAGE in `treesit-parser-list'. +If none exists, create one and return it." + (or (treesit-get-parser language) + (treesit-parser-create + (current-buffer) language))) + +(defun treesit-parse-string (string language) + "Parse STRING using a parser for LANGUAGE. +Return the root node of the syntax tree." + (with-temp-buffer + (insert string) + (treesit-parser-root-node + (treesit-parser-create (current-buffer) language)))) + +(defun treesit-language-at (point) + "Return the language used at POINT." + (cl-loop for parser in treesit-parser-list + if (treesit-node-at point nil parser) + return (treesit-parser-language parser))) + +(defun treesit-set-ranges (parser-or-lang ranges) + "Set the ranges of PARSER-OR-LANG to RANGES." + (treesit-parser-set-included-ranges + (cond ((symbolp parser-or-lang) + (or (treesit-get-parser parser-or-lang) + (error "Cannot find a parser for %s" parser-or-lang))) + ((treesit-parser-p parser-or-lang) + parser-or-lang) + (t (error "Expecting a parser or language, but got %s" + parser-or-lang))) + ranges)) + +(defun treesit-get-ranges (parser-or-lang) + "Get the ranges of PARSER-OR-LANG." + (treesit-parser-included-ranges + (cond ((symbolp parser-or-lang) + (or (treesit-get-parser parser-or-lang) + (error "Cannot find a parser for %s" parser-or-lang))) + ((treesit-parser-p parser-or-lang) + parser-or-lang) + (t (error "Expecting a parser or language, but got %s" + parser-or-lang))))) + +;;; Node API supplement + +(defun treesit-node-buffer (node) + "Return the buffer in where NODE belongs." + (treesit-parser-buffer + (treesit-node-parser node))) + +(defun treesit-node-language (node) + "Return the language symbol that NODE's parser uses." + (treesit-parser-language + (treesit-node-parser node))) + +(defun treesit-node-at (beg &optional end parser-or-lang named) + "Return the smallest node covering BEG to END. + +If omitted, END defaults to BEG. Return nil if none find. If +NAMED non-nil, only look for named node. NAMED defaults to nil. + +If PARSER-OR-LANG is nil, use the first parser in +`treesit-parser-list'; if PARSER-OR-LANG is a parser, use +that parser; if PARSER-OR-LANG is a language, find a parser using +that language in the current buffer, and use that." + (let ((root (if (treesit-parser-p parser-or-lang) + (treesit-parser-root-node parser-or-lang) + (treesit-buffer-root-node parser-or-lang)))) + (treesit-node-descendant-for-range root beg (or end beg) named))) + +(defun treesit-buffer-root-node (&optional language) + "Return the root node of the current buffer. +Use the first parser in `treesit-parser-list', if LANGUAGE is +non-nil, use the first parser for LANGUAGE." + (if-let ((parser + (or (if language + (or (treesit-get-parser language) + (error "Cannot find a parser for %s" language)) + (or (car treesit-parser-list) + (error "Buffer has no parser")))))) + (treesit-parser-root-node parser))) + +(defun treesit-filter-child (node pred &optional named) + "Return children of NODE that satisfies PRED. +PRED is a function that takes one argument, the child node. If +NAMED non-nil, only search for named node." + (let ((child (treesit-node-child node 0 named)) + result) + (while child + (when (funcall pred child) + (push child result)) + (setq child (treesit-node-next-sibling child named))) + (reverse result))) + +(defun treesit-node-text (node &optional no-property) + "Return the buffer (or string) content corresponding to NODE. +If NO-PROPERTY is non-nil, remove text properties." + (with-current-buffer (treesit-node-buffer node) + (if no-property + (buffer-substring-no-properties + (treesit-node-start node) + (treesit-node-end node)) + (buffer-substring + (treesit-node-start node) + (treesit-node-end node))))) + +(defun treesit-parent-until (node pred) + "Return the closest parent of NODE that satisfies PRED. +Return nil if none found. PRED should be a function that takes +one argument, the parent node." + (let ((node (treesit-node-parent node))) + (while (and node (not (funcall pred node))) + (setq node (treesit-node-parent node))) + node)) + +(defun treesit-parent-while (node pred) + "Return the furthest parent of NODE that satisfies PRED. +Return nil if none found. PRED should be a function that takes +one argument, the parent node." + (let ((last nil)) + (while (and node (funcall pred node)) + (setq last node + node (treesit-node-parent node))) + last)) + +(defun treesit-node-children (node &optional named) + "Return a list of NODE's children. +If NAMED is non-nil, collect named child only." + (mapcar (lambda (idx) + (treesit-node-child node idx named)) + (number-sequence + 0 (1- (treesit-node-child-count node named))))) + +(defun treesit-node-index (node &optional named) + "Return the index of NODE in its parent. +If NAMED is non-nil, count named child only." + (let ((count 0)) + (while (setq node (treesit-node-prev-sibling node named)) + (cl-incf count)) + count)) + +(defun treesit-node-field-name (node) + "Return the field name of NODE as a child of its parent." + (when-let ((parent (treesit-node-parent node)) + (idx (treesit-node-index node))) + (treesit-node-field-name-for-child parent idx))) + +;;; Query API supplement + +(defun treesit-query-in (source query &optional beg end) + "Query the current buffer with QUERY. + +SOURCE can be a language symbol, a parser, or a node. If a +language symbol, use the root node of the first parser for that +language; if a parser, use the root node of that parser; if a +node, use that node. + +QUERY is either a string query or a sexp query. See Info node +`(elisp)Pattern Matching' for how to write a query pattern in either +string or s-expression form. + +BEG and END, if _both_ non-nil, specifies the range in which the query +is executed. + +Raise an treesit-query-error if QUERY is malformed." + (treesit-query-capture + (cond ((symbolp source) (treesit-buffer-root-node source)) + ((treesit-parser-p source) + (treesit-parser-root-node source)) + ((treesit-node-p source) source)) + query + beg end)) + +(defun treesit-query-string (string query language) + "Query STRING with QUERY in LANGUAGE. +See `treesit-query-capture' for QUERY." + (with-temp-buffer + (insert string) + (let ((parser (treesit-parser-create (current-buffer) language))) + (treesit-query-capture + (treesit-parser-root-node parser) + query)))) + +(defun treesit-query-range (source query &optional beg end) + "Query the current buffer and return ranges of captured nodes. + +QUERY, SOURCE, BEG, END are the same as in +`treesit-query-in'. This function returns a list +of (START . END), where START and END specifics the range of each +captured node. Capture names don't matter." + (cl-loop for capture + in (treesit-query-in source query beg end) + for node = (cdr capture) + collect (cons (treesit-node-start node) + (treesit-node-end node)))) + +;;; Range API supplement + +(defvar-local treesit-range-functions nil + "A list of range functions. +Font-locking and indenting code uses functions in this alist to +set correct ranges for a language parser before using it. + +The signature of each function should be + + (start end &rest _) + +where START and END marks the region that is about to be used. A +range function only need to (but not limited to) update ranges in +that region. + +Each function in the list is called in-order.") + +(defun treesit-update-ranges (&optional start end) + "Update the ranges for each language in the current buffer. +Calls each range functions in `treesit-range-functions' +in-order. START and END are passed to each range function." + (dolist (range-fn treesit-range-functions) + (funcall range-fn (or start (point-min)) (or end (point-max))))) + +;;; Font-lock + +(defvar-local treesit-font-lock-settings nil + "A list of SETTINGs for treesit-based fontification. + +Each SETTING should look like + + (LANGUAGE QUERY) + +Each SETTING controls one parser (often of different language). +LANGUAGE is the language symbol. See Info node `(elisp)Language +Definitions'. + +QUERY is either a string query or a sexp query. +See Info node `(elisp)Pattern Matching' for writing queries. + +Capture names in QUERY should be face names like +`font-lock-keyword-face'. The captured node will be fontified +with that face. Capture names can also be function names, in +which case the function is called with (START END NODE), where +START and END are the start and end position of the node in +buffer, and NODE is the tree-sitter node object. If a capture +name is both a face and a function, face takes priority. + +Generally, major modes should set +`treesit-font-lock-defaults', and let Emacs automatically +populate this variable.") + +(defvar-local treesit-font-lock-defaults nil + "Defaults for tree-sitter Font Lock specified by the major mode. + +This variable should be a list of + + (DEFAULT :KEYWORD VALUE...) + +A DEFAULT may be a symbol or a list of symbols (specifying +different levels of fontification). The symbol(s) can be of a +variable or a function. If a symbol is both a variable and a +function, it is used as a function. Different levels of +fontification can be controlled by +`font-lock-maximum-decoration'. + +The symbol(s) in DEFAULT should contain or return a SETTING as +explained in `treesit-font-lock-settings', which looks like + + (LANGUAGE QUERY) + +KEYWORD and VALUE are additional settings could be used to alter +fontification behavior. Currently there aren't any. + +Multi-language major-modes should provide a range function for +eacn language it supports in `treesit-range-functions', and +Emacs will set the ranges accordingly before fontifing a region. +See Info node `(elisp)Multiple Languages' for what does it mean +to set ranges for a parser.") + +(defun treesit-font-lock-fontify-region (start end &optional loudly) + "Fontify the region between START and END. +If LOUDLY is non-nil, message some debugging information." + (treesit-update-ranges start end) + (font-lock-unfontify-region start end) + (dolist (setting treesit-font-lock-settings) + (when-let* ((language (nth 0 setting)) + (match-pattern (nth 1 setting)) + (parser (treesit-get-parser-create language))) + (when-let ((node (treesit-node-at start end parser))) + (let ((captures (treesit-query-capture + node match-pattern + ;; Specifying the range is important. More + ;; often than not, NODE will be the root + ;; node, and if we don't specify the range, + ;; we are basically querying the whole file. + start end))) + (with-silent-modifications + (dolist (capture captures) + (let* ((face (car capture)) + (node (cdr capture)) + (start (treesit-node-start node)) + (end (treesit-node-end node))) + (cond ((facep face) + (put-text-property start end 'face face)) + ((functionp face) + (funcall face start end node)) + (t (error "Capture name %s is neither a face nor a function" face))) + (when loudly + (message "Fontifying text from %d to %d, Face: %s Language: %s" + start end face language))))))))) + ;; Call regexp font-lock after tree-sitter, as it is usually used + ;; for custom fontification. + (let ((font-lock-unfontify-region-function #'ignore)) + (funcall #'font-lock-default-fontify-region start end loudly))) + +(defun treesit-font-lock-enable () + "Enable tree-sitter font-locking for the current buffer." + (let ((default (car treesit-font-lock-defaults)) + (attributes (cdr treesit-font-lock-defaults))) + (ignore attributes) + (setq-local treesit-font-lock-settings + (font-lock-eval-keywords + (font-lock-choose-keywords + default + (font-lock-value-in-major-mode + font-lock-maximum-decoration))))) + (setq-local font-lock-fontify-region-function + #'treesit-font-lock-fontify-region) + ;; If we don't set `font-lock-defaults' to some non-nil value, + ;; font-lock doesn't enable properly (the font-lock-mode-internal + ;; doesn't run). See `font-lock-add-keywords'. + (when (and font-lock-mode + (null font-lock-keywords) + (null font-lock-defaults)) + (font-lock-mode -1) + (setq-local font-lock-defaults '(nil t)) + (font-lock-mode 1))) + +;;; Indent + +(defvar treesit--indent-verbose nil + "If non-nil, log progress when indenting.") + +;; This is not bound locally like we normally do with major-mode +;; stuff, because for tree-sitter, a buffer could contain more than +;; one language. +(defvar treesit-simple-indent-rules nil + "A list of indent rule settings. +Each indent rule setting should be (LANGUAGE . RULES), +where LANGUAGE is a language symbol, and RULES is a list of + + (MATCHER ANCHOR OFFSET). + +MATCHER determines whether this rule applies, ANCHOR and OFFSET +together determines which column to indent to. + +A MATCHER is a function that takes three arguments (NODE PARENT +BOL). BOL is the point where we are indenting: the beginning of +line content, the position of the first non-whitespace character. +NODE is the largest (highest-in-tree) node starting at that +point. PARENT is the parent of NODE. + +If MATCHER returns non-nil, meaning the rule matches, Emacs then +uses ANCHOR to find an anchor, it should be a function that takes +the same argument (NODE PARENT BOL) and returns a point. + +Finally Emacs computes the column of that point returned by ANCHOR +and adds OFFSET to it, and indents to that column. + +For MATCHER and ANCHOR, Emacs provides some convenient presets. +See `treesit-simple-indent-presets'.") + +(defvar treesit-simple-indent-presets + '((match . (lambda + (&optional node-type parent-type node-field + node-index-min node-index-max) + `(lambda (node parent bol &rest _) + (and (or (null ,node-type) + (equal (treesit-node-type node) + ,node-type)) + (or (null ,parent-type) + (equal (treesit-node-type parent) + ,parent-type)) + (or (null ,node-field) + (equal (treesit-node-field-name node) + ,node-field)) + (or (null ,node-index-min) + (>= (treesit-node-index node t) + ,node-index-min)) + (or (null ,node-index-max) + (<= (treesit-node-index node t) + ,node-index-max)))))) + (no-node . (lambda (node parent bol &rest _) (null node))) + (parent-is . (lambda (type) + `(lambda (node parent bol &rest _) + (equal ,type (treesit-node-type parent))))) + + (node-is . (lambda (type) + `(lambda (node parent bol &rest _) + (equal ,type (treesit-node-type node))))) + + (query . (lambda (pattern) + `(lambda (node parent bol &rest _) + (cl-loop for capture + in (treesit-query-capture + parent ,pattern) + if (treesit-node-eq node (cdr capture)) + return t + finally return nil)))) + (first-sibling . (lambda (node parent bol &rest _) + (treesit-node-start + (treesit-node-child parent 0 t)))) + + (parent . (lambda (node parent bol &rest _) + (treesit-node-start parent))) + (parent-bol . (lambda (node parent bol &rest _) + (save-excursion + (goto-char (treesit-node-start parent)) + (back-to-indentation) + (point)))) + (prev-sibling . (lambda (node parent bol &rest _) + (treesit-node-start + (treesit-node-prev-sibling node)))) + (no-indent . (lambda (node parent bol &rest _) bol)) + (prev-line . (lambda (node parent bol &rest _) + (save-excursion + (goto-char bol) + (forward-line -1) + (skip-chars-forward " \t") + (treesit-node-start + (treesit-node-at (point) nil nil t)))))) + "A list of presets. +These presets that can be used as MATHER and ANCHOR in +`treesit-simple-indent-rules'. + +MATCHER: + +\(match NODE-TYPE PARENT-TYPE NODE-FIELD NODE-INDEX-MIN NODE-INDEX-MAX) + + NODE-TYPE checks for node's type, PARENT-TYPE checks for + parent's type, NODE-FIELD checks for the filed name of node + in the parent, NODE-INDEX-MIN and NODE-INDEX-MAX checks for + the node's index in the parent. Therefore, to match the + first child where parent is \"argument_list\", use + + (match nil \"argument_list\" nil nil 0 0). + +no-node + + Matches the case where node is nil, i.e., there is no node + that starts at point. This is the case when indenting an + empty line. + +\(parent-is TYPE) + + Check that the parent has type TYPE. + +\(node-is TYPE) + + Checks that the node has type TYPE. + +\(query QUERY) + + Queries the parent node with QUERY, and checks if the node + is captured (by any capture name). + +ANCHOR: + +first-sibling + + Find the first child of the parent. + +parent + + Find the parent. + +parent-bol + + Find the beginning of non-space characters on the line where + the parent is on. + +prev-sibling + + Find node's previous sibling. + +no-indent + + Do nothing. + +prev-line + + Find the named node on the previous line. This can be used when + indenting an empty line: just indent like the previous node.") + +(defun treesit--simple-apply (fn args) + "Apply ARGS to FN. + +If FN is a key in `treesit-simple-indent-presets', use the +corresponding value as the function." + ;; We don't want to match uncompiled lambdas, so make sure this cons + ;; is not a function. We could move the condition functionp + ;; forward, but better be explicit. + (cond ((and (consp fn) (not (functionp fn))) + (apply (treesit--simple-apply (car fn) (cdr fn)) + ;; We don't evaluate ARGS with `simple-apply', i.e., + ;; no composing, better keep it simple. + args)) + ((and (symbolp fn) + (alist-get fn treesit-simple-indent-presets)) + (apply (alist-get fn treesit-simple-indent-presets) + args)) + ((functionp fn) (apply fn args)) + (t (error "Couldn't find the function corresponding to %s" fn)))) + +;; This variable might seem unnecessary: why split +;; `treesit-indent' and `treesit-simple-indent' into two +;; functions? We add this variable in between because later we might +;; add more powerful indentation engines, and that new engine can +;; probably share `treesit-indent'. It is also useful, suggested +;; by Stefan M, to have a function that figures out how much to indent +;; but doesn't actually performs the indentation, because we might +;; want to know where will a node indent to if we put it at some other +;; location, and use that information to calculate the actual +;; indentation. And `treesit-simple-indent' is that function. I +;; forgot the example Stefan gave, but it makes a lot of sense. +(defvar treesit-indent-function #'treesit-simple-indent + "Function used by `treesit-indent' to do some of the work. + +This function is called with + + (NODE PARENT BOL &rest _) + +and returns + + (ANCHOR . OFFSET). + +BOL is the position of the beginning of the line; NODE is the +\"largest\" node that starts at BOL; PARENT is its parent; ANCHOR +is a point (not a node), and OFFSET is a number. Emacs finds the +column of ANCHOR and adds OFFSET to it as the final indentation +of the current line.") + +(defun treesit-indent () + "Indent according to the result of `treesit-indent-function'." + (treesit-update-ranges) + (let* ((orig-pos (point)) + (bol (save-excursion + (forward-line 0) + (skip-chars-forward " \t") + (point))) + (smallest-node + (cl-loop for parser in treesit-parser-list + for node = (treesit-node-at + bol nil parser) + if node return node)) + (node (treesit-parent-while + smallest-node + (lambda (node) + (eq bol (treesit-node-start node)))))) + (pcase-let* + ((parser (if smallest-node + (treesit-node-parser smallest-node) + nil)) + ;; NODE would be nil if BOL is on a whitespace. In that case + ;; we set PARENT to the "node at point", which would + ;; encompass the whitespace. + (parent (cond ((and node parser) + (treesit-node-parent node)) + (parser + (treesit-node-at bol nil parser)) + (t nil))) + (`(,anchor . ,offset) + (funcall treesit-indent-function node parent bol))) + (if (null anchor) + (when treesit--indent-verbose + (message "Failed to find the anchor")) + (let ((col (+ (save-excursion + (goto-char anchor) + (current-column)) + offset))) + (if (< bol orig-pos) + (save-excursion + (indent-line-to col)) + (indent-line-to col))))))) + +(defun treesit-simple-indent (node parent bol) + "Calculate indentation according to `treesit-simple-indent-rules'. + +BOL is the position of the first non-whitespace character on the +current line. NODE is the largest node that starts at BOL, +PARENT is NODE's parent. + +Return (ANCHOR . OFFSET) where ANCHOR is a node, OFFSET is the +indentation offset, meaning indent to align with ANCHOR and add +OFFSET." + (if (null parent) + (when treesit--indent-verbose + (message "PARENT is nil, not indenting")) + (let* ((language (treesit-node-language parent)) + (rules (alist-get language + treesit-simple-indent-rules))) + (cl-loop for rule in rules + for pred = (nth 0 rule) + for anchor = (nth 1 rule) + for offset = (nth 2 rule) + if (treesit--simple-apply + pred (list node parent bol)) + do (when treesit--indent-verbose + (message "Matched rule: %S" rule)) + and + return (cons (treesit--simple-apply + anchor (list node parent bol)) + offset))))) + +(defun treesit-check-indent (mode) + "Check current buffer's indentation against a major mode MODE. + +Pop up a diff buffer showing the difference. Correct +indentation (target) is in green, current indentation is in red." + (interactive "CTarget major mode: ") + (let ((source-buf (current-buffer))) + (with-temp-buffer + (insert-buffer-substring source-buf) + (funcall mode) + (indent-region (point-min) (point-max)) + (diff-buffers source-buf (current-buffer))))) + +;;; Debugging + +(defvar-local treesit--inspect-name nil + "treesit-inspect-mode uses this to show node name in mode-line.") + +(defun treesit-inspect-node-at-point (&optional arg) + "Show information of the node at point. +If called interactively, show in echo area, otherwise set +`treesit--inspect-name' (which will appear in the mode-line +if `treesit-inspect-mode' is enabled). Uses the first parser +in `treesit-parser-list'." + (interactive "p") + ;; NODE-LIST contains all the node that starts at point. + (let* ((node-list + (cl-loop for node = (treesit-node-at (point)) + then (treesit-node-parent node) + while node + if (eq (treesit-node-start node) + (point)) + collect node)) + (largest-node (car (last node-list))) + (parent (treesit-node-parent largest-node)) + ;; node-list-acending contains all the node bottom-up, then + ;; the parent. + (node-list-acending + (if (null largest-node) + ;; If there are no nodes that start at point, just show + ;; the node at point and its parent. + (list (treesit-node-at (point)) + (treesit-node-parent + (treesit-node-at (point)))) + (append node-list (list parent)))) + (name "")) + ;; We draw nodes like (parent field-name: (node)) recursively, + ;; so it could be (node1 field-name: (node2 field-name: (node3))). + (dolist (node node-list-acending) + (setq + name + (concat + (if (treesit-node-field-name node) + (format " %s: " (treesit-node-field-name node)) + " ") + (if (treesit-node-check node 'named) "(" "\"") + (or (treesit-node-type node) + "N/A") + name + (if (treesit-node-check node 'named) ")" "\"")))) + (setq treesit--inspect-name name) + (force-mode-line-update) + (when arg + (if node-list + (message "%s" treesit--inspect-name) + (message "No node at point"))))) + +(define-minor-mode treesit-inspect-mode + "Shows the node that _starts_ at point in the mode-line. + +The mode-line displays + + PARENT FIELD-NAME: (CHILD (GRAND-CHILD (...))) + +CHILD, GRAND-CHILD, and GRAND-GRAND-CHILD, etc, are nodes that +have their beginning at point. And PARENT is the parent of +CHILD. + +If no node starts at point, i.e., point is in the middle of a +node, then we just display the smallest node that spans point and +its immediate parent. + +This minor mode doesn't create parsers on its own. It simply +uses the first parser in `treesit-parser-list'." + :lighter nil + (if treesit-inspect-mode + (progn + (add-hook 'post-command-hook + #'treesit-inspect-node-at-point 0 t) + (add-to-list 'mode-line-misc-info + '(:eval treesit--inspect-name))) + (remove-hook 'post-command-hook + #'treesit-inspect-node-at-point t) + (setq mode-line-misc-info + (remove '(:eval treesit--inspect-name) + mode-line-misc-info)))) + +(defun treesit-check-query (query language) + "Check if QUERY is valid for LANGUAGE. +If QUERY is invalid, display the query in a popup buffer, jumps +to the offending pattern and highlight the pattern." + (let ((buf (get-buffer-create "*tree-sitter check query*"))) + (with-temp-buffer + (treesit-get-parser-create language) + (condition-case err + (progn (treesit-query-in language query) + (message "QUERY is valid")) + (treesit-query-error + (with-current-buffer buf + (let* ((data (cdr err)) + (message (nth 0 data)) + (start (nth 1 data))) + (erase-buffer) + (insert query) + (goto-char start) + (search-forward " " nil t) + (put-text-property start (point) 'face 'error) + (message "%s" (buffer-substring start (point))) + (goto-char (point-min)) + (insert (format "%s: %d\n" message start)) + (forward-char start))) + (pop-to-buffer buf)))))) + +;;; Etc + +(declare-function find-library-name "find-func.el") +(defun treesit--check-manual-covarage () + "Print tree-sitter functions missing from the manual in message buffer." + (interactive) + (require 'find-func) + (let ((functions-in-source + (with-temp-buffer + (insert-file-contents (find-library-name "tree-sitter")) + (cl-remove-if + (lambda (name) (string-match "treesit--" name)) + (cl-sort + (save-excursion + (goto-char (point-min)) + (cl-loop while (re-search-forward + "^(defun \\([^ ]+\\)" nil t) + collect (match-string-no-properties 1))) + #'string<)))) + (functions-in-manual + (with-temp-buffer + (insert-file-contents (expand-file-name + "doc/lispref/parsing.texi" + source-directory)) + (insert-file-contents (expand-file-name + "doc/lispref/modes.texi" + source-directory)) + (cl-sort + (save-excursion + (goto-char (point-min)) + (cl-loop while (re-search-forward + "^@defun \\([^ ]+\\)" nil t) + collect (match-string-no-properties 1))) + #'string<)))) + (message "Missing: %s" + (string-join + (cl-remove-if + (lambda (name) (member name functions-in-manual)) + functions-in-source) + "\n")))) + +(provide 'treesit) + +;;; treesit.el ends here diff --git a/src/Makefile.in b/src/Makefile.in index 2b7c4bb316..6ae55b19e1 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -337,6 +337,10 @@ JSON_LIBS = JSON_CFLAGS = @JSON_CFLAGS@ JSON_OBJ = @JSON_OBJ@ +TREE_SITTER_LIBS = @TREE_SITTER_LIBS@ +TREE_SITTER_FLAGS = @TREE_SITTER_FLAGS@ +TREE_SITTER_OBJ = @TREE_SITTER_OBJ@ + INTERVALS_H = dispextern.h intervals.h composite.h GETLOADAVG_LIBS = @GETLOADAVG_LIBS@ @@ -400,7 +404,7 @@ EMACS_CFLAGS= $(XINPUT_CFLAGS) $(WEBP_CFLAGS) $(WEBKIT_CFLAGS) $(LCMS2_CFLAGS) \ $(SETTINGS_CFLAGS) $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) \ $(HARFBUZZ_CFLAGS) $(LIBOTF_CFLAGS) $(M17N_FLT_CFLAGS) $(DEPFLAGS) \ - $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) \ + $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) $(TREE_SITTER_CFLAGS) \ $(LIBGNUTLS_CFLAGS) $(NOTIFY_CFLAGS) $(CAIRO_CFLAGS) \ $(WERROR_CFLAGS) $(HAIKU_CFLAGS) ALL_CFLAGS = $(EMACS_CFLAGS) $(WARN_CFLAGS) $(CFLAGS) @@ -439,7 +443,7 @@ base_obj = $(if $(HYBRID_MALLOC),sheap.o) \ $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \ $(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ) $(JSON_OBJ) \ - $(HAIKU_OBJ) $(PGTK_OBJ) + $(TREE_SITTER_OBJ) $(HAIKU_OBJ) $(PGTK_OBJ) doc_obj = $(base_obj) $(NS_OBJC_OBJ) obj = $(doc_obj) $(HAIKU_CXX_OBJ) @@ -559,7 +563,7 @@ LIBES = $(LIBGNUTLS_LIBS) $(LIB_PTHREAD) $(GETADDRINFO_A_LIBS) $(LCMS2_LIBS) \ $(NOTIFY_LIBS) $(LIB_MATH) $(LIBZ) $(LIBMODULES) $(LIBSYSTEMD_LIBS) \ $(JSON_LIBS) $(LIBGMP) $(LIBGCCJIT_LIBS) $(XINPUT_LIBS) $(HAIKU_LIBS) \ - $(SQLITE3_LIBS) + $(TREE_SITTER_LIBS) $(SQLITE3_LIBS) ## FORCE it so that admin/unidata can decide whether this file is ## up-to-date. Although since charprop depends on bootstrap-emacs, diff --git a/src/alloc.c b/src/alloc.c index 9ed94dc8a1..e7603fac37 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -50,6 +50,10 @@ Copyright (C) 1985-1986, 1988, 1993-1995, 1997-2022 Free Software #include TERM_HEADER #endif /* HAVE_WINDOW_SYSTEM */ +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + #include #include #include /* For backtrace. */ @@ -3177,6 +3181,15 @@ cleanup_vector (struct Lisp_Vector *vector) if (uptr->finalizer) uptr->finalizer (uptr->p); } +#ifdef HAVE_TREE_SITTER + else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_TS_PARSER)) + { + struct Lisp_TS_Parser *lisp_parser + = PSEUDOVEC_STRUCT (vector, Lisp_TS_Parser); + ts_tree_delete(lisp_parser->tree); + ts_parser_delete(lisp_parser->parser); + } +#endif #ifdef HAVE_MODULES else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_MODULE_FUNCTION)) { diff --git a/src/casefiddle.c b/src/casefiddle.c index 2ea5f09b4c..3022c5cc7d 100644 --- a/src/casefiddle.c +++ b/src/casefiddle.c @@ -30,6 +30,10 @@ Copyright (C) 1985, 1994, 1997-1999, 2001-2022 Free Software Foundation, #include "composite.h" #include "keymap.h" +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP}; /* State for casing individual characters. */ @@ -530,6 +534,11 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) modify_text (start, end); prepare_casing_context (&ctx, flag, true); +#ifdef HAVE_TREE_SITTER + ptrdiff_t start_byte = CHAR_TO_BYTE (start); + ptrdiff_t old_end_byte = CHAR_TO_BYTE (end); +#endif + ptrdiff_t orig_end = end; record_delete (start, make_buffer_string (start, end, true), false); if (NILP (BVAR (current_buffer, enable_multibyte_characters))) @@ -548,6 +557,9 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) { signal_after_change (start, end - start - added, end - start); update_compositions (start, end, CHECK_ALL); +#ifdef HAVE_TREE_SITTER + ts_record_change (start_byte, old_end_byte, CHAR_TO_BYTE (end)); +#endif } return orig_end + added; diff --git a/src/data.c b/src/data.c index 1526cc0c73..9c711d2021 100644 --- a/src/data.c +++ b/src/data.c @@ -260,6 +260,10 @@ DEFUN ("type-of", Ftype_of, Stype_of, 1, 1, 0, return Qxwidget; case PVEC_XWIDGET_VIEW: return Qxwidget_view; + case PVEC_TS_PARSER: + return Qtreesit_parser; + case PVEC_TS_NODE: + return Qtreesit_node; case PVEC_SQLITE: return Qsqlite; /* "Impossible" cases. */ @@ -4203,6 +4207,8 @@ #define PUT_ERROR(sym, tail, msg) \ DEFSYM (Qterminal, "terminal"); DEFSYM (Qxwidget, "xwidget"); DEFSYM (Qxwidget_view, "xwidget-view"); + DEFSYM (Qtreesit_parser, "treesit-parser"); + DEFSYM (Qtreesit_node, "treesit-node"); DEFSYM (Qdefun, "defun"); diff --git a/src/emacs.c b/src/emacs.c index d1060bca0b..9a14e10375 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -136,6 +136,10 @@ #define MAIN_PROGRAM #include #endif +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + #include "pdumper.h" #include "fingerprint.h" #include "epaths.h" @@ -2181,6 +2185,9 @@ main (int argc, char **argv) syms_of_module (); #endif +#ifdef HAVE_TREE_SITTER + syms_of_treesit (); +#endif #ifdef HAVE_SOUND syms_of_sound (); #endif diff --git a/src/eval.c b/src/eval.c index 294d79e67a..ecf57efb92 100644 --- a/src/eval.c +++ b/src/eval.c @@ -1915,6 +1915,19 @@ signal_error (const char *s, Lisp_Object arg) xsignal (Qerror, Fcons (build_string (s), arg)); } +void +define_error (Lisp_Object name, const char *message, Lisp_Object parent) +{ + eassert (SYMBOLP (name)); + eassert (SYMBOLP (parent)); + Lisp_Object parent_conditions = Fget (parent, Qerror_conditions); + eassert (CONSP (parent_conditions)); + eassert (!NILP (Fmemq (parent, parent_conditions))); + eassert (NILP (Fmemq (name, parent_conditions))); + Fput (name, Qerror_conditions, pure_cons (name, parent_conditions)); + Fput (name, Qerror_message, build_pure_c_string (message)); +} + /* Use this for arithmetic overflow, e.g., when an integer result is too large even for a bignum. */ void diff --git a/src/insdel.c b/src/insdel.c index 6f180ac580..4676330cb7 100644 --- a/src/insdel.c +++ b/src/insdel.c @@ -31,6 +31,10 @@ #include "region-cache.h" #include "pdumper.h" +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + static void insert_from_string_1 (Lisp_Object, ptrdiff_t, ptrdiff_t, ptrdiff_t, ptrdiff_t, bool, bool); static void insert_from_buffer_1 (struct buffer *, ptrdiff_t, ptrdiff_t, bool); @@ -940,6 +944,12 @@ insert_1_both (const char *string, set_text_properties (make_fixnum (PT), make_fixnum (PT + nchars), Qnil, Qnil, Qnil); +#ifdef HAVE_TREE_SITTER + eassert (nbytes >= 0); + eassert (PT_BYTE >= 0); + ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes); +#endif + adjust_point (nchars, nbytes); check_markers (); @@ -1071,6 +1081,12 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, graft_intervals_into_buffer (intervals, PT, nchars, current_buffer, inherit); +#ifdef HAVE_TREE_SITTER + eassert (nbytes >= 0); + eassert (PT_BYTE >= 0); + ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes); +#endif + adjust_point (nchars, outgoing_nbytes); check_markers (); @@ -1137,6 +1153,12 @@ insert_from_gap (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail) current_buffer, 0); } +#ifdef HAVE_TREE_SITTER + eassert (nbytes >= 0); + eassert (ins_bytepos >= 0); + ts_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes); +#endif + if (ins_charpos < PT) adjust_point (nchars, nbytes); @@ -1287,6 +1309,12 @@ insert_from_buffer_1 (struct buffer *buf, /* Insert those intervals. */ graft_intervals_into_buffer (intervals, PT, nchars, current_buffer, inherit); +#ifdef HAVE_TREE_SITTER + eassert (outgoing_nbytes >= 0); + eassert (PT_BYTE >= 0); + ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + outgoing_nbytes); +#endif + adjust_point (nchars, outgoing_nbytes); } @@ -1535,6 +1563,13 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new, graft_intervals_into_buffer (intervals, from, inschars, current_buffer, inherit); +#ifdef HAVE_TREE_SITTER + eassert (to_byte >= from_byte); + eassert (outgoing_insbytes >= 0); + eassert (from_byte >= 0); + ts_record_change (from_byte, to_byte, from_byte + outgoing_insbytes); +#endif + /* Relocate point as if it were a marker. */ if (from < PT) adjust_point ((from + inschars - (PT < to ? PT : to)), @@ -1569,7 +1604,11 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new, If MARKERS, relocate markers. Unlike most functions at this level, never call - prepare_to_modify_buffer and never call signal_after_change. */ + prepare_to_modify_buffer and never call signal_after_change. + Because this function is called in a loop, one character at a time. + The caller of 'replace_range_2' calls these hooks for the entire + region once. Apart from signal_after_change, any caller of this + function should also call ts_record_change. */ void replace_range_2 (ptrdiff_t from, ptrdiff_t from_byte, @@ -1892,6 +1931,12 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte, evaporate_overlays (from); +#ifdef HAVE_TREE_SITTER + eassert (from_byte <= to_byte); + eassert (from_byte >= 0); + ts_record_change (from_byte, to_byte, from_byte); +#endif + return deletion; } diff --git a/src/json.c b/src/json.c index db1be07f19..957f91b46b 100644 --- a/src/json.c +++ b/src/json.c @@ -1090,22 +1090,6 @@ DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer, return unbind_to (count, lisp); } -/* Simplified version of 'define-error' that works with pure - objects. */ - -static void -define_error (Lisp_Object name, const char *message, Lisp_Object parent) -{ - eassert (SYMBOLP (name)); - eassert (SYMBOLP (parent)); - Lisp_Object parent_conditions = Fget (parent, Qerror_conditions); - eassert (CONSP (parent_conditions)); - eassert (!NILP (Fmemq (parent, parent_conditions))); - eassert (NILP (Fmemq (name, parent_conditions))); - Fput (name, Qerror_conditions, pure_cons (name, parent_conditions)); - Fput (name, Qerror_message, build_pure_c_string (message)); -} - void syms_of_json (void) { diff --git a/src/lisp.h b/src/lisp.h index 778bd1bfa5..aecbfed7fa 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -575,6 +575,8 @@ #define ENUM_BF(TYPE) enum TYPE your object -- this way, the same object could be used to represent several disparate C structures. + In addition, you need to add switch branches in data.c for Ftype_of. + You also need to add the new type to the constant `cl--typeof-types' in lisp/emacs-lisp/cl-preloaded.el. */ @@ -1053,6 +1055,8 @@ DEFINE_GDB_SYMBOL_END (PSEUDOVECTOR_FLAG) PVEC_CONDVAR, PVEC_MODULE_FUNCTION, PVEC_NATIVE_COMP_UNIT, + PVEC_TS_PARSER, + PVEC_TS_NODE, PVEC_SQLITE, /* These should be last, for internal_equal and sxhash_obj. */ @@ -5407,6 +5411,11 @@ maybe_gc (void) maybe_garbage_collect (); } +/* Simplified version of 'define-error' that works with pure + objects. */ +void +define_error (Lisp_Object name, const char *message, Lisp_Object parent); + INLINE_HEADER_END #endif /* EMACS_LISP_H */ diff --git a/src/lread.c b/src/lread.c index 0486a98883..8989e2d12d 100644 --- a/src/lread.c +++ b/src/lread.c @@ -5196,6 +5196,14 @@ syms_of_lread (void) Fcons (build_pure_c_string (MODULES_SECONDARY_SUFFIX), Vload_suffixes); #endif + DEFVAR_LISP ("dynamic-library-suffixes", Vdynamic_library_suffixes, + doc: /* A list of suffixes for loadable dynamic libraries. */); + Vdynamic_library_suffixes = + Fcons (build_pure_c_string (DYNAMIC_LIB_SECONDARY_SUFFIX), Qnil); + Vdynamic_library_suffixes = + Fcons (build_pure_c_string (DYNAMIC_LIB_SUFFIX), + Vdynamic_library_suffixes); + #endif DEFVAR_LISP ("module-file-suffix", Vmodule_file_suffix, doc: /* Suffix of loadable module file, or nil if modules are not supported. */); diff --git a/src/print.c b/src/print.c index 8cce8a1ad8..ab3047dee5 100644 --- a/src/print.c +++ b/src/print.c @@ -48,6 +48,10 @@ Copyright (C) 1985-1986, 1988, 1993-1995, 1997-2022 Free Software # include /* for F_DUPFD_CLOEXEC */ #endif +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + struct terminal; /* Avoid actual stack overflow in print. */ @@ -1936,6 +1940,30 @@ print_vectorlike (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag, } break; #endif + +#ifdef HAVE_TREE_SITTER + case PVEC_TS_PARSER: + print_c_string ("#language_symbol; + print_string (Fsymbol_name (language), printcharfun); + print_c_string (" in ", printcharfun); + print_object (XTS_PARSER (obj)->buffer, printcharfun, escapeflag); + printchar ('>', printcharfun); + break; + case PVEC_TS_NODE: + print_c_string ("#parser)->buffer, + printcharfun, escapeflag); + printchar ('>', printcharfun); + break; +#endif + case PVEC_SQLITE: { print_c_string ("#. */ + +#include +#include "lisp.h" +#include "buffer.h" +#include "treesit.h" + +/* Commentary + + The Emacs wrapper of tree-sitter does not expose everything the C + API provides, most notably: + + - It doesn't expose a syntax tree, we put the syntax tree in the + parser object, and updating the tree is handled in the C level. + + - We don't expose tree cursor either. I think Lisp is slow enough + to nullify any performance advantage of using a cursor, though I + don't have evidence. Also I want to minimize the number of new + types we introduce, currently we only add parser and node type. + + - Because updating the change is handled in the C level as each + change is made in the buffer, there is no way for Lisp to update + a node. But since we can just retrieve a new node, it shouldn't + be a limitation. + + - I didn't expose setting timeout and cancellation flag for a + parser, mainly because I don't think they are really necessary + in Emacs' use cases. + + - Many tree-sitter functions asks for a TSPoint, basically a (row, + column) location. Emacs uses a gap buffer and keeps no + information about row and column position. According to the + author of tree-sitter, tree-sitter only asks for (row, column) + position to carry it around and return back to the user later; + and the real position used is the byte position. He also said + that he _think_ that it will work to use byte position only. + That's why whenever a TSPoint is asked, we pass a dummy one to + it. Judging by the nature of parsing algorithms, I think it is + safe to use only byte position, and I don't think this will + change in the future. + + REF: https://github.com/tree-sitter/tree-sitter/issues/445 + + treesit.h has some commentary on the two main data structure + for the parser and node. ts_ensure_position_synced has some + commentary on how do we make tree-sitter play well with narrowing + (tree-sitter parser only sees the visible region, so we need to + translate positions back and forth). Most action happens in + ts_ensure_parsed, ts_read_buffer and ts_record_change. + + A complete correspondence list between tree-sitter functions and + exposed Lisp functions can be found in the manual (elisp)API + Correspondence. + + Placement of CHECK_xxx functions: call CHECK_xxx before using any + unchecked Lisp values; these include argument of Lisp functions, + return value of Fsymbol_value, car of a cons. + + Initializing tree-sitter: there are two entry points to tree-sitter + functions: 'treesit-parser-create' and + 'treesit-language-available-p'. Therefore we only need to call + initialization function in those two functions. + + Tree-sitter offset (0-based) and buffer position (1-based): + tree-sitter offset + buffer position = buffer position + buffer position - buffer position = tree-sitter offset + + Tree-sitter-related code in other files: + - src/alloc.c for gc for parser and node + - src/casefiddle.c & src/insdel.c for notifying tree-sitter + parser of buffer changes. + - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and + node type. + */ + +/*** Initialization */ + +bool ts_initialized = false; + +static void * +ts_calloc_wrapper (size_t n, size_t size) +{ + return xzalloc (n * size); +} + +void +ts_initialize () +{ + if (!ts_initialized) + { + ts_set_allocator (xmalloc, ts_calloc_wrapper, xrealloc, xfree); + ts_initialized = true; + } +} + +/*** Loading language library */ + +/* Translates a symbol treesit- to a C name + treesit_. */ +void +ts_symbol_to_c_name (char *symbol_name) +{ + for (int idx=0; idx < strlen (symbol_name); idx++) + { + if (symbol_name[idx] == '-') + symbol_name[idx] = '_'; + } +} + +bool +ts_find_override_name +(Lisp_Object language_symbol, Lisp_Object *name, Lisp_Object *c_symbol) +{ + for (Lisp_Object list = Vtreesit_load_name_override_list; + !NILP (list); list = XCDR (list)) + { + Lisp_Object lang = XCAR (XCAR (list)); + CHECK_SYMBOL (lang); + if (EQ (lang, language_symbol)) + { + *name = Fnth (make_fixnum (1), XCAR (list)); + CHECK_STRING (*name); + *c_symbol = Fnth (make_fixnum (2), XCAR (list)); + CHECK_STRING (*c_symbol); + return true; + } + } + return false; +} + +/* For example, if Vdynamic_library_suffixes is (".so", ".dylib"), + thsi function pushes "lib_base_name.so" and "lib_base_name.dylib" + into *path_candidates. Obiviously path_candidates should be a Lisp + list of Lisp strings. */ +void +ts_load_language_push_for_each_suffix +(Lisp_Object lib_base_name, Lisp_Object *path_candidates) +{ + for (Lisp_Object suffixes = Vdynamic_library_suffixes; + !NILP (suffixes); suffixes = XCDR (suffixes)) { + *path_candidates = Fcons (concat2 (lib_base_name, XCAR (suffixes)), + *path_candidates); + } +} + +/* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer + to the language definition. Signals + Qtreesit_load_language_error if something goes wrong. + Qtreesit_load_language_error carries the error message from + trying to load the library with each extension. + + If SIGNAL is true, signal an error when failed to load LANGUAGE; if + false, return NULL when failed. */ +TSLanguage * +ts_load_language (Lisp_Object language_symbol, bool signal) +{ + Lisp_Object symbol_name = Fsymbol_name (language_symbol); + + /* Figure out the library name and C name. */ + Lisp_Object lib_base_name = + (concat2 (build_pure_c_string ("libtree-sitter-"), symbol_name)); + Lisp_Object base_name = + (concat2 (build_pure_c_string ("tree-sitter-"), symbol_name)); + char *c_name = strdup (SSDATA (base_name)); + ts_symbol_to_c_name (c_name); + + /* Override the library name and C name, if appropriate. */ + Lisp_Object override_name; + Lisp_Object override_c_name; + bool found_override = ts_find_override_name + (language_symbol, &override_name, &override_c_name); + if (found_override) + { + lib_base_name = override_name; + c_name = SSDATA (override_c_name); + } + + /* Now we generate a list of possible library paths. */ + Lisp_Object path_candidates = Qnil; + /* First push just the filenames to the candidate list, which will + make dynlib_open look under standard system load paths. */ + ts_load_language_push_for_each_suffix + (lib_base_name, &path_candidates); + /* Then push ~/.emacs.d/tree-sitter paths. */ + ts_load_language_push_for_each_suffix + (Fexpand_file_name + (concat2 (build_string ("tree-sitter/"), lib_base_name), + Fsymbol_value (Quser_emacs_directory)), + &path_candidates); + /* Then push paths from treesit-extra-load-path. */ + for (Lisp_Object tail = Freverse (Vtreesit_extra_load_path); + !NILP (tail); tail = XCDR (tail)) + { + ts_load_language_push_for_each_suffix + (Fexpand_file_name (lib_base_name, XCAR (tail)), + &path_candidates); + } + + /* Try loading the dynamic library by each path candidate. Stop + when succeed, record the error message and try the next one when + fail. */ + dynlib_handle_ptr handle; + char const *error; + Lisp_Object error_list = Qnil; + for (Lisp_Object tail = path_candidates; + !NILP (tail); tail = XCDR (tail)) + { + char *library_name = SSDATA (XCAR (tail)); + dynlib_error (); + handle = dynlib_open (library_name); + error = dynlib_error (); + if (error == NULL) + break; + else + error_list = Fcons (build_string (error), error_list); + } + if (error != NULL) + { + if (signal) + xsignal2 (Qtreesit_load_language_error, + symbol_name, Fnreverse (error_list)); + else + return NULL; + } + + /* Load TSLanguage. */ + dynlib_error (); + TSLanguage *(*langfn) (); + langfn = dynlib_sym (handle, c_name); + error = dynlib_error (); + if (error != NULL) + { + if (signal) + xsignal1 (Qtreesit_load_language_error, + build_string (error)); + else + return NULL; + } + TSLanguage *lang = (*langfn) (); + + /* Check if language version matches tree-sitter version. */ + TSParser *parser = ts_parser_new (); + bool success = ts_parser_set_language (parser, lang); + ts_parser_delete (parser); + if (!success) + { + if (signal) + xsignal2 (Qtreesit_load_language_error, + build_pure_c_string ("Language version doesn't match tree-sitter version, language version:"), + make_fixnum (ts_language_version (lang))); + else + return NULL; + } + return lang; +} + +DEFUN ("treesit-language-available-p", + Ftreesit_langauge_available_p, + Streesit_language_available_p, + 1, 1, 0, + doc: /* Return non-nil if LANGUAGE exists and is loadable. */) + (Lisp_Object language) +{ + CHECK_SYMBOL (language); + ts_initialize (); + if (ts_load_language(language, false) == NULL) + return Qnil; + else + return Qt; +} + +/*** Parsing functions */ + +/* An auxiliary function that saves a few lines of code. Assumes TREE + is not NULL. */ +static inline void +ts_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte, + ptrdiff_t old_end_byte, ptrdiff_t new_end_byte) +{ + TSPoint dummy_point = {0, 0}; + TSInputEdit edit = {(uint32_t) start_byte, + (uint32_t) old_end_byte, + (uint32_t) new_end_byte, + dummy_point, dummy_point, dummy_point}; + ts_tree_edit (tree, &edit); +} + +/* Update each parser's tree after the user made an edit. This +function does not parse the buffer and only updates the tree. (So it +should be very fast.) */ +void +ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, + ptrdiff_t new_end_byte) +{ + for (Lisp_Object parser_list = + Fsymbol_value (Qtreesit_parser_list); + !NILP (parser_list); + parser_list = XCDR (parser_list)) + { + CHECK_CONS (parser_list); + Lisp_Object lisp_parser = XCAR (parser_list); + CHECK_TS_PARSER (lisp_parser); + TSTree *tree = XTS_PARSER (lisp_parser)->tree; + if (tree != NULL) + { + eassert (start_byte <= old_end_byte); + eassert (start_byte <= new_end_byte); + /* Think the recorded change as a delete followed by an + insert, and think of them as moving unchanged text back + and forth. After all, the whole point of updating the + tree is to update the position of unchanged text. */ + ptrdiff_t bytes_del = old_end_byte - start_byte; + ptrdiff_t bytes_ins = new_end_byte - start_byte; + + ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg; + ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end; + + ptrdiff_t affected_start = + max (visible_beg, start_byte) - visible_beg; + ptrdiff_t affected_old_end = + min (visible_end, affected_start + bytes_del); + ptrdiff_t affected_new_end = + affected_start + bytes_ins; + + ts_tree_edit_1 (tree, affected_start, affected_old_end, + affected_new_end); + XTS_PARSER (lisp_parser)->visible_end = affected_new_end; + XTS_PARSER (lisp_parser)->need_reparse = true; + XTS_PARSER (lisp_parser)->timestamp++; + } + } +} + +void +ts_ensure_position_synced (Lisp_Object parser) +{ + TSParser *ts_parser = XTS_PARSER (parser)->parser; + TSTree *tree = XTS_PARSER (parser)->tree; + + if (tree == NULL) + return; + + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg; + ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end; + /* Before we parse or set ranges, catch up with the narrowing + situation. We change visible_beg and visible_end to match + BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the + change. We want to move the visible range of tree-sitter to + match the narrowed range. For example, + from ________|xxxx|__ + to |xxxx|__________ */ + + /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */ + if (visible_beg > BUF_BEGV_BYTE (buffer)) + { + /* Tree-sitter sees: insert at the beginning. */ + ts_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer)); + visible_beg = BUF_BEGV_BYTE (buffer); + } + /* 2. Make sure visible_end = BUF_ZV_BYTE. */ + if (visible_end < BUF_ZV_BYTE (buffer)) + { + /* Tree-sitter sees: insert at the end. */ + ts_tree_edit_1 (tree, visible_end - visible_beg, + visible_end - visible_beg, + BUF_ZV_BYTE (buffer) - visible_beg); + visible_end = BUF_ZV_BYTE (buffer); + } + else if (visible_end > BUF_ZV_BYTE (buffer)) + { + /* Tree-sitter sees: delete at the end. */ + ts_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg, + visible_end - visible_beg, + BUF_ZV_BYTE (buffer) - visible_beg); + visible_end = BUF_ZV_BYTE (buffer); + } + /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */ + if (visible_beg < BUF_BEGV_BYTE (buffer)) + { + /* Tree-sitter sees: delete at the beginning. */ + ts_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0); + visible_beg = BUF_BEGV_BYTE (buffer); + } + eassert (0 <= visible_beg); + eassert (visible_beg <= visible_end); + + XTS_PARSER (parser)->visible_beg = visible_beg; + XTS_PARSER (parser)->visible_end = visible_end; +} + +void +ts_check_buffer_size (struct buffer *buffer) +{ + ptrdiff_t buffer_size = + (BUF_Z (buffer) - BUF_BEG (buffer)); + if (buffer_size > UINT32_MAX) + xsignal2 (Qtreesit_buffer_too_large, + build_pure_c_string ("Buffer size too large, size:"), + make_fixnum (buffer_size)); +} + +/* Parse the buffer. We don't parse until we have to. When we have +to, we call this function to parse and update the tree. */ +void +ts_ensure_parsed (Lisp_Object parser) +{ + if (!XTS_PARSER (parser)->need_reparse) + return; + TSParser *ts_parser = XTS_PARSER (parser)->parser; + TSTree *tree = XTS_PARSER(parser)->tree; + TSInput input = XTS_PARSER (parser)->input; + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + ts_check_buffer_size (buffer); + + /* Before we parse, catch up with the narrowing situation. */ + ts_ensure_position_synced (parser); + + TSTree *new_tree = ts_parser_parse(ts_parser, tree, input); + /* This should be very rare (impossible, really): it only happens + when 1) language is not set (impossible in Emacs because the user + has to supply a language to create a parser), 2) parse canceled + due to timeout (impossible because we don't set a timeout), 3) + parse canceled due to cancellation flag (impossible because we + don't set the flag). (See comments for ts_parser_parse in + tree_sitter/api.h.) */ + if (new_tree == NULL) + { + Lisp_Object buf; + XSETBUFFER (buf, buffer); + xsignal1 (Qtreesit_parse_error, buf); + } + + ts_tree_delete (tree); + XTS_PARSER (parser)->tree = new_tree; + XTS_PARSER (parser)->need_reparse = false; +} + +/* This is the read function provided to tree-sitter to read from a + buffer. It reads one character at a time and automatically skips + the gap. */ +const char* +ts_read_buffer (void *parser, uint32_t byte_index, + TSPoint position, uint32_t *bytes_read) +{ + struct buffer *buffer = + XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer); + ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg; + ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end; + ptrdiff_t byte_pos = byte_index + visible_beg; + /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in + ts_ensure_parsed), so byte_pos will never be smaller than + BUF_BEG_BYTE. */ + eassert (visible_beg = BUF_BEGV_BYTE (buffer)); + eassert (visible_end = BUF_ZV_BYTE (buffer)); + + /* Read one character. Tree-sitter wants us to set bytes_read to 0 + if it reads to the end of buffer. It doesn't say what it wants + for the return value in that case, so we just give it an empty + string. */ + char *beg; + int len; + /* This function could run from a user command, so it is better to + do nothing instead of raising an error. (It was a pain in the a** + to decrypt mega-if-conditions in Emacs source, so I wrote the two + branches separately.) */ + if (!BUFFER_LIVE_P (buffer)) + { + beg = NULL; + len = 0; + } + /* Reached visible end-of-buffer, tell tree-sitter to read no more. */ + else if (byte_pos >= visible_end) + { + beg = NULL; + len = 0; + } + /* Normal case, read a character. */ + else + { + beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos); + len = BYTES_BY_CHAR_HEAD ((int) *beg); + } + *bytes_read = (uint32_t) len; + return beg; +} + +/*** Functions for parser and node object*/ + +/* Wrap the parser in a Lisp_Object to be used in the Lisp machine. */ +Lisp_Object +make_ts_parser (Lisp_Object buffer, TSParser *parser, + TSTree *tree, Lisp_Object language_symbol) +{ + struct Lisp_TS_Parser *lisp_parser + = ALLOCATE_PSEUDOVECTOR + (struct Lisp_TS_Parser, buffer, PVEC_TS_PARSER); + + lisp_parser->language_symbol = language_symbol; + lisp_parser->buffer = buffer; + lisp_parser->parser = parser; + lisp_parser->tree = tree; + TSInput input = {lisp_parser, ts_read_buffer, TSInputEncodingUTF8}; + lisp_parser->input = input; + lisp_parser->need_reparse = true; + lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer)); + lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer)); + return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); +} + +/* Wrap the node in a Lisp_Object to be used in the Lisp machine. */ +Lisp_Object +make_ts_node (Lisp_Object parser, TSNode node) +{ + struct Lisp_TS_Node *lisp_node + = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node, parser, PVEC_TS_NODE); + lisp_node->parser = parser; + lisp_node->node = node; + lisp_node->timestamp = XTS_PARSER (parser)->timestamp; + return make_lisp_ptr (lisp_node, Lisp_Vectorlike); +} + +DEFUN ("treesit-parser-p", + Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0, + doc: /* Return t if OBJECT is a tree-sitter parser. */) + (Lisp_Object object) +{ + if (TS_PARSERP (object)) + return Qt; + else + return Qnil; +} + +DEFUN ("treesit-node-p", + Ftreesit_node_p, Streesit_node_p, 1, 1, 0, + doc: /* Return t if OBJECT is a tree-sitter node. */) + (Lisp_Object object) +{ + if (TS_NODEP (object)) + return Qt; + else + return Qnil; +} + +DEFUN ("treesit-node-parser", + Ftreesit_node_parser, Streesit_node_parser, + 1, 1, 0, + doc: /* Return the parser to which NODE belongs. */) + (Lisp_Object node) +{ + CHECK_TS_NODE (node); + return XTS_NODE (node)->parser; +} + +DEFUN ("treesit-parser-create", + Ftreesit_parser_create, Streesit_parser_create, + 2, 2, 0, + doc: /* Create and return a parser in BUFFER for LANGUAGE. + +The parser is automatically added to BUFFER's +`treesit-parser-list'. LANGUAGE should be the symbol of a +function provided by a tree-sitter language dynamic module, e.g., +'treesit-json. If BUFFER is nil, use the current buffer. */) + (Lisp_Object buffer, Lisp_Object language) +{ + if (NILP (buffer)) + buffer = Fcurrent_buffer (); + + CHECK_BUFFER (buffer); + CHECK_SYMBOL (language); + ts_check_buffer_size (XBUFFER (buffer)); + + ts_initialize (); + + TSParser *parser = ts_parser_new (); + TSLanguage *lang = ts_load_language (language, true); + /* We check language version when loading a language, so this should + always succeed. */ + ts_parser_set_language (parser, lang); + + Lisp_Object lisp_parser + = make_ts_parser (buffer, parser, NULL, language); + + struct buffer *old_buffer = current_buffer; + set_buffer_internal (XBUFFER (buffer)); + + Fset (Qtreesit_parser_list, + Fcons (lisp_parser, Fsymbol_value (Qtreesit_parser_list))); + + set_buffer_internal (old_buffer); + return lisp_parser; +} + +DEFUN ("treesit-parser-buffer", + Ftreesit_parser_buffer, Streesit_parser_buffer, + 1, 1, 0, + doc: /* Return the buffer of PARSER. */) + (Lisp_Object parser) +{ + CHECK_TS_PARSER (parser); + Lisp_Object buf; + XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer)); + return buf; +} + +DEFUN ("treesit-parser-language", + Ftreesit_parser_language, Streesit_parser_language, + 1, 1, 0, + doc: /* Return parser's language symbol. +This symbol is the one used to create the parser. */) + (Lisp_Object parser) +{ + CHECK_TS_PARSER (parser); + return XTS_PARSER (parser)->language_symbol; +} + +/*** Parser API */ + +DEFUN ("treesit-parser-root-node", + Ftreesit_parser_root_node, Streesit_parser_root_node, + 1, 1, 0, + doc: /* Return the root node of PARSER. */) + (Lisp_Object parser) +{ + CHECK_TS_PARSER (parser); + ts_ensure_parsed (parser); + TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree); + return make_ts_node (parser, root_node); +} + +/* Checks that the RANGES argument of + treesit-parser-set-included-ranges is valid. */ +void +ts_check_range_argument (Lisp_Object ranges) +{ + EMACS_INT last_point = 1; + for (Lisp_Object tail = ranges; + !NILP (tail); tail = XCDR (tail)) + { + CHECK_CONS (tail); + Lisp_Object range = XCAR (tail); + CHECK_CONS (range); + CHECK_FIXNUM (XCAR (range)); + CHECK_FIXNUM (XCDR (range)); + EMACS_INT beg = XFIXNUM (XCAR (range)); + EMACS_INT end = XFIXNUM (XCDR (range)); + /* TODO: Maybe we should check for point-min/max, too? */ + if (!(last_point <= beg && beg <= end)) + xsignal2 (Qtreesit_range_invalid, + build_pure_c_string + ("RANGE is either overlapping or out-of-order"), + ranges); + last_point = end; + } +} + +DEFUN ("treesit-parser-set-included-ranges", + Ftreesit_parser_set_included_ranges, + Streesit_parser_set_included_ranges, + 2, 2, 0, + doc: /* Limit PARSER to RANGES. + +RANGES is a list of (BEG . END), each (BEG . END) confines a range in +which the parser should operate in. Each range must not overlap, and +each range should come in order. Signal `treesit-set-range-error' +if the argument is invalid, or something else went wrong. If RANGES +is nil, set PARSER to parse the whole buffer. */) + (Lisp_Object parser, Lisp_Object ranges) +{ + CHECK_TS_PARSER (parser); + CHECK_CONS (ranges); + ts_check_range_argument (ranges); + + /* Before we parse, catch up with narrowing/widening. */ + ts_ensure_position_synced (parser); + + bool success; + if (NILP (ranges)) + { + /* If RANGES is nil, make parser to parse the whole document. + To do that we give tree-sitter a 0 length, the range is a + dummy. */ + TSRange ts_range = {0, 0, 0, 0}; + success = ts_parser_set_included_ranges + (XTS_PARSER (parser)->parser, &ts_range , 0); + } + else + { + /* Set ranges for PARSER. */ + ptrdiff_t len = list_length (ranges); + TSRange *ts_ranges = malloc (sizeof(TSRange) * len); + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + + for (int idx=0; !NILP (ranges); idx++, ranges = XCDR (ranges)) + { + Lisp_Object range = XCAR (ranges); + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + + EMACS_INT beg_byte = buf_charpos_to_bytepos + (buffer, XFIXNUM (XCAR (range))); + EMACS_INT end_byte = buf_charpos_to_bytepos + (buffer, XFIXNUM (XCDR (range))); + /* We don't care about start and end points, put in dummy + value. */ + TSRange rg = {{0,0}, {0,0}, + (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer), + (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)}; + ts_ranges[idx] = rg; + } + success = ts_parser_set_included_ranges + (XTS_PARSER (parser)->parser, ts_ranges, (uint32_t) len); + /* Although XFIXNUM could signal, it should be impossible + because we have checked the input by ts_check_range_argument. + So there is no need for unwind-protect. */ + free (ts_ranges); + } + + if (!success) + xsignal2 (Qtreesit_range_invalid, + build_pure_c_string + ("Something went wrong when setting ranges"), + ranges); + + XTS_PARSER (parser)->need_reparse = true; + return Qnil; +} + +DEFUN ("treesit-parser-included-ranges", + Ftreesit_parser_included_ranges, + Streesit_parser_included_ranges, + 1, 1, 0, + doc: /* Return the ranges set for PARSER. +See `treesit-parser-set-ranges'. If no range is set, return +nil. */) + (Lisp_Object parser) +{ + CHECK_TS_PARSER (parser); + uint32_t len; + const TSRange *ranges = ts_parser_included_ranges + (XTS_PARSER (parser)->parser, &len); + if (len == 0) + return Qnil; + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + + Lisp_Object list = Qnil; + for (int idx=0; idx < len; idx++) + { + TSRange range = ranges[idx]; + uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer); + uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer); + + Lisp_Object lisp_range = + Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)) , + make_fixnum (buf_bytepos_to_charpos (buffer, end_byte))); + list = Fcons (lisp_range, list); + } + return Fnreverse (list); +} + +/*** Node API */ + +/* Check that OBJ is a positive integer and signal an error if + otherwise. */ +static void +ts_check_positive_integer (Lisp_Object obj) +{ + CHECK_INTEGER (obj); + if (XFIXNUM (obj) < 0) + xsignal1 (Qargs_out_of_range, obj); +} + +static void +ts_check_node (Lisp_Object obj) +{ + CHECK_TS_NODE (obj); + Lisp_Object lisp_parser = XTS_NODE (obj)->parser; + if (XTS_NODE (obj)->timestamp != + XTS_PARSER (lisp_parser)->timestamp) + xsignal1 (Qtreesit_node_outdated, obj); +} + +DEFUN ("treesit-node-type", + Ftreesit_node_type, Streesit_node_type, 1, 1, 0, + doc: /* Return the NODE's type as a string. +If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + const char *type = ts_node_type (ts_node); + return build_string (type); +} + +DEFUN ("treesit-node-start", + Ftreesit_node_start, Streesit_node_start, 1, 1, 0, + doc: /* Return the NODE's start position. +If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + uint32_t start_byte_offset = ts_node_start_byte (ts_node); + struct buffer *buffer = + XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); + ptrdiff_t start_pos = buf_bytepos_to_charpos + (buffer, start_byte_offset + visible_beg); + return make_fixnum (start_pos); +} + +DEFUN ("treesit-node-end", + Ftreesit_node_end, Streesit_node_end, 1, 1, 0, + doc: /* Return the NODE's end position. +If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + uint32_t end_byte_offset = ts_node_end_byte (ts_node); + struct buffer *buffer = + XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); + ptrdiff_t end_pos = buf_bytepos_to_charpos + (buffer, end_byte_offset + visible_beg); + return make_fixnum (end_pos); +} + +DEFUN ("treesit-node-string", + Ftreesit_node_string, Streesit_node_string, 1, 1, 0, + doc: /* Return the string representation of NODE. +If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + char *string = ts_node_string (ts_node); + return make_string (string, strlen (string)); +} + +DEFUN ("treesit-node-parent", + Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0, + doc: /* Return the immediate parent of NODE. +Return nil if there isn't any. If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + TSNode parent = ts_node_parent (ts_node); + + if (ts_node_is_null (parent)) + return Qnil; + + return make_ts_node (XTS_NODE (node)->parser, parent); +} + +DEFUN ("treesit-node-child", + Ftreesit_node_child, Streesit_node_child, 2, 3, 0, + doc: /* Return the Nth child of NODE. + +Return nil if there isn't any. If NAMED is non-nil, look for named +child only. NAMED defaults to nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object n, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + ts_check_positive_integer (n); + EMACS_INT idx = XFIXNUM (n); + if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n); + TSNode ts_node = XTS_NODE (node)->node; + TSNode child; + if (NILP (named)) + child = ts_node_child (ts_node, (uint32_t) idx); + else + child = ts_node_named_child (ts_node, (uint32_t) idx); + + if (ts_node_is_null (child)) + return Qnil; + + return make_ts_node (XTS_NODE (node)->parser, child); +} + +DEFUN ("treesit-node-check", + Ftreesit_node_check, Streesit_node_check, 2, 2, 0, + doc: /* Return non-nil if NODE has PROPERTY, nil otherwise. + +PROPERTY could be 'named, 'missing, 'extra, 'has-changes, 'has-error. +Named nodes correspond to named rules in the language definition, +whereas "anonymous" nodes correspond to string literals in the +language definition. + +Missing nodes are inserted by the parser in order to recover from +certain kinds of syntax errors, i.e., should be there but not there. + +Extra nodes represent things like comments, which are not required the +language definition, but can appear anywhere. + +A node "has changes" if the buffer changed since the node is +created. (Don't forget the "s" at the end of 'has-changes.) + +A node "has error" if itself is a syntax error or contains any syntax +errors. */) + (Lisp_Object node, Lisp_Object property) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + CHECK_SYMBOL (property); + TSNode ts_node = XTS_NODE (node)->node; + bool result; + if (EQ (property, Qnamed)) + result = ts_node_is_named (ts_node); + else if (EQ (property, Qmissing)) + result = ts_node_is_missing (ts_node); + else if (EQ (property, Qextra)) + result = ts_node_is_extra (ts_node); + else if (EQ (property, Qhas_error)) + result = ts_node_has_error (ts_node); + else if (EQ (property, Qhas_changes)) + result = ts_node_has_changes (ts_node); + else + signal_error ("Expecting 'named, 'missing, 'extra, 'has-changes or 'has-error, got", + property); + return result ? Qt : Qnil; +} + +DEFUN ("treesit-node-field-name-for-child", + Ftreesit_node_field_name_for_child, + Streesit_node_field_name_for_child, 2, 2, 0, + doc: /* Return the field name of the Nth child of NODE. + +Return nil if there isn't any child or no field is found. +If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object n) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + ts_check_positive_integer (n); + EMACS_INT idx = XFIXNUM (n); + if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n); + TSNode ts_node = XTS_NODE (node)->node; + const char *name + = ts_node_field_name_for_child (ts_node, (uint32_t) idx); + + if (name == NULL) + return Qnil; + + return make_string (name, strlen (name)); +} + +DEFUN ("treesit-node-child-count", + Ftreesit_node_child_count, + Streesit_node_child_count, 1, 2, 0, + doc: /* Return the number of children of NODE. + +If NAMED is non-nil, count named child only. NAMED defaults to +nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + uint32_t count; + if (NILP (named)) + count = ts_node_child_count (ts_node); + else + count = ts_node_named_child_count (ts_node); + return make_fixnum (count); +} + +DEFUN ("treesit-node-child-by-field-name", + Ftreesit_node_child_by_field_name, + Streesit_node_child_by_field_name, 2, 2, 0, + doc: /* Return the child of NODE with FIELD-NAME. +Return nil if there isn't any. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object field_name) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + CHECK_STRING (field_name); + char *name_str = SSDATA (field_name); + TSNode ts_node = XTS_NODE (node)->node; + TSNode child + = ts_node_child_by_field_name (ts_node, name_str, strlen (name_str)); + + if (ts_node_is_null(child)) + return Qnil; + + return make_ts_node(XTS_NODE (node)->parser, child); +} + +DEFUN ("treesit-node-next-sibling", + Ftreesit_node_next_sibling, + Streesit_node_next_sibling, 1, 2, 0, + doc: /* Return the next sibling of NODE. + +Return nil if there isn't any. If NAMED is non-nil, look for named +child only. NAMED defaults to nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + TSNode sibling; + if (NILP (named)) + sibling = ts_node_next_sibling (ts_node); + else + sibling = ts_node_next_named_sibling (ts_node); + + if (ts_node_is_null(sibling)) + return Qnil; + + return make_ts_node(XTS_NODE (node)->parser, sibling); +} + +DEFUN ("treesit-node-prev-sibling", + Ftreesit_node_prev_sibling, + Streesit_node_prev_sibling, 1, 2, 0, + doc: /* Return the previous sibling of NODE. + +Return nil if there isn't any. If NAMED is non-nil, look for named +child only. NAMED defaults to nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + TSNode sibling; + + if (NILP (named)) + sibling = ts_node_prev_sibling (ts_node); + else + sibling = ts_node_prev_named_sibling (ts_node); + + if (ts_node_is_null(sibling)) + return Qnil; + + return make_ts_node(XTS_NODE (node)->parser, sibling); +} + +DEFUN ("treesit-node-first-child-for-pos", + Ftreesit_node_first_child_for_pos, + Streesit_node_first_child_for_pos, 2, 3, 0, + doc: /* Return the first child of NODE on POS. + +Specifically, return the first child that extends beyond POS. POS is +a position in the buffer. Return nil if there isn't any. If NAMED is +non-nil, look for named child only. NAMED defaults to nil. Note that +this function returns an immediate child, not the smallest +(grand)child. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object pos, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + ts_check_positive_integer (pos); + + struct buffer *buf = + XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos)); + + if (byte_pos < BUF_BEGV_BYTE (buf) || byte_pos > BUF_ZV_BYTE (buf)) + xsignal1 (Qargs_out_of_range, pos); + + TSNode ts_node = XTS_NODE (node)->node; + TSNode child; + if (NILP (named)) + child = ts_node_first_child_for_byte + (ts_node, byte_pos - visible_beg); + else + child = ts_node_first_named_child_for_byte + (ts_node, byte_pos - visible_beg); + + if (ts_node_is_null (child)) + return Qnil; + + return make_ts_node (XTS_NODE (node)->parser, child); +} + +DEFUN ("treesit-node-descendant-for-range", + Ftreesit_node_descendant_for_range, + Streesit_node_descendant_for_range, 3, 4, 0, + doc: /* Return the smallest node that covers BEG to END. + +The returned node is a descendant of NODE. POS is a position. Return +nil if there isn't any. If NAMED is non-nil, look for named child +only. NAMED defaults to nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + CHECK_INTEGER (beg); + CHECK_INTEGER (end); + + struct buffer *buf = + XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg)); + ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end)); + + /* Checks for BUFFER_BEG <= BEG <= END <= BUFFER_END. */ + if (!(BUF_BEGV_BYTE (buf) <= byte_beg + && byte_beg <= byte_end + && byte_end <= BUF_ZV_BYTE (buf))) + xsignal2 (Qargs_out_of_range, beg, end); + + TSNode ts_node = XTS_NODE (node)->node; + TSNode child; + if (NILP (named)) + child = ts_node_descendant_for_byte_range + (ts_node, byte_beg - visible_beg , byte_end - visible_beg); + else + child = ts_node_named_descendant_for_byte_range + (ts_node, byte_beg - visible_beg, byte_end - visible_beg); + + if (ts_node_is_null (child)) + return Qnil; + + return make_ts_node (XTS_NODE (node)->parser, child); +} + +DEFUN ("treesit-node-eq", + Ftreesit_node_eq, + Streesit_node_eq, 2, 2, 0, + doc: /* Return non-nil if NODE1 and NODE2 are the same node. +If any one of NODE1 and NODE2 is nil, return nil. */) + (Lisp_Object node1, Lisp_Object node2) +{ + if (NILP (node1) || NILP (node2)) + return Qnil; + CHECK_TS_NODE (node1); + CHECK_TS_NODE (node2); + + TSNode ts_node_1 = XTS_NODE (node1)->node; + TSNode ts_node_2 = XTS_NODE (node2)->node; + + bool same_node = ts_node_eq (ts_node_1, ts_node_2); + return same_node ? Qt : Qnil; +} + +/*** Query functions */ + +/* If we decide to pre-load tree-sitter.el, maybe we can implement + this function in Lisp. */ +DEFUN ("treesit-expand-pattern", + Ftreesit_expand_pattern, + Streesit_expand_pattern, 1, 1, 0, + doc: /* Expand PATTERN to its string form. + +PATTERN can be + + :anchor + :? + :* + :+ + :equal + :match + (TYPE PATTERN...) + [PATTERN...] + FIELD-NAME: + @CAPTURE-NAME + (_) + _ + \"TYPE\" + +Consult Info node `(elisp)Pattern Matching' form detailed +explanation. */) + (Lisp_Object pattern) +{ + if (EQ (pattern, intern_c_string (":anchor"))) + return build_pure_c_string("."); + if (EQ (pattern, intern_c_string (":?"))) + return build_pure_c_string("?"); + if (EQ (pattern, intern_c_string (":*"))) + return build_pure_c_string("*"); + if (EQ (pattern, intern_c_string (":+"))) + return build_pure_c_string("+"); + if (EQ (pattern, intern_c_string (":equal"))) + return build_pure_c_string("#equal"); + if (EQ (pattern, intern_c_string (":match"))) + return build_pure_c_string("#match"); + Lisp_Object opening_delimeter = + build_pure_c_string (VECTORP (pattern) ? "[" : "("); + Lisp_Object closing_delimiter = + build_pure_c_string (VECTORP (pattern) ? "]" : ")"); + if (VECTORP (pattern) || CONSP (pattern)) + return concat3 (opening_delimeter, + Fmapconcat (intern_c_string + ("treesit-expand-pattern"), + pattern, + build_pure_c_string (" ")), + closing_delimiter); + return CALLN (Fformat, build_pure_c_string("%S"), pattern); +} + +DEFUN ("treesit-expand-query", + Ftreesit_expand_query, + Streesit_expand_query, 1, 1, 0, + doc: /* Expand sexp QUERY to its string form. + +A PATTERN in QUERY can be + + :anchor + :? + :* + :+ + :equal + :match + (TYPE PATTERN...) + [PATTERN...] + FIELD-NAME: + @CAPTURE-NAME + (_) + _ + \"TYPE\" + +Consult Info node `(elisp)Pattern Matching' form detailed +explanation. */) + (Lisp_Object query) +{ + return Fmapconcat (intern_c_string ("treesit-expand-pattern"), + query, build_pure_c_string (" ")); +} + +char* +ts_query_error_to_string (TSQueryError error) +{ + switch (error) + { + case TSQueryErrorNone: + return "None"; + case TSQueryErrorSyntax: + return "Syntax error at"; + case TSQueryErrorNodeType: + return "Node type error at"; + case TSQueryErrorField: + return "Field error at"; + case TSQueryErrorCapture: + return "Capture error at"; + case TSQueryErrorStructure: + return "Structure error at"; + default: + return "Unknown error"; + } +} + +/* Collect predicates for this match and return them in a list. Each + predicate is a list of strings and symbols. */ +Lisp_Object +ts_predicates_for_pattern +(TSQuery *query, uint32_t pattern_index) +{ + uint32_t len; + const TSQueryPredicateStep *predicate_list = + ts_query_predicates_for_pattern (query, pattern_index, &len); + Lisp_Object result = Qnil; + Lisp_Object predicate = Qnil; + for (int idx=0; idx < len; idx++) + { + TSQueryPredicateStep step = predicate_list[idx]; + switch (step.type) + { + case TSQueryPredicateStepTypeCapture: + { + uint32_t str_len; + const char *str = ts_query_capture_name_for_id + (query, step.value_id, &str_len); + predicate = Fcons (intern_c_string_1 (str, str_len), + predicate); + break; + } + case TSQueryPredicateStepTypeString: + { + uint32_t str_len; + const char *str = ts_query_string_value_for_id + (query, step.value_id, &str_len); + predicate = Fcons (make_string (str, str_len), predicate); + break; + } + case TSQueryPredicateStepTypeDone: + result = Fcons (Fnreverse (predicate), result); + predicate = Qnil; + break; + } + } + return Fnreverse (result); +} + +/* Translate a capture NAME (symbol) to the text of the captured node. + Signals treesit-query-error if such node is not captured. */ +Lisp_Object +ts_predicate_capture_name_to_text (Lisp_Object name, Lisp_Object captures) +{ + Lisp_Object node = Qnil; + for (Lisp_Object tail = captures; !NILP (tail); tail = XCDR (tail)) + { + if (EQ (XCAR (XCAR (tail)), name)) + { + node = XCDR (XCAR (tail)); + break; + } + } + + if (NILP (node)) + xsignal3 (Qtreesit_query_error, + build_pure_c_string ("Cannot find captured node"), + name, build_pure_c_string ("A predicate can only refer to captured nodes in the same pattern")); + + struct buffer *old_buffer = current_buffer; + set_buffer_internal + (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer)); + Lisp_Object text = Fbuffer_substring + (Ftreesit_node_start (node), Ftreesit_node_end (node)); + set_buffer_internal (old_buffer); + return text; +} + +/* Handles predicate (#equal A B). Return true if A equals B; return + false otherwise. A and B can be either string, or a capture name. + The capture name evaluates to the text its captured node spans in + the buffer. */ +bool +ts_predicate_equal (Lisp_Object args, Lisp_Object captures) +{ + if (XFIXNUM (Flength (args)) != 2) + xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args)); + + Lisp_Object arg1 = XCAR (args); + Lisp_Object arg2 = XCAR (XCDR (args)); + Lisp_Object tail = captures; + Lisp_Object text1 = STRINGP (arg1) ? arg1 : + ts_predicate_capture_name_to_text (arg1, captures); + Lisp_Object text2 = STRINGP (arg2) ? arg2 : + ts_predicate_capture_name_to_text (arg2, captures); + + if (NILP (Fstring_equal (text1, text2))) + return false; + else + return true; +} + +/* Handles predicate (#match "regexp" @node). Return true if "regexp" + matches the text spanned by @node; return false otherwise. Matching + is case-sensitive. */ +bool +ts_predicate_match (Lisp_Object args, Lisp_Object captures) +{ + if (XFIXNUM (Flength (args)) != 2) + xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args)); + + Lisp_Object regexp = XCAR (args); + Lisp_Object capture_name = XCAR (XCDR (args)); + Lisp_Object tail = captures; + Lisp_Object text = ts_predicate_capture_name_to_text + (capture_name, captures); + + /* It's probably common to get the argument order backwards. Catch + this mistake early and show helpful explanation, because Emacs + loves you. (We put the regexp first because that's what + string-match does.) */ + if (!STRINGP (regexp)) + xsignal1 (Qtreesit_query_error, build_pure_c_string ("The first argument to `match' should be a regexp string, not a capture name")); + if (!SYMBOLP (capture_name)) + xsignal1 (Qtreesit_query_error, build_pure_c_string ("The second argument to `match' should be a capture name, not a string")); + + if (fast_string_match (regexp, text) >= 0) + return true; + else + return false; +} + +/* About predicates: I decide to hard-code predicates in C instead of + implementing an extensible system where predicates are translated + to Lisp functions, and new predicates can be added by extending a + list of functions, because I really couldn't imagine any useful + predicates besides equal and match. If we later found out that + such system is indeed useful and necessary, it can be easily + added. */ + +/* If all predicates in PREDICATES passes, return true; otherwise + return false. */ +bool +ts_eval_predicates (Lisp_Object captures, Lisp_Object predicates) +{ + bool pass = true; + /* Evaluate each predicates. */ + for (Lisp_Object tail = predicates; + !NILP (tail); tail = XCDR (tail)) + { + Lisp_Object predicate = XCAR (tail); + Lisp_Object fn = XCAR (predicate); + Lisp_Object args = XCDR (predicate); + if (!NILP (Fstring_equal (fn, build_pure_c_string("equal")))) + pass = ts_predicate_equal (args, captures); + else if (!NILP (Fstring_equal + (fn, build_pure_c_string("match")))) + pass = ts_predicate_match (args, captures); + else + xsignal3 (Qtreesit_query_error, + build_pure_c_string ("Invalid predicate"), + fn, build_pure_c_string ("Currently Emacs only supports equal and match predicate")); + } + /* If all predicates passed, add captures to result list. */ + return pass; +} + +DEFUN ("treesit-query-capture", + Ftreesit_query_capture, + Streesit_query_capture, 2, 4, 0, + doc: /* Query NODE with patterns in QUERY. + +Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name +assigned to the node in PATTERN. NODE is the captured node. + +QUERY is either a string query or a sexp query. See Info node +`(elisp)Pattern Matching' for how to write a query in either string or +s-expression form. + +BEG and END, if both non-nil, specifies the range in which the query +is executed. + +Raise an treesit-query-error if QUERY is malformed, or something +else goes wrong. */) + (Lisp_Object node, Lisp_Object query, + Lisp_Object beg, Lisp_Object end) +{ + ts_check_node (node); + if (!NILP (beg)) + CHECK_INTEGER (beg); + if (!NILP (end)) + CHECK_INTEGER (end); + + if (CONSP (query)) + query = Ftreesit_expand_query (query); + else + CHECK_STRING (query); + + /* Extract C values from Lisp objects. */ + TSNode ts_node = XTS_NODE (node)->node; + Lisp_Object lisp_parser = XTS_NODE (node)->parser; + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + const TSLanguage *lang = ts_parser_language + (XTS_PARSER (lisp_parser)->parser); + char *source = SSDATA (query); + + /* Initialize query objects, and execute query. */ + uint32_t error_offset; + TSQueryError error_type; + /* TODO: We could cache the query object, so that repeatedly + querying with the same query can reuse the query object. It also + saves us from expanding the sexp query into a string. I don't + know how much time that could save though. */ + TSQuery *ts_query = ts_query_new (lang, source, strlen (source), + &error_offset, &error_type); + TSQueryCursor *cursor = ts_query_cursor_new (); + + if (ts_query == NULL) + { + xsignal2 (Qtreesit_query_error, + build_string (ts_query_error_to_string (error_type)), + make_fixnum (error_offset + 1)); + } + if (!NILP (beg) && !NILP (end)) + { + EMACS_INT beg_byte = XFIXNUM (beg); + EMACS_INT end_byte = XFIXNUM (end); + ts_query_cursor_set_byte_range + (cursor, (uint32_t) beg_byte - visible_beg, + (uint32_t) end_byte - visible_beg); + } + + ts_query_cursor_exec (cursor, ts_query, ts_node); + TSQueryMatch match; + + /* Go over each match, collect captures and predicates. Include the + captures in the return list if all predicates in that match + passes. */ + Lisp_Object result = Qnil; + while (ts_query_cursor_next_match (cursor, &match)) + { + /* Get captured nodes. */ + Lisp_Object captures_lisp = Qnil; + const TSQueryCapture *captures = match.captures; + for (int idx=0; idx < match.capture_count; idx++) + { + uint32_t capture_name_len; + TSQueryCapture capture = captures[idx]; + Lisp_Object captured_node = + make_ts_node(lisp_parser, capture.node); + const char *capture_name = ts_query_capture_name_for_id + (ts_query, capture.index, &capture_name_len); + Lisp_Object cap = + Fcons (intern_c_string_1 (capture_name, capture_name_len), + captured_node); + captures_lisp = Fcons (cap, captures_lisp); + } + /* Get predicates. */ + Lisp_Object predicates = + ts_predicates_for_pattern (ts_query, match.pattern_index); + + captures_lisp = Fnreverse (captures_lisp); + if (ts_eval_predicates (captures_lisp, predicates)) + { + result = CALLN (Fnconc, result, captures_lisp); + } + } + ts_query_delete (ts_query); + ts_query_cursor_delete (cursor); + return result; +} + +/*** Initialization */ + +/* Initialize the tree-sitter routines. */ +void +syms_of_treesit (void) +{ + DEFSYM (Qtreesit_parser_p, "treesit-parser-p"); + DEFSYM (Qtreesit_node_p, "treesit-node-p"); + DEFSYM (Qnamed, "named"); + DEFSYM (Qmissing, "missing"); + DEFSYM (Qextra, "extra"); + DEFSYM (Qhas_changes, "has-changes"); + DEFSYM (Qhas_error, "has-error"); + + DEFSYM (Qtreesit_error, "treesit-error"); + DEFSYM (Qtreesit_query_error, "treesit-query-error"); + DEFSYM (Qtreesit_parse_error, "treesit-parse-error"); + DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid"); + DEFSYM (Qtreesit_buffer_too_large, + "treesit-buffer-too-large"); + DEFSYM (Qtreesit_load_language_error, + "treesit-load-language-error"); + DEFSYM (Qtreesit_node_outdated, + "treesit-node-outdated"); + DEFSYM (Quser_emacs_directory, + "user-emacs-directory"); + + define_error (Qtreesit_error, "Generic tree-sitter error", Qerror); + define_error (Qtreesit_query_error, "Query pattern is malformed", + Qtreesit_error); + /* Should be impossible, no need to document this error. */ + define_error (Qtreesit_parse_error, "Parse failed", + Qtreesit_error); + define_error (Qtreesit_range_invalid, + "RANGES are invalid, they have to be ordered and not overlapping", + Qtreesit_error); + define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GB)", + Qtreesit_error); + define_error (Qtreesit_load_language_error, + "Cannot load language definition", + Qtreesit_error); + define_error (Qtreesit_node_outdated, + "This node is outdated, please retrieve a new one", + Qtreesit_error); + + DEFSYM (Qtreesit_parser_list, "treesit-parser-list"); + DEFVAR_LISP ("treesit-parser-list", Vtreesit_parser_list, + doc: /* A list of tree-sitter parsers. + +If you removed a parser from this list, do not put it back in. Emacs +keeps the parser in this list updated with any change in the buffer. +If removed and put back in, there is no guarantee that the parser is in +sync with the buffer's content. */); + Vtreesit_parser_list = Qnil; + Fmake_variable_buffer_local (Qtreesit_parser_list); + + DEFVAR_LISP ("treesit-load-name-override-list", + Vtreesit_load_name_override_list, + doc: + /* An override list for unconventional tree-sitter libraries. + +By default, Emacs assumes the dynamic library for LANG is +libtree-sitter-LANG.EXT, where EXT is the OS specific extension for +dynamic libraries. Emacs also assumes that the name of the C function +the library provides is tree_sitter_LANG. If that is not the case, +add an entry + + (LANG LIBRARY-BASE-NAME FUNCTION-NAME) + +to this list, where LIBRARY-BASE-NAME is the filename of the dynamic +library without extension, FUNCTION-NAME is the function provided by +the library. */); + Vtreesit_load_name_override_list = Qnil; + + DEFVAR_LISP ("treesit-extra-load-path", + Vtreesit_extra_load_path, + doc: + /* Extra load paths of tree-sitter language definitions. +When trying to load a tree-sitter language definition, +Emacs looks at directories in this variable, +`user-emacs-directory'/tree-sitter, and system default locations for +dynamic libraries, in that order. */); + Vtreesit_extra_load_path = Qnil; + + defsubr (&Streesit_language_available_p); + + defsubr (&Streesit_parser_p); + defsubr (&Streesit_node_p); + + defsubr (&Streesit_node_parser); + + defsubr (&Streesit_parser_create); + defsubr (&Streesit_parser_buffer); + defsubr (&Streesit_parser_language); + + defsubr (&Streesit_parser_root_node); + /* defsubr (&Streesit_parse_string); */ + + defsubr (&Streesit_parser_set_included_ranges); + defsubr (&Streesit_parser_included_ranges); + + defsubr (&Streesit_node_type); + defsubr (&Streesit_node_start); + defsubr (&Streesit_node_end); + defsubr (&Streesit_node_string); + defsubr (&Streesit_node_parent); + defsubr (&Streesit_node_child); + defsubr (&Streesit_node_check); + defsubr (&Streesit_node_field_name_for_child); + defsubr (&Streesit_node_child_count); + defsubr (&Streesit_node_child_by_field_name); + defsubr (&Streesit_node_next_sibling); + defsubr (&Streesit_node_prev_sibling); + defsubr (&Streesit_node_first_child_for_pos); + defsubr (&Streesit_node_descendant_for_range); + defsubr (&Streesit_node_eq); + + defsubr (&Streesit_expand_pattern); + defsubr (&Streesit_expand_query); + defsubr (&Streesit_query_capture); +} diff --git a/src/treesit.h b/src/treesit.h new file mode 100644 index 0000000000..639c4eedc5 --- /dev/null +++ b/src/treesit.h @@ -0,0 +1,137 @@ +/* Header file for the tree-sitter integration. + +Copyright (C) 2021 Free Software Foundation, Inc. + +This file is part of GNU Emacs. + +GNU Emacs is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Emacs. If not, see . */ + +#ifndef EMACS_TREESIT_H +#define EMACS_TREESIT_H + +#include +#include "lisp.h" + +INLINE_HEADER_BEGIN + +/* A wrapper for a tree-sitter parser, but also contains a parse tree + and other goodies for convenience. */ +struct Lisp_TS_Parser +{ + union vectorlike_header header; + /* A symbol represents the language this parser uses. See the + manual for more explanation. */ + Lisp_Object language_symbol; + /* The buffer associated with this parser. */ + Lisp_Object buffer; + /* The pointer to the tree-sitter parser. Never NULL. */ + TSParser *parser; + /* Pointer to the syntax tree. Initially is NULL, so check for NULL + before use. */ + TSTree *tree; + /* Teaches tree-sitter how to read an Emacs buffer. */ + TSInput input; + /* Re-parsing an unchanged buffer is not free for tree-sitter, so we + only make it re-parse when need_reparse == true. That usually + means some change is made in the buffer. But others could set + this field to true to force tree-sitter to re-parse. */ + bool need_reparse; + /* These two positions record the buffer byte position (1-based) of + the "visible region" that tree-sitter sees. Unlike markers, + These two positions do not change as the user inserts and deletes + text around them. Before re-parse, we move these positions to + match BUF_BEGV_BYTE and BUF_ZV_BYTE. Note that we don't need to + synchronize these positions when retrieving them in a function + that involves a node: if the node is not outdated, these + positions are synchronized. */ + ptrdiff_t visible_beg; + ptrdiff_t visible_end; + /* This counter is incremented every time a change is made to the + buffer in ts_record_change. The node retrieved from this parser + inherits this timestamp. This way we can make sure the node is + not outdated when we access its information. */ + ptrdiff_t timestamp; +}; + +/* A wrapper around a tree-sitter node. */ +struct Lisp_TS_Node +{ + union vectorlike_header header; + /* This prevents gc from collecting the tree before the node is done + with it. TSNode contains a pointer to the tree it belongs to, + and the parser object, when collected by gc, will free that + tree. */ + Lisp_Object parser; + TSNode node; + /* A node inherits its parser's timestamp at creation time. The + parser's timestamp increments as the buffer changes. This way we + can make sure the node is not outdated when we access its + information. */ + ptrdiff_t timestamp; +}; + +INLINE bool +TS_PARSERP (Lisp_Object x) +{ + return PSEUDOVECTORP (x, PVEC_TS_PARSER); +} + +INLINE struct Lisp_TS_Parser * +XTS_PARSER (Lisp_Object a) +{ + eassert (TS_PARSERP (a)); + return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Parser); +} + +INLINE bool +TS_NODEP (Lisp_Object x) +{ + return PSEUDOVECTORP (x, PVEC_TS_NODE); +} + +INLINE struct Lisp_TS_Node * +XTS_NODE (Lisp_Object a) +{ + eassert (TS_NODEP (a)); + return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Node); +} + +INLINE void +CHECK_TS_PARSER (Lisp_Object parser) +{ + CHECK_TYPE (TS_PARSERP (parser), Qtreesit_parser_p, parser); +} + +INLINE void +CHECK_TS_NODE (Lisp_Object node) +{ + CHECK_TYPE (TS_NODEP (node), Qtreesit_node_p, node); +} + +void +ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, + ptrdiff_t new_end_byte); + +Lisp_Object +make_ts_parser (Lisp_Object buffer, TSParser *parser, + TSTree *tree, Lisp_Object language_symbol); + +Lisp_Object +make_ts_node (Lisp_Object parser, TSNode node); + +extern void syms_of_treesit (void); + +INLINE_HEADER_END + +#endif /* EMACS_TREESIT_H */ diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el new file mode 100644 index 0000000000..eb6e85c3fd --- /dev/null +++ b/test/src/treesit-tests.el @@ -0,0 +1,366 @@ +;;; treesit-tests.el --- tests for src/treesit.c -*- lexical-binding: t; -*- + +;; Copyright (C) 2021 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Code: + +(require 'ert) +(require 'treesit) + +(ert-deftest treesit-basic-parsing () + "Test basic parsing routines." + (with-temp-buffer + (let ((parser (treesit-parser-create + (current-buffer) 'json))) + (should + (eq parser (car treesit-parser-list))) + (should + (equal (treesit-node-string + (treesit-parser-root-node parser)) + "(ERROR)")) + + (insert "[1,2,3]") + (should + (equal (treesit-node-string + (treesit-parser-root-node parser)) + "(document (array (number) (number) (number)))")) + + (goto-char (point-min)) + (forward-char 3) + (insert "{\"name\": \"Bob\"},") + (should + (equal + (treesit-node-string + (treesit-parser-root-node parser)) + "(document (array (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number) (number)))"))))) + +(ert-deftest treesit-node-api () + "Tests for node API." + (with-temp-buffer + (let (parser root-node doc-node object-node pair-node) + (progn + (insert "[1,2,{\"name\": \"Bob\"},3]") + (setq parser (treesit-parser-create + (current-buffer) 'json)) + (setq root-node (treesit-parser-root-node + parser))) + ;; `treesit-node-type'. + (should (equal "document" (treesit-node-type root-node))) + ;; `treesit-node-check'. + (should (eq t (treesit-node-check root-node 'named))) + (should (eq nil (treesit-node-check root-node 'missing))) + (should (eq nil (treesit-node-check root-node 'extra))) + (should (eq nil (treesit-node-check root-node 'has-error))) + ;; `treesit-node-child'. + (setq doc-node (treesit-node-child root-node 0)) + (should (equal "array" (treesit-node-type doc-node))) + (should (equal (treesit-node-string doc-node) + "(array (number) (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number))")) + ;; `treesit-node-child-count'. + (should (eql 9 (treesit-node-child-count doc-node))) + (should (eql 4 (treesit-node-child-count doc-node t))) + ;; `treesit-node-field-name-for-child'. + (setq object-node (treesit-node-child doc-node 2 t)) + (setq pair-node (treesit-node-child object-node 0 t)) + (should (equal "object" (treesit-node-type object-node))) + (should (equal "pair" (treesit-node-type pair-node))) + (should (equal "key" + (treesit-node-field-name-for-child + pair-node 0))) + ;; `treesit-node-child-by-field-name'. + (should (equal "(string (string_content))" + (treesit-node-string + (treesit-node-child-by-field-name + pair-node "key")))) + ;; `treesit-node-next-sibling'. + (should (equal "(number)" + (treesit-node-string + (treesit-node-next-sibling object-node t)))) + (should (equal "(\",\")" + (treesit-node-string + (treesit-node-next-sibling object-node)))) + ;; `treesit-node-prev-sibling'. + (should (equal "(number)" + (treesit-node-string + (treesit-node-prev-sibling object-node t)))) + (should (equal "(\",\")" + (treesit-node-string + (treesit-node-prev-sibling object-node)))) + ;; `treesit-node-first-child-for-pos'. + (should (equal "(number)" + (treesit-node-string + (treesit-node-first-child-for-pos + doc-node 3 t)))) + (should (equal "(\",\")" + (treesit-node-string + (treesit-node-first-child-for-pos + doc-node 3)))) + ;; `treesit-node-descendant-for-range'. + (should (equal "(\"{\")" + (treesit-node-string + (treesit-node-descendant-for-range + root-node 6 7)))) + (should (equal "(object (pair key: (string (string_content)) value: (string (string_content))))" + (treesit-node-string + (treesit-node-descendant-for-range + root-node 6 7 t)))) + ;; `treesit-node-eq'. + (should (treesit-node-eq root-node root-node)) + (should (not (treesit-node-eq root-node doc-node)))))) + +(ert-deftest treesit-query-api () + "Tests for query API." + (with-temp-buffer + (let (parser root-node pattern doc-node object-node pair-node) + (progn + (insert "[1,2,{\"name\": \"Bob\"},3]") + (setq parser (treesit-parser-create + (current-buffer) 'json)) + (setq root-node (treesit-parser-root-node + parser))) + + (dolist (pattern + '("(string) @string +(pair key: (_) @keyword) +((_) @bob (#match \"^B.b$\" @bob)) +(number) @number +((number) @n3 (#equal \"3\" @n3)) " + ((string) @string + (pair key: (_) @keyword) + ((_) @bob (:match "^B.b$" @bob)) + (number) @number + ((number) @n3 (:equal "3" @n3))))) + (should + (equal + '((number . "1") (number . "2") + (keyword . "\"name\"") + (string . "\"name\"") + (string . "\"Bob\"") + (bob . "Bob") + (number . "3") + (n3 . "3")) + (mapcar (lambda (entry) + (cons (car entry) + (treesit-node-text + (cdr entry)))) + (treesit-query-capture root-node pattern)))) + (should + (equal + "(type field: (_) @capture .) ? * + \"return\"" + (treesit-expand-query + '((type field: (_) @capture :anchor) + :? :* :+ "return")))))))) + +(ert-deftest treesit-narrow () + "Tests if narrowing works." + (with-temp-buffer + (let (parser root-node pattern doc-node object-node pair-node) + (progn + (insert "xxx[1,{\"name\": \"Bob\"},2,3]xxx") + (narrow-to-region (+ (point-min) 3) (- (point-max) 3)) + (setq parser (treesit-parser-create + (current-buffer) 'json)) + (setq root-node (treesit-parser-root-node + parser))) + ;; This test is from the basic test. + (should + (equal + (treesit-node-string + (treesit-parser-root-node parser)) + "(document (array (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number) (number)))")) + + (widen) + (goto-char (point-min)) + (insert "ooo") + (should (equal "oooxxx[1,{\"name\": \"Bob\"},2,3]xxx" + (buffer-string))) + (delete-region 10 26) + (should (equal "oooxxx[1,2,3]xxx" + (buffer-string))) + (narrow-to-region (+ (point-min) 6) (- (point-max) 3)) + ;; This test is also from the basic test. + (should + (equal (treesit-node-string + (treesit-parser-root-node parser)) + "(document (array (number) (number) (number)))")) + (widen) + (goto-char (point-max)) + (insert "[1,2]") + (should (equal "oooxxx[1,2,3]xxx[1,2]" + (buffer-string))) + (narrow-to-region (- (point-max) 5) (point-max)) + (should + (equal (treesit-node-string + (treesit-parser-root-node parser)) + "(document (array (number) (number)))")) + (widen) + (goto-char (point-min)) + (insert "[1]") + (should (equal "[1]oooxxx[1,2,3]xxx[1,2]" + (buffer-string))) + (narrow-to-region (point-min) (+ (point-min) 3)) + (should + (equal (treesit-node-string + (treesit-parser-root-node parser)) + "(document (array (number)))"))))) + +(ert-deftest treesit-range () + "Tests if range works." + (with-temp-buffer + (let (parser root-node pattern doc-node object-node pair-node) + (progn + (insert "[[1],oooxxx[1,2,3],xxx[1,2]]") + (setq parser (treesit-parser-create + (current-buffer) 'json)) + (setq root-node (treesit-parser-root-node + parser))) + (should-error + (treesit-parser-set-included-ranges + parser '((1 . 6) (5 . 20))) + :type '(treesit-range-invalid)) + + (treesit-parser-set-included-ranges + parser '((1 . 6) (12 . 20) (23 . 29))) + (should (equal '((1 . 6) (12 . 20) (23 . 29)) + (treesit-parser-included-ranges parser))) + (should (equal "(document (array (array (number)) (array (number) (number) (number)) (array (number) (number))))" + (treesit-node-string + (treesit-parser-root-node parser)))) + ;; TODO: More tests. + ))) + +(ert-deftest treesit-multi-lang () + "Tests if parsing multiple language works." + (with-temp-buffer + (let (html css js html-range css-range js-range) + (progn + (insert "") + (setq html (treesit-get-parser-create 'html)) + (setq css (treesit-get-parser-create 'css)) + (setq js (treesit-get-parser-create 'javascript))) + ;; JavaScript. + (setq js-range + (treesit-query-range + 'html + '((script_element (raw_text) @capture)))) + (should (equal '((15 . 16)) js-range)) + (treesit-parser-set-included-ranges js js-range) + (should (equal "(program (expression_statement (number)))" + (treesit-node-string + (treesit-parser-root-node js)))) + ;; CSS. + (setq css-range + (treesit-query-range + 'html + '((style_element (raw_text) @capture)))) + (should (equal '((32 . 39)) css-range)) + (treesit-parser-set-included-ranges css css-range) + (should + (equal "(stylesheet (rule_set (selectors (tag_name)) (block)))" + (treesit-node-string + (treesit-parser-root-node css)))) + ;; TODO: More tests. + ))) + +(ert-deftest treesit-parser-supplemental () + "Supplemental node functions." + ;; `treesit-get-parser'. + (with-temp-buffer + (should (equal (treesit-get-parser 'json) nil))) + ;; `treesit-get-parser-create'. + (with-temp-buffer + (should (not (equal (treesit-get-parser-create 'json) + nil)))) + ;; `treesit-parse-string'. + (should (equal (treesit-node-string + (treesit-parse-string + "[1,2,{\"name\": \"Bob\"},3]" + 'json)) + "(document (array (number) (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number)))")) + (with-temp-buffer + (let (parser root-node doc-node object-node pair-node) + (progn + (insert "[1,2,{\"name\": \"Bob\"},3]") + (setq parser (treesit-parser-create + (current-buffer) 'json)) + (setq root-node (treesit-parser-root-node + parser)) + (setq doc-node (treesit-node-child root-node 0))) + ;; `treesit-get-parser'. + (should (not (equal (treesit-get-parser 'json) + nil))) + ;; `treesit-language-at'. + (should (equal (treesit-language-at (point)) + 'json)) + ;; `treesit-set-ranges', `treesit-get-ranges'. + (treesit-set-ranges 'json + '((1 . 2))) + (should (equal (treesit-get-ranges 'json) + '((1 . 2))))))) + +(ert-deftest treesit-node-supplemental () + "Supplemental node functions." + (let (parser root-node doc-node array-node) + (progn + (insert "[1,2,{\"name\": \"Bob\"},3]") + (setq parser (treesit-parser-create + (current-buffer) 'json)) + (setq root-node (treesit-parser-root-node + parser)) + (setq doc-node (treesit-node-child root-node 0))) + ;; `treesit-node-buffer'. + (should (equal (treesit-node-buffer root-node) + (current-buffer))) + ;; `treesit-node-language'. + (should (eq (treesit-node-language root-node) + 'json)) + ;; `treesit-node-at'. + (should (equal (treesit-node-string + (treesit-node-at 1 2 'json)) + "(\"[\")")) + ;; `treesit-buffer-root-node'. + (should (treesit-node-eq + (treesit-buffer-root-node 'json) + root-node)) + ;; `treesit-filter-child'. + (should (equal (mapcar + (lambda (node) + (treesit-node-type node)) + (treesit-filter-child + doc-node (lambda (node) + (treesit-node-check node 'named)))) + '("number" "number" "object" "number"))) + ;; `treesit-node-text'. + (should (equal (treesit-node-text doc-node) + "[1,2,{\"name\": \"Bob\"},3]")) + ;; `treesit-node-index'. + (should (eq (treesit-node-index doc-node) + 0)) + ;; TODO: + ;; `treesit-parent-until' + ;; `treesit-parent-while' + ;; `treesit-node-children' + ;; `treesit-node-field-name' + )) + +;; TODO +;; - Functions in treesit.el +;; - treesit-load-name-override-list + +(provide 'treesit-tests) +;;; treesit-tests.el ends here