emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

feature/tree-sitter eba6582436 09/15: Add the treesit-search functions t


From: Yuan Fu
Subject: feature/tree-sitter eba6582436 09/15: Add the treesit-search functions that supplant the removed ones
Date: Sun, 25 Sep 2022 00:11:59 -0400 (EDT)

branch: feature/tree-sitter
commit eba65824364474bde89bdce5f57a772d74a2c409
Author: Yuan Fu <casouri@gmail.com>
Commit: Yuan Fu <casouri@gmail.com>

    Add the treesit-search functions that supplant the removed ones
    
    The signatures also changed.
    
    treesit-traverse-depth-first   -> treesit-search-subtree
    treesit-traverse-breadth-first ->
    treesit-traverse-forward       -> treesit-search-forward
    treesit-search-forward         -> treesit-search-forward-goto
    treesit-search-beginning/end   -> treesit-search-forward-goto
                                   -> treesit-induce-sparse-tree
    
    * doc/lispref/parsing.texi (Retrieving Node): Add relevant manual
    sections.
    * lisp/treesit.el (treesit-search-forward-goto): New function.
    * src/treesit.c (ts_traverse_sibling_helper)
    (ts_traverse_match_predicate)
    (ts_search_dfs)
    (ts_search_forward)
    (treesit-search-subtree)
    (treesit-search-forward)
    (ts_build_sparse_tree)
    (Ftreesit_induce_sparse_tree): Add functions.
    * test/src/treesit-tests.el (treesit-node-supplemental): Add comments.
---
 doc/lispref/parsing.texi  | 105 ++++++++++++++
 lisp/treesit.el           |  24 +++-
 src/treesit.c             | 358 ++++++++++++++++++++++++++++++++++++++++++++++
 test/src/treesit-tests.el |   5 +
 4 files changed, 489 insertions(+), 3 deletions(-)

diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi
index 0dbc70ce2d..868b9bc074 100644
--- a/doc/lispref/parsing.texi
+++ b/doc/lispref/parsing.texi
@@ -580,11 +580,116 @@ for named child (@pxref{tree-sitter named node, named 
node}).
 
 @heading Searching for node
 
+@defun treesit-search-subtree node predicate &optional all backward limit
+This function traverses the subtree of @var{node}, and match
+@var{predicate} with each node along the way.  And @var{predicate} is
+a regexp that matches against each node's type, or a function that
+takes a node and returns nil/non-nil.  If a node matches, that node is
+returned, if no node ever matches, nil is returned.
+
+By default, this function only traverses named nodes, if @var{all} is
+non-nil, it traverses all nodes.  If @var{backward} is non-nil, it
+traverse backwards.  If @var{limit} is non-nil, it only traverses that
+number of levels down in the tree.
+@end defun
+
+@defun treesit-search-forward start predicate &optional all backward up
+This function is somewhat similar to @code{treesit-search-subtree}.
+It also traverse the parse tree and match each node with
+@var{predicate} (except for @var{start}), where @var{predicate} can be
+a regexp or a function.  For a tree like the below where @var{start}
+is marked 1, this function will traverse as numbered:
+
+@example
+@group
+              o
+              |
+     3--------4-----------8
+     |        |           |
+o--o-+--1  5--+--6    9---+-----12
+|  |    |        |    |         |
+o  o    2        7  +-+-+    +--+--+
+                    |   |    |  |  |
+                    10  11   13 14 15
+@end group
+@end example
+
+Same as in @code{treesit-search-subtree}, this function only searches
+for named nodes by default.  But if @var{all} is non-nil, it searches
+for all nodes.  And If @var{backward} is non-nil, it searches
+backwards.
 
+If @var{up} is non-nil, this function will only traverse to siblings
+and parents.  In that case, only 1 3 4 8 would be traversed.
 @end defun
 
+@defun treesit-search-forward-goto start predicate side &optional all backward 
up
+For those who want to not only search for a node but also move to it,
+this is the function to use.  Parameter @var{start}, @var{predicate},
+@var{all}, @var{backward}, and @var{up} are the same as in
+@code{treesit-search-forward}.  And @var{side} controls which side of
+the matched no do we stop at, it can be @code{'start} or @code{'end}.
+
+Beware of this common pitfall:
+
+@example
+@group
+;; This will not move point forward.
+(while (treesit-search-forward-goto
+        (treesit-node-at (point))
+        "xxx"
+        'start)
+  ...)
+
+;; This is will move point forward.
+(let ((node (treesit-node-at (point))))
+  (while (setq node (treesit-search-forward-goto
+                     node "xxx" 'start))
+    ...))
+@end group
+@end example
+
+The exact reason why is left as an exercise for the reader.
 @end defun
 
+@defun treesit-induce-sparse-tree root predicate &optional process-fn limit
+This function creates a sparse tree of @var{root}'s subtree.
+
+Basically, it takes the subtree under @var{root}, and combs it so only
+the nodes that match @var{predicate} are left, like picking out grapes
+on the vine.  Like previous functions, @var{predicate} can be a regexp
+string that matches against each node's type, or a function that takes
+a node and return nil/non-nil.
+
+For example, for a subtree on the left that consist of both numbers
+and letters, if @var{predicate} is ``is letter'', the returned tree is
+the one on the right.
+
+@example
+@group
+    a                 a              a
+    |                 |              |
++---+---+         +---+---+      +---+---+
+|   |   |         |   |   |      |   |   |
+b   1   2         b   |   |      b   c   d
+    |   |     =>      |   |  =>      |
+    c   +--+          c   +          e
+    |   |  |          |   |
+ +--+   d  4       +--+   d
+ |  |              |
+ e  5              e
+@end group
+@end example
+
+If @var{process-fn} is non-nil, instead of returning the matched
+nodes, pass each node to @var{process-fn} use the return value
+instead.  If non-nil, @var{limit} is the number of levels to go down
+from @var{root}.
+
+Each node in the returned tree looks like @code{(@var{node}
+. (@var{child} ...))}.  The root of this tree might be nil, if
+@var{root} doesn't match @var{pred}.  If no node matches
+@var{predicate}, return nil.
 @end defun
 
 @heading More convenient functions
diff --git a/lisp/treesit.el b/lisp/treesit.el
index 2defd83dc6..9bdff83da8 100644
--- a/lisp/treesit.el
+++ b/lisp/treesit.el
@@ -722,9 +722,27 @@ indentation (target) is in green, current indentation is 
in red."
 
 ;;; Search
 
-
-
-
+(defun treesit-search-forward-goto
+    (start predicate side &optional all backward up)
+  "Search for node in the parse tree and move point to it.
+
+Start traversing the tree from node START, and match PREDICATE with
+each node along the way (except START).  PREDICATE can be either a
+regexp that matches against each node's type, or a function that takes
+a node and returns nil/non-nil for match/no match.
+
+If a node matches, move to that node and return the node,
+otherwise return nil.  SIDE controls whether we move to the start
+or end of the matches node, it can be either \\='start or
+\\='end.
+
+ALL, BACKWARD, and UP are the same as in `treesit-search-forward'."
+  (when-let ((node (treesit-search-forward
+                    start predicate all backward up)))
+    (pcase side
+      ('start (goto-char (treesit-node-start node)))
+      ('end (goto-char (treesit-node-end node))))
+    node))
 
 ;;; Debugging
 
diff --git a/src/treesit.c b/src/treesit.c
index 51261c34a2..f3efcbe596 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -1805,6 +1805,358 @@ query.  */)
   return Fnreverse (result);
 }
 
+/*** Navigation */
+
+/* Return the next/previous named/unnamed sibling of NODE.  FORWARD
+   controls the direction and NAMED controls the nameness.
+ */
+static TSNode
+ts_traverse_sibling_helper (TSNode node, bool forward, bool named)
+{
+  if (forward)
+    {
+      if (named)
+       return ts_node_next_named_sibling (node);
+      else
+       return ts_node_next_sibling (node);
+    }
+  else
+    {
+      if (named)
+       return ts_node_prev_named_sibling (node);
+      else
+       return ts_node_prev_sibling (node);
+    }
+}
+
+/* Return true if NODE matches PRED.  PRED can be a string or a
+   function.  This function doesn't check for PRED's type.  */
+static bool
+ts_traverse_match_predicate
+(TSNode node, Lisp_Object pred, Lisp_Object parser)
+{
+  if (STRINGP (pred))
+    {
+      const char *type = ts_node_type (node);
+      return (fast_c_string_match_ignore_case
+             (pred, type, strlen (type)) >= 0);
+    }
+  else
+    {
+      Lisp_Object lisp_node = make_ts_node (parser, node);
+      return !NILP (CALLN (Ffuncall, pred, lisp_node));
+    }
+
+}
+
+/* Traverse the parse tree starting from ROOT (but ROOT is not
+   matches against PRED).  PRED can be a function (takes a node and
+   returns nil/non-nil),or a string (treated as regexp matching the
+   node's type, ignores case, must be all single byte characters).  If
+   the node satisfies PRED , terminate, set ROOT to that node, and
+   return true.  If no node satisfies PRED, return FALSE.  PARSER is
+   the parser of ROOT.
+
+   LIMIT is the number of levels we descend in the tree.  If NO_LIMIT
+   is true, LIMIT is ignored.  FORWARD controls the direction in which
+   we traverse the tree, true means forward, false backward.  If NAMED
+   is true, only traverse named nodes, if false, all nodes.  If
+   SKIP_ROOT is true, don't match ROOT.  */
+static bool
+ts_search_dfs
+(TSNode *root, Lisp_Object pred, Lisp_Object parser,
+ bool named, bool forward, ptrdiff_t limit, bool no_limit,
+ bool skip_root)
+{
+  /* TSTreeCursor doesn't allow us to move backward, so we can't use
+     it.  We could use limit == -1 to indicate no_limit == true, but
+     separating them is safer.  */
+  TSNode node = *root;
+
+  if (!skip_root && ts_traverse_match_predicate (node, pred, parser))
+    {
+      *root = node;
+      return true;
+    }
+
+  if (!no_limit && limit <= 0)
+    return false;
+  else
+    {
+      int count = named ?
+       ts_node_named_child_count( node)
+       : ts_node_child_count (node);
+      for (int offset=0; offset < count; offset++)
+       {
+         uint32_t idx = forward ? offset
+           : count - offset - 1;
+         TSNode child = ts_node_child (node, idx);
+
+         if (!ts_node_is_null (child)
+             && ts_search_dfs (&child, pred, parser, named,
+                               forward, limit - 1, no_limit, false))
+           {
+             *root = child;
+             return true;
+           }
+       }
+      return false;
+    }
+}
+
+/* Go thought the whole tree linearly depth first, starting from
+   START.  PRED, PARSER, NAMED, FORWARD are the same as in
+   ts_search_subtre.  If UP_ONLY is true, never go to children, only
+   sibling and parents.  If SKIP_START is true, don'tt match
+   START.  */
+static bool
+ts_search_forward
+(TSNode *start, Lisp_Object pred, Lisp_Object parser,
+ bool named, bool forward, bool up_only, bool skip_start)
+{
+  TSNode node = *start;
+
+  if (!up_only && ts_search_dfs
+      (start, pred, parser, named, forward, 0, true, skip_start))
+    return true;
+
+  TSNode next = ts_traverse_sibling_helper(node, forward, named);
+  while (ts_node_is_null (next))
+    {
+      node = ts_node_parent (node);
+      if (ts_node_is_null (node))
+       return false;
+
+      if (ts_traverse_match_predicate (node, pred, parser))
+       {
+         *start = node;
+         return false;
+       }
+      next = ts_traverse_sibling_helper(node, forward, named);
+    }
+  if (ts_search_forward
+      (&next, pred, parser, named, forward, up_only, false))
+    {
+      *start = next;
+      return true;
+    }
+  else
+    return false;
+}
+
+DEFUN ("treesit-search-subtree",
+       Ftreesit_search_subtree,
+       Streesit_search_subtree, 2, 5, 0,
+       doc: /* Traverse the parse tree depth-first.
+
+Traverse the subtree of NODE, and match PREDICATE with each node along
+the way.  PREDICATE is a regexp string that matches against each
+node's type, or a function that takes a node and returns nil/non-nil.
+
+By default, only traverse named nodes, if ALL is non-nil, traverse all
+nodes.  If BACKWARD is non-nil, traverse backwards.  If LIMIT is
+non-nil, we only traverse that number of levels down in the tree.
+
+Return the first matched node, or nil if none matches.  */)
+  (Lisp_Object node, Lisp_Object predicate, Lisp_Object all,
+   Lisp_Object backward, Lisp_Object limit)
+{
+  CHECK_TS_NODE (node);
+  CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
+             list3 (Qor, Qstringp, Qfunctionp), predicate);
+  CHECK_SYMBOL (all);
+  CHECK_SYMBOL (backward);
+
+  ptrdiff_t the_limit = 0;
+  bool no_limit = false;
+  if (NILP (limit))
+      no_limit = true;
+  else
+    {
+      CHECK_FIXNUM (limit);
+      the_limit = XFIXNUM (limit);
+    }
+
+  TSNode ts_node = XTS_NODE (node)->node;
+  Lisp_Object parser = XTS_NODE (node)->parser;
+  if (ts_search_dfs
+      (&ts_node, predicate, parser, NILP (all),
+       NILP (backward), the_limit, no_limit, false))
+    {
+      return make_ts_node (parser, ts_node);
+    }
+  else
+    return Qnil;
+}
+
+DEFUN ("treesit-search-forward",
+       Ftreesit_search_forward,
+       Streesit_search_forward, 2, 5, 0,
+       doc: /* Search for node in the parse tree.
+
+Start traversing the tree from node START, and match PREDICATE with
+each node along the way (except START).  PREDICATE is a regexp string
+that matches against each node's type, or a function that takes a node
+and returns nil/non-nil.
+
+By default, only search for named nodes, if ALL is non-nil, search for
+all nodes.  If BACKWARD is non-nil, search backwards.
+
+Return the first matched node, or nil if none matches.
+
+For a tree like the below where START is marked 1, traverse as
+numbered:
+                16
+                |
+       3--------4-----------8
+       |        |           |
+  o--o-+--1  5--+--6    9---+-----12
+  |  |    |        |    |         |
+  o  o    2        7  +-+-+    +--+--+
+                      |   |    |  |  |
+                      10  11   13 14 15
+
+If UP is non-nil, only traverse to siblings and parents.  In that
+case, only 1 3 4 8 16 would be traversed.  */)
+  (Lisp_Object start, Lisp_Object predicate, Lisp_Object all,
+   Lisp_Object backward, Lisp_Object up)
+{
+  CHECK_TS_NODE (start);
+  CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
+             list3 (Qor, Qstringp, Qfunctionp), predicate);
+  CHECK_SYMBOL (all);
+  CHECK_SYMBOL (backward);
+  CHECK_SYMBOL (up);
+
+  TSNode ts_start = XTS_NODE (start)->node;
+  Lisp_Object parser = XTS_NODE (start)->parser;
+  if (ts_search_forward
+      (&ts_start, predicate, parser, NILP (all),
+       NILP (backward), !NILP (up), true))
+    {
+      return make_ts_node (parser, ts_start);
+    }
+  else
+    return Qnil;
+}
+
+/* Recursively traverse the tree under CURSOR, and append the result
+   subtree to PARENT's cdr.  See more in `ts_build_sparse_tree'.  */
+static void
+ts_build_sparse_tree
+(TSTreeCursor *cursor, Lisp_Object parent, Lisp_Object pred,
+ Lisp_Object process_fn, ptrdiff_t limit,
+ bool no_limit, Lisp_Object parser)
+{
+
+  TSNode node = ts_tree_cursor_current_node (cursor);
+  bool match = ts_traverse_match_predicate (node, pred, parser);
+  if (match)
+    {
+      /* If this node matches pred, add a new node to the parent's
+        children list.  */
+      Lisp_Object lisp_node = make_ts_node (parser, node);
+      if (!NILP (process_fn))
+       {
+         lisp_node = CALLN (Ffuncall, process_fn, lisp_node);
+       }
+      Lisp_Object this = Fcons (lisp_node, Qnil);
+      Fsetcdr (parent, Fcons (this, Fcdr (parent)));
+      /* Now for children nodes, this is the new parent.  */
+      parent = this;
+    }
+  /* Go through each child.  */
+  if ((no_limit || limit > 0)
+      && ts_tree_cursor_goto_first_child (cursor))
+    {
+      do
+       {
+         /* Make sure not to use node after the recursive funcall.
+            Then C compilers should be smart enough not to copy NODE
+            to stack.  */
+         ts_build_sparse_tree
+           (cursor, parent, pred, process_fn,
+            limit - 1, no_limit, parser);
+       }
+      while (ts_tree_cursor_goto_next_sibling (cursor));
+      /* Don't forget to come back to this node.  */
+      ts_tree_cursor_goto_parent (cursor);
+    }
+  /* Before we go, reverse children in the sparse tree.  */
+  if (match)
+    {
+      /* When match == true, "parent" is actually the node we added in
+        this layer (parent = this).  */
+      Fsetcdr (parent, Fnreverse (Fcdr (parent)));
+    }
+}
+
+DEFUN ("treesit-induce-sparse-tree",
+       Ftreesit_induce_sparse_tree,
+       Streesit_induce_sparse_tree, 2, 4, 0,
+       doc: /* Create a sparse tree of ROOT's subtree.
+
+Basically, take the subtree under ROOT, and comb it so only the nodes
+that match PREDICATE are left, like picking out grapes on the vine.
+PREDICATE is a regexp string that matches against each node's type.
+
+For a subtree on the left that consist of both numbers and letters, if
+PREDICATE is "is letter", the returned tree is the one on the right.
+
+       a                 a              a
+       |                 |              |
+    +---+---+         +---+---+      +---+---+
+    |   |   |         |   |   |      |   |   |
+    b   1   2         b   |   |      b   c   d
+       |   |     =>      |   |  =>      |
+       c   +--+          c   +          e
+       |   |  |          |   |
+     +--+   d  4       +--+   d
+     |  |              |
+     e  5              e
+
+If PROCESS-FN is non-nil, instead of returning the matched nodes, pass
+each node to PROCESS-FN use the return value instead.  If non-nil,
+LIMIT is the number of levels to go down from ROOT.
+
+Each node in the returned tree looks like (NODE . (CHILD ...)).  The
+root of this tree might be nil, if ROOT doesn't match PREDICATE.  If
+no node matches PRED, return nil.
+
+PREDICATE can also be a function that takes a node and returns
+nil/non-nil, but it is slower and more memory consuming than
+regexp.  */)
+  (Lisp_Object root, Lisp_Object predicate, Lisp_Object process_fn,
+   Lisp_Object limit)
+{
+  CHECK_TS_NODE (root);
+  CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
+             list3 (Qor, Qstringp, Qfunctionp), predicate);
+
+  if (!NILP (process_fn))
+    CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
+  ptrdiff_t the_limit = 0;
+  bool no_limit = false;
+  if (NILP (limit))
+      no_limit = true;
+  else
+    {
+      CHECK_FIXNUM (limit);
+      the_limit = XFIXNUM (limit);
+    }
+
+  TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
+  Lisp_Object parser = XTS_NODE (root)->parser;
+  Lisp_Object parent = Fcons (Qnil, Qnil);
+  ts_build_sparse_tree
+    (&cursor, parent, predicate, process_fn,
+     the_limit, no_limit, parser);
+  if (NILP (Fcdr (parent)))
+    return Qnil;
+  else
+    return parent;
+}
+
 /*** Initialization */
 
 /* Initialize the tree-sitter routines.  */
@@ -1835,6 +2187,8 @@ syms_of_treesit (void)
          "user-emacs-directory");
   DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
 
+  DEFSYM (Qor, "or");
+
   define_error (Qtreesit_error, "Generic tree-sitter error", Qerror);
   define_error (Qtreesit_query_error, "Query pattern is malformed",
                Qtreesit_error);
@@ -1925,4 +2279,8 @@ dynamic libraries, in that order.  */);
   defsubr (&Streesit_query_expand);
   defsubr (&Streesit_query_compile);
   defsubr (&Streesit_query_capture);
+
+  defsubr (&Streesit_search_subtree);
+  defsubr (&Streesit_search_forward);
+  defsubr (&Streesit_induce_sparse_tree);
 }
diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el
index fbf99ff087..6fa891a136 100644
--- a/test/src/treesit-tests.el
+++ b/test/src/treesit-tests.el
@@ -434,12 +434,17 @@ visible_end.)"
     ;; `treesit-parent-while'
     ;; `treesit-node-children'
     ;; `treesit-node-field-name'
+    ;; `treesit-search-forward-goto'
     ))
 
 ;; TODO
 ;; - Functions in treesit.el
 ;; - treesit-load-name-override-list
+;; - treesit-search-subtree
 ;; - treesit-search-forward
+;; - treesit-induce-sparse-tree
+;; - treesit-search-forward
+
 
 (provide 'treesit-tests)
 ;;; treesit-tests.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]