bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 01/15] Merge trie and tree into a single struct


From: Nick Cleaton
Subject: [PATCH 01/15] Merge trie and tree into a single struct
Date: Sat, 2 Oct 2010 07:07:44 +0100

Store data for a trie node and the edge that leads to it in a single
struct.  Previously, trie and tree structs were always allocated in
pairs (except for the top level trie node, which has no inbound edge)
so merging them removes a level of indirection, saves some space and
simplifies some code.
---
 src/kwset.c |  104 +++++++++++++++++++++++++---------------------------------
 1 files changed, 45 insertions(+), 59 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index d85fa30..18a0f61 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -49,21 +49,17 @@
 
 #define U(c) ((unsigned char) (c))
 
-/* Balanced tree of edges and labels leaving a given trie node. */
-struct tree
-{
-  struct tree *llink;          /* Left link; MUST be first field. */
-  struct tree *rlink;          /* Right link (to larger labels). */
-  struct trie *trie;           /* Trie node pointed to by this edge. */
-  unsigned char label;         /* Label on this edge. */
-  char balance;                        /* Difference in depths of subtrees. */
-};
-
-/* Node of a trie representing a set of reversed keywords. */
+/* Element of a trie representing a set of reversed keywords.  Holds data
+   for one trie node and for the edge that leads to it.  Sibling nodes are
+   arranged as a balanced tree.  */
 struct trie
 {
+  struct trie *llink;          /* Left tree link; MUST be first field. */
+  struct trie *rlink;          /* Right tree link (to larger labels). */
+  unsigned char label;         /* Label on the incoming edge. */
+  char balance;                        /* Difference in depths of subtrees. */
   unsigned int accepting;      /* Word index of accepted word, or zero. */
-  struct tree *links;          /* Tree of edges leaving this node. */
+  struct trie *links;          /* Tree of edges leaving this node. */
   struct trie *parent;         /* Parent of this node. */
   struct trie *next;           /* List of all trie nodes in level order. */
   struct trie *fail;           /* Aho-Corasick failure function. */
@@ -134,11 +130,11 @@ kwsincr (kwset_t kws, char const *text, size_t len)
   struct kwset *kwset;
   struct trie *trie;
   unsigned char label;
-  struct tree *link;
+  struct trie *link;
   int depth;
-  struct tree *links[DEPTH_SIZE];
+  struct trie *links[DEPTH_SIZE];
   enum { L, R } dirs[DEPTH_SIZE];
-  struct tree *t, *r, *l, *rl, *lr;
+  struct trie *t, *r, *l, *rl, *lr;
 
   kwset = (struct kwset *) kws;
   trie = kwset->trie;
@@ -154,7 +150,7 @@ kwsincr (kwset_t kws, char const *text, size_t len)
          looking for the current character and keeping track
          of the path followed. */
       link = trie->links;
-      links[0] = (struct tree *) &trie->links;
+      links[0] = (struct trie *) &trie->links;
       dirs[0] = L;
       depth = 1;
 
@@ -172,26 +168,19 @@ kwsincr (kwset_t kws, char const *text, size_t len)
          a link in the current trie node's tree. */
       if (!link)
         {
-          link = (struct tree *) obstack_alloc(&kwset->obstack,
-                                               sizeof (struct tree));
+          link = (struct trie *) obstack_alloc(&kwset->obstack,
+                                               sizeof (struct trie));
           if (!link)
             return _("memory exhausted");
           link->llink = NULL;
           link->rlink = NULL;
-          link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
-                                                     sizeof (struct trie));
-          if (!link->trie)
-            {
-              obstack_free(&kwset->obstack, link);
-              return _("memory exhausted");
-            }
-          link->trie->accepting = 0;
-          link->trie->links = NULL;
-          link->trie->parent = trie;
-          link->trie->next = NULL;
-          link->trie->fail = NULL;
-          link->trie->depth = trie->depth + 1;
-          link->trie->shift = 0;
+          link->accepting = 0;
+          link->links = NULL;
+          link->parent = trie;
+          link->next = NULL;
+          link->fail = NULL;
+          link->depth = trie->depth + 1;
+          link->shift = 0;
           link->label = label;
           link->balance = 0;
 
@@ -268,7 +257,7 @@ kwsincr (kwset_t kws, char const *text, size_t len)
             }
         }
 
-      trie = link->trie;
+      trie = link;
     }
 
   /* Mark the node we finally reached as accepting, encoding the
@@ -289,23 +278,23 @@ kwsincr (kwset_t kws, char const *text, size_t len)
 /* Enqueue the trie nodes referenced from the given tree in the
    given queue. */
 static void
-enqueue (struct tree *tree, struct trie **last)
+enqueue (struct trie *tree, struct trie **last)
 {
   if (!tree)
     return;
   enqueue(tree->llink, last);
   enqueue(tree->rlink, last);
-  (*last) = (*last)->next = tree->trie;
+  (*last) = (*last)->next = tree;
 }
 
 /* Compute the Aho-Corasick failure function for the trie nodes referenced
    from the given tree, given the failure function for their parent as
    well as a last resort failure node. */
 static void
-treefails (struct tree const *tree, struct trie const *fail,
+treefails (struct trie *tree, struct trie const *fail,
            struct trie *recourse)
 {
-  struct tree *link;
+  struct trie *link;
 
   if (!tree)
     return;
@@ -325,19 +314,19 @@ treefails (struct tree const *tree, struct trie const 
*fail,
           link = link->rlink;
       if (link)
         {
-          tree->trie->fail = link->trie;
+          tree->fail = link;
           return;
         }
       fail = fail->fail;
     }
 
-  tree->trie->fail = recourse;
+  tree->fail = recourse;
 }
 
 /* Set delta entries for the links of the given tree such that
    the preexisting delta value is larger than the current depth. */
 static void
-treedelta (struct tree const *tree,
+treedelta (struct trie const *tree,
            unsigned int depth,
            unsigned char delta[])
 {
@@ -351,7 +340,7 @@ treedelta (struct tree const *tree,
 
 /* Return true if A has every label in B. */
 static int
-hasevery (struct tree const *a, struct tree const *b)
+hasevery (struct trie const *a, struct trie const *b)
 {
   if (!b)
     return 1;
@@ -370,13 +359,13 @@ hasevery (struct tree const *a, struct tree const *b)
 /* Compute a vector, indexed by character code, of the trie nodes
    referenced from the given tree. */
 static void
-treenext (struct tree const *tree, struct trie *next[])
+treenext (struct trie *tree, struct trie *next[])
 {
   if (!tree)
     return;
   treenext(tree->llink, next);
   treenext(tree->rlink, next);
-  next[tree->label] = tree->trie;
+  next[tree->label] = tree;
 }
 
 /* Compute the shift for each trie node, as well as the delta
@@ -410,7 +399,7 @@ kwsprep (kwset_t kws)
       for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
         {
           kwset->target[i] = curr->links->label;
-          curr = curr->links->trie;
+          curr = curr->links;
         }
       /* Build the Boyer Moore delta.  Boy that's easy compared to CW. */
       for (i = 0; i < kwset->mind; ++i)
@@ -595,7 +584,6 @@ cwexec (kwset_t kws, char const *text, size_t len, struct 
kwsmatch *kwsmatch)
   unsigned char const *delta;
   int d;
   char const *end, *qlim;
-  struct tree const *tree;
   char const *trans;
 
 #ifdef lint
@@ -652,15 +640,14 @@ cwexec (kwset_t kws, char const *text, size_t len, struct 
kwsmatch *kwsmatch)
       while (beg > text)
         {
           c = trans ? trans[U(*--beg)] : *--beg;
-          tree = trie->links;
-          while (tree && c != tree->label)
-            if (c < tree->label)
-              tree = tree->llink;
+          trie = trie->links;
+          while (trie && c != trie->label)
+            if (c < trie->label)
+              trie = trie->llink;
             else
-              tree = tree->rlink;
-          if (tree)
+              trie = trie->rlink;
+          if (trie)
             {
-              trie = tree->trie;
               if (trie->accepting)
                 {
                   mch = beg;
@@ -703,15 +690,14 @@ cwexec (kwset_t kws, char const *text, size_t len, struct 
kwsmatch *kwsmatch)
       while (beg > text)
         {
           c = trans ? trans[U(*--beg)] : *--beg;
-          tree = trie->links;
-          while (tree && c != tree->label)
-            if (c < tree->label)
-              tree = tree->llink;
+          trie = trie->links;
+          while (trie && c != trie->label)
+            if (c < trie->label)
+              trie = trie->llink;
             else
-              tree = tree->rlink;
-          if (tree)
+              trie = trie->rlink;
+          if (trie)
             {
-              trie = tree->trie;
               if (trie->accepting && beg <= mch)
                 {
                   lmch = beg;
-- 
1.5.6.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]