bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 12/15] Eliminate the trie->maxshift field


From: Nick Cleaton
Subject: [PATCH 12/15] Eliminate the trie->maxshift field
Date: Sat, 2 Oct 2010 07:07:44 +0100

Move the maxshift storage to an external vector of ints, laid out in
the same order as the trie node vector.  Because maxshift isn't touched
when matching, this reduces the size of the working set of the matching
process.
---
 src/kwset.c |   41 ++++++++++++++++++++++++++++-------------
 1 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index 27a47c3..e23cbca 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -74,7 +74,6 @@ struct trie
   unsigned int accepting;      /* Word index of accepted word, or zero. */
   struct trie *links;          /* Tree of edges leaving this node. */
   int shift;                   /* Shift function for search failures. */
-  int maxshift;                        /* Max shift of self and descendents. */
 };
 
 /* A record of a trie node that has one or more siblings. */
@@ -516,17 +515,24 @@ kwsprep (kwset_t kws)
     {
       struct trie *fail;
       struct trie *next[NCHAR], *child, *nodes_end;
+      int *maxshift;
 
       if ((err = build_trie_from_sorted_keywords (kwset)))
         return err;
 
+      MALLOC (maxshift, kwset->node_count);
+      if (!maxshift)
+        return _("memory exhausted");
+      for (i = 0; i < kwset->node_count; ++i)
+        maxshift[i] = kwset->mind;
+
       cpsort_free (kwset->cps);
       kwset->cps = NULL;
 
       /* Traverse the nodes of the trie, simultaneously computing the delta
          table, failure function, and shift function. */
       nodes_end = kwset->trie + kwset->node_count;
-      kwset->trie->shift = kwset->trie->maxshift = kwset->mind;
+      kwset->trie->shift = kwset->mind;
       for (curr = kwset->trie; curr < nodes_end; ++curr)
         {
           /* Loop over the immediate descendents of this node. */
@@ -538,17 +544,17 @@ kwsprep (kwset_t kws)
                   if (curr->rd.depth < delta[child->label])
                     delta[child->label] = curr->rd.depth;
 
-                  /* Initialize shift and maxshift, and compute the failure
-                     function for this node.  Since we are not traversing
-                     the trie in level order, we may need to recurse ahead
-                     to compute the fail of the fail and so on. */
+                  /* Initialize shift, and compute the failure function for
+                     this node.  Since we are not traversing the trie in
+                     level order, we may need to recurse ahead to compute
+                     the fail of the fail and so on. */
                   {
                     struct trie *target = child;
                     struct trie const *pfail = curr->lf.fail;
 
                     while (target->lf.fail == NULL && target != kwset->trie)
                       {
-                        target->shift = target->maxshift = kwset->mind;
+                        target->shift = kwset->mind;
                         pfail = triefail (target, pfail, kwset->trie);
                         target = target->lf.fail;
                       }
@@ -571,8 +577,11 @@ kwsprep (kwset_t kws)
               /* If the current node is accepting then the shift at the
                  fail and its descendents should be no larger than the
                  difference of their depths. */
-              if (curr->accepting && fail->maxshift > curr->rd.depth - 
fail->rd.depth)
-                fail->maxshift = curr->rd.depth - fail->rd.depth;
+              if (curr->accepting
+                  && maxshift[fail - kwset->trie] >
+                  curr->rd.depth - fail->rd.depth)
+                maxshift[fail - kwset->trie] =
+                  curr->rd.depth - fail->rd.depth;
             }
         }
 
@@ -581,15 +590,20 @@ kwsprep (kwset_t kws)
          fields. */
       for (curr = kwset->trie; curr < nodes_end; ++curr)
         {
+          int parent_maxshift, *child_maxshift_ptr;
+
           if (!(child = curr->links))
             continue;
 
+          parent_maxshift = maxshift[curr - kwset->trie];
+          child_maxshift_ptr = maxshift + (child - kwset->trie);
           do
             {
-              if (curr->maxshift < child->maxshift)
-                child->maxshift = curr->maxshift;
-              if (child->maxshift < child->shift)
-                child->shift = child->maxshift;
+              if (parent_maxshift < *child_maxshift_ptr)
+                *child_maxshift_ptr = parent_maxshift;
+              if (*child_maxshift_ptr < child->shift)
+                child->shift = *child_maxshift_ptr;
+              child_maxshift_ptr++;
             }
           while (!child++->is_lastkid);
 
@@ -598,6 +612,7 @@ kwsprep (kwset_t kws)
             plumb_siblings (&links, child - links);
           }
         }
+      free (maxshift);
 
       /* Create a vector, indexed by character code, of the outgoing links
          from the root node. */
-- 
1.5.6.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]