bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 15/15] Make trie->shift an unsigned short


From: Nick Cleaton
Subject: [PATCH 15/15] Make trie->shift an unsigned short
Date: Sat, 2 Oct 2010 07:07:44 +0100

Reduce trie->shift from an int to an unsigned short, taking
sizeof(struct trie) down from 20 to 16 on i386.  Keyword sets where
the minimum keyword length is much greater than USHRT_MAX could
suffer a performance hit, since the matcher won't be shifting as far
as it could.
---
 src/kwset.c |   27 ++++++++++++++++++++++-----
 1 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index 2e4ab6d..0cb6578 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -52,6 +52,22 @@
 #define MALLOC(ptr, count) ( (ptr) = malloc ( (count) * sizeof (*(ptr)) ) )
 #define FREENULL(ptr) ( free (ptr), (ptr) = NULL )
 
+/* On i386, changing the type of trie->shift from int to unsigned short
+   reduces sizeof(struct trie) from 20 to 16.  The cost is a potential
+   performance hit if the minimum keyword length is much greater than
+   USHRT_MAX.
+   TODO: add an autoconf check for type sizes, and use an unsigned short
+   for trie->shift only if it makes sense to do so. */
+#define TRIE_SHIFT_USHRT 1
+
+#if TRIE_SHIFT_USHRT
+  typedef unsigned short trie_shift_t;
+# define LIMIT_SHIFT_VALUE(s) ( (s) < USHRT_MAX ? (s) : USHRT_MAX )
+#else
+  typedef int trie_shift_t;
+# define LIMIT_SHIFT_VALUE(s) (s)
+#endif
+
 /* Element of a trie representing a set of reversed keywords.  Holds data
    for one trie node and for the edge that leads to it.  Sibling nodes are
    arranged as a balanced tree.  */
@@ -71,8 +87,8 @@ struct trie
   rd;
   unsigned char label;         /* Label on the incoming edge. */
   unsigned char flags;          /* Flags for this node. */
+  trie_shift_t shift;           /* Shift function for search failures. */
   struct trie *links;          /* Tree of edges leaving this node. */
-  int shift;                   /* Shift function for search failures. */
 };
 
 /* Bit masks for trie->flags */
@@ -521,7 +537,7 @@ kwsprep (kwset_t kws)
     {
       struct trie *fail;
       struct trie *next[NCHAR], *child, *nodes_end;
-      int *maxshift;
+      int *maxshift, max_shift_value;
 
       if ((err = build_trie_from_sorted_keywords (kwset)))
         return err;
@@ -544,8 +560,9 @@ kwsprep (kwset_t kws)
       MALLOC (maxshift, kwset->node_count);
       if (!maxshift)
         return _("memory exhausted");
+      max_shift_value = LIMIT_SHIFT_VALUE (kwset->mind);
       for (i = 0; i < kwset->node_count; ++i)
-        maxshift[i] = kwset->mind;
+        maxshift[i] = max_shift_value;
 
       cpsort_free (kwset->cps);
       kwset->cps = NULL;
@@ -553,7 +570,7 @@ kwsprep (kwset_t kws)
       /* Traverse the nodes of the trie, simultaneously computing the delta
          table, failure function, and shift function. */
       nodes_end = kwset->trie + kwset->node_count;
-      kwset->trie->shift = kwset->mind;
+      kwset->trie->shift = max_shift_value;
       for (curr = kwset->trie; curr < nodes_end; ++curr)
         {
           /* Loop over the immediate descendents of this node. */
@@ -575,7 +592,7 @@ kwsprep (kwset_t kws)
 
                     while (target->lf.fail == NULL && target != kwset->trie)
                       {
-                        target->shift = kwset->mind;
+                        target->shift = max_shift_value;
                         pfail = triefail (target, pfail, kwset->trie);
                         target = target->lf.fail;
                       }
-- 
1.5.6.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]