bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH] dfa: make position_sets grow automatically


From: Paolo Bonzini
Subject: [RFC PATCH] dfa: make position_sets grow automatically
Date: Tue, 28 Jun 2011 17:14:32 +0200

Inspired by the recent heap overrun patch, I propose to make all
position_sets grow dynamically.  What do you think?

Paolo Bonzini (5):
  dfa: use a separate data type for grps
  dfa: introduce alloc_posset
  dfa: remove dead assignment
  dfa: move nalloc to position_set structure
  dfa: automatically resize position_sets

 src/dfa.c |   85 +++++++++++++++++++++++++++++++++----------------------------
 1 files changed, 46 insertions(+), 39 deletions(-)

-- 
1.7.5.2

>From 765d61f65a0b4a193fd712382e85b786222a8fc4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:38:55 +0200
Subject: [PATCH 1/5] dfa: use a separate data type for grps

* src/dfa.c (leaf_set): New.
(dfastate): Use leaf_set for grps, as the constraint field is unused.
---
 src/dfa.c |   29 +++++++++++++++++++----------
 1 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index a36b80a..d875517 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -244,6 +244,13 @@ typedef struct
   int nelem;                   /* Number of elements in this set. */
 } position_set;
 
+/* Sets of leaves are also stored as arrays. */
+typedef struct
+{
+  unsigned int *elems;         /* Elements of this position set. */
+  int nelem;                   /* Number of elements in this set. */
+} leaf_set;
+
 /* A state of the dfa consists of a set of positions, some flags,
    and the token value of the lowest-numbered position of the state that
    contains an END token. */
@@ -2356,7 +2363,7 @@ dfaanalyze (struct dfa *d, int searchflag)
 void
 dfastate (int s, struct dfa *d, int trans[])
 {
-  position_set *grps;          /* As many as will ever be needed. */
+  leaf_set *grps;              /* As many as will ever be needed. */
   charclass *labels;           /* Labels corresponding to the groups. */
   int ngrps = 0;               /* Number of groups actually used. */
   position pos;                        /* Current position being considered. */
@@ -2483,14 +2490,16 @@ dfastate (int s, struct dfa *d, int trans[])
             {
               copyset(leftovers, labels[ngrps]);
               copyset(intersect, labels[j]);
-              MALLOC(grps[ngrps].elems, position, d->nleaves);
-              copy(&grps[j], &grps[ngrps]);
+              MALLOC(grps[ngrps].elems, unsigned int, d->nleaves);
+              memcpy(grps[ngrps].elems, grps[j].elems,
+                     grps[j].nelem * sizeof(unsigned int));
+              grps[ngrps].nelem = grps[j].nelem;
               ++ngrps;
             }
 
-          /* Put the position in the current group.  Note that there is no
-             reason to call insert() here. */
-          grps[j].elems[grps[j].nelem++] = pos;
+          /* Put the position in the current group.  The constraint is
+            irrelevant here.  */
+          grps[j].elems[grps[j].nelem++] = pos.index;
 
           /* If every character matching the current position has been
              accounted for, we're done. */
@@ -2504,9 +2513,9 @@ dfastate (int s, struct dfa *d, int trans[])
         {
           copyset(matches, labels[ngrps]);
           zeroset(matches);
-          MALLOC(grps[ngrps].elems, position, d->nleaves);
+          MALLOC(grps[ngrps].elems, unsigned int, d->nleaves);
           grps[ngrps].nelem = 1;
-          grps[ngrps].elems[0] = pos;
+          grps[ngrps].elems[0] = pos.index;
           ++ngrps;
         }
     }
@@ -2553,8 +2562,8 @@ dfastate (int s, struct dfa *d, int trans[])
       /* Find the union of the follows of the positions of the group.
          This is a hideously inefficient loop.  Fix it someday. */
       for (j = 0; j < grps[i].nelem; ++j)
-        for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
-          insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
+        for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
+          insert(d->follows[grps[i].elems[j]].elems[k], &follows);
 
 #if MBS_SUPPORT
       if (d->mb_cur_max > 1)
-- 
1.7.5.2


>From 635579f5fe9aa92fc781eb21901ef08f8be13a48 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:02:51 +0200
Subject: [PATCH 2/5] dfa: introduce alloc_posset

* src/dfa.c (alloc_posset): New function, use it throughout.
---
 src/dfa.c |   25 ++++++++++++++-----------
 1 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index d875517..4ad3a9b 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -1842,6 +1842,13 @@ copy (position_set const *src, position_set *dst)
   dst->nelem = src->nelem;
 }
 
+static void
+alloc_posset (position_set *s, size_t size)
+{
+  MALLOC(s->elems, position, size);
+  s->nelem = 0;
+}
+
 /* Insert a position in a set.  Position sets are maintained in sorted
    order according to index.  If position already exists in the set with
    the same index then their constraints are logically or'd together.
@@ -1945,7 +1952,7 @@ state_index (struct dfa *d, position_set const *s, int 
newline, int letter)
   /* We'll have to create a new state. */
   REALLOC_IF_NECESSARY(d->states, dfa_state, d->salloc, d->sindex);
   d->states[i].hash = hash;
-  MALLOC(d->states[i].elems.elems, position, s->nelem);
+  alloc_posset(&d->states[i].elems, s->nelem);
   copy(s, &d->states[i].elems);
   d->states[i].newline = newline;
   d->states[i].letter = letter;
@@ -2139,7 +2146,7 @@ dfaanalyze (struct dfa *d, int searchflag)
   MALLOC(lastpos, position, d->nleaves);
   o_lastpos = lastpos, lastpos += d->nleaves;
   CALLOC(nalloc, int, d->tindex);
-  MALLOC(merged.elems, position, 2 * d->nleaves);
+  alloc_posset(&merged, d->nleaves);
 
   CALLOC(d->follows, position_set, d->tindex);
 
@@ -2249,7 +2256,7 @@ dfaanalyze (struct dfa *d, int searchflag)
 
         /* Allocate the follow set for this position. */
         nalloc[i] = 1;
-        MALLOC(d->follows[i].elems, position, nalloc[i]);
+        alloc_posset(&d->follows[i], nalloc[i]);
         break;
       }
 #ifdef DEBUG
@@ -2419,10 +2426,7 @@ dfastate (int s, struct dfa *d, int trans[])
              must put it to d->states[s].mbps, which contains the positions
              which can match with a single character not a byte.  */
           if (d->states[s].mbps.nelem == 0)
-            {
-              MALLOC(d->states[s].mbps.elems, position,
-                     d->states[s].elems.nelem);
-            }
+            alloc_posset(&d->states[s].mbps, d->states[s].elems.nelem);
           insert(pos, &(d->states[s].mbps));
           continue;
         }
@@ -2520,8 +2524,8 @@ dfastate (int s, struct dfa *d, int trans[])
         }
     }
 
-  MALLOC(follows.elems, position, d->nleaves);
-  MALLOC(tmp.elems, position, d->nleaves);
+  alloc_posset(&follows, d->nleaves);
+  alloc_posset(&tmp, d->nleaves);
 
   /* If we are a searching matcher, the default transition is to a state
      containing the positions of state 0, otherwise the default transition
@@ -3145,8 +3149,7 @@ transit_state (struct dfa *d, int s, unsigned char const 
**pp)
     }
 
   /* This state has some operators which can match a multibyte character.  */
-  follows.nelem = 0;
-  MALLOC(follows.elems, position, d->nleaves);
+  alloc_posset(&follows, d->nleaves);
 
   /* `maxlen' may be longer than the length of a character, because it may
      not be a character but a (multi character) collating element.
-- 
1.7.5.2


>From 690906eb043236efa06aa1cdc83d980717a0b554 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:58:37 +0200
Subject: [PATCH 3/5] dfa: remove dead assignment

* src/dfa.c (transit_state): transit_state_consume_1char will clear follows,
do not do this ourselves.
---
 src/dfa.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 4ad3a9b..f174cc9 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3163,7 +3163,6 @@ transit_state (struct dfa *d, int s, unsigned char const 
**pp)
 
   while (*pp - p1 < maxlen)
     {
-      follows.nelem = 0;
       transit_state_consume_1char(d, s1, pp, NULL, &mbclen, &follows);
 
       for (i = 0; i < nelem ; i++)
-- 
1.7.5.2


>From d65738a921827089714c78bcc483bbfc17d721b4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:07:44 +0200
Subject: [PATCH 4/5] dfa: move nalloc to position_set structure

* src/dfa.c (position_set): Add alloc.
(alloc_posset): Initialize it.
(dfaanalyze): Use it instead of the nalloc array or nelem.
---
 src/dfa.c |   16 +++++++---------
 1 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index f174cc9..2d3bb6f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -242,6 +242,7 @@ typedef struct
 {
   position *elems;             /* Elements of this position set. */
   int nelem;                   /* Number of elements in this set. */
+  int alloc;                    /* Number of elements allocated in ELEMS.  */
 } position_set;
 
 /* Sets of leaves are also stored as arrays. */
@@ -1846,6 +1847,7 @@ static void
 alloc_posset (position_set *s, size_t size)
 {
   MALLOC(s->elems, position, size);
+  s->alloc = size;
   s->nelem = 0;
 }
 
@@ -2113,7 +2115,6 @@ dfaanalyze (struct dfa *d, int searchflag)
   position *firstpos;          /* Array where firstpos elements are stored. */
   int *nlastpos;               /* Element count stack for lastpos sets. */
   position *lastpos;           /* Array where lastpos elements are stored. */
-  int *nalloc;                 /* Sizes of arrays allocated to follow sets. */
   position_set tmp;            /* Temporary set for merging sets. */
   position_set merged;         /* Result of merging sets. */
   int wants_newline;           /* True if some position wants newline info. */
@@ -2145,7 +2146,6 @@ dfaanalyze (struct dfa *d, int searchflag)
   o_nlast = nlastpos;
   MALLOC(lastpos, position, d->nleaves);
   o_lastpos = lastpos, lastpos += d->nleaves;
-  CALLOC(nalloc, int, d->tindex);
   alloc_posset(&merged, d->nleaves);
 
   CALLOC(d->follows, position_set, d->tindex);
@@ -2175,7 +2175,7 @@ dfaanalyze (struct dfa *d, int searchflag)
           {
             merge(&tmp, &d->follows[pos[j].index], &merged);
             REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
-                                 nalloc[pos[j].index], merged.nelem - 1);
+                                 d->follows[pos[j].index].alloc, merged.nelem 
- 1);
             copy(&merged, &d->follows[pos[j].index]);
           }
 
@@ -2195,7 +2195,7 @@ dfaanalyze (struct dfa *d, int searchflag)
           {
             merge(&tmp, &d->follows[pos[j].index], &merged);
             REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
-                                 nalloc[pos[j].index], merged.nelem - 1);
+                                 d->follows[pos[j].index].alloc, merged.nelem 
- 1);
             copy(&merged, &d->follows[pos[j].index]);
           }
 
@@ -2255,8 +2255,7 @@ dfaanalyze (struct dfa *d, int searchflag)
         firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
 
         /* Allocate the follow set for this position. */
-        nalloc[i] = 1;
-        alloc_posset(&d->follows[i], nalloc[i]);
+        alloc_posset(&d->follows[i], 1);
         break;
       }
 #ifdef DEBUG
@@ -2304,8 +2303,8 @@ dfaanalyze (struct dfa *d, int searchflag)
 #endif
         copy(&d->follows[i], &merged);
         epsclosure(&merged, d);
-        if (d->follows[i].nelem < merged.nelem)
-          REALLOC(d->follows[i].elems, position, merged.nelem);
+        REALLOC_IF_NECESSARY(d->follows[i].elems, position,
+                             d->follows[i].alloc, merged.nelem);
         copy(&merged, &d->follows[i]);
       }
 
@@ -2333,7 +2332,6 @@ dfaanalyze (struct dfa *d, int searchflag)
   free(o_firstpos);
   free(o_nlast);
   free(o_lastpos);
-  free(nalloc);
   free(merged.elems);
 }
 
-- 
1.7.5.2


>From 0348452c8b7830bd381c44dc304e96e2a2ed47f5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:07:44 +0200
Subject: [PATCH 5/5] dfa: automatically resize position_sets

* src/dfa.c (insert, copy, merge): Resize arrays here.
(dfaanalyze): Do not track number of allocated elements here.
(dfastate): Allocate mbps with only one element.
---
 src/dfa.c |   26 ++++++++++++--------------
 1 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 2d3bb6f..04c127e 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -1839,6 +1839,7 @@ dfaparse (char const *s, size_t len, struct dfa *d)
 static void
 copy (position_set const *src, position_set *dst)
 {
+  REALLOC_IF_NECESSARY(dst->elems, position, dst->alloc, src->nelem - 1);
   memcpy(dst->elems, src->elems, sizeof(dst->elems[0]) * src->nelem);
   dst->nelem = src->nelem;
 }
@@ -1860,6 +1861,7 @@ insert (position p, position_set *s)
 {
   int count = s->nelem;
   int lo = 0, hi = count;
+  int i;
   while (lo < hi)
     {
       int mid = ((unsigned) lo + (unsigned) hi) >> 1;
@@ -1870,15 +1872,16 @@ insert (position p, position_set *s)
     }
 
   if (lo < count && p.index == s->elems[lo].index)
-    s->elems[lo].constraint |= p.constraint;
-  else
     {
-      int i;
-      for (i = count; i > lo; i--)
-        s->elems[i] = s->elems[i - 1];
-      s->elems[lo] = p;
-      ++s->nelem;
+      s->elems[lo].constraint |= p.constraint;
+      return;
     }
+
+  REALLOC_IF_NECESSARY(s->elems, position, s->alloc, count);
+  for (i = count; i > lo; i--)
+    s->elems[i] = s->elems[i - 1];
+  s->elems[lo] = p;
+  ++s->nelem;
 }
 
 /* Merge two sets of positions into a third.  The result is exactly as if
@@ -1888,6 +1891,7 @@ merge (position_set const *s1, position_set const *s2, 
position_set *m)
 {
   int i = 0, j = 0;
 
+  REALLOC_IF_NECESSARY(m->elems, position, m->alloc, s1->nelem + s2->nelem - 
1);
   m->nelem = 0;
   while (i < s1->nelem && j < s2->nelem)
     if (s1->elems[i].index > s2->elems[j].index)
@@ -2174,8 +2178,6 @@ dfaanalyze (struct dfa *d, int searchflag)
         for (j = 0; j < nlastpos[-1]; ++j)
           {
             merge(&tmp, &d->follows[pos[j].index], &merged);
-            REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
-                                 d->follows[pos[j].index].alloc, merged.nelem 
- 1);
             copy(&merged, &d->follows[pos[j].index]);
           }
 
@@ -2194,8 +2196,6 @@ dfaanalyze (struct dfa *d, int searchflag)
         for (j = 0; j < nlastpos[-2]; ++j)
           {
             merge(&tmp, &d->follows[pos[j].index], &merged);
-            REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
-                                 d->follows[pos[j].index].alloc, merged.nelem 
- 1);
             copy(&merged, &d->follows[pos[j].index]);
           }
 
@@ -2303,8 +2303,6 @@ dfaanalyze (struct dfa *d, int searchflag)
 #endif
         copy(&d->follows[i], &merged);
         epsclosure(&merged, d);
-        REALLOC_IF_NECESSARY(d->follows[i].elems, position,
-                             d->follows[i].alloc, merged.nelem);
         copy(&merged, &d->follows[i]);
       }
 
@@ -2424,7 +2422,7 @@ dfastate (int s, struct dfa *d, int trans[])
              must put it to d->states[s].mbps, which contains the positions
              which can match with a single character not a byte.  */
           if (d->states[s].mbps.nelem == 0)
-            alloc_posset(&d->states[s].mbps, d->states[s].elems.nelem);
+            alloc_posset(&d->states[s].mbps, 1);
           insert(pos, &(d->states[s].mbps));
           continue;
         }
-- 
1.7.5.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]