[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[RFC PATCH] dfa: make position_sets grow automatically
From: |
Paolo Bonzini |
Subject: |
[RFC PATCH] dfa: make position_sets grow automatically |
Date: |
Tue, 28 Jun 2011 17:14:32 +0200 |
Inspired by the recent heap overrun patch, I propose to make all
position_sets grow dynamically. What do you think?
Paolo Bonzini (5):
dfa: use a separate data type for grps
dfa: introduce alloc_posset
dfa: remove dead assignment
dfa: move nalloc to position_set structure
dfa: automatically resize position_sets
src/dfa.c | 85 +++++++++++++++++++++++++++++++++----------------------------
1 files changed, 46 insertions(+), 39 deletions(-)
--
1.7.5.2
>From 765d61f65a0b4a193fd712382e85b786222a8fc4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:38:55 +0200
Subject: [PATCH 1/5] dfa: use a separate data type for grps
* src/dfa.c (leaf_set): New.
(dfastate): Use leaf_set for grps, as the constraint field is unused.
---
src/dfa.c | 29 +++++++++++++++++++----------
1 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index a36b80a..d875517 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -244,6 +244,13 @@ typedef struct
int nelem; /* Number of elements in this set. */
} position_set;
+/* Sets of leaves are also stored as arrays. */
+typedef struct
+{
+ unsigned int *elems; /* Elements of this position set. */
+ int nelem; /* Number of elements in this set. */
+} leaf_set;
+
/* A state of the dfa consists of a set of positions, some flags,
and the token value of the lowest-numbered position of the state that
contains an END token. */
@@ -2356,7 +2363,7 @@ dfaanalyze (struct dfa *d, int searchflag)
void
dfastate (int s, struct dfa *d, int trans[])
{
- position_set *grps; /* As many as will ever be needed. */
+ leaf_set *grps; /* As many as will ever be needed. */
charclass *labels; /* Labels corresponding to the groups. */
int ngrps = 0; /* Number of groups actually used. */
position pos; /* Current position being considered. */
@@ -2483,14 +2490,16 @@ dfastate (int s, struct dfa *d, int trans[])
{
copyset(leftovers, labels[ngrps]);
copyset(intersect, labels[j]);
- MALLOC(grps[ngrps].elems, position, d->nleaves);
- copy(&grps[j], &grps[ngrps]);
+ MALLOC(grps[ngrps].elems, unsigned int, d->nleaves);
+ memcpy(grps[ngrps].elems, grps[j].elems,
+ grps[j].nelem * sizeof(unsigned int));
+ grps[ngrps].nelem = grps[j].nelem;
++ngrps;
}
- /* Put the position in the current group. Note that there is no
- reason to call insert() here. */
- grps[j].elems[grps[j].nelem++] = pos;
+ /* Put the position in the current group. The constraint is
+ irrelevant here. */
+ grps[j].elems[grps[j].nelem++] = pos.index;
/* If every character matching the current position has been
accounted for, we're done. */
@@ -2504,9 +2513,9 @@ dfastate (int s, struct dfa *d, int trans[])
{
copyset(matches, labels[ngrps]);
zeroset(matches);
- MALLOC(grps[ngrps].elems, position, d->nleaves);
+ MALLOC(grps[ngrps].elems, unsigned int, d->nleaves);
grps[ngrps].nelem = 1;
- grps[ngrps].elems[0] = pos;
+ grps[ngrps].elems[0] = pos.index;
++ngrps;
}
}
@@ -2553,8 +2562,8 @@ dfastate (int s, struct dfa *d, int trans[])
/* Find the union of the follows of the positions of the group.
This is a hideously inefficient loop. Fix it someday. */
for (j = 0; j < grps[i].nelem; ++j)
- for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
- insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
+ for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
+ insert(d->follows[grps[i].elems[j]].elems[k], &follows);
#if MBS_SUPPORT
if (d->mb_cur_max > 1)
--
1.7.5.2
>From 635579f5fe9aa92fc781eb21901ef08f8be13a48 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:02:51 +0200
Subject: [PATCH 2/5] dfa: introduce alloc_posset
* src/dfa.c (alloc_posset): New function, use it throughout.
---
src/dfa.c | 25 ++++++++++++++-----------
1 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index d875517..4ad3a9b 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -1842,6 +1842,13 @@ copy (position_set const *src, position_set *dst)
dst->nelem = src->nelem;
}
+static void
+alloc_posset (position_set *s, size_t size)
+{
+ MALLOC(s->elems, position, size);
+ s->nelem = 0;
+}
+
/* Insert a position in a set. Position sets are maintained in sorted
order according to index. If position already exists in the set with
the same index then their constraints are logically or'd together.
@@ -1945,7 +1952,7 @@ state_index (struct dfa *d, position_set const *s, int
newline, int letter)
/* We'll have to create a new state. */
REALLOC_IF_NECESSARY(d->states, dfa_state, d->salloc, d->sindex);
d->states[i].hash = hash;
- MALLOC(d->states[i].elems.elems, position, s->nelem);
+ alloc_posset(&d->states[i].elems, s->nelem);
copy(s, &d->states[i].elems);
d->states[i].newline = newline;
d->states[i].letter = letter;
@@ -2139,7 +2146,7 @@ dfaanalyze (struct dfa *d, int searchflag)
MALLOC(lastpos, position, d->nleaves);
o_lastpos = lastpos, lastpos += d->nleaves;
CALLOC(nalloc, int, d->tindex);
- MALLOC(merged.elems, position, 2 * d->nleaves);
+ alloc_posset(&merged, d->nleaves);
CALLOC(d->follows, position_set, d->tindex);
@@ -2249,7 +2256,7 @@ dfaanalyze (struct dfa *d, int searchflag)
/* Allocate the follow set for this position. */
nalloc[i] = 1;
- MALLOC(d->follows[i].elems, position, nalloc[i]);
+ alloc_posset(&d->follows[i], nalloc[i]);
break;
}
#ifdef DEBUG
@@ -2419,10 +2426,7 @@ dfastate (int s, struct dfa *d, int trans[])
must put it to d->states[s].mbps, which contains the positions
which can match with a single character not a byte. */
if (d->states[s].mbps.nelem == 0)
- {
- MALLOC(d->states[s].mbps.elems, position,
- d->states[s].elems.nelem);
- }
+ alloc_posset(&d->states[s].mbps, d->states[s].elems.nelem);
insert(pos, &(d->states[s].mbps));
continue;
}
@@ -2520,8 +2524,8 @@ dfastate (int s, struct dfa *d, int trans[])
}
}
- MALLOC(follows.elems, position, d->nleaves);
- MALLOC(tmp.elems, position, d->nleaves);
+ alloc_posset(&follows, d->nleaves);
+ alloc_posset(&tmp, d->nleaves);
/* If we are a searching matcher, the default transition is to a state
containing the positions of state 0, otherwise the default transition
@@ -3145,8 +3149,7 @@ transit_state (struct dfa *d, int s, unsigned char const
**pp)
}
/* This state has some operators which can match a multibyte character. */
- follows.nelem = 0;
- MALLOC(follows.elems, position, d->nleaves);
+ alloc_posset(&follows, d->nleaves);
/* `maxlen' may be longer than the length of a character, because it may
not be a character but a (multi character) collating element.
--
1.7.5.2
>From 690906eb043236efa06aa1cdc83d980717a0b554 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:58:37 +0200
Subject: [PATCH 3/5] dfa: remove dead assignment
* src/dfa.c (transit_state): transit_state_consume_1char will clear follows,
do not do this ourselves.
---
src/dfa.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 4ad3a9b..f174cc9 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3163,7 +3163,6 @@ transit_state (struct dfa *d, int s, unsigned char const
**pp)
while (*pp - p1 < maxlen)
{
- follows.nelem = 0;
transit_state_consume_1char(d, s1, pp, NULL, &mbclen, &follows);
for (i = 0; i < nelem ; i++)
--
1.7.5.2
>From d65738a921827089714c78bcc483bbfc17d721b4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:07:44 +0200
Subject: [PATCH 4/5] dfa: move nalloc to position_set structure
* src/dfa.c (position_set): Add alloc.
(alloc_posset): Initialize it.
(dfaanalyze): Use it instead of the nalloc array or nelem.
---
src/dfa.c | 16 +++++++---------
1 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index f174cc9..2d3bb6f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -242,6 +242,7 @@ typedef struct
{
position *elems; /* Elements of this position set. */
int nelem; /* Number of elements in this set. */
+ int alloc; /* Number of elements allocated in ELEMS. */
} position_set;
/* Sets of leaves are also stored as arrays. */
@@ -1846,6 +1847,7 @@ static void
alloc_posset (position_set *s, size_t size)
{
MALLOC(s->elems, position, size);
+ s->alloc = size;
s->nelem = 0;
}
@@ -2113,7 +2115,6 @@ dfaanalyze (struct dfa *d, int searchflag)
position *firstpos; /* Array where firstpos elements are stored. */
int *nlastpos; /* Element count stack for lastpos sets. */
position *lastpos; /* Array where lastpos elements are stored. */
- int *nalloc; /* Sizes of arrays allocated to follow sets. */
position_set tmp; /* Temporary set for merging sets. */
position_set merged; /* Result of merging sets. */
int wants_newline; /* True if some position wants newline info. */
@@ -2145,7 +2146,6 @@ dfaanalyze (struct dfa *d, int searchflag)
o_nlast = nlastpos;
MALLOC(lastpos, position, d->nleaves);
o_lastpos = lastpos, lastpos += d->nleaves;
- CALLOC(nalloc, int, d->tindex);
alloc_posset(&merged, d->nleaves);
CALLOC(d->follows, position_set, d->tindex);
@@ -2175,7 +2175,7 @@ dfaanalyze (struct dfa *d, int searchflag)
{
merge(&tmp, &d->follows[pos[j].index], &merged);
REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
- nalloc[pos[j].index], merged.nelem - 1);
+ d->follows[pos[j].index].alloc, merged.nelem
- 1);
copy(&merged, &d->follows[pos[j].index]);
}
@@ -2195,7 +2195,7 @@ dfaanalyze (struct dfa *d, int searchflag)
{
merge(&tmp, &d->follows[pos[j].index], &merged);
REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
- nalloc[pos[j].index], merged.nelem - 1);
+ d->follows[pos[j].index].alloc, merged.nelem
- 1);
copy(&merged, &d->follows[pos[j].index]);
}
@@ -2255,8 +2255,7 @@ dfaanalyze (struct dfa *d, int searchflag)
firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
/* Allocate the follow set for this position. */
- nalloc[i] = 1;
- alloc_posset(&d->follows[i], nalloc[i]);
+ alloc_posset(&d->follows[i], 1);
break;
}
#ifdef DEBUG
@@ -2304,8 +2303,8 @@ dfaanalyze (struct dfa *d, int searchflag)
#endif
copy(&d->follows[i], &merged);
epsclosure(&merged, d);
- if (d->follows[i].nelem < merged.nelem)
- REALLOC(d->follows[i].elems, position, merged.nelem);
+ REALLOC_IF_NECESSARY(d->follows[i].elems, position,
+ d->follows[i].alloc, merged.nelem);
copy(&merged, &d->follows[i]);
}
@@ -2333,7 +2332,6 @@ dfaanalyze (struct dfa *d, int searchflag)
free(o_firstpos);
free(o_nlast);
free(o_lastpos);
- free(nalloc);
free(merged.elems);
}
--
1.7.5.2
>From 0348452c8b7830bd381c44dc304e96e2a2ed47f5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Tue, 28 Jun 2011 09:07:44 +0200
Subject: [PATCH 5/5] dfa: automatically resize position_sets
* src/dfa.c (insert, copy, merge): Resize arrays here.
(dfaanalyze): Do not track number of allocated elements here.
(dfastate): Allocate mbps with only one element.
---
src/dfa.c | 26 ++++++++++++--------------
1 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 2d3bb6f..04c127e 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -1839,6 +1839,7 @@ dfaparse (char const *s, size_t len, struct dfa *d)
static void
copy (position_set const *src, position_set *dst)
{
+ REALLOC_IF_NECESSARY(dst->elems, position, dst->alloc, src->nelem - 1);
memcpy(dst->elems, src->elems, sizeof(dst->elems[0]) * src->nelem);
dst->nelem = src->nelem;
}
@@ -1860,6 +1861,7 @@ insert (position p, position_set *s)
{
int count = s->nelem;
int lo = 0, hi = count;
+ int i;
while (lo < hi)
{
int mid = ((unsigned) lo + (unsigned) hi) >> 1;
@@ -1870,15 +1872,16 @@ insert (position p, position_set *s)
}
if (lo < count && p.index == s->elems[lo].index)
- s->elems[lo].constraint |= p.constraint;
- else
{
- int i;
- for (i = count; i > lo; i--)
- s->elems[i] = s->elems[i - 1];
- s->elems[lo] = p;
- ++s->nelem;
+ s->elems[lo].constraint |= p.constraint;
+ return;
}
+
+ REALLOC_IF_NECESSARY(s->elems, position, s->alloc, count);
+ for (i = count; i > lo; i--)
+ s->elems[i] = s->elems[i - 1];
+ s->elems[lo] = p;
+ ++s->nelem;
}
/* Merge two sets of positions into a third. The result is exactly as if
@@ -1888,6 +1891,7 @@ merge (position_set const *s1, position_set const *s2,
position_set *m)
{
int i = 0, j = 0;
+ REALLOC_IF_NECESSARY(m->elems, position, m->alloc, s1->nelem + s2->nelem -
1);
m->nelem = 0;
while (i < s1->nelem && j < s2->nelem)
if (s1->elems[i].index > s2->elems[j].index)
@@ -2174,8 +2178,6 @@ dfaanalyze (struct dfa *d, int searchflag)
for (j = 0; j < nlastpos[-1]; ++j)
{
merge(&tmp, &d->follows[pos[j].index], &merged);
- REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
- d->follows[pos[j].index].alloc, merged.nelem
- 1);
copy(&merged, &d->follows[pos[j].index]);
}
@@ -2194,8 +2196,6 @@ dfaanalyze (struct dfa *d, int searchflag)
for (j = 0; j < nlastpos[-2]; ++j)
{
merge(&tmp, &d->follows[pos[j].index], &merged);
- REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
- d->follows[pos[j].index].alloc, merged.nelem
- 1);
copy(&merged, &d->follows[pos[j].index]);
}
@@ -2303,8 +2303,6 @@ dfaanalyze (struct dfa *d, int searchflag)
#endif
copy(&d->follows[i], &merged);
epsclosure(&merged, d);
- REALLOC_IF_NECESSARY(d->follows[i].elems, position,
- d->follows[i].alloc, merged.nelem);
copy(&merged, &d->follows[i]);
}
@@ -2424,7 +2422,7 @@ dfastate (int s, struct dfa *d, int trans[])
must put it to d->states[s].mbps, which contains the positions
which can match with a single character not a byte. */
if (d->states[s].mbps.nelem == 0)
- alloc_posset(&d->states[s].mbps, d->states[s].elems.nelem);
+ alloc_posset(&d->states[s].mbps, 1);
insert(pos, &(d->states[s].mbps));
continue;
}
--
1.7.5.2
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [RFC PATCH] dfa: make position_sets grow automatically,
Paolo Bonzini <=