From 14f4fe9be5c2267f05d0d8a79afe5cfd3083b189 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Sat, 12 Apr 2014 14:20:11 +0900 Subject: [PATCH] grep: waste of a lot of memory for a large pattern in dfamust * src/dfa.c (struct must): New member `prev'. It has the pointer to previous must. (allocmust): New function. (freemust): New function. (dfamust): Use it. --- src/dfa.c | 127 ++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 62 insertions(+), 65 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index 65fc03d..24a7d44 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -3821,7 +3821,9 @@ inboth (char **left, char **right) return both; } -typedef struct +typedef struct must must; + +struct must { char **in; char *left; @@ -3829,21 +3831,46 @@ typedef struct char *is; bool begline; bool endline; -} must; + must *prev; +}; + +static must * +allocmust (must *mp) +{ + must *new_mp = xzalloc (sizeof *new_mp); + new_mp->in = xzalloc (sizeof *new_mp->in); + new_mp->left = xzalloc (2); + new_mp->right = xzalloc (2); + new_mp->is = xzalloc (2); + new_mp->prev = mp; + return new_mp; +} static void -resetmust (must * mp) +resetmust (must *mp) { freelist (mp->in); mp->in[0] = NULL; mp->left[0] = mp->right[0] = mp->is[0] = '\0'; + mp->begline = false; + mp->endline = false; +} + +static void +freemust (must *mp) +{ + freelist (mp->in); + free (mp->in); + free (mp->left); + free (mp->right); + free (mp->is); + free (mp); } static void dfamust (struct dfa *d) { - must *musts = xnmalloc (d->tindex + 1, sizeof *musts); - must *mp = musts; + must *mp = NULL; char const *result = ""; size_t ri; size_t i; @@ -3852,34 +3879,18 @@ dfamust (struct dfa *d) bool endline = false; struct dfamust *dm; - for (i = 0; i <= d->tindex; ++i) - { - mp[i].in = xzalloc (sizeof *mp[i].in); - mp[i].left = xzalloc (2); - mp[i].right = xzalloc (2); - mp[i].is = xzalloc (2); - mp[i].begline = false; - mp[i].endline = false; - } -#ifdef DEBUG - fprintf (stderr, "dfamust:\n"); - for (i = 0; i < d->tindex; ++i) - { - fprintf (stderr, " %zd:", i); - prtok (d->tokens[i]); - } - putc ('\n', stderr); -#endif for (ri = 0; ri < d->tindex; ++ri) { token t = d->tokens[ri]; switch (t) { case BEGLINE: + mp = allocmust (mp); resetmust (mp); mp->begline = true; break; case ENDLINE: + mp = allocmust (mp); resetmust (mp); mp->endline = true; break; @@ -3887,11 +3898,6 @@ dfamust (struct dfa *d) case RPAREN: assert (!"neither LPAREN nor RPAREN may appear here"); - case STAR: - case QMARK: - assert (musts < mp); - --mp; - /* Fall through. */ case EMPTY: case BEGWORD: case ENDWORD: @@ -3900,19 +3906,24 @@ dfamust (struct dfa *d) case BACKREF: case ANYCHAR: case MBCSET: + mp = allocmust (mp); + /* Fall through. */ + case STAR: + case QMARK: + assert (mp != NULL); resetmust (mp); break; case OR: - assert (&musts[2] <= mp); + assert (mp && mp->prev); { char **new; must *lmp; must *rmp; size_t j, ln, rn, n; - rmp = --mp; - lmp = --mp; + rmp = mp; + lmp = mp = mp->prev; /* Guaranteed to be. Unlikely, but ... */ if (!STREQ (lmp->is, rmp->is)) lmp->is[0] = '\0'; @@ -3939,36 +3950,39 @@ dfamust (struct dfa *d) freelist (lmp->in); free (lmp->in); lmp->in = new; + freemust (rmp); } break; case PLUS: - assert (musts < mp); - --mp; + assert (mp != NULL); mp->is[0] = '\0'; break; case END: - assert (mp == &musts[1]); - for (i = 0; musts[0].in[i] != NULL; ++i) - if (strlen (musts[0].in[i]) > strlen (result)) - result = musts[0].in[i]; - if (STREQ (result, musts[0].is)) + assert (!mp || !mp->prev); + if (mp) { - exact = true; - begline = musts[0].begline; - endline = musts[0].endline; + for (i = 0; mp->in[i] != NULL; ++i) + if (strlen (mp->in[i]) > strlen (result)) + result = mp->in[i]; + if (STREQ (result, mp->is)) + { + exact = true; + begline = mp->begline; + endline = mp->endline; + } } goto done; case CAT: - assert (&musts[2] <= mp); + assert (mp && mp->prev); { must *lmp; must *rmp; - rmp = --mp; - lmp = --mp; + rmp = mp; + lmp = mp = mp->prev; /* In. Everything in left, plus everything in right, plus concatenation of left's right and right's left. */ @@ -4003,6 +4017,7 @@ dfamust (struct dfa *d) lmp->begline = false; lmp->endline = false; } + freemust (rmp); } break; @@ -4011,6 +4026,7 @@ dfamust (struct dfa *d) goto done; default: + mp = allocmust (mp); resetmust (mp); if (CSET <= t) { @@ -4040,17 +4056,6 @@ dfamust (struct dfa *d) mp->in = enlist (mp->in, mp->is, 1); break; } -#ifdef DEBUG - fprintf (stderr, " node: %zd:", ri); - prtok (d->tokens[ri]); - fprintf (stderr, "\n in:"); - for (i = 0; mp->in[i]; ++i) - fprintf (stderr, " \"%s\"", mp->in[i]); - fprintf (stderr, "\n is: \"%s\"\n", mp->is); - fprintf (stderr, " left: \"%s\"\n", mp->left); - fprintf (stderr, " right: \"%s\"\n", mp->right); -#endif - ++mp; } done: if (*result) @@ -4063,16 +4068,8 @@ done: dm->next = d->musts; d->musts = dm; } - mp = musts; - for (i = 0; i <= d->tindex; ++i) - { - freelist (mp[i].in); - free (mp[i].in); - free (mp[i].left); - free (mp[i].right); - free (mp[i].is); - } - free (mp); + if (mp) + freemust (mp); } struct dfa * -- 1.9.2