From e39701931703ca7fc603c5e2e38f580730a24be2 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Sat, 12 Apr 2014 14:20:11 +0900 Subject: [PATCH] grep: waste of a lot of memory for a large pattern in dfamust * src/dfa.c (struct must): New member `prev'. It has the pointer to previous must. (allocmust): New function. (freemust): New function. (dfamust): Use it. --- src/dfa.c | 117 ++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 57 insertions(+), 60 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index eeca257..9661c0c 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -3821,13 +3821,30 @@ inboth (char **left, char **right) return both; } -typedef struct +typedef struct must must; + +struct must { char **in; char *left; char *right; char *is; -} must; + must *prev; +}; + +static must * +allocmust (must *prev) +{ + must *mp = xmalloc (sizeof *prev); + mp->in = xmalloc (sizeof *mp->in); + mp->left = xmalloc (2); + mp->right = xmalloc (2); + mp->is = xmalloc (2); + mp->left[0] = mp->right[0] = mp->is[0] = '\0'; + mp->in[0] = NULL; + mp->prev = prev; + return mp; +} static void resetmust (must * mp) @@ -3838,32 +3855,26 @@ resetmust (must * mp) } static void +freemust (must *mp) +{ + freelist (mp->in); + free (mp->in); + free (mp->left); + free (mp->right); + free (mp->is); + free (mp); +} + +static void dfamust (struct dfa *d) { - must *musts = xnmalloc (d->tindex + 1, sizeof *musts); - must *mp = musts; + must *mp = NULL; char const *result = ""; size_t ri; size_t i; bool exact = false; struct dfamust *dm; - for (i = 0; i <= d->tindex; ++i) - { - mp[i].in = xzalloc (sizeof *mp[i].in); - mp[i].left = xzalloc (2); - mp[i].right = xzalloc (2); - mp[i].is = xzalloc (2); - } -#ifdef DEBUG - fprintf (stderr, "dfamust:\n"); - for (i = 0; i < d->tindex; ++i) - { - fprintf (stderr, " %zd:", i); - prtok (d->tokens[i]); - } - putc ('\n', stderr); -#endif for (ri = 0; ri < d->tindex; ++ri) { token t = d->tokens[ri]; @@ -3873,11 +3884,6 @@ dfamust (struct dfa *d) case RPAREN: assert (!"neither LPAREN nor RPAREN may appear here"); - case STAR: - case QMARK: - assert (musts < mp); - --mp; - /* Fall through. */ case EMPTY: case BEGLINE: case ENDLINE: @@ -3888,19 +3894,24 @@ dfamust (struct dfa *d) case BACKREF: case ANYCHAR: case MBCSET: + mp = allocmust (mp); + /* Fall through. */ + case STAR: + case QMARK: + assert (mp != NULL); resetmust (mp); break; case OR: - assert (&musts[2] <= mp); + assert (mp && mp->prev); { char **new; must *lmp; must *rmp; size_t j, ln, rn, n; - rmp = --mp; - lmp = --mp; + rmp = mp; + lmp = mp = mp->prev; /* Guaranteed to be. Unlikely, but ... */ if (!STREQ (lmp->is, rmp->is)) lmp->is[0] = '\0'; @@ -3925,32 +3936,35 @@ dfamust (struct dfa *d) freelist (lmp->in); free (lmp->in); lmp->in = new; + freemust (rmp); } break; case PLUS: - assert (musts < mp); - --mp; + assert (mp != NULL); mp->is[0] = '\0'; break; case END: - assert (mp == &musts[1]); - for (i = 0; musts[0].in[i] != NULL; ++i) - if (strlen (musts[0].in[i]) > strlen (result)) - result = musts[0].in[i]; - if (STREQ (result, musts[0].is)) - exact = true; + assert (!mp || !mp->prev); + if (mp) + { + for (i = 0; mp->in[i] != NULL; ++i) + if (strlen (mp->in[i]) > strlen (result)) + result = mp->in[i]; + if (STREQ (result, mp->is)) + exact = true; + } goto done; case CAT: - assert (&musts[2] <= mp); + assert (mp && mp->prev); { must *lmp; must *rmp; - rmp = --mp; - lmp = --mp; + rmp = mp; + lmp = mp = mp->prev; /* In. Everything in left, plus everything in right, plus concatenation of left's right and right's left. */ @@ -3977,6 +3991,7 @@ dfamust (struct dfa *d) lmp->is = icatalloc (lmp->is, rmp->is); else lmp->is[0] = '\0'; + freemust (rmp); } break; @@ -3985,6 +4000,7 @@ dfamust (struct dfa *d) goto done; default: + mp = allocmust (mp); resetmust (mp); if (CSET <= t) { @@ -4014,17 +4030,6 @@ dfamust (struct dfa *d) mp->in = enlist (mp->in, mp->is, 1); break; } -#ifdef DEBUG - fprintf (stderr, " node: %zd:", ri); - prtok (d->tokens[ri]); - fprintf (stderr, "\n in:"); - for (i = 0; mp->in[i]; ++i) - fprintf (stderr, " \"%s\"", mp->in[i]); - fprintf (stderr, "\n is: \"%s\"\n", mp->is); - fprintf (stderr, " left: \"%s\"\n", mp->left); - fprintf (stderr, " right: \"%s\"\n", mp->right); -#endif - ++mp; } done: if (*result) @@ -4035,16 +4040,8 @@ done: dm->next = d->musts; d->musts = dm; } - mp = musts; - for (i = 0; i <= d->tindex; ++i) - { - freelist (mp[i].in); - free (mp[i].in); - free (mp[i].left); - free (mp[i].right); - free (mp[i].is); - } - free (mp); + if (mp) + freemust (mp); } struct dfa * -- 1.9.2