From 57e7bdc5a7d9ecf3efef979112317894cf26d8bd Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Sat, 12 Apr 2014 14:20:11 +0900 Subject: [PATCH] grep: waste of a lot of memory for a large pattern in dfamust * src/dfa.c (struct must): New member `prev'. It has the pointer to previous must. (allocmust): New function. (freemust): New function. (dfamust): Use it. --- src/dfa.c | 125 +++++++++++++++++++++++++++++--------------------------------- 1 file changed, 58 insertions(+), 67 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index 2b6c5d6..9268faa 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -4012,13 +4012,30 @@ inboth (char **left, char **right) return both; } -typedef struct +typedef struct must must; + +struct must { char **in; char *left; char *right; char *is; -} must; + must *prev; +}; + +static must * +allocmust (must *prev) +{ + must *mp = xmalloc (sizeof *prev); + mp->in = xmalloc (sizeof *mp->in); + mp->left = xmalloc (2); + mp->right = xmalloc (2); + mp->is = xmalloc (2); + mp->left[0] = mp->right[0] = mp->is[0] = '\0'; + mp->in[0] = NULL; + mp->prev = prev; + return mp; +} static void resetmust (must * mp) @@ -4028,42 +4045,30 @@ resetmust (must * mp) } static void +freemust (must *mp) +{ + freelist (mp->in); + free (mp->in); + free (mp->left); + free (mp->right); + free (mp->is); + free (mp); +} + +static void dfamust (struct dfa *d) { - must *musts; - must *mp; + must *mp = NULL; char *result; size_t ri; size_t i; bool exact; - static must must0; struct dfamust *dm; static char empty_string[] = ""; result = empty_string; exact = false; - MALLOC (musts, d->tindex + 1); - mp = musts; - for (i = 0; i <= d->tindex; ++i) - mp[i] = must0; - for (i = 0; i <= d->tindex; ++i) - { - mp[i].in = xmalloc (sizeof *mp[i].in); - mp[i].left = xmalloc (2); - mp[i].right = xmalloc (2); - mp[i].is = xmalloc (2); - mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0'; - mp[i].in[0] = NULL; - } -#ifdef DEBUG - fprintf (stderr, "dfamust:\n"); - for (i = 0; i < d->tindex; ++i) - { - fprintf (stderr, " %zd:", i); - prtok (d->tokens[i]); - } - putc ('\n', stderr); -#endif + for (ri = 0; ri < d->tindex; ++ri) { token t = d->tokens[ri]; @@ -4073,11 +4078,6 @@ dfamust (struct dfa *d) case RPAREN: assert (!"neither LPAREN nor RPAREN may appear here"); - case STAR: - case QMARK: - assert (musts < mp); - --mp; - /* Fall through. */ case EMPTY: case BEGLINE: case ENDLINE: @@ -4088,19 +4088,24 @@ dfamust (struct dfa *d) case BACKREF: case ANYCHAR: case MBCSET: + mp = allocmust (mp); + /* Fall through. */ + case STAR: + case QMARK: + assert (mp != NULL); resetmust (mp); break; case OR: - assert (&musts[2] <= mp); + assert (mp && mp->prev); { char **new; must *lmp; must *rmp; size_t j, ln, rn, n; - rmp = --mp; - lmp = --mp; + rmp = mp; + lmp = mp = mp->prev; /* Guaranteed to be. Unlikely, but ... */ if (!STREQ (lmp->is, rmp->is)) lmp->is[0] = '\0'; @@ -4127,32 +4132,35 @@ dfamust (struct dfa *d) freelist (lmp->in); free (lmp->in); lmp->in = new; + freemust (rmp); } break; case PLUS: - assert (musts < mp); - --mp; + assert (mp != NULL); mp->is[0] = '\0'; break; case END: - assert (mp == &musts[1]); - for (i = 0; musts[0].in[i] != NULL; ++i) - if (strlen (musts[0].in[i]) > strlen (result)) - result = musts[0].in[i]; - if (STREQ (result, musts[0].is)) - exact = true; + assert (!mp || !mp->prev); + if (mp) + { + for (i = 0; mp->in[i] != NULL; ++i) + if (strlen (mp->in[i]) > strlen (result)) + result = mp->in[i]; + if (STREQ (result, mp->is)) + exact = true; + } goto done; case CAT: - assert (&musts[2] <= mp); + assert (mp && mp->prev); { must *lmp; must *rmp; - rmp = --mp; - lmp = --mp; + rmp = mp; + lmp = mp = mp->prev; /* In. Everything in left, plus everything in right, plus concatenation of left's right and right's left. */ @@ -4192,6 +4200,7 @@ dfamust (struct dfa *d) } else lmp->is[0] = '\0'; + freemust (rmp); } break; @@ -4200,6 +4209,7 @@ dfamust (struct dfa *d) goto done; default: + mp = allocmust (mp); resetmust (mp); if (CSET <= t) { @@ -4231,17 +4241,6 @@ dfamust (struct dfa *d) goto done; break; } -#ifdef DEBUG - fprintf (stderr, " node: %zd:", ri); - prtok (d->tokens[ri]); - fprintf (stderr, "\n in:"); - for (i = 0; mp->in[i]; ++i) - fprintf (stderr, " \"%s\"", mp->in[i]); - fprintf (stderr, "\n is: \"%s\"\n", mp->is); - fprintf (stderr, " left: \"%s\"\n", mp->left); - fprintf (stderr, " right: \"%s\"\n", mp->right); -#endif - ++mp; } done: if (strlen (result)) @@ -4252,16 +4251,8 @@ done: dm->next = d->musts; d->musts = dm; } - mp = musts; - for (i = 0; i <= d->tindex; ++i) - { - freelist (mp[i].in); - free (mp[i].in); - free (mp[i].left); - free (mp[i].right); - free (mp[i].is); - } - free (mp); + if (mp) + freemust (mp); } struct dfa * -- 1.9.2