From 4432de50f8cff0485005794e2d12348de7cf7e11 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Fri, 6 Jun 2014 19:08:08 +0900 Subject: [PATCH] dfa: build struct dfamust on demand If we don't use KWset, struct dfamust doesn't have to build. Now it's built on demand. * src/dfa.c (struct dfa): Remove attribute `musts'. (dfacomp): Don't build dfamust here. (dfamustfree): New function to deallocate a struct dfamust. (dfamust): Advance it to the global function, and it returns struct dfamust. (dfamusts): Remove it. * src/dfa.h (dfamustfree): Add prototype for it. (dfamust): Add prototype for it. (dfamusts): Remove prototype for it. * src/dfasearch.c (kwsmusts): Use new interface of dfa. --- src/dfa.c | 59 +++++++++++++++++++++----------------------------------- src/dfa.h | 8 +++++--- src/dfasearch.c | 60 ++++++++++++++++++++++++++------------------------------- 3 files changed, 54 insertions(+), 73 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index 522a027..cf0d9af 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -420,9 +420,6 @@ struct dfa newline is stored separately and handled as a special case. Newline is also used as a sentinel at the end of the buffer. */ - struct dfamust *musts; /* List of strings, at least one of which - is known to appear in any r.e. matching - the dfa. */ position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET on demand. */ int *mb_match_lens; /* Array of length reduced by ANYCHAR and/or @@ -439,7 +436,6 @@ struct dfa #define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \ SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr) -static void dfamust (struct dfa *dfa); static void regexp (void); static void @@ -3527,7 +3523,6 @@ dfassbuild (struct dfa *d) sup->fails = NULL; sup->success = NULL; sup->newlines = NULL; - sup->musts = NULL; sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses); memcpy (sup->charclasses, d->charclasses, @@ -3591,7 +3586,6 @@ dfacomp (char const *s, size_t len, struct dfa *d, int searchflag) dfainit (d); dfambcache (d); dfaparse (s, len, d); - dfamust (d); dfassbuild (d); dfaoptimize (d); dfaanalyze (d, searchflag); @@ -3607,7 +3601,6 @@ void dfafree (struct dfa *d) { size_t i; - struct dfamust *dm, *ndm; free (d->charclasses); free (d->tokens); @@ -3643,13 +3636,6 @@ dfafree (struct dfa *d) free (d->success); } - for (dm = d->musts; dm; dm = ndm) - { - ndm = dm->next; - free (dm->must); - free (dm); - } - if (d->superset) dfafree (d->superset); } @@ -3670,9 +3656,7 @@ dfafree (struct dfa *d) sequences that must constitute the match ("is") When we get to the root of the tree, we use one of the longest of its - calculated "in" sequences as our answer. The sequence we find is returned in - d->must (where "d" is the single argument passed to "dfamust"); - the length of the sequence is returned in d->mustn. + calculated "in" sequences as our answer. The sequences calculated for the various types of node (in pseudo ANSI c) are shown below. "p" is the operand of unary operators (and the left-hand @@ -3896,8 +3880,8 @@ freemust (must *mp) free (mp); } -static void -dfamust (struct dfa *d) +struct dfamust * +dfamust (struct dfa const *d) { must *mp = NULL; char const *result = ""; @@ -4067,30 +4051,28 @@ dfamust (struct dfa *d) t = j; while (++j < NOTCHAR) if (tstbit (j, *ccl) - && ! (case_fold && !d->multibyte + && ! (case_fold && MB_CUR_MAX == 1 && toupper (j) == toupper (t))) break; if (j < NOTCHAR) break; } mp->is[0] = mp->left[0] = mp->right[0] - = case_fold && !d->multibyte ? toupper (t) : t; + = case_fold && MB_CUR_MAX == 1 ? toupper (t) : t; mp->is[1] = mp->left[1] = mp->right[1] = '\0'; mp->in = enlist (mp->in, mp->is, 1); break; } } done: - if (*result) - { - dm = xmalloc (sizeof *dm); - dm->exact = exact; - dm->begline = begline; - dm->endline = endline; - dm->must = xstrdup (result); - dm->next = d->musts; - d->musts = dm; - } + if (!*result) + return NULL; + + dm = xmalloc (sizeof *dm); + dm->exact = exact; + dm->begline = begline; + dm->endline = endline; + dm->must = xstrdup (result); while (mp) { @@ -4098,18 +4080,21 @@ done: freemust (mp); mp = prev; } + + return dm; } -struct dfa * -dfaalloc (void) +void +dfamustfree (struct dfamust *dm) { - return xmalloc (sizeof (struct dfa)); + free (dm->must); + free (dm); } -struct dfamust *_GL_ATTRIBUTE_PURE -dfamusts (struct dfa const *d) +struct dfa * +dfaalloc (void) { - return d->musts; + return xmalloc (sizeof (struct dfa)); } /* vim:set shiftwidth=2: */ diff --git a/src/dfa.h b/src/dfa.h index f30c3cb..edb6324 100644 --- a/src/dfa.h +++ b/src/dfa.h @@ -30,7 +30,6 @@ struct dfamust bool begline; bool endline; char *must; - struct dfamust *next; }; /* The dfa structure. It is completely opaque. */ @@ -43,8 +42,11 @@ struct dfa; calling dfafree() on it. */ extern struct dfa *dfaalloc (void); -/* Return the dfamusts associated with a dfa. */ -extern struct dfamust *dfamusts (struct dfa const *); +/* Build and return the struct dfamust from the given struct dfa. */ +extern struct dfamust *dfamust (struct dfa const *); + +/* Free the storage held by the components of a struct dfamust. */ +extern void dfamustfree (struct dfamust *); /* dfasyntax() takes three arguments; the first sets the syntax bits described earlier in this file, the second sets the case-folding flag, and the diff --git a/src/dfasearch.c b/src/dfasearch.c index 77b4e3e..afe5517 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -86,41 +86,35 @@ dfawarn (char const *mesg) static void kwsmusts (void) { - struct dfamust const *dm = dfamusts (dfa); - if (dm) + struct dfamust *dm = dfamust (dfa); + if (!dm) + return; + kwsinit (&kwset); + /* First, we compile in the substrings known to be exact + matches. The kwset matcher will return the index + of the matching string that it chooses. */ + if (dm->exact) { - kwsinit (&kwset); - /* First, we compile in the substrings known to be exact - matches. The kwset matcher will return the index - of the matching string that it chooses. */ - for (; dm; dm = dm->next) - { - if (!dm->exact) - continue; - ++kwset_exact_matches; - size_t old_len = strlen (dm->must); - size_t new_len = old_len + dm->begline + dm->endline; - char *must = xmalloc (new_len); - char *mp = must; - *mp = eolbyte; - mp += dm->begline; - begline |= dm->begline; - memcpy (mp, dm->must, old_len); - if (dm->endline) - mp[old_len] = eolbyte; - kwsincr (kwset, must, new_len); - free (must); - } - /* Now, we compile the substrings that will require - the use of the regexp matcher. */ - for (dm = dfamusts (dfa); dm; dm = dm->next) - { - if (dm->exact) - continue; - kwsincr (kwset, dm->must, strlen (dm->must)); - } - kwsprep (kwset); + ++kwset_exact_matches; + size_t old_len = strlen (dm->must); + size_t new_len = old_len + dm->begline + dm->endline; + char *must = xmalloc (new_len); + char *mp = must; + *mp = eolbyte; + mp += dm->begline; + begline |= dm->begline; + memcpy (mp, dm->must, old_len); + if (dm->endline) + mp[old_len] = eolbyte; + kwsincr (kwset, must, new_len); + free (must); } + /* Now, we compile the substrings that will require + the use of the regexp matcher. */ + else + kwsincr (kwset, dm->must, strlen (dm->must)); + kwsprep (kwset); + dfamustfree (dm); } void -- 2.0.0