From e5680a4c43b185df6b8c1b80423d663de6f6e37e Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Fri, 17 Apr 2020 10:12:01 +0900 Subject: [PATCH] dfa: build auxiliary indexes before remove epsilon closure When remove epsilon closure, so far searched nodes including epsilon closure in all nodes sequentially, but it is slow for some cases. Now build auxiliary indexes before search. Problem reported in: https://debbugs.gnu.org/cgi/bugreport.cgi?bug=40634 * lib/dfa.c (overwrap): New function. (epsclosure): build auxiliary indexes before remove epsilon closure. --- lib/dfa.c | 52 +++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 45 insertions(+), 7 deletions(-) diff --git a/lib/dfa.c b/lib/dfa.c index 9939d22..958069e 100644 --- a/lib/dfa.c +++ b/lib/dfa.c @@ -2201,6 +2201,22 @@ replace (position_set *dst, idx_t del, position_set *add, } } +static bool +overwrap (position_set const *s, idx_t const *elems, idx_t nelem) +{ + idx_t i = 0, j = 0; + + while (i < s->nelem && j < nelem) + if (s->elems[i].index < elems[j]) + i++; + else if (s->elems[i].index > elems[j]) + j++; + else + return true; + + return false; +} + /* Find the index of the state corresponding to the given position set with the given preceding context, or create a new state if there is no such state. Context tells whether we got here on a newline or letter. */ @@ -2298,14 +2314,33 @@ static void epsclosure (struct dfa const *d) { position_set tmp; + idx_t *currs, *nexts; + idx_t ncurr = 0; + idx_t nnext = 0; + alloc_position_set (&tmp, d->nleaves); - for (idx_t i = 0; i < d->tindex; i++) - if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR + currs = xnmalloc (d->tindex, sizeof *currs); + nexts = xnmalloc (d->tindex, sizeof *nexts); + + for (idx_t i = 0; i < d->tindex; i++) { + if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR && d->tokens[i] != BACKREF && d->tokens[i] != ANYCHAR && d->tokens[i] != MBCSET && d->tokens[i] < CSET) + currs[ncurr++] = i; + } + + for (idx_t i = 0, j = 0; i < d->tindex; i++) + { + while (j < ncurr && currs[j] < i) + j++; + if (overwrap (&d->follows[i], currs, ncurr)) + nexts[nnext++] = i; + } + + for (idx_t i = 0; i < ncurr; i++) { unsigned int constraint; - switch (d->tokens[i]) + switch (d->tokens[currs[i]]) { case BEGLINE: constraint = BEGLINE_CONSTRAINT; @@ -2330,13 +2365,16 @@ epsclosure (struct dfa const *d) break; } - delete (i, &d->follows[i]); + delete (i, &d->follows[currs[i]]); - for (idx_t j = 0; j < d->tindex; j++) - if (i != j && d->follows[j].nelem > 0) - replace (&d->follows[j], i, &d->follows[i], constraint, &tmp); + for (idx_t j = 0; j < nnext; j++) + replace (&d->follows[nexts[j]], currs[i], &d->follows[currs[i]], + constraint, &tmp); } + free (tmp.elems); + free (currs); + free (nexts); } /* Returns the set of contexts for which there is at least one -- 1.7.1