Index: dfa.c =================================================================== --- dfa.c (revision grep-2.14-faster-dfa) +++ dfa.c (revision grep-2.14) @@ -1992,6 +1992,133 @@ s->elems[i] = s->elems[i + 1]; } +/* Merge implementation of sorted sets, then number of sets smore than 2. + Priority queue aka 'heap' data structure used to select maximum of + many elements */ + +typedef struct { + position p; + size_t set_index; /* Remember which set this element was taken from */ +} heap_elem; + + +static void +heap_add (heap_elem *heap, size_t *heap_size, heap_elem e) +{ + size_t i = *heap_size; + while (i) + { + size_t parent = (i-1) / 2; + if (e.p.index < heap[parent].p.index) + break; + + heap[i] = heap[parent]; + i = parent; + } + heap[i] = e; + *heap_size += 1; +} + +static void +heap_remove_top_and_add (heap_elem *heap, size_t heap_size, heap_elem e) +{ + size_t i = 0; + size_t lchild = 2*i + 1; + size_t n = heap_size; + while (lchild < n) + { + size_t rchild = lchild + 1; + size_t nextchild = lchild; + if (rchild < n && heap[lchild].p.index < heap[rchild].p.index) + nextchild = rchild; + if (e.p.index >= heap[nextchild].p.index) + break; + heap[i] = heap[nextchild]; + i = nextchild; + lchild = 2*i + 1; + } + heap[i] = e; +} + + +static void +heap_remove_top (heap_elem *heap, size_t *heap_size) +{ + *heap_size -= 1; + if (*heap_size == 0) + return; + heap_remove_top_and_add (heap, *heap_size, heap[*heap_size]); +} + +static void +heap_merge_many_sets (position_set *sets, size_t n, position_set *result) +{ + size_t heap_size = 0; + position_set r; /* faster access to local variables */ + heap_elem *heap = XNMALLOC (n, heap_elem); + size_t *ids = XNMALLOC (n, size_t); + size_t sum_of_sizes = 0; + size_t i; + + for (i = 0; i < n; ++i) + { + sum_of_sizes += sets[i].nelem; + /* special value for "end of set", to reduce + dependent memory reads */ + ids[i] = ~(size_t)0; + if (sets[i].nelem) + { + heap_elem e = { sets[i].elems[0], i }; + heap_add (heap, &heap_size, e); + if (sets[i].nelem > 1) + ids[i] = 1; + } + } + + REALLOC_IF_NECESSARY(result->elems, result->alloc, sum_of_sizes); + r = *result; + r.nelem = 0; + + while (heap_size) + { + heap_elem e = heap[0]; + if (r.nelem && r.elems[r.nelem-1].index == e.p.index) + r.elems[r.nelem-1].constraint |= e.p.constraint; + else + r.elems[r.nelem++] = e.p; + i = e.set_index; + if (ids[i] != ~(size_t)0) + { + heap_elem e2 = { sets[i].elems[ids[i]], i }; + heap_remove_top_and_add (heap, heap_size, e2); + ++ids[i]; + if (ids[i] == sets[i].nelem) + ids[i] = ~(size_t)0; + } + else + { + heap_remove_top (heap, &heap_size); + } + } + *result = r; + free (ids); + free (heap); +} + +static void +merge_many_sets (position_set *sets, size_t n, position_set *result) +{ + /* optimized solutions for special/trivial cases */ + if (n == 0) + result->nelem = 0; + else if (n == 1) + copy (&sets[0], result); + else if (n == 2) + merge (&sets[0], &sets[1], result); + else + heap_merge_many_sets(sets, n, result); +} + /* Find the index of the state corresponding to the given position set with the given preceding context, or create a new state if there is no such state. Context tells whether we got here on a newline or letter. */ @@ -2482,13 +2609,16 @@ charclass leftovers; /* Stuff in the label that didn't match. */ int leftoversf; /* True if leftovers is nonempty. */ position_set follows; /* Union of the follows of some group. */ - position_set tmp; /* Temporary space for merging sets. */ + position_set follows_tmp; /* Temporary space for merging sets. */ int possible_contexts; /* Contexts that this group can match. */ int separate_contexts; /* Context that new state wants to know. */ state_num state; /* New state. */ state_num state_newline; /* New state on a newline transition. */ state_num state_letter; /* New state on a letter transition. */ int next_isnt_1st_byte = 0; /* Flag if we can't add state0. */ + position_set *sets = 0; /* Temporary array of sets, to pass many sets to merge function */ + size_t sets_n = 0; /* Sets array size */ + size_t sets_alloc = 0; /* Sets array capacity */ size_t i, j, k; MALLOC (grps, NOTCHAR); @@ -2607,7 +2737,7 @@ } alloc_position_set (&follows, d->nleaves); - alloc_position_set (&tmp, d->nleaves); + alloc_position_set (&follows_tmp, d->nleaves); /* If we are a searching matcher, the default transition is to a state containing the positions of state 0, otherwise the default transition @@ -2637,13 +2767,12 @@ for (i = 0; i < ngrps; ++i) { - follows.nelem = 0; - - /* Find the union of the follows of the positions of the group. - This is a hideously inefficient loop. Fix it someday. */ + /* Find the union of the follows of the positions of the group. */ + sets_n = grps[i].nelem; + REALLOC_IF_NECESSARY(sets, sets_alloc, sets_n); for (j = 0; j < grps[i].nelem; ++j) - for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k) - insert (d->follows[grps[i].elems[j]].elems[k], &follows); + sets[j] = d->follows[grps[i].elems[j]]; + merge_many_sets (sets, sets_n, &follows_tmp); if (d->mb_cur_max > 1) { @@ -2666,9 +2795,9 @@ , so we cannot add state[0]. */ next_isnt_1st_byte = 0; - for (j = 0; j < follows.nelem; ++j) + for (j = 0; j < follows_tmp.nelem; ++j) { - if (!(d->multibyte_prop[follows.elems[j].index] & 1)) + if (!(d->multibyte_prop[follows_tmp.elems[j].index] & 1)) { next_isnt_1st_byte = 1; break; @@ -2680,8 +2809,7 @@ of state 0 as well. */ if (d->searchflag && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte))) - for (j = 0; j < d->states[0].elems.nelem; ++j) - insert (d->states[0].elems.elems[j], &follows); + merge (&follows_tmp, &d->states[0].elems, &follows); /* Find out if the new state will want any context information. */ possible_contexts = charclass_context (labels[i]); @@ -2720,7 +2848,8 @@ for (i = 0; i < ngrps; ++i) free (grps[i].elems); free (follows.elems); - free (tmp.elems); + free (sets); + free (follows_tmp.elems); free (grps); free (labels); }