diff --git a/src/kwset.c b/src/kwset.c index f86ee03..3c53e81 100644 --- a/src/kwset.c +++ b/src/kwset.c @@ -55,6 +55,35 @@ #define U(c) (to_uchar (c)) +#if defined(__i386__) || defined(__x86_64__) +#define SHIFT(p, d) \ + switch (d) \ + { \ + default: \ + p += d; \ + break; \ + case 8: \ + ++p; \ + case 7: \ + ++p; \ + case 6: \ + ++p; \ + case 5: \ + ++p; \ + case 4: \ + ++p; \ + case 3: \ + ++p; \ + case 2: \ + ++p; \ + case 1: \ + ++p; \ + } +#else +#define SHIFT(p, d) \ + p += d; +#endif + /* Balanced tree of edges and labels leaving a given trie node. */ struct tree { @@ -464,80 +493,75 @@ kwsprep (kwset_t kwset) kwset->delta[i] = delta[U(trans[i])]; } -/* Use TRANS to transliterate C. A null TRANS does no transliteration. */ -static char -tr (char const *trans, char c) -{ - return trans ? trans[U(c)] : c; -} - -/* Delta2 portion of a Boyer-Moore search. *TP is the string text - pointer; it is updated in place. EP is the end of the string text, - and SP the end of the pattern. LEN is the pattern length; it must - be at least 2. TRANS, if nonnull, is the input translation table. - GC1 and GC2 are the last and second-from last bytes of the pattern, - transliterated by TRANS; the caller precomputes them for - efficiency. If D1 is nonnull, it is a delta1 table for shifting *TP - when failing. KWSET->shift says how much to shift. */ -static inline bool -bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len, - char const *trans, char gc1, char gc2, - unsigned char const *d1, kwset_t kwset) -{ - char const *tp = *tpp; - int d = len, skip = 0; - - while (true) - { - int i = 2; - if (tr (trans, tp[-2]) == gc2) - { - while (++i <= d) - if (tr (trans, tp[-i]) != tr (trans, sp[-i])) - break; - if (i > d) - { - for (i = d + skip + 1; i <= len; ++i) - if (tr (trans, tp[-i]) != tr (trans, sp[-i])) - break; - if (i > len) - { - *tpp = tp - len; - return true; - } - } - } - - tp += d = kwset->shift[i - 2]; - if (tp > ep) - break; - if (tr (trans, tp[-1]) != gc1) - { - if (d1) - tp += d1[U(tp[-1])]; - break; - } - skip = i - 1; +#define BM_DELTA2_SEARCH \ + if (TRANS(tp[-2]) == gc2) \ + { \ + for (i = 3; i <= len; ++i) \ + if (TRANS(tp[-i]) != TRANS(sp[-i])) \ + break; \ + if (i > len) \ + return tp - len - text; \ + d = kwset->shift[i - 2]; SHIFT(tp, d); \ + if (tp > ep) \ + break; \ + if (TRANS(tp[-1]) != gc1) \ + { \ + d = d1[U(tp[-1])]; LAST_SHIFT; \ + continue; \ + } \ + skip = i - 1; \ + } \ + else \ + { \ + d = kwset->shift[0]; SHIFT(tp, d); \ + if (tp > ep) \ + break; \ + if (TRANS(tp[-1]) != gc1) \ + { \ + d = d1[U(tp[-1])]; LAST_SHIFT; \ + continue; \ + } \ + skip = 1; \ + } \ + while (true) \ + { \ + if (TRANS(tp[-2]) == gc2) \ + { \ + for (i = 3; i <= d; ++i) \ + if (TRANS(tp[-i]) != TRANS(sp[-i])) \ + break; \ + if (i > d) \ + { \ + for (i = d + skip + 1; i <= len; ++i) \ + if (TRANS(tp[-i]) != TRANS(sp[-i])) \ + break; \ + if (i > len) \ + return tp - len - text; \ + } \ + d = kwset->shift[i - 2]; SHIFT(tp, d); \ + if (tp > ep) \ + break; \ + if (TRANS(tp[-1]) != gc1) \ + { \ + d = d1[U(tp[-1])]; LAST_SHIFT; \ + break; \ + } \ + skip = i - 1; \ + } \ + else \ + { \ + d = kwset->shift[0]; SHIFT(tp, d); \ + if (tp > ep) \ + break; \ + if (TRANS(tp[-1]) != gc1) \ + { \ + d = d1[U(tp[-1])]; LAST_SHIFT; \ + break; \ + } \ + skip = 1; \ + } \ } - *tpp = tp; - return false; -} - -/* Return the address of the first byte in the buffer S that equals C. - S contains N bytes. If TRANS is nonnull, use it to transliterate - S's bytes before comparing them. */ -static char const * -memchr_trans (char const *s, char c, size_t n, char const *trans) -{ - if (! trans) - return memchr (s, c, n); - char const *slim = s + n; - for (; s < slim; s++) - if (trans[U(*s)] == c) - return s; - return NULL; -} /* Fast boyer-moore search. */ static size_t _GL_ATTRIBUTE_PURE @@ -545,7 +569,8 @@ bmexec (kwset_t kwset, char const *text, size_t size) { unsigned char const *d1; char const *ep, *sp, *tp; - int d; + int d, i, skip; + char gc1, gc2; int len = kwset->mind; char const *trans = kwset->trans; @@ -555,48 +580,94 @@ bmexec (kwset_t kwset, char const *text, size_t size) return -1; if (len == 1) { - tp = memchr_trans (text, kwset->target[0], size, trans); - return tp ? tp - text : -1; + if (trans) + { + for (tp = text; tp < text + size; tp++) + if (trans[U(*tp)] == kwset->target[0]) + return tp - text; + return -1; + } + else + { + tp = memchr (text, kwset->target[0], size); + return tp ? tp - text : -1; + } } d1 = kwset->delta; sp = kwset->target + len; tp = text + len; - char gc1 = tr (trans, sp[-1]); - char gc2 = tr (trans, sp[-2]); + if (trans) + { + gc1 = trans[U(sp[-1])]; + gc2 = trans[U(sp[-2])]; + } + else + { + gc1 = sp[-1]; + gc2 = sp[-2]; + } + skip = 0; /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */ if (size > 12 * len) /* 11 is not a bug, the initial offset happens only once. */ - for (ep = text + size - 11 * len; tp <= ep; ) + for (ep = text + size - 11 * len;;) { - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - if (d != 0) + while (tp <= ep) { - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - if (d != 0) + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); + if (d == 0) + goto found; + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); + if (d == 0) + goto found; + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); + if (d == 0) + goto found; + /* memchar() of glibc is faster than seeking by delta1 on + some platforms. When there is no chance to match for a + while, use it on them. */ +#if defined(__GLIBC__) && (defined(__i386__) || defined(__x86_64__)) + if (!trans) { - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - if (d != 0) + tp = memchr (tp - 1, gc1, size + text - tp + 1); + if (tp) { - /* Typically memchr is faster than seeking by - delta1 when there is no chance to match for - a while. */ - tp--; - tp = memchr_trans (tp, gc1, text + size - tp, trans); - if (! tp) - return -1; - tp++; + ++tp; + goto found; } + else + return -1; + } + else +#endif + { + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); } } - if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset)) - return tp - text; + break; + found: +#undef LAST_SHIFT +#define LAST_SHIFT SHIFT(tp, d); + if (trans) + { +#undef TRANS +#define TRANS(ch) trans[U(ch)] + BM_DELTA2_SEARCH; + } + else + { +#undef TRANS +#define TRANS(ch) ch + BM_DELTA2_SEARCH; + } } /* Now we have only a few characters left to search. We @@ -605,11 +676,24 @@ bmexec (kwset_t kwset, char const *text, size_t size) d = d1[U(tp[-1])]; while (d <= ep - tp) { - d = d1[U((tp += d)[-1])]; + SHIFT(tp, d); + d = d1[U(tp[-1])]; if (d != 0) continue; - if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset)) - return tp - text; +#undef LAST_SHIFT +#define LAST_SHIFT + if (trans) + { +#undef TRANS +#define TRANS(ch) trans[U(ch)] + BM_DELTA2_SEARCH; + } + else + { +#undef TRANS +#define TRANS(ch) ch + BM_DELTA2_SEARCH; + } } return -1; @@ -659,17 +743,19 @@ cwexec (kwset_t kwset, char const *text, size_t len, struct kwsmatch *kwsmatch) { if (qlim && end <= qlim) { - end += d - 1; - while ((d = delta[c = *end]) && end < qlim) + SHIFT(end, d); + while ((d = delta[c = end[-1]]) && end <= qlim) { - end += d; - end += delta[U(*end)]; - end += delta[U(*end)]; + SHIFT(end, d); + d = delta[U(end[-1])]; SHIFT(end, d); + d = delta[U(end[-1])]; SHIFT(end, d); } - ++end; } else - d = delta[c = (end += d)[-1]]; + { + SHIFT(end, d); + d = delta[c = end[-1]]; + } if (d) continue; beg = end - 1; @@ -718,7 +804,8 @@ cwexec (kwset_t kwset, char const *text, size_t len, struct kwsmatch *kwsmatch) d = 1; while (lim - end >= d) { - if ((d = delta[c = (end += d)[-1]]) != 0) + SHIFT(end, d); + if ((d = delta[c = end[-1]]) != 0) continue; beg = end - 1; if (!(trie = next[c]))