From 70fd81f7cf7311589b143e0adaa2aad41e7ab0da Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Thu, 10 Apr 2014 20:52:54 +0900 Subject: [PATCH 3/3] grep: speed-up by replacing `incr' to `add' in x86 and x86-64 The increment instrument is more faster than the add instuction in x86 and x86-64 architecture. So prefer the add instrument to the increment instrument for them. * src/kwset.c (SHIFT): New macro. It uses the increment instrument instead of the add instrument for x86 and x86-64. (bmexec, cwexec): Use it. --- src/kwset.c | 92 +++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/src/kwset.c b/src/kwset.c index c95bc56..998aab7 100644 --- a/src/kwset.c +++ b/src/kwset.c @@ -55,6 +55,35 @@ #define U(c) (to_uchar (c)) +#if defined(__i386__) || defined(__x86_64__) +#define SHIFT(p, d) \ + switch (d) \ + { \ + default: \ + p += d; \ + break; \ + case 8: \ + ++p; \ + case 7: \ + ++p; \ + case 6: \ + ++p; \ + case 5: \ + ++p; \ + case 4: \ + ++p; \ + case 3: \ + ++p; \ + case 2: \ + ++p; \ + case 1: \ + ++p; \ + } +#else +#define SHIFT(p, d) \ + p += d; +#endif + /* Balanced tree of edges and labels leaving a given trie node. */ struct tree { @@ -517,18 +546,18 @@ bmexec (kwset_t kwset, char const *text, size_t size) { while (tp <= ep) { - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); if (d == 0) goto found; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); if (d == 0) goto found; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); if (d == 0) goto found; /* memchar() of glibc is faster than seeking by delta1 on @@ -549,8 +578,8 @@ bmexec (kwset_t kwset, char const *text, size_t size) else #endif { - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])]; SHIFT(tp, d); + d = d1[U(tp[-1])]; SHIFT(tp, d); } } break; @@ -573,24 +602,24 @@ bmexec (kwset_t kwset, char const *text, size_t size) if (i > len) return tp - len - text; } - d = kwset->shift[i - 2]; tp += d; + d = kwset->shift[i - 2]; SHIFT(tp, d); if (tp > ep) break; if (trans[U(tp[-1])] != gc1) { - d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])]; SHIFT(tp, d); break; } skip = i - 1; } else { - d = kwset->shift[0]; tp += d; + d = kwset->shift[0]; SHIFT(tp, d); if (tp > ep) break; if (trans[U(tp[-1])] != gc1) { - d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])]; SHIFT(tp, d); break; } skip = 1; @@ -614,24 +643,24 @@ bmexec (kwset_t kwset, char const *text, size_t size) if (i > len) return tp - len - text; } - d = kwset->shift[i - 2]; tp += d; + d = kwset->shift[i - 2]; SHIFT(tp, d); if (tp > ep) break; if (tp[-1] != gc1) { - d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])]; SHIFT(tp, d); break; } skip = i - 1; } else { - d = kwset->shift[0]; tp += d; + d = kwset->shift[0]; SHIFT(tp, d); if (tp > ep) break; if (tp[-1] != gc1) { - d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])]; SHIFT(tp, d); break; } skip = 1; @@ -646,7 +675,8 @@ bmexec (kwset_t kwset, char const *text, size_t size) d = d1[U(tp[-1])]; while (d <= ep - tp) { - d = d1[U((tp += d)[-1])]; + SHIFT(tp, d); + d = d1[U(tp[-1])]; if (d != 0) continue; d = len; @@ -667,7 +697,7 @@ bmexec (kwset_t kwset, char const *text, size_t size) if (i > len) return tp - len - text; } - d = kwset->shift[i - 2]; tp += d; + d = kwset->shift[i - 2]; SHIFT(tp, d); if (tp > ep) break; if (trans[U(tp[-1])] != gc1) @@ -679,7 +709,7 @@ bmexec (kwset_t kwset, char const *text, size_t size) } else { - d = kwset->shift[0]; tp += d; + d = kwset->shift[0]; SHIFT(tp, d); if (tp > ep) break; if (trans[U(tp[-1])] != gc1) @@ -708,7 +738,7 @@ bmexec (kwset_t kwset, char const *text, size_t size) if (i > len) return tp - len - text; } - d = kwset->shift[i - 2]; tp += d; + d = kwset->shift[i - 2]; SHIFT(tp, d); if (tp > ep) break; if (tp[-1] != gc1) @@ -720,7 +750,7 @@ bmexec (kwset_t kwset, char const *text, size_t size) } else { - d = kwset->shift[0]; tp += d; + d = kwset->shift[0]; SHIFT(tp, d); if (tp > ep) break; if (tp[-1] != gc1) @@ -781,17 +811,18 @@ cwexec (kwset_t kwset, char const *text, size_t len, struct kwsmatch *kwsmatch) { if (qlim && end <= qlim) { - end += d - 1; while ((d = delta[c = *end]) && end < qlim) { - end += d; - end += delta[U(*end)]; - end += delta[U(*end)]; + SHIFT(end, d); + d = delta[U(end[-1])]; SHIFT(end, d); + d = delta[U(end[-1])]; SHIFT(end, d); } - ++end; } else - d = delta[c = (end += d)[-1]]; + { + SHIFT(end, d); + d = delta[c = end[-1]]; + } if (d) continue; beg = end - 1; @@ -840,7 +871,8 @@ cwexec (kwset_t kwset, char const *text, size_t len, struct kwsmatch *kwsmatch) d = 1; while (lim - end >= d) { - if ((d = delta[c = (end += d)[-1]]) != 0) + SHIFT(end, d); + if ((d = delta[c = end[-1]]) != 0) continue; beg = end - 1; if (!(trie = next[c])) -- 1.9.2