[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
grep branch, master, updated. v2.18-91-g14892aa
From: |
Paul Eggert |
Subject: |
grep branch, master, updated. v2.18-91-g14892aa |
Date: |
Wed, 23 Apr 2014 06:55:27 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".
The branch, master has been updated
via 14892aa6e0c21f49e5ec6d203253074ef15fedb0 (commit)
via 73893ffbada36599fb6ec2eb489b6a7decf0c248 (commit)
from c7ea5aea911b950b2398454ca89cce23cabd3a40 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=14892aa6e0c21f49e5ec6d203253074ef15fedb0
commit 14892aa6e0c21f49e5ec6d203253074ef15fedb0
Author: Paul Eggert <address@hidden>
Date: Tue Apr 22 23:34:22 2014 -0700
kwset: simplify and speed up Boyer-Moore unibyte -i in some cases
This improves the performance of, for example,
yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 | grep -i jk
in a unibyte locale.
* src/kwset.c (memchr_trans): New function.
(bmexec): Use it. Simplify the code and remove some of the
confusing gotos and breaks and labels. Do not treat glibc memchr
as a special case; if non-glibc memchr is slow, that is lower
priority and I suppose we can try to work around the problem in
gnulib.
diff --git a/src/kwset.c b/src/kwset.c
index 78fb0b2..f86ee03 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -524,6 +524,20 @@ bm_delta2_search (char const **tpp, char const *ep, char
const *sp, int len,
return false;
}
+/* Return the address of the first byte in the buffer S that equals C.
+ S contains N bytes. If TRANS is nonnull, use it to transliterate
+ S's bytes before comparing them. */
+static char const *
+memchr_trans (char const *s, char c, size_t n, char const *trans)
+{
+ if (! trans)
+ return memchr (s, c, n);
+ char const *slim = s + n;
+ for (; s < slim; s++)
+ if (trans[U(*s)] == c)
+ return s;
+ return NULL;
+}
/* Fast boyer-moore search. */
static size_t _GL_ATTRIBUTE_PURE
@@ -541,18 +555,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
return -1;
if (len == 1)
{
- if (trans)
- {
- for (tp = text; tp < text + size; tp++)
- if (trans[U(*tp)] == kwset->target[0])
- return tp - text;
- return -1;
- }
- else
- {
- tp = memchr (text, kwset->target[0], size);
- return tp ? tp - text : -1;
- }
+ tp = memchr_trans (text, kwset->target[0], size, trans);
+ return tp ? tp - text : -1;
}
d1 = kwset->delta;
@@ -564,48 +568,33 @@ bmexec (kwset_t kwset, char const *text, size_t size)
/* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
if (size > 12 * len)
/* 11 is not a bug, the initial offset happens only once. */
- for (ep = text + size - 11 * len;;)
+ for (ep = text + size - 11 * len; tp <= ep; )
{
- while (tp <= ep)
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d != 0)
{
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
- if (d == 0)
- goto found;
- d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- if (d == 0)
- goto found;
- d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- if (d == 0)
- goto found;
- /* memchar() of glibc is faster than seeking by delta1 on
- some platforms. When there is no chance to match for a
- while, use it on them. */
-#if defined(__GLIBC__) && (defined(__i386__) || defined(__x86_64__))
- if (!trans)
- {
- tp = memchr (tp - 1, gc1, size + text - tp + 1);
- if (tp)
- {
- ++tp;
- goto found;
- }
- else
- return -1;
- }
- else
-#endif
+ if (d != 0)
{
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d != 0)
+ {
+ /* Typically memchr is faster than seeking by
+ delta1 when there is no chance to match for
+ a while. */
+ tp--;
+ tp = memchr_trans (tp, gc1, text + size - tp, trans);
+ if (! tp)
+ return -1;
+ tp++;
+ }
}
}
- break;
- found:
if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
return tp - text;
}
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=73893ffbada36599fb6ec2eb489b6a7decf0c248
commit 14892aa6e0c21f49e5ec6d203253074ef15fedb0
Author: Paul Eggert <address@hidden>
Date: Tue Apr 22 23:34:22 2014 -0700
kwset: simplify and speed up Boyer-Moore unibyte -i in some cases
This improves the performance of, for example,
yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 | grep -i jk
in a unibyte locale.
* src/kwset.c (memchr_trans): New function.
(bmexec): Use it. Simplify the code and remove some of the
confusing gotos and breaks and labels. Do not treat glibc memchr
as a special case; if non-glibc memchr is slow, that is lower
priority and I suppose we can try to work around the problem in
gnulib.
diff --git a/src/kwset.c b/src/kwset.c
index 78fb0b2..f86ee03 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -524,6 +524,20 @@ bm_delta2_search (char const **tpp, char const *ep, char
const *sp, int len,
return false;
}
+/* Return the address of the first byte in the buffer S that equals C.
+ S contains N bytes. If TRANS is nonnull, use it to transliterate
+ S's bytes before comparing them. */
+static char const *
+memchr_trans (char const *s, char c, size_t n, char const *trans)
+{
+ if (! trans)
+ return memchr (s, c, n);
+ char const *slim = s + n;
+ for (; s < slim; s++)
+ if (trans[U(*s)] == c)
+ return s;
+ return NULL;
+}
/* Fast boyer-moore search. */
static size_t _GL_ATTRIBUTE_PURE
@@ -541,18 +555,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
return -1;
if (len == 1)
{
- if (trans)
- {
- for (tp = text; tp < text + size; tp++)
- if (trans[U(*tp)] == kwset->target[0])
- return tp - text;
- return -1;
- }
- else
- {
- tp = memchr (text, kwset->target[0], size);
- return tp ? tp - text : -1;
- }
+ tp = memchr_trans (text, kwset->target[0], size, trans);
+ return tp ? tp - text : -1;
}
d1 = kwset->delta;
@@ -564,48 +568,33 @@ bmexec (kwset_t kwset, char const *text, size_t size)
/* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
if (size > 12 * len)
/* 11 is not a bug, the initial offset happens only once. */
- for (ep = text + size - 11 * len;;)
+ for (ep = text + size - 11 * len; tp <= ep; )
{
- while (tp <= ep)
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d != 0)
{
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
- if (d == 0)
- goto found;
- d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- if (d == 0)
- goto found;
- d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
- d = d1[U(tp[-1])], tp += d;
- if (d == 0)
- goto found;
- /* memchar() of glibc is faster than seeking by delta1 on
- some platforms. When there is no chance to match for a
- while, use it on them. */
-#if defined(__GLIBC__) && (defined(__i386__) || defined(__x86_64__))
- if (!trans)
- {
- tp = memchr (tp - 1, gc1, size + text - tp + 1);
- if (tp)
- {
- ++tp;
- goto found;
- }
- else
- return -1;
- }
- else
-#endif
+ if (d != 0)
{
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d != 0)
+ {
+ /* Typically memchr is faster than seeking by
+ delta1 when there is no chance to match for
+ a while. */
+ tp--;
+ tp = memchr_trans (tp, gc1, text + size - tp, trans);
+ if (! tp)
+ return -1;
+ tp++;
+ }
}
}
- break;
- found:
if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
return tp - text;
}
-----------------------------------------------------------------------
Summary of changes:
src/kwset.c | 66 +++++++++++++++++++++++++++++++++-------------------------
1 files changed, 37 insertions(+), 29 deletions(-)
hooks/post-receive
--
grep
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- grep branch, master, updated. v2.18-91-g14892aa,
Paul Eggert <=