From e89a7e6d4be4669a8a73650c28bb1eb69399d703 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 23 Dec 2016 12:43:46 -0800 Subject: [PATCH 5/8] grep: speed up -wf in C locale Problem reported by Norihiro Tanaka (Bug#22357#100). This patch improves the performance on that benchmark on my platform so that grep is now only about 2x slower than grep 2.26, which means it is considerably faster than grep 2.25 and earlier. * src/kwsearch.c (Fexecute): Use wordchars_size to boost performance for this case. * src/search.h, src/searchutils.c (wordchars_size): New function. --- src/kwsearch.c | 6 ++++++ src/search.h | 1 + src/searchutils.c | 9 +++++++++ 3 files changed, 16 insertions(+) diff --git a/src/kwsearch.c b/src/kwsearch.c index b30dfd0..6005b60 100644 --- a/src/kwsearch.c +++ b/src/kwsearch.c @@ -150,6 +150,12 @@ Fexecute (char const *buf, size_t size, size_t *match_size, break; len = kwsmatch.size[0]; } + + /* No word match was found at BEG. Skip past word constituents, + since they cannot precede the next match and not skipping + them could make things much slower. */ + beg += wordchars_size (beg, buf + size); + mb_start = beg; } /* for (beg in buf) */ return -1; diff --git a/src/search.h b/src/search.h index 6fe1797..1def4d6 100644 --- a/src/search.h +++ b/src/search.h @@ -48,6 +48,7 @@ typedef signed char mb_len_map_t; /* searchutils.c */ extern void wordinit (void); extern kwset_t kwsinit (bool); +extern size_t wordchars_size (char const *, char const *); extern size_t wordchar_next (char const *, char const *); extern bool wordchar_prev (char const *, char const *, char const *); extern ptrdiff_t mb_goback (char const **, char const *, char const *); diff --git a/src/searchutils.c b/src/searchutils.c index e0a1db3..6f6ae0b 100644 --- a/src/searchutils.c +++ b/src/searchutils.c @@ -146,6 +146,15 @@ wordchars_count (char const *buf, char const *end, bool countall) return n; } +/* Examine the start of BUF for the longest prefix containing just + word constituents. Return the total number of bytes in the prefix. + The buffer ends at END. */ +size_t +wordchars_size (char const *buf, char const *end) +{ + return wordchars_count (buf, end, true); +} + /* If BUF starts with a word constituent, return the number of bytes used to represent it; otherwise, return zero. The buffer ends at END. */ size_t -- 2.7.4