>From c4d6d577820f6d40cc0f34be56fc8d8795381fef Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 24 Oct 2014 13:27:29 -0700 Subject: [PATCH 1/2] grep: fix grep -P crash Reported by Shlomi Fish in: http://bugs.gnu.org/18806 Commit 9fa500407137f49f6edc3c6b4ee6c7096f0190c5 (2014-09-16) is a hack that I put in to speed up 'grep -P'. Unfortunately, not only is it violation of modularity, it's also a bug magnet, as we have found out with Bug#18738 and Bug#18806. Remove the optimization instead of applying more bandaids. Perhaps we can think of a better way of doing the optimization, or perhaps we can just live with a slower grep -P (as -P is inherently slower anyway...). * src/grep.c, src/grep.h (validated_boundary): Remove. All uses removed. * src/pcresearch.c (Pexecute): Do not worry about validated_boundary. --- src/grep.c | 3 --- src/grep.h | 4 ---- src/pcresearch.c | 37 ++++++++++++++----------------------- 3 files changed, 14 insertions(+), 30 deletions(-) diff --git a/src/grep.c b/src/grep.c index a0f2620..0a4ac27 100644 --- a/src/grep.c +++ b/src/grep.c @@ -352,7 +352,6 @@ bool match_words; bool match_lines; char eolbyte; enum textbin input_textbin; -char const *validated_boundary; static char const *matcher; @@ -1226,7 +1225,6 @@ grepbuf (char const *beg, char const *lim) intmax_t outleft0 = outleft; char const *p; char const *endp; - validated_boundary = beg; for (p = beg; p < lim; p = endp) { @@ -2516,7 +2514,6 @@ main (int argc, char **argv) /* We need one byte prior and one after. */ char eolbytes[3] = { 0, eolbyte, 0 }; size_t match_size; - validated_boundary = eolbytes + 1; skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0) == out_invert); diff --git a/src/grep.h b/src/grep.h index 86259fb..02052b4 100644 --- a/src/grep.h +++ b/src/grep.h @@ -47,8 +47,4 @@ enum textbin /* Input file type. */ extern enum textbin input_textbin; -/* Validation boundary. Earlier bytes have already been validated by - the PCRE matcher, which cares about this sort of thing. */ -extern char const *validated_boundary; - #endif diff --git a/src/pcresearch.c b/src/pcresearch.c index 1fd5bde..5451029 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -156,7 +156,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size, char const *line_start = buf; int e = PCRE_ERROR_NOMATCH; char const *line_end; - char const *validated = validated_boundary; /* If the input type is unknown, the caller is still testing the input, which means the current buffer cannot contain encoding @@ -210,34 +209,28 @@ Pexecute (char const *buf, size_t size, size_t *match_size, int options = 0; if (!bol) options |= PCRE_NOTBOL; - if (multiline || p + search_bytes <= validated) + if (multiline) options |= PCRE_NO_UTF8_CHECK; - int valid_bytes = validated - p; - if (valid_bytes <= 0) + e = pcre_exec (cre, extra, p, search_bytes, 0, + options, sub, NSUB); + if (e != PCRE_ERROR_BADUTF8) { - e = pcre_exec (cre, extra, p, search_bytes, 0, - options, sub, NSUB); - if (e != PCRE_ERROR_BADUTF8) + if (0 < e && multiline && sub[1] - sub[0] != 0) { - validated = p + search_bytes; - if (0 < e && multiline && sub[1] - sub[0] != 0) + char const *nl = memchr (p + sub[0], eolbyte, + sub[1] - sub[0]); + if (nl) { - char const *nl = memchr (p + sub[0], eolbyte, - sub[1] - sub[0]); - if (nl) - { - /* This match crosses a line boundary; reject it. */ - p += sub[0]; - line_end = nl; - continue; - } + /* This match crosses a line boundary; reject it. */ + p += sub[0]; + line_end = nl; + continue; } - break; } - valid_bytes = sub[0]; - validated = p + valid_bytes; + break; } + int valid_bytes = sub[0]; /* Try to match the string before the encoding error. Again, handle the empty-match case specially, for speed. */ @@ -263,8 +256,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size, bol = true; } - validated_boundary = validated; - if (e <= 0) { switch (e) -- 1.9.3