From 1b22722ed769d7f53d3e0f6e21962d38b4a7f3df Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Fri, 11 Apr 2014 21:34:11 +0900 Subject: [PATCH] grep: no match for the empty string included in multiple patterns * src/dfasearch.c (EGAcompile): Fix it. * src/kwsearch.c (Fcompile): Fix it. --- src/dfasearch.c | 11 +++++++++++ src/kwsearch.c | 47 ++++++++++++++++++++++++----------------------- tests/Makefile.am | 1 + tests/empty-line | 17 +++++++++++++++++ 4 files changed, 53 insertions(+), 23 deletions(-) create mode 100755 tests/empty-line diff --git a/src/dfasearch.c b/src/dfasearch.c index 2ae0a4a..39ea442 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -152,6 +152,17 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) p = sep; } while (sep && total != 0); + if (sep) + { + patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns); + patterns[pcount] = patterns0; + + if ((err = re_compile_pattern ("", 0, + &(patterns[pcount].regexbuf))) != NULL) + error (EXIT_TROUBLE, 0, "%s", err); + pcount++; + } + /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. Then if DFA succeeds we do some hairy stuff using the regex matcher diff --git a/src/kwsearch.c b/src/kwsearch.c index df94951..7fe8e48 100644 --- a/src/kwsearch.c +++ b/src/kwsearch.c @@ -32,42 +32,43 @@ static kwset_t kwset; void Fcompile (char const *pattern, size_t size) { - size_t psize = size; + char const *p, *sep; + size_t total = size; mb_len_map_t *map = NULL; char const *pat = (match_icase && MB_CUR_MAX > 1 - ? mbtoupper (pattern, &psize, &map) + ? mbtoupper (pattern, &total, &map) : pattern); kwsinit (&kwset); - char const *beg = pat; + p = pat; do { - char const *lim; - char const *end; - for (lim = beg;; ++lim) + size_t len; + sep = memchr (p, '\n', total); + if (sep) { - end = lim; - if (lim >= pat + psize) - break; - if (*lim == '\n') - { - lim++; - break; - } + len = sep - p; + sep++; + total -= (len + 1); #if HAVE_DOS_FILE_CONTENTS - if (*lim == '\r' && lim + 1 < pat + psize && lim[1] == '\n') - { - lim += 2; - break; - } + if (sep[-1] == '\r') + --len; #endif } + else + { + len = total; + total = 0; + } - kwsincr (kwset, beg, end - beg); - beg = lim; - } - while (beg < pat + psize); + kwsincr (kwset, p, len); + + p = sep; + } while (sep && total != 0); + + if (sep) + kwsincr (kwset, "", 0); kwsprep (kwset); } diff --git a/tests/Makefile.am b/tests/Makefile.am index 219e96a..49d6cba 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -54,6 +54,7 @@ TESTS = \ dfa-heap-overrun \ dfaexec-multibyte \ empty \ + empty-line \ epipe \ equiv-classes \ ere \ diff --git a/tests/empty-line b/tests/empty-line new file mode 100755 index 0000000..aeaa6ca --- /dev/null +++ b/tests/empty-line @@ -0,0 +1,17 @@ +#! /bin/sh +# This would fail for grep-2.18 + +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +fail=0 +printf 'abc\n' >in || framework_failure_ + +printf 'foo\n\n' >pat || framework_failure_ +grep -F -f pat in >out || fail=1 +compare in out || fail=1 + +printf '\(\)\\1foo\n\n' >pat || framework_failure_ +grep -f pat in >out || fail=1 +compare in out || fail=1 + +Exit $fail -- 1.9.2