From 9a9b4c59babc60e15a79cf7db8167d42e68e44b9 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 2 Nov 2013 12:15:48 -0700 Subject: [PATCH] grep: fix regression with -P vs. invalid UTF-8 input * src/pcresearch.c (Pexecute): Don't abort upon unexpected PCRE-specific error code. Explicitly handle PCRE_ERROR_BADUTF8, and change the default to print a diagnostic including the unhandled integer PCRE error code and exit with status 2. * tests/pcre-invalid-utf8-input: New file. * tests/Makefile.am (TESTS): Add it. * NEWS (Bug fixes): Mention it. * THANKS: Update. Reported by Dave Reisner in http://bugs.gnu.org/15758. --- NEWS | 6 ++++++ THANKS | 1 + src/pcresearch.c | 13 ++++++++++++- tests/Makefile.am | 1 + tests/pcre-invalid-utf8-input | 25 +++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 1 deletion(-) create mode 100755 tests/pcre-invalid-utf8-input diff --git a/NEWS b/NEWS index 078d13b..e974f29 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,12 @@ GNU grep NEWS -*- outline -*- For example, \s*, \s+, \s? and \s{3} would all malfunction in a multi-byte locale. [bug introduced in grep-2.15] + The fix to make grep -P work better with UTF-8 made it possible for + grep to evoke a larger set of PCRE errors, some of which could trigger + an abort. E.g., this would abort: + printf '\x82'|LC_ALL=en_US.UTF-8 grep -P y + Now grep handles arbitrary PCRE errors. [bug introduced in grep-2.15] + * Noteworthy changes in release 2.15 (2013-10-26) [stable] diff --git a/THANKS b/THANKS index 475c51e..02ef246 100644 --- a/THANKS +++ b/THANKS @@ -20,6 +20,7 @@ Christian Groessler Corinna Vinschen Dagobert Michelsen Daisuke GOTO +Dave Reisner David Clissold David J MacKenzie David O'Brien diff --git a/src/pcresearch.c b/src/pcresearch.c index ad5999d..9ba1227 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -185,9 +185,20 @@ Pexecute (char const *buf, size_t size, size_t *match_size, error (EXIT_TROUBLE, 0, _("exceeded PCRE's backtracking limit")); + case PCRE_ERROR_BADUTF8: + error (EXIT_TROUBLE, 0, + _("invalid UTF-8 byte sequence in input")); + default: - abort (); + /* For now, we lump all remaining PCRE failures into this basket. + If anyone cares to provide sample grep usage that can trigger + particular PCRE errors, we can add to the list (above) of more + detailed diagnostics. */ + error (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e); } + + /* NOTREACHED */ + return -1; } else { diff --git a/tests/Makefile.am b/tests/Makefile.am index 970a9de..76b8c52 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -79,6 +79,7 @@ TESTS = \ options \ pcre \ pcre-abort \ + pcre-invalid-utf8-input \ pcre-utf8 \ pcre-z \ prefix-of-multibyte \ diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input new file mode 100755 index 0000000..52a5432 --- /dev/null +++ b/tests/pcre-invalid-utf8-input @@ -0,0 +1,25 @@ +#! /bin/sh +# Ensure that grep -P doesn't abort for invalid multi-byte input +# +# Copyright (C) 2013 Free Software Foundation, Inc. +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +require_pcre_ +require_en_utf8_locale_ + +fail=0 + +printf '\202\n' > in || framework_failure_ +printf 'grep: invalid UTF-8 byte sequence in input\n' \ + > exp-err || framework_failure_ + +LC_ALL=en_US.UTF-8 grep -P anything in > out 2> err +test $? = 2 || fail=1 +compare /dev/null out || fail=1 +compare exp-err err || fail=1 + +Exit $fail -- 1.8.5.rc0.23.gaa27064