From 9311cf9c4f1e6a97c2e01e4a86f8f937c8010a01 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Tue, 31 Dec 2013 08:15:07 -0800 Subject: [PATCH] pcre: use PCRE_NO_UTF8_CHECK properly In order to obtain the behavior we want, i.e., to disable error-on-invalid-UTF-in-input, apply this PCRE option in pcre_exec, not when compiling. * src/pcresearch.c (Pexecute): Use PCRE_NO_UTF8_CHECK here, ... (Pcompile): ...rather than here. * tests/pcre-invalid-utf8-input: Adjust test case to test for this. --- src/pcresearch.c | 12 ++++++++---- tests/pcre-invalid-utf8-input | 11 ++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/pcresearch.c b/src/pcresearch.c index 664070d..4abd9c2 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -63,9 +63,9 @@ Pcompile (char const *pattern, size_t size) # if defined HAVE_LANGINFO_CODESET if (STREQ (nl_langinfo (CODESET), "UTF-8")) { - /* Enable PCRE's UTF-8 matching, but disable the check that would - make an invalid byte seqence *in the input* trigger a failure. */ - flags |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK; + /* Enable PCRE's UTF-8 matching. Note also the use of + PCRE_NO_UTF8_CHECK when calling pcre_extra, below. */ + flags |= PCRE_UTF8; } # endif @@ -158,6 +158,10 @@ Pexecute (char const *buf, size_t size, size_t *match_size, e == PCRE_ERROR_NOMATCH && line_next < buf + size; start_ofs -= line_next - line_buf) { + /* Disable the check that would make an invalid byte + seqence *in the input* trigger a failure. */ + int options = PCRE_NO_UTF8_CHECK; + line_buf = line_next; line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf); if (line_end == NULL) @@ -172,7 +176,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size, error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit")); e = pcre_exec (cre, extra, line_buf, line_end - line_buf, - start_ofs < 0 ? 0 : start_ofs, 0, + start_ofs < 0 ? 0 : start_ofs, options, sub, sizeof sub / sizeof *sub); } diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input index 52a5432..c70951f 100755 --- a/tests/pcre-invalid-utf8-input +++ b/tests/pcre-invalid-utf8-input @@ -13,13 +13,10 @@ require_en_utf8_locale_ fail=0 -printf '\202\n' > in || framework_failure_ -printf 'grep: invalid UTF-8 byte sequence in input\n' \ - > exp-err || framework_failure_ +printf 'j\202\nj\n' > in || framework_failure_ -LC_ALL=en_US.UTF-8 grep -P anything in > out 2> err -test $? = 2 || fail=1 -compare /dev/null out || fail=1 -compare exp-err err || fail=1 +LC_ALL=en_US.UTF-8 grep -P j in > out 2>&1 || fail=1 +compare in out || fail=1 +compare /dev/null err || fail=1 Exit $fail -- 1.8.5.rc2.6.gc6f1b92