From b6b43bea32b5e059929fa58ed8ad182fe5fa1ecd Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 12 Nov 2021 16:56:53 -0800 Subject: [PATCH 02/12] maint: minor rewording and reindenting --- NEWS | 4 ++++ TODO | 4 ++-- m4/pcre.m4 | 8 ++++---- src/pcresearch.c | 44 ++++++++++++++++++++++---------------------- tests/pcre-abort | 2 +- 5 files changed, 33 insertions(+), 29 deletions(-) diff --git a/NEWS b/NEWS index 4a62fb7..2f63071 100644 --- a/NEWS +++ b/NEWS @@ -4,10 +4,14 @@ GNU grep NEWS -*- outline -*- ** Changes in behavior + The -P option is now based on PCRE2 instead of the older PCRE, + thanks to code contributed by Carlo Arenas. + The egrep and fgrep commands, which have been deprecated since release 2.5.3 (2007), now warn that they are obsolescent and should be replaced by grep -E and grep -F. + * Noteworthy changes in release 3.7 (2021-08-14) [stable] ** Changes in behavior diff --git a/TODO b/TODO index 5211ac1..0b82eff 100644 --- a/TODO +++ b/TODO @@ -31,13 +31,13 @@ GNU grep originally did 32-bit arithmetic. Although it has moved to 64-bit on 64-bit platforms by using types like ptrdiff_t and size_t, this conversion has not been entirely systematic and should be checked. -Lazy dynamic linking of libpcre. See Debian’s 03-397262-dlopen-pcre.patch. +Lazy dynamic linking of the PCRE library. Check FreeBSD’s integration of zgrep (-Z) and bzgrep (-J) in one binary. Is there a possibility of doing even better by automatically checking the magic of binary files ourselves (0x1F 0x8B for gzip, 0x1F 0x9D for compress, and 0x42 0x5A 0x68 for bzip2)? Once what to do with -libpcre is decided, do the same for libz and libbz2. +the PCRE library is decided, do the same for libz and libbz2. =================== diff --git a/m4/pcre.m4 b/m4/pcre.m4 index a1c6c82..970a229 100644 --- a/m4/pcre.m4 +++ b/m4/pcre.m4 @@ -9,7 +9,7 @@ AC_DEFUN([gl_FUNC_PCRE], [ AC_ARG_ENABLE([perl-regexp], AS_HELP_STRING([--disable-perl-regexp], - [disable perl-regexp (pcre2) support]), + [disable perl-regexp (PCRE) support]), [case $enableval in yes|no) test_pcre=$enableval;; *) AC_MSG_ERROR([invalid value $enableval for --disable-perl-regexp]);; @@ -42,16 +42,16 @@ AC_DEFUN([gl_FUNC_PCRE], if test "$pcre_cv_have_pcre2_compile" = yes; then use_pcre=yes elif test $test_pcre = maybe; then - AC_MSG_WARN([AC_PACKAGE_NAME will be built without pcre support.]) + AC_MSG_WARN([AC_PACKAGE_NAME will be built without PCRE support.]) else - AC_MSG_ERROR([pcre support not available]) + AC_MSG_ERROR([PCRE support not available]) fi fi if test $use_pcre = yes; then AC_DEFINE([HAVE_LIBPCRE], [1], [Define to 1 if you have the Perl Compatible Regular Expressions - library (-lpcre2).]) + library.]) else PCRE_CFLAGS= PCRE_LIBS= diff --git a/src/pcresearch.c b/src/pcresearch.c index 630678b..daa0c42 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -16,9 +16,6 @@ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ -/* Written August 1992 by Mike Haertel. */ -/* Updated for PCRE2 by Carlo Arenas. */ - #include #include "search.h" #include "die.h" @@ -26,24 +23,27 @@ #define PCRE2_CODE_UNIT_WIDTH 8 #include -/* Needed for backward compatibility for PCRE2 < 10.30 */ +/* For PCRE2 < 10.30. */ #ifndef PCRE2_CONFIG_DEPTHLIMIT -#define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT -#define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT -#define pcre2_set_depth_limit pcre2_set_recursion_limit +# define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT +# define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT +# define pcre2_set_depth_limit pcre2_set_recursion_limit #endif struct pcre_comp { - /* The JIT stack and its maximum size. */ - pcre2_jit_stack *jit_stack; - PCRE2_SIZE jit_stack_size; - /* Compiled internal form of a Perl regular expression. */ pcre2_code *cre; + + /* Match context and data block. */ pcre2_match_context *mcontext; pcre2_match_data *data; - /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty + + /* The JIT stack and its maximum size. */ + pcre2_jit_stack *jit_stack; + PCRE2_SIZE jit_stack_size; + + /* Table, indexed by ! (flag & PCRE2_NOTBOL), of whether the empty string matches when that flag is used. */ int empty_match[2]; }; @@ -59,7 +59,7 @@ jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes, { while (true) { - int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes, + int e = pcre2_match (pc->cre, (PCRE2_SPTR) subject, search_bytes, search_offset, options, pc->data, pc->mcontext); if (e == PCRE2_ERROR_JIT_STACKLIMIT && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) @@ -118,7 +118,7 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact) char *patlim = pattern + size; char *n = (char *)re; struct pcre_comp *pc = xcalloc (1, sizeof (*pc)); - pcre2_compile_context *ccontext = pcre2_compile_context_create(NULL); + pcre2_compile_context *ccontext = pcre2_compile_context_create (NULL); if (localeinfo.multibyte) { @@ -126,11 +126,11 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact) die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales")); flags |= PCRE2_UTF; #if 0 - /* do not match individual code units but only UTF-8 */ + /* Do not match individual code units but only UTF-8. */ flags |= PCRE2_NEVER_BACKSLASH_C; #endif #ifdef PCRE2_MATCH_INVALID_UTF - /* consider invalid UTF-8 as a barrier, instead of error */ + /* Consider invalid UTF-8 as a barrier, instead of error. */ flags |= PCRE2_MATCH_INVALID_UTF; #endif } @@ -149,13 +149,13 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact) n += size; if (match_words && !match_lines) { - strcpy (n, wsuffix); - n += strlen(wsuffix); + strcpy (n, wsuffix); + n += strlen (wsuffix); } if (match_lines) { - strcpy (n, xsuffix); - n += strlen(xsuffix); + strcpy (n, xsuffix); + n += strlen (xsuffix); } pcre2_set_character_tables (ccontext, pcre2_maketables (NULL)); @@ -204,8 +204,8 @@ Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size, do { - /* Search line by line. Although this code formerly used - PCRE_MULTILINE for performance, the performance wasn't always + /* Search line by line. Although this formerly used something like + PCRE2_MULTILINE for performance, the performance wasn't always better and the correctness issues were too puzzling. See Bug#22655. */ line_end = rawmemchr (p, eolbyte); diff --git a/tests/pcre-abort b/tests/pcre-abort index 51cee25..772a1d2 100755 --- a/tests/pcre-abort +++ b/tests/pcre-abort @@ -1,5 +1,5 @@ #! /bin/sh -# Show that grep handles PCRE's PCRE_ERROR_MATCHLIMIT. +# Show that grep handles PCRE2_ERROR_MATCHLIMIT. # In grep-2.8, it would abort. # # Copyright (C) 2011-2021 Free Software Foundation, Inc. -- 2.32.0