>From bc5ac38040eb49c751e39700651f2b98cd866228 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 30 Dec 2019 00:52:10 -0800 Subject: [PATCH 2/3] doc: spell "back-reference" more consistently --- NEWS | 12 ++++++------ README | 2 +- doc/grep.in.1 | 2 +- src/dfasearch.c | 12 ++++++------ tests/backref | 2 +- tests/pcre-wx-backref | 2 +- tests/tests | 4 ++-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/NEWS b/NEWS index 4f04ec3..e4083a1 100644 --- a/NEWS +++ b/NEWS @@ -22,7 +22,7 @@ GNU grep NEWS -*- outline -*- [Bug#37716 introduced in grep 3.2] A performance bug has been fixed when grep is given many patterns, - each with no backreference. + each with no back-reference. [Bug#33249 introduced in grep 2.5] A performance bug has been fixed for patterns like '01.2' that @@ -47,7 +47,7 @@ GNU grep NEWS -*- outline -*- the following would print nothing (it should print the input line): echo 123-x|LC_ALL=C grep '.\bx' Using a multibyte locale, using certain regexp constructs (some ranges, - backreferences), or forcing use of the PCRE matcher via --perl-regexp (-P) + back-references), or forcing use of the PCRE matcher via --perl-regexp (-P) would avoid the bug. [bug introduced in grep 3.2] @@ -240,7 +240,7 @@ GNU grep NEWS -*- outline -*- grep -z would match strings it should not. To trigger the bug, you'd have to use a regular expression including an anchor (^ or $) and a - feature like a range or a backreference, causing grep to forego its DFA + feature like a range or a back-reference, causing grep to forego its DFA matcher and resort to using re_search. With a multibyte locale, that matcher could mistakenly match a string containing a newline. For example, this command: @@ -473,7 +473,7 @@ GNU grep NEWS -*- outline -*- Previously it was unreliable, and sometimes crashed or looped. [bug introduced in grep-2.16] - grep -P now works with -w and -x and backreferences. Before, + grep -P now works with -w and -x and back-references. Before, echo aa|grep -Pw '(.)\1' would fail to match, yet echo aa|grep -Pw '(.)\2' would match. @@ -809,7 +809,7 @@ GNU grep NEWS -*- outline -*- X{0,0} is implemented correctly. It used to be a synonym of X{0,1}. [bug present since "the beginning"] - In multibyte locales, regular expressions including backreferences + In multibyte locales, regular expressions including back-references no longer exhibit quadratic complexity (i.e., they are orders of magnitude faster). [bug present since multi-byte character set support was introduced in 2.5.2] @@ -967,7 +967,7 @@ Version 2.5 - The new option --line-buffered fflush on everyline. There is a noticeable slow down when forcing line buffering. - - Back references are now local to the regex. + - Back-references are now local to the regex. grep -e '\(a\)\1' -e '\(b\)\1' The last backref \1 in the second expression refer to \(b\) diff --git a/README b/README index 0120973..2c15d9b 100644 --- a/README +++ b/README @@ -17,7 +17,7 @@ twice as fast as stock Unix egrep) hybridized with a Boyer-Moore-Gosper search for a fixed string that eliminates impossible text from being considered by the full regexp matcher without necessarily having to look at every character. The result is typically many times faster -than Unix grep or egrep. (Regular expressions containing backreferencing +than Unix grep or egrep. (Regular expressions containing back-references will run more slowly, however.) See the files AUTHORS and THANKS for a list of authors and other contributors. diff --git a/doc/grep.in.1 b/doc/grep.in.1 index a91b2a6..e095d5c 100644 --- a/doc/grep.in.1 +++ b/doc/grep.in.1 @@ -920,7 +920,7 @@ Repetition takes precedence over concatenation, which in turn takes precedence over alternation. A whole expression may be enclosed in parentheses to override these precedence rules and form a subexpression. -.SS "Back References and Subexpressions" +.SS "Back-references and Subexpressions" The back-reference .BI \e n\c \&, where diff --git a/src/dfasearch.c b/src/dfasearch.c index b1a242a..0160d71 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -103,8 +103,8 @@ kwsmusts (struct dfa_comp *dc) dfamustfree (dm); } -/* Return true if KEYS, of length LEN, might contain a backreference. - Return false if KEYS cannot contain a backreference. +/* Return true if KEYS, of length LEN, might contain a back-reference. + Return false if KEYS cannot contain a back-reference. BS_SAFE is true of encodings where a backslash cannot appear as the last byte of a multibyte character. */ static bool _GL_ATTRIBUTE_PURE @@ -116,13 +116,13 @@ possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe) multibyte character. */ int second_backslash = bs_safe ? '\\' : CHAR_MAX + 1; - /* This code can return true even if KEYS lacks a backreference, for + /* This code can return true even if KEYS lacks a back-reference, for patterns like [\2], or for encodings where '\' appears as the last byte of a multibyte character. However, false alarms should be rare and do not affect correctness. */ /* Do not look for a backslash in the pattern's last byte, since it - can't be part of a backreference and this streamlines the code. */ + can't be part of a back-reference and this streamlines the code. */ len--; if (0 <= len) @@ -204,7 +204,7 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits) char const *prev = pattern; - /* Buffer containing backreference-free patterns. */ + /* Buffer containing back-reference-free patterns. */ char *buf = NULL; ptrdiff_t buflen = 0; size_t bufalloc = 0; @@ -449,7 +449,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size, end = memchr (next_beg, eol, buflim - next_beg); end = end ? end + 1 : buflim; - /* Successful, no backreferences encountered! */ + /* Successful, no back-references encountered! */ if (!backref) goto success; ptr = beg; diff --git a/tests/backref b/tests/backref index eaa16cf..384b7ca 100755 --- a/tests/backref +++ b/tests/backref @@ -1,5 +1,5 @@ #! /bin/sh -# Test for backreferences and other things. +# Test for back-references and other things. # # Copyright (C) 2001, 2006, 2009-2019 Free Software Foundation, Inc. # diff --git a/tests/pcre-wx-backref b/tests/pcre-wx-backref index 63536f9..b63009a 100755 --- a/tests/pcre-wx-backref +++ b/tests/pcre-wx-backref @@ -1,5 +1,5 @@ #! /bin/sh -# Before grep-2.19, grep -P and -w/-x would not with a backreference. +# Before grep-2.19, grep -P and -w/-x would not work with a back-reference. # # Copyright (C) 2014-2019 Free Software Foundation, Inc. # diff --git a/tests/tests b/tests/tests index af9ae8a..f95c28a 100644 --- a/tests/tests +++ b/tests/tests @@ -152,7 +152,7 @@ a\\$ & a$ a\\$ & a\$ a\\$ & a\ a\ -# back references, ugh +# back-references, ugh ##a\(b\)\2c bC ESUBREG ##a\(b\1\)c bC ESUBREG a\(b*\)c\1d b abbcbbd abbcbbd bb @@ -166,7 +166,7 @@ a\(\([bc]\)\2\)*d b abbcbd a\(\(b\)*\2\)*d b abbbd abbbd # here is a case that no NFA implementation does right \(ab*\)[ab]*\1 b ababaaa ababaaa a -# check out normal matching in the presence of back refs +# check out normal matching in the presence of back-references \(a\)\1bcd b aabcd aabcd \(a\)\1bc*d b aabcd aabcd \(a\)\1bc*d b aabd aabd -- 2.17.1