>From d752bd5c9ab74d3f0114960345b99f1ab6dd8746 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Tue, 21 Aug 2018 14:25:57 -0600 Subject: [PATCH] maint: remove REG_PERL code Perl-regexp syntax (PCRE) in GNU Sed is shelved indefinitely. See https://bugs.gnu.org/22801 , https://bugs.gnu.org/22647 . Remove all (unused) REG_PERL related code. * sed/sed.c, sed/sed.h, sed/regexp.c, sed/compile.c: Remove REG_PERL code. --- sed/compile.c | 67 ------------------------------------------------------ sed/regexp.c | 73 ----------------------------------------------------------- sed/sed.c | 30 ------------------------ sed/sed.h | 10 -------- 4 files changed, 180 deletions(-) diff --git a/sed/compile.c b/sed/compile.c index 2ae92a6..6776b65 100644 --- a/sed/compile.c +++ b/sed/compile.c @@ -548,10 +548,8 @@ match_slash (int slash, int regex) ch = inchar (); if (ch == EOF) break; -#ifndef REG_PERL else if (ch == 'n' && regex) ch = '\n'; -#endif else if (ch != '\n' && (ch != slash || (!regex && ch == '&'))) add1_buffer (b, '\\'); } @@ -595,24 +593,6 @@ mark_subst_opts (struct subst *cmd) flags |= REG_ICASE; break; -#ifdef REG_PERL - case 's': /* GNU extension */ - case 'S': /* GNU extension */ - if (posixicity == POSIXLY_BASIC) - bad_prog (_(UNKNOWN_S_OPT)); - if (extended_regexp_flags & REG_PERL) - flags |= REG_DOTALL; - break; - - case 'x': /* GNU extension */ - case 'X': /* GNU extension */ - if (posixicity == POSIXLY_BASIC) - bad_prog (_(UNKNOWN_S_OPT)); - if (extended_regexp_flags & REG_PERL) - flags |= REG_EXTENDED; - break; -#endif - case 'm': /* GNU extension */ case 'M': /* GNU extension */ if (posixicity == POSIXLY_BASIC) @@ -930,18 +910,6 @@ compile_address (struct addr *addr, int ch) flags |= REG_ICASE; break; -#ifdef REG_PERL - case 'S': /* GNU extension */ - if (extended_regexp_flags & REG_PERL) - flags |= REG_DOTALL; - break; - - case 'X': /* GNU extension */ - if (extended_regexp_flags & REG_PERL) - flags |= REG_EXTENDED; - break; -#endif - case 'M': /* GNU extension */ flags |= REG_NEWLINE; break; @@ -1437,43 +1405,8 @@ normalize_text (char *buf, size_t len, enum text_types buftype) base = 16; goto convert; -#ifdef REG_PERL - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - if ((extended_regexp_flags & REG_PERL) - && p+1 < bufend - && p[1] >= '0' && p[1] <= '9') - { - base = 8; - goto convert; - } - else - { - /* we just pass the \ up one level for interpretation */ - if (buftype != TEXT_BUFFER) - *q++ = '\\'; - } - - continue; - - case 'o': /* octal byte */ - if (!(extended_regexp_flags & REG_PERL)) - { - base = 8; - goto convert; - } - else - { - /* we just pass the \ up one level for interpretation */ - if (buftype != TEXT_BUFFER) - *q++ = '\\'; - } - - continue; -#else case 'o': /* octal byte */ base = 8; -#endif convert: p = convert_number (&ch, p, bufend, base); diff --git a/sed/regexp.c b/sed/regexp.c index e575997..4ac06d6 100644 --- a/sed/regexp.c +++ b/sed/regexp.c @@ -57,20 +57,6 @@ dfawarn (char const *mesg) static void compile_regex_1 (struct regex *new_regex, int needed_sub) { -#ifdef REG_PERL - int errcode; - errcode = regncomp (&new_regex->pattern, new_regex->re, new_regex->sz, - (needed_sub ? 0 : REG_NOSUB) - | new_regex->flags - | extended_regexp_flags); - - if (errcode) - { - char errorbuf[200]; - regerror (errcode, NULL, errorbuf, 200); - bad_prog (gettext (errorbuf)); - } -#else const char *error; int syntax = ((extended_regexp_flags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED @@ -129,7 +115,6 @@ compile_regex_1 (struct regex *new_regex, int needed_sub) if (error) bad_prog (error); -#endif /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */ if (needed_sub @@ -179,55 +164,13 @@ compile_regex (struct buffer *b, int flags, int needed_sub) new_regex->flags = flags; memcpy (new_regex->re, get_buffer (b), re_len); -#ifdef REG_PERL - new_regex->sz = re_len; -#else /* GNU regex does not process \t & co. */ new_regex->sz = normalize_text (new_regex->re, re_len, TEXT_REGEX); -#endif compile_regex_1 (new_regex, needed_sub); return new_regex; } -#ifdef REG_PERL -static void -copy_regs (regs, pmatch, nregs) - struct re_registers *regs; - regmatch_t *pmatch; - int nregs; -{ - int i; - int need_regs = nregs + 1; - /* We need one extra element beyond `num_regs' for the `-1' marker GNU code - uses. */ - - /* Have the register data arrays been allocated? */ - if (!regs->start) - { /* No. So allocate them with malloc. */ - regs->start = XCALLOC (need_regs, regoff_t); - regs->end = XCALLOC (need_regs, regoff_t); - regs->num_regs = need_regs; - } - else if (need_regs > regs->num_regs) - { /* Yes. We also need more elements than were already - allocated, so reallocate them. */ - regs->start = REALLOC (regs->start, need_regs, regoff_t); - regs->end = REALLOC (regs->end, need_regs, regoff_t); - regs->num_regs = need_regs; - } - - /* Copy the regs. */ - for (i = 0; i < nregs; ++i) - { - regs->start[i] = pmatch[i].rm_so; - regs->end[i] = pmatch[i].rm_eo; - } - for ( ; i < regs->num_regs; ++i) - regs->start[i] = regs->end[i] = -1; -} -#endif - int match_regex (struct regex *regex, char *buf, size_t buflen, size_t buf_start_offset, struct re_registers *regarray, @@ -235,11 +178,6 @@ match_regex (struct regex *regex, char *buf, size_t buflen, { int ret; static struct regex *regex_last; -#ifdef REG_PERL - regmatch_t rm[10], *regmatch = rm; - if (regsize > 10) - regmatch = alloca (sizeof (regmatch_t) * regsize); -#endif /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */ @@ -257,16 +195,6 @@ match_regex (struct regex *regex, char *buf, size_t buflen, if (buflen >= INT_MAX) panic (_("regex input buffer length larger than INT_MAX")); -#ifdef REG_PERL - regmatch[0].rm_so = (int)buf_start_offset; - regmatch[0].rm_eo = (int)buflen; - ret = regexec (®ex->pattern, buf, regsize, regmatch, REG_STARTEND); - - if (regsize) - copy_regs (regarray, regmatch, regsize); - - return (ret == 0); -#else if (regex->pattern.no_sub && regsize) { /* Re-compiling an existing regex, free the previously allocated @@ -432,7 +360,6 @@ match_regex (struct regex *regex, char *buf, size_t buflen, regsize ? regarray : NULL); return (ret > -1); -#endif } diff --git a/sed/sed.c b/sed/sed.c index 0c5af66..6b8d081 100644 --- a/sed/sed.c +++ b/sed/sed.c @@ -109,10 +109,8 @@ static void contact (int errmsg) { FILE *out = errmsg ? stderr : stdout; -#ifndef REG_PERL fprintf (out, _("GNU sed home page: .\n\ General help using GNU software: .\n")); -#endif /* Only print the bug report address for `sed --help', otherwise we'll get reports for other people's bugs. */ @@ -125,12 +123,6 @@ usage (int status) { FILE *out = status ? stderr : stdout; -#ifdef REG_PERL -#define PERL_HELP _(" -R, --regexp-perl" \ - "\n use Perl 5's regular expressions" \ - " syntax in the script.\n") -#endif - fprintf (out, _("\ Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n\ \n"), program_name); @@ -161,9 +153,6 @@ Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n\ fprintf (out, _(" -E, -r, --regexp-extended\n\ use extended regular expressions in the script\n\ (for portability use POSIX -E).\n")); -#ifdef REG_PERL - fprintf (out, PERL_HELP); -#endif fprintf (out, _(" -s, --separate\n\ consider files as separate rather than as a single,\n\ continuous long stream.\n")); @@ -191,20 +180,13 @@ specified, then the standard input is read.\n\ int main (int argc, char **argv) { -#ifdef REG_PERL -#define SHORTOPTS "bsnrzRuEe:f:l:i::V:" -#else #define SHORTOPTS "bsnrzuEe:f:l:i::V:" -#endif enum { SANDBOX_OPTION = CHAR_MAX+1 }; static const struct option longopts[] = { {"binary", 0, NULL, 'b'}, {"regexp-extended", 0, NULL, 'r'}, -#ifdef REG_PERL - {"regexp-perl", 0, NULL, 'R'}, -#endif {"expression", 1, NULL, 'e'}, {"file", 1, NULL, 'f'}, {"in-place", 2, NULL, 'i'}, @@ -320,21 +302,9 @@ main (int argc, char **argv) case 'E': case 'r': -#ifdef REG_PERL - if (extended_regexp_flags && (extended_regexp_flags!=REG_EXTENDED)) - usage (EXIT_BAD_USAGE); -#endif extended_regexp_flags = REG_EXTENDED; break; -#ifdef REG_PERL - case 'R': - if (extended_regexp_flags && (extended_regexp_flags!=REG_PERL))) - usage (EXIT_BAD_USAGE); - extended_regexp_flags = REG_PERL; - break; -#endif - case 's': separate_files = true; break; diff --git a/sed/sed.h b/sed/sed.h index 4f8cdee..be26511 100644 --- a/sed/sed.h +++ b/sed/sed.h @@ -130,16 +130,6 @@ struct subst { #endif }; -#ifdef REG_PERL -/* This is the structure we store register match data in. See - regex.texinfo for a full description of what registers match. */ -struct re_registers -{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -}; -#endif -- 2.11.0