[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
coreutils adjustment to recent regex.h API change
From: |
Paul Eggert |
Subject: |
coreutils adjustment to recent regex.h API change |
Date: |
Mon, 10 Apr 2006 17:59:08 -0700 |
The latest glibc regex.h changed the API slightly, by changing the
type of the fastmap member. While fixing this I noticed that there
are some storage-allocations that could be done better in coreutils
regexp code. I installed this:
2006-04-10 Paul Eggert <address@hidden>
Adjust to new regex.h API (with new fastmap type), and clean
up the regex storage allocation a bit.
* src/csplit.c (struct control): Put re_compiled member at the
end, since it's large. Change regexpr member from char * to bool;
all uses changed. Add new member fastmap.
(extract_regexp): regexp arg is now char const *, not char *.
Don't bother duplicating the regular expression; it's not needed.
Set fastmap from new fastmap member. Don't bother allocating
a buffer, as the regexp code does a better job than we do.
* src/expr.c (docolon): Allocate and use a fastmap.
Don't bother allocating a buffer.
* src/nl.c (body_fastmap, header_fastmap, footer_fastmap):
New vars.
(build_type_arg): New fastmap arg. All uses changed.
Don't bother allocating a buffer, but set a fastmap.
* src/ptx.c (context_regex_string, word_regex_string): Remove.
(context_regex, word_regex): New vars, replacing the above.
All uses changed.
(struct regex_data): New type.
(compile_regex): Renamed from alloc_and_compile_regex, since
we no longer allocate storage. Arg is now a struct regex_data *,
not a const char *. All uses changed. Don't allocate the fastmap;
instead, take it from the caller. Don't convert size_t to int,
to avoid arithmetic overflow problems. Don't bother freeing
storage afterwards; it's not worth the aggravation.
* src/tac.c (compiled_separator_fastmap): New ver.
(main): Use it. Don't bother allocating a buffer.
Index: src/csplit.c
===================================================================
RCS file: /fetish/cu/src/csplit.c,v
retrieving revision 1.145
diff -p -u -r1.145 csplit.c
--- src/csplit.c 10 Sep 2005 13:56:45 -0000 1.145
+++ src/csplit.c 11 Apr 2006 00:40:17 -0000
@@ -61,14 +61,15 @@
/* A compiled pattern arg. */
struct control
{
- char *regexpr; /* Non-compiled regular expression. */
- struct re_pattern_buffer re_compiled; /* Compiled regular expression.
*/
intmax_t offset; /* Offset from regexp to split at. */
uintmax_t lines_required; /* Number of lines required. */
uintmax_t repeat; /* Repeat count. */
int argnum; /* ARGV index. */
bool repeat_forever; /* True if `*' used as a repeat count. */
bool ignore; /* If true, produce no output (for regexp). */
+ bool regexpr; /* True if regular expression was used.
*/
+ struct re_pattern_buffer re_compiled; /* Compiled regular expression.
*/
+ char fastmap[UCHAR_MAX + 1]; /* Fastmap for RE_COMPILED. */
};
/* Initial size of data area in buffers. */
@@ -1038,7 +1039,7 @@ new_control_record (void)
if (control_used == control_allocated)
controls = X2NREALLOC (controls, &control_allocated);
p = &controls[control_used++];
- p->regexpr = NULL;
+ p->regexpr = false;
p->repeat = 0;
p->repeat_forever = false;
p->lines_required = 0;
@@ -1097,11 +1098,11 @@ parse_repeat_count (int argnum, struct c
Unless IGNORE is true, mark these lines for output. */
static struct control *
-extract_regexp (int argnum, bool ignore, char *str)
+extract_regexp (int argnum, bool ignore, char const *str)
{
size_t len; /* Number of bytes in this regexp. */
char delim = *str;
- char *closing_delim;
+ char const *closing_delim;
struct control *p;
const char *err;
@@ -1115,13 +1116,12 @@ extract_regexp (int argnum, bool ignore,
p->argnum = argnum;
p->ignore = ignore;
- p->regexpr = xmalloc (len + 1);
- strncpy (p->regexpr, str + 1, len);
- p->re_compiled.allocated = len * 2;
- p->re_compiled.buffer = xmalloc (p->re_compiled.allocated);
- p->re_compiled.fastmap = xmalloc (1 << CHAR_BIT);
+ p->regexpr = true;
+ p->re_compiled.buffer = NULL;
+ p->re_compiled.allocated = 0;
+ p->re_compiled.fastmap = p->fastmap;
p->re_compiled.translate = NULL;
- err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
+ err = re_compile_pattern (str + 1, len, &p->re_compiled);
if (err)
{
error (0, 0, _("%s: invalid regular expression: %s"), str, err);
Index: src/expr.c
===================================================================
RCS file: /fetish/cu/src/expr.c,v
retrieving revision 1.108
diff -p -u -r1.108 expr.c
--- src/expr.c 13 Sep 2005 22:30:56 -0000 1.108
+++ src/expr.c 11 Apr 2006 00:40:17 -0000
@@ -412,8 +412,8 @@ docolon (VALUE *sv, VALUE *pv)
VALUE *v IF_LINT (= NULL);
const char *errmsg;
struct re_pattern_buffer re_buffer;
+ char fastmap[UCHAR_MAX + 1];
struct re_registers re_regs;
- size_t len;
regoff_t matchlen;
tostring (sv);
@@ -427,14 +427,12 @@ of the basic regular expression is not p
quote (pv->u.s));
}
- len = strlen (pv->u.s);
- memset (&re_buffer, 0, sizeof (re_buffer));
- memset (&re_regs, 0, sizeof (re_regs));
- re_buffer.buffer = xnmalloc (len, 2);
- re_buffer.allocated = 2 * len;
+ re_buffer.buffer = NULL;
+ re_buffer.allocated = 0;
+ re_buffer.fastmap = fastmap;
re_buffer.translate = NULL;
re_syntax_options = RE_SYNTAX_POSIX_BASIC;
- errmsg = re_compile_pattern (pv->u.s, len, &re_buffer);
+ errmsg = re_compile_pattern (pv->u.s, strlen (pv->u.s), &re_buffer);
if (errmsg)
error (EXPR_FAILURE, 0, "%s", errmsg);
@@ -442,7 +440,7 @@ of the basic regular expression is not p
if (0 <= matchlen)
{
/* Were \(...\) used? */
- if (re_buffer.re_nsub > 0)/* was (re_regs.start[1] >= 0) */
+ if (re_buffer.re_nsub > 0)
{
sv->u.s[re_regs.end[1]] = '\0';
v = str_value (sv->u.s + re_regs.start[1]);
Index: src/nl.c
===================================================================
RCS file: /fetish/cu/src/nl.c,v
retrieving revision 1.86
diff -p -u -r1.86 nl.c
--- src/nl.c 9 Sep 2005 21:09:48 -0000 1.86
+++ src/nl.c 11 Apr 2006 00:40:17 -0000
@@ -84,6 +84,11 @@ static struct re_pattern_buffer header_r
/* Regex for footer lines to number (-fp). */
static struct re_pattern_buffer footer_regex;
+/* Fastmaps for the above. */
+static char body_fastmap[UCHAR_MAX + 1];
+static char header_fastmap[UCHAR_MAX + 1];
+static char footer_fastmap[UCHAR_MAX + 1];
+
/* Pointer to current regex, if any. */
static struct re_pattern_buffer *current_regex = NULL;
@@ -230,11 +235,10 @@ FORMAT is one of:\n\
according to `optarg'. */
static bool
-build_type_arg (char **typep, struct re_pattern_buffer *regexp)
+build_type_arg (char **typep, struct re_pattern_buffer *regexp, char *fastmap)
{
const char *errmsg;
bool rval = true;
- size_t optlen;
switch (*optarg)
{
@@ -245,13 +249,11 @@ build_type_arg (char **typep, struct re_
break;
case 'p':
*typep = optarg++;
- optlen = strlen (optarg);
- regexp->allocated = optlen * 2;
- regexp->buffer = xnmalloc (optlen, 2);
+ regexp->buffer = NULL;
+ regexp->allocated = 0;
+ regexp->fastmap = fastmap;
regexp->translate = NULL;
- regexp->fastmap = xmalloc (256);
- regexp->fastmap_accurate = 0;
- errmsg = re_compile_pattern (optarg, optlen, regexp);
+ errmsg = re_compile_pattern (optarg, strlen (optarg), regexp);
if (errmsg)
error (EXIT_FAILURE, 0, "%s", errmsg);
break;
@@ -469,7 +471,7 @@ main (int argc, char **argv)
switch (c)
{
case 'h':
- if (! build_type_arg (&header_type, &header_regex))
+ if (! build_type_arg (&header_type, &header_regex, header_fastmap))
{
error (0, 0, _("invalid header numbering style: %s"),
quote (optarg));
@@ -477,7 +479,7 @@ main (int argc, char **argv)
}
break;
case 'b':
- if (! build_type_arg (&body_type, &body_regex))
+ if (! build_type_arg (&body_type, &body_regex, body_fastmap))
{
error (0, 0, _("invalid body numbering style: %s"),
quote (optarg));
@@ -485,7 +487,7 @@ main (int argc, char **argv)
}
break;
case 'f':
- if (! build_type_arg (&footer_type, &footer_regex))
+ if (! build_type_arg (&footer_type, &footer_regex, footer_fastmap))
{
error (0, 0, _("invalid footer numbering style: %s"),
quote (optarg));
Index: src/ptx.c
===================================================================
RCS file: /fetish/cu/src/ptx.c,v
retrieving revision 1.53
diff -p -u -r1.53 ptx.c
--- src/ptx.c 28 Mar 2006 09:47:28 -0000 1.53
+++ src/ptx.c 11 Apr 2006 00:40:17 -0000
@@ -95,14 +95,24 @@ static enum Format output_format = UNKNO
/* output format */
static bool ignore_case = false; /* fold lower to upper for sorting */
-static const char *context_regex_string = NULL;
- /* raw regex for end of context */
-static const char *word_regex_string = NULL;
- /* raw regex for a keyword */
static const char *break_file = NULL; /* name of the `Break characters' file
*/
static const char *only_file = NULL; /* name of the `Only words' file */
static const char *ignore_file = NULL; /* name of the `Ignore words' file */
+/* Options that use regular expressions. */
+struct regex_data
+{
+ /* The original regular expression, as a string. */
+ char const *string;
+
+ /* The compiled regular expression, and its fastmap. */
+ struct re_pattern_buffer pattern;
+ char fastmap[UCHAR_MAX + 1];
+};
+
+static struct regex_data context_regex; /* end of context */
+static struct regex_data word_regex; /* keyword */
+
/* A BLOCK delimit a region in memory of arbitrary size, like the copy of a
whole file. A WORD is something smaller, its length should fit in a
short integer. A WORD_TABLE may contain several WORDs. */
@@ -134,15 +144,9 @@ WORD_TABLE;
/* For each character, provide its folded equivalent. */
static unsigned char folded_chars[CHAR_SET_SIZE];
-/* Compiled regex for end of context. */
-static struct re_pattern_buffer *context_regex;
-
/* End of context pattern register indices. */
static struct re_registers context_regs;
-/* Compiled regex for a keyword. */
-static struct re_pattern_buffer *word_regex;
-
/* Keyword pattern register indices. */
static struct re_registers word_regs;
@@ -188,10 +192,10 @@ static BLOCK text_buffer; /* file to stu
cursor--
#define SKIP_SOMETHING(cursor, limit) \
- if (word_regex_string) \
+ if (word_regex.string) \
{ \
regoff_t count; \
- count = re_match (word_regex, cursor, limit - cursor, 0, NULL); \
+ count = re_match (&word_regex.pattern, cursor, limit - cursor, 0, NULL);
\
if (count == -2) \
matcher_error (); \
cursor += count == -1 ? 1 : count; \
@@ -397,26 +401,23 @@ copy_unescaped_string (const char *strin
return result;
}
-/*-------------------------------------------------------------------.
-| Compile the regex represented by STRING, diagnose and abort if any |
-| error. Returns the compiled regex structure. |
-`-------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------.
+| Compile the regex represented by REGEX, diagnose and abort if any error. |
+`--------------------------------------------------------------------------*/
-static struct re_pattern_buffer *
-alloc_and_compile_regex (const char *string)
+static void
+compile_regex (struct regex_data *regex)
{
- struct re_pattern_buffer *pattern; /* newly allocated structure */
- const char *message; /* error message returned by regex.c */
-
- pattern = xmalloc (sizeof *pattern);
- memset (pattern, 0, sizeof *pattern);
+ struct re_pattern_buffer *pattern = ®ex->pattern;
+ char const *string = regex->string;
+ char const *message;
pattern->buffer = NULL;
pattern->allocated = 0;
- pattern->translate = ignore_case ? (char *) folded_chars : NULL;
- pattern->fastmap = xmalloc ((size_t) CHAR_SET_SIZE);
+ pattern->fastmap = regex->fastmap;
+ pattern->translate = ignore_case ? folded_chars : NULL;
- message = re_compile_pattern (string, (int) strlen (string), pattern);
+ message = re_compile_pattern (string, strlen (string), pattern);
if (message)
error (EXIT_FAILURE, 0, _("%s (for regexp %s)"), message, quote (string));
@@ -425,13 +426,6 @@ alloc_and_compile_regex (const char *str
and it compiles the fastmap if this has not been done yet. */
re_compile_fastmap (pattern);
-
- /* Do not waste extra allocated space. */
-
- pattern->buffer = xrealloc (pattern->buffer, pattern->used);
- pattern->allocated = pattern->used;
-
- return pattern;
}
/*------------------------------------------------------------------------.
@@ -457,18 +451,18 @@ initialize_regex (void)
extensions are enabled, use end of sentence like in GNU emacs. If
disabled, use end of lines. */
- if (context_regex_string)
+ if (context_regex.string)
{
- if (!*context_regex_string)
- context_regex_string = NULL;
+ if (!*context_regex.string)
+ context_regex.string = NULL;
}
else if (gnu_extensions & !input_reference)
- context_regex_string = "[.?!][]\"')}]*\\($\\|\t\\| \\)[ \t\n]*";
+ context_regex.string = "[.?!][]\"')}]*\\($\\|\t\\| \\)[ \t\n]*";
else
- context_regex_string = "\n";
+ context_regex.string = "\n";
- if (context_regex_string)
- context_regex = alloc_and_compile_regex (context_regex_string);
+ if (context_regex.string)
+ compile_regex (&context_regex);
/* If the user has already provided a non-empty regexp to describe
words, compile it. Else, unless this has already been done through
@@ -478,8 +472,8 @@ initialize_regex (void)
include almost everything, even punctuations; stop only on white
space. */
- if (word_regex_string && *word_regex_string)
- word_regex = alloc_and_compile_regex (word_regex_string);
+ if (word_regex.string)
+ compile_regex (&word_regex);
else if (!break_file)
{
if (gnu_extensions)
@@ -880,8 +874,9 @@ find_occurs_in_text (void)
sentence at the end of the buffer. */
next_context_start = text_buffer.end;
- if (context_regex_string)
- switch (re_search (context_regex, cursor, text_buffer.end - cursor,
+ if (context_regex.string)
+ switch (re_search (&context_regex.pattern, cursor,
+ text_buffer.end - cursor,
0, text_buffer.end - cursor, &context_regs))
{
case -2:
@@ -907,14 +902,15 @@ find_occurs_in_text (void)
while (1)
{
- if (word_regex)
+ if (word_regex.string)
/* If a word regexp has been compiled, use it to skip at the
beginning of the next word. If there is no such word, exit
the loop. */
{
- regoff_t r = re_search (word_regex, cursor, context_end - cursor,
+ regoff_t r = re_search (&word_regex.pattern, cursor,
+ context_end - cursor,
0, context_end - cursor, &word_regs);
if (r == -2)
matcher_error ();
@@ -2071,7 +2067,7 @@ main (int argc, char **argv)
break;
case 'S':
- context_regex_string = copy_unescaped_string (optarg);
+ context_regex.string = copy_unescaped_string (optarg);
break;
case 'T':
@@ -2079,7 +2075,9 @@ main (int argc, char **argv)
break;
case 'W':
- word_regex_string = copy_unescaped_string (optarg);
+ word_regex.string = copy_unescaped_string (optarg);
+ if (!*word_regex.string)
+ word_regex.string = NULL;
break;
case 10:
Index: src/tac.c
===================================================================
RCS file: /fetish/cu/src/tac.c,v
retrieving revision 1.126
diff -p -u -r1.126 tac.c
--- src/tac.c 24 Oct 2005 16:07:36 -0000 1.126
+++ src/tac.c 11 Apr 2006 00:40:17 -0000
@@ -110,6 +110,7 @@ static size_t G_buffer_size;
/* The compiled regular expression representing `separator'. */
static struct re_pattern_buffer compiled_separator;
+static char compiled_separator_fastmap[UCHAR_MAX + 1];
static struct option const longopts[] =
{
@@ -608,9 +609,9 @@ main (int argc, char **argv)
if (sentinel_length == 0)
{
- compiled_separator.allocated = 100;
- compiled_separator.buffer = xmalloc (compiled_separator.allocated);
- compiled_separator.fastmap = xmalloc (256);
+ compiled_separator.buffer = NULL;
+ compiled_separator.allocated = 0;
+ compiled_separator.fastmap = compiled_separator_fastmap;
compiled_separator.translate = NULL;
error_message = re_compile_pattern (separator, strlen (separator),
&compiled_separator);
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- coreutils adjustment to recent regex.h API change,
Paul Eggert <=