bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

coreutils adjustment to recent regex.h API change


From: Paul Eggert
Subject: coreutils adjustment to recent regex.h API change
Date: Mon, 10 Apr 2006 17:59:08 -0700

The latest glibc regex.h changed the API slightly, by changing the
type of the fastmap member.  While fixing this I noticed that there
are some storage-allocations that could be done better in coreutils
regexp code.  I installed this:

2006-04-10  Paul Eggert  <address@hidden>

        Adjust to new regex.h API (with new fastmap type), and clean
        up the regex storage allocation a bit.

        * src/csplit.c (struct control): Put re_compiled member at the
        end, since it's large.  Change regexpr member from char * to bool;
        all uses changed.  Add new member fastmap.
        (extract_regexp): regexp arg is now char const *, not char *.
        Don't bother duplicating the regular expression; it's not needed.
        Set fastmap from new fastmap member.  Don't bother allocating
        a buffer, as the regexp code does a better job than we do.
        * src/expr.c (docolon): Allocate and use a fastmap.
        Don't bother allocating a buffer.
        * src/nl.c (body_fastmap, header_fastmap, footer_fastmap):
        New vars.
        (build_type_arg): New fastmap arg.  All uses changed.
        Don't bother allocating a buffer, but set a fastmap.
        * src/ptx.c (context_regex_string, word_regex_string): Remove.
        (context_regex, word_regex): New vars, replacing the above.
        All uses changed.
        (struct regex_data): New type.
        (compile_regex): Renamed from alloc_and_compile_regex, since
        we no longer allocate storage.  Arg is now a struct regex_data *,
        not a const char *.  All uses changed.  Don't allocate the fastmap;
        instead, take it from the caller.  Don't convert size_t to int,
        to avoid arithmetic overflow problems.  Don't bother freeing
        storage afterwards; it's not worth the aggravation.
        * src/tac.c (compiled_separator_fastmap): New ver.
        (main): Use it.  Don't bother allocating a buffer.

Index: src/csplit.c
===================================================================
RCS file: /fetish/cu/src/csplit.c,v
retrieving revision 1.145
diff -p -u -r1.145 csplit.c
--- src/csplit.c        10 Sep 2005 13:56:45 -0000      1.145
+++ src/csplit.c        11 Apr 2006 00:40:17 -0000
@@ -61,14 +61,15 @@
 /* A compiled pattern arg. */
 struct control
 {
-  char *regexpr;               /* Non-compiled regular expression. */
-  struct re_pattern_buffer re_compiled;        /* Compiled regular expression. 
*/
   intmax_t offset;             /* Offset from regexp to split at. */
   uintmax_t lines_required;    /* Number of lines required. */
   uintmax_t repeat;            /* Repeat count. */
   int argnum;                  /* ARGV index. */
   bool repeat_forever;         /* True if `*' used as a repeat count. */
   bool ignore;                 /* If true, produce no output (for regexp). */
+  bool regexpr;                        /* True if regular expression was used. 
*/
+  struct re_pattern_buffer re_compiled;        /* Compiled regular expression. 
*/
+  char fastmap[UCHAR_MAX + 1]; /* Fastmap for RE_COMPILED.  */
 };
 
 /* Initial size of data area in buffers. */
@@ -1038,7 +1039,7 @@ new_control_record (void)
   if (control_used == control_allocated)
     controls = X2NREALLOC (controls, &control_allocated);
   p = &controls[control_used++];
-  p->regexpr = NULL;
+  p->regexpr = false;
   p->repeat = 0;
   p->repeat_forever = false;
   p->lines_required = 0;
@@ -1097,11 +1098,11 @@ parse_repeat_count (int argnum, struct c
    Unless IGNORE is true, mark these lines for output. */
 
 static struct control *
-extract_regexp (int argnum, bool ignore, char *str)
+extract_regexp (int argnum, bool ignore, char const *str)
 {
   size_t len;                  /* Number of bytes in this regexp. */
   char delim = *str;
-  char *closing_delim;
+  char const *closing_delim;
   struct control *p;
   const char *err;
 
@@ -1115,13 +1116,12 @@ extract_regexp (int argnum, bool ignore,
   p->argnum = argnum;
   p->ignore = ignore;
 
-  p->regexpr = xmalloc (len + 1);
-  strncpy (p->regexpr, str + 1, len);
-  p->re_compiled.allocated = len * 2;
-  p->re_compiled.buffer = xmalloc (p->re_compiled.allocated);
-  p->re_compiled.fastmap = xmalloc (1 << CHAR_BIT);
+  p->regexpr = true;
+  p->re_compiled.buffer = NULL;
+  p->re_compiled.allocated = 0;
+  p->re_compiled.fastmap = p->fastmap;
   p->re_compiled.translate = NULL;
-  err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
+  err = re_compile_pattern (str + 1, len, &p->re_compiled);
   if (err)
     {
       error (0, 0, _("%s: invalid regular expression: %s"), str, err);
Index: src/expr.c
===================================================================
RCS file: /fetish/cu/src/expr.c,v
retrieving revision 1.108
diff -p -u -r1.108 expr.c
--- src/expr.c  13 Sep 2005 22:30:56 -0000      1.108
+++ src/expr.c  11 Apr 2006 00:40:17 -0000
@@ -412,8 +412,8 @@ docolon (VALUE *sv, VALUE *pv)
   VALUE *v IF_LINT (= NULL);
   const char *errmsg;
   struct re_pattern_buffer re_buffer;
+  char fastmap[UCHAR_MAX + 1];
   struct re_registers re_regs;
-  size_t len;
   regoff_t matchlen;
 
   tostring (sv);
@@ -427,14 +427,12 @@ of the basic regular expression is not p
             quote (pv->u.s));
     }
 
-  len = strlen (pv->u.s);
-  memset (&re_buffer, 0, sizeof (re_buffer));
-  memset (&re_regs, 0, sizeof (re_regs));
-  re_buffer.buffer = xnmalloc (len, 2);
-  re_buffer.allocated = 2 * len;
+  re_buffer.buffer = NULL;
+  re_buffer.allocated = 0;
+  re_buffer.fastmap = fastmap;
   re_buffer.translate = NULL;
   re_syntax_options = RE_SYNTAX_POSIX_BASIC;
-  errmsg = re_compile_pattern (pv->u.s, len, &re_buffer);
+  errmsg = re_compile_pattern (pv->u.s, strlen (pv->u.s), &re_buffer);
   if (errmsg)
     error (EXPR_FAILURE, 0, "%s", errmsg);
 
@@ -442,7 +440,7 @@ of the basic regular expression is not p
   if (0 <= matchlen)
     {
       /* Were \(...\) used? */
-      if (re_buffer.re_nsub > 0)/* was (re_regs.start[1] >= 0) */
+      if (re_buffer.re_nsub > 0)
        {
          sv->u.s[re_regs.end[1]] = '\0';
          v = str_value (sv->u.s + re_regs.start[1]);
Index: src/nl.c
===================================================================
RCS file: /fetish/cu/src/nl.c,v
retrieving revision 1.86
diff -p -u -r1.86 nl.c
--- src/nl.c    9 Sep 2005 21:09:48 -0000       1.86
+++ src/nl.c    11 Apr 2006 00:40:17 -0000
@@ -84,6 +84,11 @@ static struct re_pattern_buffer header_r
 /* Regex for footer lines to number (-fp).  */
 static struct re_pattern_buffer footer_regex;
 
+/* Fastmaps for the above.  */
+static char body_fastmap[UCHAR_MAX + 1];
+static char header_fastmap[UCHAR_MAX + 1];
+static char footer_fastmap[UCHAR_MAX + 1];
+
 /* Pointer to current regex, if any.  */
 static struct re_pattern_buffer *current_regex = NULL;
 
@@ -230,11 +235,10 @@ FORMAT is one of:\n\
    according to `optarg'.  */
 
 static bool
-build_type_arg (char **typep, struct re_pattern_buffer *regexp)
+build_type_arg (char **typep, struct re_pattern_buffer *regexp, char *fastmap)
 {
   const char *errmsg;
   bool rval = true;
-  size_t optlen;
 
   switch (*optarg)
     {
@@ -245,13 +249,11 @@ build_type_arg (char **typep, struct re_
       break;
     case 'p':
       *typep = optarg++;
-      optlen = strlen (optarg);
-      regexp->allocated = optlen * 2;
-      regexp->buffer = xnmalloc (optlen, 2);
+      regexp->buffer = NULL;
+      regexp->allocated = 0;
+      regexp->fastmap = fastmap;
       regexp->translate = NULL;
-      regexp->fastmap = xmalloc (256);
-      regexp->fastmap_accurate = 0;
-      errmsg = re_compile_pattern (optarg, optlen, regexp);
+      errmsg = re_compile_pattern (optarg, strlen (optarg), regexp);
       if (errmsg)
        error (EXIT_FAILURE, 0, "%s", errmsg);
       break;
@@ -469,7 +471,7 @@ main (int argc, char **argv)
       switch (c)
        {
        case 'h':
-         if (! build_type_arg (&header_type, &header_regex))
+         if (! build_type_arg (&header_type, &header_regex, header_fastmap))
            {
              error (0, 0, _("invalid header numbering style: %s"),
                     quote (optarg));
@@ -477,7 +479,7 @@ main (int argc, char **argv)
            }
          break;
        case 'b':
-         if (! build_type_arg (&body_type, &body_regex))
+         if (! build_type_arg (&body_type, &body_regex, body_fastmap))
            {
              error (0, 0, _("invalid body numbering style: %s"),
                     quote (optarg));
@@ -485,7 +487,7 @@ main (int argc, char **argv)
            }
          break;
        case 'f':
-         if (! build_type_arg (&footer_type, &footer_regex))
+         if (! build_type_arg (&footer_type, &footer_regex, footer_fastmap))
            {
              error (0, 0, _("invalid footer numbering style: %s"),
                     quote (optarg));
Index: src/ptx.c
===================================================================
RCS file: /fetish/cu/src/ptx.c,v
retrieving revision 1.53
diff -p -u -r1.53 ptx.c
--- src/ptx.c   28 Mar 2006 09:47:28 -0000      1.53
+++ src/ptx.c   11 Apr 2006 00:40:17 -0000
@@ -95,14 +95,24 @@ static enum Format output_format = UNKNO
                                /* output format */
 
 static bool ignore_case = false;       /* fold lower to upper for sorting */
-static const char *context_regex_string = NULL;
-                               /* raw regex for end of context */
-static const char *word_regex_string = NULL;
-                               /* raw regex for a keyword */
 static const char *break_file = NULL;  /* name of the `Break characters' file 
*/
 static const char *only_file = NULL;   /* name of the `Only words' file */
 static const char *ignore_file = NULL; /* name of the `Ignore words' file */
 
+/* Options that use regular expressions.  */
+struct regex_data
+{
+  /* The original regular expression, as a string.  */
+  char const *string;
+
+  /* The compiled regular expression, and its fastmap.  */
+  struct re_pattern_buffer pattern;
+  char fastmap[UCHAR_MAX + 1];
+};
+
+static struct regex_data context_regex;        /* end of context */
+static struct regex_data word_regex;   /* keyword */
+
 /* A BLOCK delimit a region in memory of arbitrary size, like the copy of a
    whole file.  A WORD is something smaller, its length should fit in a
    short integer.  A WORD_TABLE may contain several WORDs.  */
@@ -134,15 +144,9 @@ WORD_TABLE;
 /* For each character, provide its folded equivalent.  */
 static unsigned char folded_chars[CHAR_SET_SIZE];
 
-/* Compiled regex for end of context.  */
-static struct re_pattern_buffer *context_regex;
-
 /* End of context pattern register indices.  */
 static struct re_registers context_regs;
 
-/* Compiled regex for a keyword.  */
-static struct re_pattern_buffer *word_regex;
-
 /* Keyword pattern register indices.  */
 static struct re_registers word_regs;
 
@@ -188,10 +192,10 @@ static BLOCK text_buffer; /* file to stu
     cursor--
 
 #define SKIP_SOMETHING(cursor, limit) \
-  if (word_regex_string)                                               \
+  if (word_regex.string)                                               \
     {                                                                  \
       regoff_t count;                                                  \
-      count = re_match (word_regex, cursor, limit - cursor, 0, NULL);  \
+      count = re_match (&word_regex.pattern, cursor, limit - cursor, 0, NULL); 
\
       if (count == -2)                                                 \
         matcher_error ();                                              \
       cursor += count == -1 ? 1 : count;                               \
@@ -397,26 +401,23 @@ copy_unescaped_string (const char *strin
   return result;
 }
 
-/*-------------------------------------------------------------------.
-| Compile the regex represented by STRING, diagnose and abort if any |
-| error.  Returns the compiled regex structure.                             |
-`-------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------.
+| Compile the regex represented by REGEX, diagnose and abort if any error.  |
+`--------------------------------------------------------------------------*/
 
-static struct re_pattern_buffer *
-alloc_and_compile_regex (const char *string)
+static void
+compile_regex (struct regex_data *regex)
 {
-  struct re_pattern_buffer *pattern; /* newly allocated structure */
-  const char *message;         /* error message returned by regex.c */
-
-  pattern = xmalloc (sizeof *pattern);
-  memset (pattern, 0, sizeof *pattern);
+  struct re_pattern_buffer *pattern = &regex->pattern;
+  char const *string = regex->string;
+  char const *message;
 
   pattern->buffer = NULL;
   pattern->allocated = 0;
-  pattern->translate = ignore_case ? (char *) folded_chars : NULL;
-  pattern->fastmap = xmalloc ((size_t) CHAR_SET_SIZE);
+  pattern->fastmap = regex->fastmap;
+  pattern->translate = ignore_case ? folded_chars : NULL;
 
-  message = re_compile_pattern (string, (int) strlen (string), pattern);
+  message = re_compile_pattern (string, strlen (string), pattern);
   if (message)
     error (EXIT_FAILURE, 0, _("%s (for regexp %s)"), message, quote (string));
 
@@ -425,13 +426,6 @@ alloc_and_compile_regex (const char *str
      and it compiles the fastmap if this has not been done yet.  */
 
   re_compile_fastmap (pattern);
-
-  /* Do not waste extra allocated space.  */
-
-  pattern->buffer = xrealloc (pattern->buffer, pattern->used);
-  pattern->allocated = pattern->used;
-
-  return pattern;
 }
 
 /*------------------------------------------------------------------------.
@@ -457,18 +451,18 @@ initialize_regex (void)
      extensions are enabled, use end of sentence like in GNU emacs.  If
      disabled, use end of lines.  */
 
-  if (context_regex_string)
+  if (context_regex.string)
     {
-      if (!*context_regex_string)
-       context_regex_string = NULL;
+      if (!*context_regex.string)
+       context_regex.string = NULL;
     }
   else if (gnu_extensions & !input_reference)
-    context_regex_string = "[.?!][]\"')}]*\\($\\|\t\\|  \\)[ \t\n]*";
+    context_regex.string = "[.?!][]\"')}]*\\($\\|\t\\|  \\)[ \t\n]*";
   else
-    context_regex_string = "\n";
+    context_regex.string = "\n";
 
-  if (context_regex_string)
-    context_regex = alloc_and_compile_regex (context_regex_string);
+  if (context_regex.string)
+    compile_regex (&context_regex);
 
   /* If the user has already provided a non-empty regexp to describe
      words, compile it.  Else, unless this has already been done through
@@ -478,8 +472,8 @@ initialize_regex (void)
      include almost everything, even punctuations; stop only on white
      space.  */
 
-  if (word_regex_string && *word_regex_string)
-    word_regex = alloc_and_compile_regex (word_regex_string);
+  if (word_regex.string)
+    compile_regex (&word_regex);
   else if (!break_file)
     {
       if (gnu_extensions)
@@ -880,8 +874,9 @@ find_occurs_in_text (void)
         sentence at the end of the buffer.  */
 
       next_context_start = text_buffer.end;
-      if (context_regex_string)
-       switch (re_search (context_regex, cursor, text_buffer.end - cursor,
+      if (context_regex.string)
+       switch (re_search (&context_regex.pattern, cursor,
+                          text_buffer.end - cursor,
                           0, text_buffer.end - cursor, &context_regs))
          {
          case -2:
@@ -907,14 +902,15 @@ find_occurs_in_text (void)
 
       while (1)
        {
-         if (word_regex)
+         if (word_regex.string)
 
            /* If a word regexp has been compiled, use it to skip at the
               beginning of the next word.  If there is no such word, exit
               the loop.  */
 
            {
-             regoff_t r = re_search (word_regex, cursor, context_end - cursor,
+             regoff_t r = re_search (&word_regex.pattern, cursor,
+                                     context_end - cursor,
                                      0, context_end - cursor, &word_regs);
              if (r == -2)
                matcher_error ();
@@ -2071,7 +2067,7 @@ main (int argc, char **argv)
          break;
 
        case 'S':
-         context_regex_string = copy_unescaped_string (optarg);
+         context_regex.string = copy_unescaped_string (optarg);
          break;
 
        case 'T':
@@ -2079,7 +2075,9 @@ main (int argc, char **argv)
          break;
 
        case 'W':
-         word_regex_string = copy_unescaped_string (optarg);
+         word_regex.string = copy_unescaped_string (optarg);
+         if (!*word_regex.string)
+           word_regex.string = NULL;
          break;
 
        case 10:
Index: src/tac.c
===================================================================
RCS file: /fetish/cu/src/tac.c,v
retrieving revision 1.126
diff -p -u -r1.126 tac.c
--- src/tac.c   24 Oct 2005 16:07:36 -0000      1.126
+++ src/tac.c   11 Apr 2006 00:40:17 -0000
@@ -110,6 +110,7 @@ static size_t G_buffer_size;
 
 /* The compiled regular expression representing `separator'. */
 static struct re_pattern_buffer compiled_separator;
+static char compiled_separator_fastmap[UCHAR_MAX + 1];
 
 static struct option const longopts[] =
 {
@@ -608,9 +609,9 @@ main (int argc, char **argv)
 
   if (sentinel_length == 0)
     {
-      compiled_separator.allocated = 100;
-      compiled_separator.buffer = xmalloc (compiled_separator.allocated);
-      compiled_separator.fastmap = xmalloc (256);
+      compiled_separator.buffer = NULL;
+      compiled_separator.allocated = 0;
+      compiled_separator.fastmap = compiled_separator_fastmap;
       compiled_separator.translate = NULL;
       error_message = re_compile_pattern (separator, strlen (separator),
                                          &compiled_separator);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]