grep-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Grep-devel] [PATCH 2/4] dfasearch: thread safety


From: Zev Weiss
Subject: [Grep-devel] [PATCH 2/4] dfasearch: thread safety
Date: Sun, 25 Dec 2016 02:57:07 -0600

* src/dfasearch.c (struct dfa_comp): New struct to hold
previously-global variables.
(dfawarn): Remove static variable.
(kwsmusts): Operate on a dfa_comp parameter instead of global
variables.
(GEAcompile): Allocate and return a dfa_comp struct instead of setting
global variables.
(EGexecute): Operate on a dfa_comp parameter instead of global
variables.
* src/searchutils.c (kwsinit): Replace a static array with a
dynamically-allocated one.
---
 src/dfasearch.c   | 126 ++++++++++++++++++++++++++----------------------------
 src/searchutils.c |   7 ++-
 2 files changed, 64 insertions(+), 69 deletions(-)

diff --git a/src/dfasearch.c b/src/dfasearch.c
index 25276fe..0abe7f6 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -26,24 +26,28 @@
 
 struct localeinfo localeinfo;
 
-/* KWset compiled pattern.  For Ecompile and Gcompile, we compile
-   a list of strings, at least one of which is known to occur in
-   any string matching the regexp. */
-static kwset_t kwset;
+struct dfa_comp
+{
+  /* KWset compiled pattern.  For Ecompile and Gcompile, we compile
+     a list of strings, at least one of which is known to occur in
+     any string matching the regexp. */
+  kwset_t kwset;
 
-/* DFA compiled regexp. */
-static struct dfa *dfa;
+  /* DFA compiled regexp. */
+  struct dfa *dfa;
 
-/* The Regex compiled patterns.  */
-static struct re_pattern_buffer *patterns;
-static size_t pcount;
+  /* Regex compiled regexps. */
+  struct re_pattern_buffer* patterns;
+  size_t pcount;
+  struct re_registers regs;
 
-/* Number of compiled fixed strings known to exactly match the regexp.
-   If kwsexec returns < kwset_exact_matches, then we don't need to
-   call the regexp matcher at all. */
-static size_t kwset_exact_matches;
+  /* Number of compiled fixed strings known to exactly match the regexp.
+     If kwsexec returns < kwset_exact_matches, then we don't need to
+     call the regexp matcher at all. */
+  size_t kwset_exact_matches;
 
-static bool begline;
+  bool begline;
+};
 
 void
 dfaerror (char const *mesg)
@@ -57,10 +61,7 @@ dfaerror (char const *mesg)
 void
 dfawarn (char const *mesg)
 {
-  static enum { DW_NONE = 0, DW_POSIX, DW_GNU } mode;
-  if (mode == DW_NONE)
-    mode = (getenv ("POSIXLY_CORRECT") ? DW_POSIX : DW_GNU);
-  if (mode == DW_GNU)
+  if (!getenv ("POSIXLY_CORRECT"))
     dfaerror (mesg);
 }
 
@@ -69,38 +70,38 @@ dfawarn (char const *mesg)
    to find those strings, and thus quickly filter out impossible
    matches. */
 static void
-kwsmusts (void)
+kwsmusts (struct dfa_comp *dc)
 {
-  struct dfamust *dm = dfamust (dfa);
+  struct dfamust *dm = dfamust (dc->dfa);
   if (!dm)
     return;
-  kwset = kwsinit (false);
+  dc->kwset = kwsinit (false);
   if (dm->exact)
     {
       /* Prepare a substring whose presence implies a match.
          The kwset matcher will return the index of the matching
          string that it chooses. */
-      ++kwset_exact_matches;
+      ++dc->kwset_exact_matches;
       size_t old_len = strlen (dm->must);
       size_t new_len = old_len + dm->begline + dm->endline;
       char *must = xmalloc (new_len);
       char *mp = must;
       *mp = eolbyte;
       mp += dm->begline;
-      begline |= dm->begline;
+      dc->begline |= dm->begline;
       memcpy (mp, dm->must, old_len);
       if (dm->endline)
         mp[old_len] = eolbyte;
-      kwsincr (kwset, must, new_len);
+      kwsincr (dc->kwset, must, new_len);
       free (must);
     }
   else
     {
       /* Otherwise, filtering with this substring should help reduce the
          search space, but we'll still have to use the regexp matcher.  */
-      kwsincr (kwset, dm->must, strlen (dm->must));
+      kwsincr (dc->kwset, dm->must, strlen (dm->must));
     }
-  kwsprep (kwset);
+  kwsprep (dc->kwset);
   dfamustfree (dm);
 }
 
@@ -108,14 +109,15 @@ void *
 GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
 {
   char *motif;
+  struct dfa_comp *dc = xcalloc (1, sizeof (*dc));
 
-  dfa = dfaalloc ();
+  dc->dfa = dfaalloc ();
 
   if (match_icase)
     syntax_bits |= RE_ICASE;
   re_set_syntax (syntax_bits);
   int dfaopts = eolbyte ? 0 : DFA_EOL_NUL;
-  dfasyntax (dfa, &localeinfo, syntax_bits, dfaopts);
+  dfasyntax (dc->dfa, &localeinfo, syntax_bits, dfaopts);
 
   /* For GNU regex, pass the patterns separately to detect errors like
      "[\nallo\n]\n", where the patterns are "[", "allo" and "]", and
@@ -138,9 +140,9 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t 
syntax_bits)
       else
         len = patlim - p;
 
-      if (palloc <= pcount)
-        patterns = x2nrealloc (patterns, &palloc, sizeof *patterns);
-      struct re_pattern_buffer *pat = &patterns[pcount];
+      if (palloc <= dc->pcount)
+        dc->patterns = x2nrealloc (dc->patterns, &palloc, sizeof 
*dc->patterns);
+      struct re_pattern_buffer *pat = &dc->patterns[dc->pcount];
       pat->buffer = NULL;
       pat->allocated = 0;
 
@@ -155,14 +157,15 @@ GEAcompile (char const *pattern, size_t size, 
reg_syntax_t syntax_bits)
           /* With patterns specified only on the command line, emit the bare
              diagnostic.  Otherwise, include a filename:lineno: prefix.  */
           size_t lineno;
-          char const *pat_filename = pattern_file_name (pcount + 1, &lineno);
+          char const *pat_filename = pattern_file_name (dc->pcount + 1,
+                                                        &lineno);
           if (*pat_filename == '\0')
             error (0, 0, "%s", err);
           else
             error (0, 0, "%s:%zu: %s", pat_filename, lineno, err);
           compilation_failed = true;
         }
-      pcount++;
+      dc->pcount++;
       p = sep;
     }
   while (p);
@@ -201,12 +204,12 @@ GEAcompile (char const *pattern, size_t size, 
reg_syntax_t syntax_bits)
   else
     motif = NULL;
 
-  dfacomp (pattern, size, dfa, 1);
-  kwsmusts ();
+  dfacomp (pattern, size, dc->dfa, 1);
+  kwsmusts (dc);
 
   free (motif);
 
-  return NULL;
+  return dc;
 }
 
 size_t
@@ -219,8 +222,9 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
   size_t len, best_len;
   struct kwsmatch kwsm;
   size_t i;
-  struct dfa *superset = dfasuperset (dfa);
-  bool dfafast = dfaisfast (dfa);
+  struct dfa_comp *dc = vdc;
+  struct dfa *superset = dfasuperset (dc->dfa);
+  bool dfafast = dfaisfast (dc->dfa);
 
   mb_start = buf;
   buflim = buf + size;
@@ -237,13 +241,13 @@ EGexecute (void *vdc, char const *buf, size_t size, 
size_t *match_size,
           bool backref = false;
 
           /* Try matching with KWset, if it's defined.  */
-          if (kwset)
+          if (dc->kwset)
             {
               char const *prev_beg;
 
               /* Find a possible match using the KWset matcher.  */
-              size_t offset = kwsexec (kwset, beg - begline,
-                                       buflim - beg + begline, &kwsm, true);
+              size_t offset = kwsexec (dc->kwset, beg - dc->begline,
+                                       buflim - beg + dc->begline, &kwsm, 
true);
               if (offset == (size_t) -1)
                 goto failure;
               match = beg + offset;
@@ -260,7 +264,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
                  PREV_BEG is less than 64 or (MATCH - PREV_BEG), this is the
                  greater of the latter two values; this temporarily prefers
                  the DFA to KWset.  */
-              exact_kwset_match = kwsm.index < kwset_exact_matches;
+              exact_kwset_match = kwsm.index < dc->kwset_exact_matches;
               end = ((exact_kwset_match || !dfafast
                       || MAX (16, match - beg) < (match - prev_beg) >> 2)
                      ? match
@@ -310,7 +314,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
             }
 
           /* Try matching with DFA.  */
-          next_beg = dfaexec (dfa, dfa_beg, (char *) end, 0, &count, &backref);
+          next_beg = dfaexec (dc->dfa, dfa_beg, (char *) end, 0, &count,
+                              &backref);
 
           /* If there's no match, or if we've matched the sentinel,
              we're done.  */
@@ -346,26 +351,17 @@ EGexecute (void *vdc, char const *buf, size_t size, 
size_t *match_size,
       /* Run the possible match through Regex.  */
       best_match = end;
       best_len = 0;
-      for (i = 0; i < pcount; i++)
+      for (i = 0; i < dc->pcount; i++)
         {
-          /* This is static because of a BRAIN-DEAD address@hidden library
-             interface in regex.c, as later calls reuse the
-             dynamically allocated storage that REGS members point at
-             and the API provides no way to free this storage.
-             If grep is ever made multithreaded, REGS would have to be
-             per-thread or the library API changed or the library
-             encapsulation violated.  */
-          static struct re_registers regs;
-
-          patterns[i].not_eol = 0;
-          patterns[i].newline_anchor = eolbyte == '\n';
-          start = re_search (&patterns[i], beg, end - beg - 1,
-                             ptr - beg, end - ptr - 1, &regs);
+          dc->patterns[i].not_eol = 0;
+          dc->patterns[i].newline_anchor = eolbyte == '\n';
+          start = re_search (&dc->patterns[i], beg, end - beg - 1,
+                             ptr - beg, end - ptr - 1, &dc->regs);
           if (start < -1)
             xalloc_die ();
           else if (0 <= start)
             {
-              len = regs.end[0] - start;
+              len = dc->regs.end[0] - start;
               match = beg + start;
               if (match > best_match)
                 continue;
@@ -396,10 +392,10 @@ EGexecute (void *vdc, char const *buf, size_t size, 
size_t *match_size,
                       {
                         /* Try a shorter length anchored at the same place. */
                         --len;
-                        patterns[i].not_eol = 1;
-                        shorter_len = re_match (&patterns[i], beg,
+                        dc->patterns[i].not_eol = 1;
+                        shorter_len = re_match (&dc->patterns[i], beg,
                                                 match + len - ptr, match - beg,
-                                                &regs);
+                                                &dc->regs);
                         if (shorter_len < -1)
                           xalloc_die ();
                       }
@@ -411,17 +407,17 @@ EGexecute (void *vdc, char const *buf, size_t size, 
size_t *match_size,
                         if (match == end - 1)
                           break;
                         match++;
-                        patterns[i].not_eol = 0;
-                        start = re_search (&patterns[i], beg, end - beg - 1,
+                        dc->patterns[i].not_eol = 0;
+                        start = re_search (&dc->patterns[i], beg, end - beg - 
1,
                                            match - beg, end - match - 1,
-                                           &regs);
+                                           &dc->regs);
                         if (start < 0)
                           {
                             if (start < -1)
                               xalloc_die ();
                             break;
                           }
-                        len = regs.end[0] - start;
+                        len = dc->regs.end[0] - start;
                         match = beg + start;
                       }
                   } /* while (match <= best_match) */
diff --git a/src/searchutils.c b/src/searchutils.c
index 1552ed7..a007955 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -43,11 +43,11 @@ wordinit (void)
 kwset_t
 kwsinit (bool mb_trans)
 {
-  static char trans[NCHAR];
-  char *transptr = NULL;
+  char *trans = NULL;
 
   if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
     {
+      trans = xmalloc (NCHAR);
       if (MB_CUR_MAX == 1)
         for (int i = 0; i < NCHAR; i++)
           trans[i] = toupper (i);
@@ -66,10 +66,9 @@ kwsinit (bool mb_trans)
             else
               trans[i] = i;
           }
-      transptr = trans;
     }
 
-  return kwsalloc (transptr, false);
+  return kwsalloc (trans, false);
 }
 
 /* In the buffer *MB_START, return the number of bytes needed to go
-- 
2.11.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]