[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Grep-devel] [PATCH 2/4] dfasearch: thread safety
From: |
Zev Weiss |
Subject: |
[Grep-devel] [PATCH 2/4] dfasearch: thread safety |
Date: |
Sun, 25 Dec 2016 02:57:07 -0600 |
* src/dfasearch.c (struct dfa_comp): New struct to hold
previously-global variables.
(dfawarn): Remove static variable.
(kwsmusts): Operate on a dfa_comp parameter instead of global
variables.
(GEAcompile): Allocate and return a dfa_comp struct instead of setting
global variables.
(EGexecute): Operate on a dfa_comp parameter instead of global
variables.
* src/searchutils.c (kwsinit): Replace a static array with a
dynamically-allocated one.
---
src/dfasearch.c | 126 ++++++++++++++++++++++++++----------------------------
src/searchutils.c | 7 ++-
2 files changed, 64 insertions(+), 69 deletions(-)
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 25276fe..0abe7f6 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -26,24 +26,28 @@
struct localeinfo localeinfo;
-/* KWset compiled pattern. For Ecompile and Gcompile, we compile
- a list of strings, at least one of which is known to occur in
- any string matching the regexp. */
-static kwset_t kwset;
+struct dfa_comp
+{
+ /* KWset compiled pattern. For Ecompile and Gcompile, we compile
+ a list of strings, at least one of which is known to occur in
+ any string matching the regexp. */
+ kwset_t kwset;
-/* DFA compiled regexp. */
-static struct dfa *dfa;
+ /* DFA compiled regexp. */
+ struct dfa *dfa;
-/* The Regex compiled patterns. */
-static struct re_pattern_buffer *patterns;
-static size_t pcount;
+ /* Regex compiled regexps. */
+ struct re_pattern_buffer* patterns;
+ size_t pcount;
+ struct re_registers regs;
-/* Number of compiled fixed strings known to exactly match the regexp.
- If kwsexec returns < kwset_exact_matches, then we don't need to
- call the regexp matcher at all. */
-static size_t kwset_exact_matches;
+ /* Number of compiled fixed strings known to exactly match the regexp.
+ If kwsexec returns < kwset_exact_matches, then we don't need to
+ call the regexp matcher at all. */
+ size_t kwset_exact_matches;
-static bool begline;
+ bool begline;
+};
void
dfaerror (char const *mesg)
@@ -57,10 +61,7 @@ dfaerror (char const *mesg)
void
dfawarn (char const *mesg)
{
- static enum { DW_NONE = 0, DW_POSIX, DW_GNU } mode;
- if (mode == DW_NONE)
- mode = (getenv ("POSIXLY_CORRECT") ? DW_POSIX : DW_GNU);
- if (mode == DW_GNU)
+ if (!getenv ("POSIXLY_CORRECT"))
dfaerror (mesg);
}
@@ -69,38 +70,38 @@ dfawarn (char const *mesg)
to find those strings, and thus quickly filter out impossible
matches. */
static void
-kwsmusts (void)
+kwsmusts (struct dfa_comp *dc)
{
- struct dfamust *dm = dfamust (dfa);
+ struct dfamust *dm = dfamust (dc->dfa);
if (!dm)
return;
- kwset = kwsinit (false);
+ dc->kwset = kwsinit (false);
if (dm->exact)
{
/* Prepare a substring whose presence implies a match.
The kwset matcher will return the index of the matching
string that it chooses. */
- ++kwset_exact_matches;
+ ++dc->kwset_exact_matches;
size_t old_len = strlen (dm->must);
size_t new_len = old_len + dm->begline + dm->endline;
char *must = xmalloc (new_len);
char *mp = must;
*mp = eolbyte;
mp += dm->begline;
- begline |= dm->begline;
+ dc->begline |= dm->begline;
memcpy (mp, dm->must, old_len);
if (dm->endline)
mp[old_len] = eolbyte;
- kwsincr (kwset, must, new_len);
+ kwsincr (dc->kwset, must, new_len);
free (must);
}
else
{
/* Otherwise, filtering with this substring should help reduce the
search space, but we'll still have to use the regexp matcher. */
- kwsincr (kwset, dm->must, strlen (dm->must));
+ kwsincr (dc->kwset, dm->must, strlen (dm->must));
}
- kwsprep (kwset);
+ kwsprep (dc->kwset);
dfamustfree (dm);
}
@@ -108,14 +109,15 @@ void *
GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
{
char *motif;
+ struct dfa_comp *dc = xcalloc (1, sizeof (*dc));
- dfa = dfaalloc ();
+ dc->dfa = dfaalloc ();
if (match_icase)
syntax_bits |= RE_ICASE;
re_set_syntax (syntax_bits);
int dfaopts = eolbyte ? 0 : DFA_EOL_NUL;
- dfasyntax (dfa, &localeinfo, syntax_bits, dfaopts);
+ dfasyntax (dc->dfa, &localeinfo, syntax_bits, dfaopts);
/* For GNU regex, pass the patterns separately to detect errors like
"[\nallo\n]\n", where the patterns are "[", "allo" and "]", and
@@ -138,9 +140,9 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t
syntax_bits)
else
len = patlim - p;
- if (palloc <= pcount)
- patterns = x2nrealloc (patterns, &palloc, sizeof *patterns);
- struct re_pattern_buffer *pat = &patterns[pcount];
+ if (palloc <= dc->pcount)
+ dc->patterns = x2nrealloc (dc->patterns, &palloc, sizeof
*dc->patterns);
+ struct re_pattern_buffer *pat = &dc->patterns[dc->pcount];
pat->buffer = NULL;
pat->allocated = 0;
@@ -155,14 +157,15 @@ GEAcompile (char const *pattern, size_t size,
reg_syntax_t syntax_bits)
/* With patterns specified only on the command line, emit the bare
diagnostic. Otherwise, include a filename:lineno: prefix. */
size_t lineno;
- char const *pat_filename = pattern_file_name (pcount + 1, &lineno);
+ char const *pat_filename = pattern_file_name (dc->pcount + 1,
+ &lineno);
if (*pat_filename == '\0')
error (0, 0, "%s", err);
else
error (0, 0, "%s:%zu: %s", pat_filename, lineno, err);
compilation_failed = true;
}
- pcount++;
+ dc->pcount++;
p = sep;
}
while (p);
@@ -201,12 +204,12 @@ GEAcompile (char const *pattern, size_t size,
reg_syntax_t syntax_bits)
else
motif = NULL;
- dfacomp (pattern, size, dfa, 1);
- kwsmusts ();
+ dfacomp (pattern, size, dc->dfa, 1);
+ kwsmusts (dc);
free (motif);
- return NULL;
+ return dc;
}
size_t
@@ -219,8 +222,9 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t
*match_size,
size_t len, best_len;
struct kwsmatch kwsm;
size_t i;
- struct dfa *superset = dfasuperset (dfa);
- bool dfafast = dfaisfast (dfa);
+ struct dfa_comp *dc = vdc;
+ struct dfa *superset = dfasuperset (dc->dfa);
+ bool dfafast = dfaisfast (dc->dfa);
mb_start = buf;
buflim = buf + size;
@@ -237,13 +241,13 @@ EGexecute (void *vdc, char const *buf, size_t size,
size_t *match_size,
bool backref = false;
/* Try matching with KWset, if it's defined. */
- if (kwset)
+ if (dc->kwset)
{
char const *prev_beg;
/* Find a possible match using the KWset matcher. */
- size_t offset = kwsexec (kwset, beg - begline,
- buflim - beg + begline, &kwsm, true);
+ size_t offset = kwsexec (dc->kwset, beg - dc->begline,
+ buflim - beg + dc->begline, &kwsm,
true);
if (offset == (size_t) -1)
goto failure;
match = beg + offset;
@@ -260,7 +264,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t
*match_size,
PREV_BEG is less than 64 or (MATCH - PREV_BEG), this is the
greater of the latter two values; this temporarily prefers
the DFA to KWset. */
- exact_kwset_match = kwsm.index < kwset_exact_matches;
+ exact_kwset_match = kwsm.index < dc->kwset_exact_matches;
end = ((exact_kwset_match || !dfafast
|| MAX (16, match - beg) < (match - prev_beg) >> 2)
? match
@@ -310,7 +314,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t
*match_size,
}
/* Try matching with DFA. */
- next_beg = dfaexec (dfa, dfa_beg, (char *) end, 0, &count, &backref);
+ next_beg = dfaexec (dc->dfa, dfa_beg, (char *) end, 0, &count,
+ &backref);
/* If there's no match, or if we've matched the sentinel,
we're done. */
@@ -346,26 +351,17 @@ EGexecute (void *vdc, char const *buf, size_t size,
size_t *match_size,
/* Run the possible match through Regex. */
best_match = end;
best_len = 0;
- for (i = 0; i < pcount; i++)
+ for (i = 0; i < dc->pcount; i++)
{
- /* This is static because of a BRAIN-DEAD address@hidden library
- interface in regex.c, as later calls reuse the
- dynamically allocated storage that REGS members point at
- and the API provides no way to free this storage.
- If grep is ever made multithreaded, REGS would have to be
- per-thread or the library API changed or the library
- encapsulation violated. */
- static struct re_registers regs;
-
- patterns[i].not_eol = 0;
- patterns[i].newline_anchor = eolbyte == '\n';
- start = re_search (&patterns[i], beg, end - beg - 1,
- ptr - beg, end - ptr - 1, ®s);
+ dc->patterns[i].not_eol = 0;
+ dc->patterns[i].newline_anchor = eolbyte == '\n';
+ start = re_search (&dc->patterns[i], beg, end - beg - 1,
+ ptr - beg, end - ptr - 1, &dc->regs);
if (start < -1)
xalloc_die ();
else if (0 <= start)
{
- len = regs.end[0] - start;
+ len = dc->regs.end[0] - start;
match = beg + start;
if (match > best_match)
continue;
@@ -396,10 +392,10 @@ EGexecute (void *vdc, char const *buf, size_t size,
size_t *match_size,
{
/* Try a shorter length anchored at the same place. */
--len;
- patterns[i].not_eol = 1;
- shorter_len = re_match (&patterns[i], beg,
+ dc->patterns[i].not_eol = 1;
+ shorter_len = re_match (&dc->patterns[i], beg,
match + len - ptr, match - beg,
- ®s);
+ &dc->regs);
if (shorter_len < -1)
xalloc_die ();
}
@@ -411,17 +407,17 @@ EGexecute (void *vdc, char const *buf, size_t size,
size_t *match_size,
if (match == end - 1)
break;
match++;
- patterns[i].not_eol = 0;
- start = re_search (&patterns[i], beg, end - beg - 1,
+ dc->patterns[i].not_eol = 0;
+ start = re_search (&dc->patterns[i], beg, end - beg -
1,
match - beg, end - match - 1,
- ®s);
+ &dc->regs);
if (start < 0)
{
if (start < -1)
xalloc_die ();
break;
}
- len = regs.end[0] - start;
+ len = dc->regs.end[0] - start;
match = beg + start;
}
} /* while (match <= best_match) */
diff --git a/src/searchutils.c b/src/searchutils.c
index 1552ed7..a007955 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -43,11 +43,11 @@ wordinit (void)
kwset_t
kwsinit (bool mb_trans)
{
- static char trans[NCHAR];
- char *transptr = NULL;
+ char *trans = NULL;
if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
{
+ trans = xmalloc (NCHAR);
if (MB_CUR_MAX == 1)
for (int i = 0; i < NCHAR; i++)
trans[i] = toupper (i);
@@ -66,10 +66,9 @@ kwsinit (bool mb_trans)
else
trans[i] = i;
}
- transptr = trans;
}
- return kwsalloc (transptr, false);
+ return kwsalloc (trans, false);
}
/* In the buffer *MB_START, return the number of bytes needed to go
--
2.11.0
- [Grep-devel] [PATCH 0/4] search backend thread safety, Zev Weiss, 2016/12/25
- [Grep-devel] [PATCH 1/4] grep: prepare search backends for thread-safety, Zev Weiss, 2016/12/25
- [Grep-devel] [PATCH 3/4] kwsearch: thread safety, Zev Weiss, 2016/12/25
- [Grep-devel] [PATCH 2/4] dfasearch: thread safety,
Zev Weiss <=
- [Grep-devel] [PATCH 4/4] pcresearch: thread safety, Zev Weiss, 2016/12/25
- Re: [Grep-devel] [PATCH 0/4] search backend thread safety, Jim Meyering, 2016/12/25
- Re: [Grep-devel] [PATCH 0/4] search backend thread safety, Paul Eggert, 2016/12/25