bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] dfa: speed up [[:digit:]] and [[:xdigit:]]


From: Paolo Bonzini
Subject: [PATCH] dfa: speed up [[:digit:]] and [[:xdigit:]]
Date: Tue, 4 May 2010 18:11:07 +0200

There's no "multibyte pain" in these two classes, since POSIX
and ISO C99 mandate their contents.

Time for "./grep -x '[[:digit:]]' /usr/share/dict/linux.words"
Before: 1.5s, after: 0.07s.  (sed manages only 0.5s).

* src/dfa.c (predicates): Declare struct dfa_ctype separately
from definition.  Add sb_only.
(find_pred): Return const struct dfa_ctype *.
(parse_bracket_exp): Return const struct dfa_ctype *.  Do
not fill MBCSET for sb_only character types.
---
 src/dfa.c |   56 +++++++++++++++++++++++++++++---------------------------
 1 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 44efc02..107866a 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -713,26 +713,29 @@ typedef int predicate (int);
 /* The following list maps the names of the Posix named character classes
    to predicate functions that determine whether a given character is in
    the class.  The leading [ has already been eaten by the lexical analyzer. */
-static struct {
+struct dfa_ctype {
   const char *name;
-  predicate *pred;
-} const prednames[] = {
-  { "alpha", isalpha },
-  { "upper", isupper },
-  { "lower", islower },
-  { "digit", isdigit },
-  { "xdigit", isxdigit },
-  { "space", isspace },
-  { "punct", ispunct },
-  { "alnum", isalnum },
-  { "print", isprint },
-  { "graph", isgraph },
-  { "cntrl", iscntrl },
-  { "blank", isblank },
-  { NULL, NULL }
+  predicate *func;
+  bool sb_only;
 };
 
-static predicate *
+static const struct dfa_ctype prednames[] = {
+  { "alpha", isalpha, false },
+  { "upper", isupper, false },
+  { "lower", islower, false },
+  { "digit", isdigit, true },
+  { "xdigit", isxdigit, true },
+  { "space", isspace, false },
+  { "punct", ispunct, false },
+  { "alnum", isalnum, false },
+  { "print", isprint, false },
+  { "graph", isgraph, false },
+  { "cntrl", iscntrl, false },
+  { "blank", isblank, false },
+  { NULL, NULL, false }
+};
+
+static const struct dfa_ctype *
 find_pred (const char *str)
 {
   unsigned int i;
@@ -740,7 +743,7 @@ find_pred (const char *str)
     if (STREQ (str, prednames[i].name))
       break;
 
-  return prednames[i].pred;
+  return &prednames[i];
 }
 
 /* Multibyte character handling sub-routine for lex.
@@ -837,8 +840,12 @@ parse_bracket_exp (void)
                                      || STREQ  (str, "lower"))
                                        ? "alpha"
                                        : str);
+                  const struct dfa_ctype *pred = find_pred (class);
+                  if (!pred)
+                    dfaerror(_("invalid character class"));
+
 #if MBS_SUPPORT
-                  if (MB_CUR_MAX > 1)
+                  if (MB_CUR_MAX > 1 && !pred->sb_only)
                     {
                       /* Store the character class as wctype_t.  */
                       wctype_t wt = wctype (class);
@@ -852,14 +859,9 @@ parse_bracket_exp (void)
                     }
 #endif
 
-                  {
-                    predicate *pred = find_pred (class);
-                    if (!pred)
-                      dfaerror(_("invalid character class"));
-                    for (c2 = 0; c2 < NOTCHAR; ++c2)
-                      if ((*pred)(c2))
-                        setbit_case_fold (c2, ccl);
-                  }
+                  for (c2 = 0; c2 < NOTCHAR; ++c2)
+                    if (pred->func(c2))
+                      setbit_case_fold (c2, ccl);
                 }
 
 #if MBS_SUPPORT
-- 
1.6.6.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]