bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

gawk->grep dfa synchronization


From: Jim Meyering
Subject: gawk->grep dfa synchronization
Date: Fri, 05 Mar 2010 10:19:25 +0100

Here are the first few gawk->grep dfa synchronization changes.
Only the second one induces a semantic change, and even that
is just an optimization:

      dfa.c: sync syntax from gawk
      dfa.c: add support for \s and \S
      maint: dfa-sync: use CALLOC rather than equiv. MALLOC+initialize-loop
      maint: dfa-sync: don't malloc zero
      maint: dfa: sync a comment and dead-to-grep code: no semantic change

As such, I'll probably give it a NEWS entry before pushing.

>From 11a68fea2d9ec9c54165ce1ecfa25d773477bb70 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Fri, 5 Mar 2010 09:22:52 +0100
Subject: [PATCH 1/5] dfa.c: sync syntax from gawk

* src/dfa.c (prednames): Add a "0" to final initializer.
---
 src/dfa.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 09c0c96..6a54d87 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -710,7 +710,7 @@ static struct {
   { ":graph:]", is_graph },
   { ":cntrl:]", is_cntrl },
   { ":blank:]", is_blank },
-  { 0 }
+  { 0, 0 }
 };

 /* Return non-zero if C is a `word-constituent' byte; zero otherwise.  */
--
1.7.0.1.300.gd855a


>From 5cc052e5da2d17cd65cd8a096293645250d89d0e Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Fri, 5 Mar 2010 09:24:34 +0100
Subject: [PATCH 2/5] dfa.c: add support for \s and \S

* src/dfa.c (lex): Sync from gawk's dfa.c.
---
 src/dfa.c |   15 +++++++++++++++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 6a54d87..af321c3 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -990,6 +990,21 @@ lex (void)
          laststart = 0;
          return lasttok = CSET + charclass_index(ccl);

+#ifndef GAWK
+       case 's':
+       case 'S':
+         if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+           goto normal_char;
+         zeroset(ccl);
+         for (c2 = 0; c2 < NOTCHAR; ++c2)
+           if (ISSPACE(c2))
+             setbit(c2, ccl);
+         if (c == 'S')
+           notset(ccl);
+         laststart = 0;
+         return lasttok = CSET + charclass_index(ccl);
+#endif
+
        case 'w':
        case 'W':
          if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
--
1.7.0.1.300.gd855a


>From 814c65caa32548e08b3661fe38b8709f511fc768 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Fri, 5 Mar 2010 09:29:20 +0100
Subject: [PATCH 3/5] maint: dfa-sync: use CALLOC rather than equiv. 
MALLOC+initialize-loop

* src/dfa.c (dfaanalyze): Sync from gawk.  No semantic change.
---
 src/dfa.c |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index af321c3..d5fa9ce 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -1743,9 +1743,7 @@ dfaanalyze (struct dfa *d, int searchflag)
   o_nlast = nlastpos;
   MALLOC(lastpos, position, d->nleaves);
   o_lastpos = lastpos, lastpos += d->nleaves;
-  MALLOC(nalloc, int, d->tindex);
-  for (i = 0; i < d->tindex; ++i)
-    nalloc[i] = 0;
+  CALLOC(nalloc, int, d->tindex);
   MALLOC(merged.elems, position, d->nleaves);

   CALLOC(d->follows, position_set, d->tindex);
--
1.7.0.1.300.gd855a


>From ef8a0827b12858613858c45699b2506e998a4a37 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Fri, 5 Mar 2010 09:41:22 +0100
Subject: [PATCH 4/5] maint: dfa-sync: don't malloc zero

* src/dfa.c (dfacomp): Skip case_fold logic when length is zero.
This probably "no semantic change", but does improve efficiency in
a degenerate case.
---
 src/dfa.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index d5fa9ce..7a4865c 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -2984,7 +2984,7 @@ dfainit (struct dfa *d)
 void
 dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
 {
-  if (case_fold)       /* dummy folding in service of dfamust() */
+  if (case_fold && len)        /* dummy folding in service of dfamust() */
     {
       char *lcopy;
       int i;
--
1.7.0.1.300.gd855a


>From a7836a557f31acf90c40e82dbe47f4ec0e743264 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Fri, 5 Mar 2010 09:59:11 +0100
Subject: [PATCH 5/5] maint: dfa: sync a comment and dead-to-grep code: no 
semantic change

* src/dfa.c: Sync a comment and some #ifdef GAWK code.
---
 src/dfa.c |   10 +++++++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 7a4865c..1da45fa 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -75,8 +75,9 @@
    host does not conform to Posix.  */
 #define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)

-#include <gettext.h>
-#define _(String) gettext(String)
+/* gettext.h ensures that we don't use gettext if ENABLE_NLS is not defined */
+#include "gettext.h"
+#define _(str) gettext (str)

 #include "mbsupport.h"  /* defines MBS_SUPPORT if appropriate */
 #ifdef MBS_SUPPORT
@@ -935,6 +936,9 @@ lex (void)
          if (c != '}')
            dfaerror(_("malformed repeat count"));
          laststart = 0;
+#ifdef GAWK
+         dfa->broken = (minrep == maxrep && minrep == 0);
+#endif
          return lasttok = REPMN;

        case '|':
@@ -1593,7 +1597,7 @@ static void
 epsclosure (position_set *s, struct dfa const *d)
 {
   int i, j;
-  char *visited;
+  char *visited;       /* array of booleans, enough to use char, not int */
   position p, old;

   CALLOC(visited, char, d->tindex);
--
1.7.0.1.300.gd855a




reply via email to

[Prev in Thread] Current Thread [Next in Thread]