bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 08/11] dfa: remove redundant line constraints


From: Paolo Bonzini
Subject: [PATCH 08/11] dfa: remove redundant line constraints
Date: Wed, 4 Jan 2012 11:59:49 +0100

For any constraint, bit 7 is always (bit 6 | bit 5), and bit 4 is always
(bit 6 & bit 5).  This is becaus we do not have "line boundary"
anchors like we have \b and \B for words.  Use this to save two bits,
which we will use to differentiate ^ and $ from \` and \' when using
-z.

* src/dfa.c (MATCHES_NEWLINE_CONTEXT): Only test bits 5 (now bit 4) and
6 (now bit 5).
(NO_CONSTRAINT, BEGLINE_CONSTRAINT, ENDLINE_CONSTRAINT, BEGWORD_CONSTRAINT,
ENDWORD_CONSTRAINT, LIMWORD_CONSTRAINT, NOTLIMWORD_CONSTRAINT): Adjust.
---
 src/dfa.c |   29 ++++++++++++++---------------
 1 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 05a4d5e..d2bfba8 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -115,10 +115,8 @@ static inline unsigned char to_uchar (char ch) { return 
ch; }
    is set indicates that the constraint succeeds in the corresponding
    context.
 
-   bit 7 - previous and current are newlines
-   bit 6 - previous was newline, current isn't
-   bit 5 - previous wasn't newline, current is
-   bit 4 - neither previous nor current is a newline
+   bit 5 - previous need not be newline
+   bit 4 - current need not be newline
    bit 3 - previous and current are word-constituents
    bit 2 - previous was word-constituent, current isn't
    bit 1 - previous wasn't word-constituent, current is
@@ -129,8 +127,9 @@ static inline unsigned char to_uchar (char ch) { return ch; 
}
    context values for the previous character, curr is the bitmask of
    possible context values for the lookahead character. */
 #define MATCHES_NEWLINE_CONTEXT(constraint, prev, curr) \
-  ((constraint) & \
-   1 << (((prev & ~CTX_NEWLINE) ? 0 : 2) + ((curr & ~CTX_NEWLINE) ? 0 : 1) + 
4))
+  ((((constraint) & 0x30) | \
+    ((prev & ~CTX_NEWLINE) ? 0 : 0x20) | \
+    ((curr & ~CTX_NEWLINE) ? 0 : 0x10)) == 0x30)
 #define MATCHES_LETTER_CONTEXT(constraint, prev, curr) \
   ((constraint) & \
    1 << (((prev & ~CTX_LETTER) ? 0 : 2) + ((curr & ~CTX_LETTER) ? 0 : 1)))
@@ -140,7 +139,7 @@ static inline unsigned char to_uchar (char ch) { return ch; 
}
 
 /* The following macros give information about what a constraint depends on. */
 #define PREV_NEWLINE_DEPENDENT(constraint) \
-  (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
+  (((constraint) & 0x20) == 0)
 #define PREV_LETTER_DEPENDENT(constraint) \
   (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
 
@@ -148,13 +147,13 @@ static inline unsigned char to_uchar (char ch) { return 
ch; }
    work by applying that constraint to determine what may follow them,
    taking into account what has gone before.  The following values are
    the constraints corresponding to the special tokens previously defined. */
-#define NO_CONSTRAINT 0xff
-#define BEGLINE_CONSTRAINT 0xcf
-#define ENDLINE_CONSTRAINT 0xaf
-#define BEGWORD_CONSTRAINT 0xf2
-#define ENDWORD_CONSTRAINT 0xf4
-#define LIMWORD_CONSTRAINT 0xf6
-#define NOTLIMWORD_CONSTRAINT 0xf9
+#define NO_CONSTRAINT 0x3f
+#define BEGLINE_CONSTRAINT 0x1f
+#define ENDLINE_CONSTRAINT 0x2f
+#define BEGWORD_CONSTRAINT 0x32
+#define ENDWORD_CONSTRAINT 0x34
+#define LIMWORD_CONSTRAINT 0x36
+#define NOTLIMWORD_CONSTRAINT 0x39
 
 /* The regexp is parsed into an array of tokens in postfix form.  Some tokens
    are operators and others are terminal symbols.  Most (but not all) of these
@@ -2492,7 +2491,7 @@ dfastate (int s, struct dfa *d, int trans[])
 
       /* Some characters may need to be eliminated from matches because
          they fail in the current context. */
-      if (pos.constraint != 0xFF)
+      if (pos.constraint != NO_CONSTRAINT)
         {
           if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
                                         d->states[s].context, CTX_NEWLINE))
-- 
1.7.7.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]