classpath-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[cp-patches] Patch: FYI: PR libgcj/20504


From: Tom Tromey
Subject: [cp-patches] Patch: FYI: PR libgcj/20504
Date: 16 May 2005 12:56:35 -0600
User-agent: Gnus/5.09 (Gnus v5.9.0) Emacs/21.3.50

I'm checking this in on the trunk, the 4.0 branch, and Classpath.

This fixes PR 20504.  It is just the patch from the PR; Ziga's
paperwork cleared.  I've checked his test cases into Mauve as well.

Tom

Index: ChangeLog
from  Ziga Mahkovec  <address@hidden>

        PR libgcj/20504
        gnu/regexp/RE.java: Add support for quoting constructs.

Index: gnu/regexp/RE.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/regexp/RE.java,v
retrieving revision 1.3
diff -u -r1.3 RE.java
--- gnu/regexp/RE.java 20 Feb 2005 21:18:29 -0000 1.3
+++ gnu/regexp/RE.java 16 May 2005 18:56:19 -0000
@@ -331,10 +331,22 @@
     // Buffer a token so we can create a TokenRepeated, etc.
     REToken currentToken = null;
     char ch;
+    boolean quot = false;
 
     while (index < pLength) {
       // read the next character unit (including backslash escapes)
-      index = getCharUnit(pattern,index,unit);
+      index = getCharUnit(pattern,index,unit,quot);
+
+      if (unit.bk)
+        if (unit.ch == 'Q') {
+          quot = true;
+          continue;
+        } else if (unit.ch == 'E') {
+          quot = false;
+          continue;
+        }
+      if (quot)
+       unit.bk = false;
 
       // ALTERNATION OPERATOR
       //  \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
@@ -342,8 +354,8 @@
 
       // TODO: the '\n' literal here should be a test against REToken.newline,
       // which unfortunately may be more than a single character.
-      if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
-            || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && 
!unit.bk) )
+      if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk 
|| quot)))
+            || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && 
!(unit.bk || quot)) )
           && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
        // make everything up to here be a branch. create vector if nec.
        addToken(currentToken);
@@ -363,7 +375,7 @@
       // OPEN QUESTION: 
       //  what is proper interpretation of '{' at start of string?
 
-      else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && 
(syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
+      else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && 
(syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
        int newIndex = getMinMax(pattern,index,minMax,syntax);
         if (newIndex > index) {
           if (minMax.first > minMax.second)
@@ -388,7 +400,7 @@
       // LIST OPERATOR:
       //  [...] | [^...]
 
-      else if ((unit.ch == '[') && !unit.bk) {
+      else if ((unit.ch == '[') && !(unit.bk || quot)) {
        Vector options = new Vector();
        boolean negative = false;
        char lastChar = 0;
@@ -490,7 +502,7 @@
       // SUBEXPRESSIONS
       //  (...) | \(...\) depending on RE_NO_BK_PARENS
 
-      else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ 
unit.bk)) {
+      else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ 
(unit.bk || quot))) {
        boolean pure = false;
        boolean comment = false;
         boolean lookAhead = false;
@@ -537,13 +549,13 @@
        int nextIndex = index;
        int nested = 0;
 
-       while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
-               && !(nested == 0 && (unit.ch == ')') && 
(syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
+       while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
+               && !(nested == 0 && (unit.ch == ')') && 
(syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) )
          if ((endIndex = nextIndex) >= pLength)
            throw new 
REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
-         else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ 
unit.bk))
+         else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ 
(unit.bk || quot)))
            nested++;
-         else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ 
unit.bk))
+         else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ 
(unit.bk || quot)))
            nested--;
 
        // endIndex is now position at a ')','\)' 
@@ -572,14 +584,14 @@
       // UNMATCHED RIGHT PAREN
       // ) or \) throw exception if
       // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
-      else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch 
== ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
+      else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch 
== ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
        throw new 
REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
       }
 
       // START OF LINE OPERATOR
       //  ^
 
-      else if ((unit.ch == '^') && !unit.bk) {
+      else if ((unit.ch == '^') && !(unit.bk || quot)) {
        addToken(currentToken);
        currentToken = null;
        addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? 
syntax.getLineSeparator() : null));
@@ -588,7 +600,7 @@
       // END OF LINE OPERATOR
       //  $
 
-      else if ((unit.ch == '$') && !unit.bk) {
+      else if ((unit.ch == '$') && !(unit.bk || quot)) {
        addToken(currentToken);
        currentToken = null;
        addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? 
syntax.getLineSeparator() : null));
@@ -597,7 +609,7 @@
       // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
       //  .
 
-      else if ((unit.ch == '.') && !unit.bk) {
+      else if ((unit.ch == '.') && !(unit.bk || quot)) {
        addToken(currentToken);
        currentToken = new 
RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & 
REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
       }
@@ -605,7 +617,7 @@
       // ZERO-OR-MORE REPEAT OPERATOR
       //  *
 
-      else if ((unit.ch == '*') && !unit.bk) {
+      else if ((unit.ch == '*') && !(unit.bk || quot)) {
        if (currentToken == null)
           throw new 
REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
        if (currentToken instanceof RETokenRepeated)
@@ -621,7 +633,7 @@
       //  + | \+ depending on RE_BK_PLUS_QM
       //  not available if RE_LIMITED_OPS is set
 
-      else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && 
(!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
+      else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && 
(!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
        if (currentToken == null)
           throw new 
REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
        if (currentToken instanceof RETokenRepeated)
@@ -638,7 +650,7 @@
       //  not available if RE_LIMITED_OPS is set
       //  stingy matching if RE_STINGY_OPS is set and it follows a quantifier
 
-      else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && 
(!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
+      else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && 
(!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
        if (currentToken == null) throw new 
REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
 
        // Check for stingy matching on RETokenRepeated
@@ -805,9 +817,9 @@
 
   }
 
-  private static int getCharUnit(char[] input, int index, CharUnit unit) 
throws REException {
+  private static int getCharUnit(char[] input, int index, CharUnit unit, 
boolean quot) throws REException {
     unit.ch = input[index++];
-    if (unit.bk = (unit.ch == '\\'))
+    if (unit.bk = (unit.ch == '\\' && (!quot || index >= input.length || 
input[index] == 'E')))
       if (index < input.length)
        unit.ch = input[index++];
       else throw new 
REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
@@ -1281,7 +1293,7 @@
     
     // Read string of digits
     do {
-      index = getCharUnit(input,index,unit);
+      index = getCharUnit(input,index,unit,false);
       if (Character.isDigit(unit.ch))
         buf.append(unit.ch);
     } while ((index != input.length) && Character.isDigit(unit.ch));
@@ -1306,7 +1318,7 @@
     else if ((unit.ch == ',') && !unit.bk) {
       buf = new StringBuffer();
       // Read string of digits
-      while (((index = getCharUnit(input,index,unit)) != input.length) && 
Character.isDigit(unit.ch))
+      while (((index = getCharUnit(input,index,unit,false)) != input.length) 
&& Character.isDigit(unit.ch))
        buf.append(unit.ch);
 
       if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ 
unit.bk)))




reply via email to

[Prev in Thread] Current Thread [Next in Thread]