[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[cp-patches] Patch: FYI: PR libgcj/20504
From: |
Tom Tromey |
Subject: |
[cp-patches] Patch: FYI: PR libgcj/20504 |
Date: |
16 May 2005 12:56:35 -0600 |
User-agent: |
Gnus/5.09 (Gnus v5.9.0) Emacs/21.3.50 |
I'm checking this in on the trunk, the 4.0 branch, and Classpath.
This fixes PR 20504. It is just the patch from the PR; Ziga's
paperwork cleared. I've checked his test cases into Mauve as well.
Tom
Index: ChangeLog
from Ziga Mahkovec <address@hidden>
PR libgcj/20504
gnu/regexp/RE.java: Add support for quoting constructs.
Index: gnu/regexp/RE.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/regexp/RE.java,v
retrieving revision 1.3
diff -u -r1.3 RE.java
--- gnu/regexp/RE.java 20 Feb 2005 21:18:29 -0000 1.3
+++ gnu/regexp/RE.java 16 May 2005 18:56:19 -0000
@@ -331,10 +331,22 @@
// Buffer a token so we can create a TokenRepeated, etc.
REToken currentToken = null;
char ch;
+ boolean quot = false;
while (index < pLength) {
// read the next character unit (including backslash escapes)
- index = getCharUnit(pattern,index,unit);
+ index = getCharUnit(pattern,index,unit,quot);
+
+ if (unit.bk)
+ if (unit.ch == 'Q') {
+ quot = true;
+ continue;
+ } else if (unit.ch == 'E') {
+ quot = false;
+ continue;
+ }
+ if (quot)
+ unit.bk = false;
// ALTERNATION OPERATOR
// \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
@@ -342,8 +354,8 @@
// TODO: the '\n' literal here should be a test against REToken.newline,
// which unfortunately may be more than a single character.
- if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
- || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') &&
!unit.bk) )
+ if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk
|| quot)))
+ || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') &&
!(unit.bk || quot)) )
&& !syntax.get(RESyntax.RE_LIMITED_OPS)) {
// make everything up to here be a branch. create vector if nec.
addToken(currentToken);
@@ -363,7 +375,7 @@
// OPEN QUESTION:
// what is proper interpretation of '{' at start of string?
- else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) &&
(syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
+ else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) &&
(syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
int newIndex = getMinMax(pattern,index,minMax,syntax);
if (newIndex > index) {
if (minMax.first > minMax.second)
@@ -388,7 +400,7 @@
// LIST OPERATOR:
// [...] | [^...]
- else if ((unit.ch == '[') && !unit.bk) {
+ else if ((unit.ch == '[') && !(unit.bk || quot)) {
Vector options = new Vector();
boolean negative = false;
char lastChar = 0;
@@ -490,7 +502,7 @@
// SUBEXPRESSIONS
// (...) | \(...\) depending on RE_NO_BK_PARENS
- else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^
unit.bk)) {
+ else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^
(unit.bk || quot))) {
boolean pure = false;
boolean comment = false;
boolean lookAhead = false;
@@ -537,13 +549,13 @@
int nextIndex = index;
int nested = 0;
- while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
- && !(nested == 0 && (unit.ch == ')') &&
(syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
+ while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
+ && !(nested == 0 && (unit.ch == ')') &&
(syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) )
if ((endIndex = nextIndex) >= pLength)
throw new
REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
- else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^
unit.bk))
+ else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^
(unit.bk || quot)))
nested++;
- else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^
unit.bk))
+ else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^
(unit.bk || quot)))
nested--;
// endIndex is now position at a ')','\)'
@@ -572,14 +584,14 @@
// UNMATCHED RIGHT PAREN
// ) or \) throw exception if
// !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
- else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch
== ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
+ else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch
== ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
throw new
REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
}
// START OF LINE OPERATOR
// ^
- else if ((unit.ch == '^') && !unit.bk) {
+ else if ((unit.ch == '^') && !(unit.bk || quot)) {
addToken(currentToken);
currentToken = null;
addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ?
syntax.getLineSeparator() : null));
@@ -588,7 +600,7 @@
// END OF LINE OPERATOR
// $
- else if ((unit.ch == '$') && !unit.bk) {
+ else if ((unit.ch == '$') && !(unit.bk || quot)) {
addToken(currentToken);
currentToken = null;
addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ?
syntax.getLineSeparator() : null));
@@ -597,7 +609,7 @@
// MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
// .
- else if ((unit.ch == '.') && !unit.bk) {
+ else if ((unit.ch == '.') && !(unit.bk || quot)) {
addToken(currentToken);
currentToken = new
RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags &
REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
}
@@ -605,7 +617,7 @@
// ZERO-OR-MORE REPEAT OPERATOR
// *
- else if ((unit.ch == '*') && !unit.bk) {
+ else if ((unit.ch == '*') && !(unit.bk || quot)) {
if (currentToken == null)
throw new
REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
if (currentToken instanceof RETokenRepeated)
@@ -621,7 +633,7 @@
// + | \+ depending on RE_BK_PLUS_QM
// not available if RE_LIMITED_OPS is set
- else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) &&
(!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
+ else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) &&
(!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
if (currentToken == null)
throw new
REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
if (currentToken instanceof RETokenRepeated)
@@ -638,7 +650,7 @@
// not available if RE_LIMITED_OPS is set
// stingy matching if RE_STINGY_OPS is set and it follows a quantifier
- else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) &&
(!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
+ else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) &&
(!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
if (currentToken == null) throw new
REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
// Check for stingy matching on RETokenRepeated
@@ -805,9 +817,9 @@
}
- private static int getCharUnit(char[] input, int index, CharUnit unit)
throws REException {
+ private static int getCharUnit(char[] input, int index, CharUnit unit,
boolean quot) throws REException {
unit.ch = input[index++];
- if (unit.bk = (unit.ch == '\\'))
+ if (unit.bk = (unit.ch == '\\' && (!quot || index >= input.length ||
input[index] == 'E')))
if (index < input.length)
unit.ch = input[index++];
else throw new
REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
@@ -1281,7 +1293,7 @@
// Read string of digits
do {
- index = getCharUnit(input,index,unit);
+ index = getCharUnit(input,index,unit,false);
if (Character.isDigit(unit.ch))
buf.append(unit.ch);
} while ((index != input.length) && Character.isDigit(unit.ch));
@@ -1306,7 +1318,7 @@
else if ((unit.ch == ',') && !unit.bk) {
buf = new StringBuffer();
// Read string of digits
- while (((index = getCharUnit(input,index,unit)) != input.length) &&
Character.isDigit(unit.ch))
+ while (((index = getCharUnit(input,index,unit,false)) != input.length)
&& Character.isDigit(unit.ch))
buf.append(unit.ch);
if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^
unit.bk)))
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [cp-patches] Patch: FYI: PR libgcj/20504,
Tom Tromey <=