[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] Changes to emacs/src/regex.c [emacs-unicode-2]
From: |
Miles Bader |
Subject: |
[Emacs-diffs] Changes to emacs/src/regex.c [emacs-unicode-2] |
Date: |
Mon, 28 Jun 2004 03:55:27 -0400 |
Index: emacs/src/regex.c
diff -c emacs/src/regex.c:1.186.4.2 emacs/src/regex.c:1.186.4.3
*** emacs/src/regex.c:1.186.4.2 Thu Apr 8 11:16:03 2004
--- emacs/src/regex.c Mon Jun 28 07:29:23 2004
***************
*** 2,8 ****
0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
! Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
--- 2,8 ----
0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
! Copyright (C) 1993,94,95,96,97,98,99,2000,04 Free Software Foundation,
Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
***************
*** 233,239 ****
/* Define the syntax stuff for \<, \>, etc. */
/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
! enum syntaxcode { Swhitespace = 0, Sword = 1 };
# ifdef SWITCH_ENUM_BUG
# define SWITCH_ENUM_CAST(x) ((int)(x))
--- 233,239 ----
/* Define the syntax stuff for \<, \>, etc. */
/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
! enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
# ifdef SWITCH_ENUM_BUG
# define SWITCH_ENUM_CAST(x) ((int)(x))
***************
*** 423,429 ****
if (ISALNUM (c))
re_syntax_table[c] = Sword;
! re_syntax_table['_'] = Sword;
done = 1;
}
--- 423,429 ----
if (ISALNUM (c))
re_syntax_table[c] = Sword;
! re_syntax_table['_'] = Ssymbol;
done = 1;
}
***************
*** 680,685 ****
--- 680,688 ----
wordbound, /* Succeeds if at a word boundary. */
notwordbound, /* Succeeds if not at a word boundary. */
+ symbeg, /* Succeeds if at symbol beginning. */
+ symend, /* Succeeds if at symbol end. */
+
/* Matches any character whose syntax is specified. Followed by
a byte which contains a syntax code, e.g., Sword. */
syntaxspec,
***************
*** 1118,1123 ****
--- 1121,1135 ----
case wordend:
fprintf (stderr, "/wordend");
+ break;
+
+ case symbeg:
+ fprintf (stderr, "/symbeg");
+ break;
+
+ case symend:
+ fprintf (stderr, "/symend");
+ break;
case syntaxspec:
fprintf (stderr, "/syntaxspec");
***************
*** 2003,2043 ****
} \
} while (0)
! #if WIDE_CHAR_SUPPORT
! /* The GNU C library provides support for user-defined character classes
! and the functions from ISO C amendement 1. */
! # ifdef CHARCLASS_NAME_MAX
! # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
! # else
! /* This shouldn't happen but some implementation might still have this
! problem. Use a reasonable default value. */
! # define CHAR_CLASS_MAX_LENGTH 256
! # endif
! typedef wctype_t re_wctype_t;
! typedef wchar_t re_wchar_t;
! # define re_wctype wctype
! # define re_iswctype iswctype
! # define re_wctype_to_bit(cc) 0
! #else
! # define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
! # define btowc(c) c
!
! /* Character classes. */
! typedef enum { RECC_ERROR = 0,
! RECC_ALNUM, RECC_ALPHA, RECC_WORD,
! RECC_GRAPH, RECC_PRINT,
! RECC_LOWER, RECC_UPPER,
! RECC_PUNCT, RECC_CNTRL,
! RECC_DIGIT, RECC_XDIGIT,
! RECC_BLANK, RECC_SPACE,
! RECC_MULTIBYTE, RECC_NONASCII,
! RECC_ASCII, RECC_UNIBYTE
! } re_wctype_t;
!
! typedef int re_wchar_t;
/* Map a string to the char class it names (if any). */
! static re_wctype_t
re_wctype (str)
re_char *str;
{
--- 2015,2024 ----
} \
} while (0)
! #if ! WIDE_CHAR_SUPPORT
/* Map a string to the char class it names (if any). */
! re_wctype_t
re_wctype (str)
re_char *str;
{
***************
*** 2063,2069 ****
}
/* True iff CH is in the char class CC. */
! static boolean
re_iswctype (ch, cc)
int ch;
re_wctype_t cc;
--- 2044,2050 ----
}
/* True iff CH is in the char class CC. */
! boolean
re_iswctype (ch, cc)
int ch;
re_wctype_t cc;
***************
*** 3464,3469 ****
--- 3445,3463 ----
BUF_PUSH (wordend);
break;
+ case '_':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ PATFETCH (c);
+ if (c == '<')
+ BUF_PUSH (symbeg);
+ else if (c == '>')
+ BUF_PUSH (symend);
+ else
+ FREE_STACK_RETURN (REG_BADPAT);
+ break;
+
case 'b':
if (syntax & RE_NO_GNU_OPS)
goto normal_char;
***************
*** 3980,3985 ****
--- 3974,3981 ----
case notwordbound:
case wordbeg:
case wordend:
+ case symbeg:
+ case symend:
continue;
***************
*** 4768,4781 ****
break;
case wordend:
! case notsyntaxspec:
return ((re_opcode_t) *p1 == syntaxspec
! && p1[1] == (op2 == wordend ? Sword : p2[1]));
case wordbeg:
! case syntaxspec:
return ((re_opcode_t) *p1 == notsyntaxspec
! && p1[1] == (op2 == wordend ? Sword : p2[1]));
case wordbound:
return (((re_opcode_t) *p1 == notsyntaxspec
--- 4764,4783 ----
break;
case wordend:
! return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
! case symend:
return ((re_opcode_t) *p1 == syntaxspec
! && (p1[1] == Ssymbol || p1[1] == Sword));
! case notsyntaxspec:
! return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
case wordbeg:
! return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
! case symbeg:
return ((re_opcode_t) *p1 == notsyntaxspec
! && (p1[1] == Ssymbol || p1[1] == Sword));
! case syntaxspec:
! return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
case wordbound:
return (((re_opcode_t) *p1 == notsyntaxspec
***************
*** 5952,5957 ****
--- 5954,6045 ----
}
break;
+ case symbeg:
+ DEBUG_PRINT1 ("EXECUTING symbeg.\n");
+
+ /* We FAIL in one of the following cases: */
+
+ /* Case 1: D is at the end of string. */
+ if (AT_STRINGS_END (d))
+ goto fail;
+ else
+ {
+ /* C1 is the character before D, S1 is the syntax of C1, C2
+ is the character at D, and S2 is the syntax of C2. */
+ re_wchar_t c1, c2;
+ int s1, s2;
+ #ifdef emacs
+ int offset = PTR_TO_OFFSET (d);
+ int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ UPDATE_SYNTAX_TABLE (charpos);
+ #endif
+ PREFETCH ();
+ c2 = RE_STRING_CHAR (d, dend - d);
+ s2 = SYNTAX (c2);
+
+ /* Case 2: S2 is neither Sword nor Ssymbol. */
+ if (s2 != Sword && s2 != Ssymbol)
+ goto fail;
+
+ /* Case 3: D is not at the beginning of string ... */
+ if (!AT_STRINGS_BEG (d))
+ {
+ GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+ #ifdef emacs
+ UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
+ #endif
+ s1 = SYNTAX (c1);
+
+ /* ... and S1 is Sword or Ssymbol. */
+ if (s1 == Sword || s1 == Ssymbol)
+ goto fail;
+ }
+ }
+ break;
+
+ case symend:
+ DEBUG_PRINT1 ("EXECUTING symend.\n");
+
+ /* We FAIL in one of the following cases: */
+
+ /* Case 1: D is at the beginning of string. */
+ if (AT_STRINGS_BEG (d))
+ goto fail;
+ else
+ {
+ /* C1 is the character before D, S1 is the syntax of C1, C2
+ is the character at D, and S2 is the syntax of C2. */
+ re_wchar_t c1, c2;
+ int s1, s2;
+ #ifdef emacs
+ int offset = PTR_TO_OFFSET (d) - 1;
+ int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ UPDATE_SYNTAX_TABLE (charpos);
+ #endif
+ GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+ s1 = SYNTAX (c1);
+
+ /* Case 2: S1 is neither Ssymbol nor Sword. */
+ if (s1 != Sword && s1 != Ssymbol)
+ goto fail;
+
+ /* Case 3: D is not at the end of string ... */
+ if (!AT_STRINGS_END (d))
+ {
+ PREFETCH_NOLIMIT ();
+ c2 = RE_STRING_CHAR (d, dend - d);
+ #ifdef emacs
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+ #endif
+ s2 = SYNTAX (c2);
+
+ /* ... and S2 is Sword or Ssymbol. */
+ if (s2 == Sword || s2 == Ssymbol)
+ goto fail;
+ }
+ }
+ break;
+
case syntaxspec:
case notsyntaxspec:
not = (re_opcode_t) *(p - 1) == notsyntaxspec;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] Changes to emacs/src/regex.c [emacs-unicode-2],
Miles Bader <=