emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/src/regex.c [emacs-unicode-2]


From: Miles Bader
Subject: [Emacs-diffs] Changes to emacs/src/regex.c [emacs-unicode-2]
Date: Mon, 28 Jun 2004 03:55:27 -0400

Index: emacs/src/regex.c
diff -c emacs/src/regex.c:1.186.4.2 emacs/src/regex.c:1.186.4.3
*** emacs/src/regex.c:1.186.4.2 Thu Apr  8 11:16:03 2004
--- emacs/src/regex.c   Mon Jun 28 07:29:23 2004
***************
*** 2,8 ****
     0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
     internationalization features.)
  
!    Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
--- 2,8 ----
     0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
     internationalization features.)
  
!    Copyright (C) 1993,94,95,96,97,98,99,2000,04  Free Software Foundation, 
Inc.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
***************
*** 233,239 ****
  /* Define the syntax stuff for \<, \>, etc.  */
  
  /* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
! enum syntaxcode { Swhitespace = 0, Sword = 1 };
  
  # ifdef SWITCH_ENUM_BUG
  #  define SWITCH_ENUM_CAST(x) ((int)(x))
--- 233,239 ----
  /* Define the syntax stuff for \<, \>, etc.  */
  
  /* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
! enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
  
  # ifdef SWITCH_ENUM_BUG
  #  define SWITCH_ENUM_CAST(x) ((int)(x))
***************
*** 423,429 ****
       if (ISALNUM (c))
        re_syntax_table[c] = Sword;
  
!    re_syntax_table['_'] = Sword;
  
     done = 1;
  }
--- 423,429 ----
       if (ISALNUM (c))
        re_syntax_table[c] = Sword;
  
!    re_syntax_table['_'] = Ssymbol;
  
     done = 1;
  }
***************
*** 680,685 ****
--- 680,688 ----
    wordbound,  /* Succeeds if at a word boundary.  */
    notwordbound,       /* Succeeds if not at a word boundary.  */
  
+   symbeg,       /* Succeeds if at symbol beginning.  */
+   symend,       /* Succeeds if at symbol end.  */
+ 
        /* Matches any character whose syntax is specified.  Followed by
           a byte which contains a syntax code, e.g., Sword.  */
    syntaxspec,
***************
*** 1118,1123 ****
--- 1121,1135 ----
  
        case wordend:
          fprintf (stderr, "/wordend");
+         break;
+ 
+       case symbeg:
+         fprintf (stderr, "/symbeg");
+         break;
+ 
+       case symend:
+         fprintf (stderr, "/symend");
+         break;
  
        case syntaxspec:
          fprintf (stderr, "/syntaxspec");
***************
*** 2003,2043 ****
         }                                                              \
      } while (0)
  
! #if WIDE_CHAR_SUPPORT
! /* The GNU C library provides support for user-defined character classes
!    and the functions from ISO C amendement 1.  */
! # ifdef CHARCLASS_NAME_MAX
! #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
! # else
! /* This shouldn't happen but some implementation might still have this
!    problem.  Use a reasonable default value.  */
! #  define CHAR_CLASS_MAX_LENGTH 256
! # endif
! typedef wctype_t re_wctype_t;
! typedef wchar_t re_wchar_t;
! # define re_wctype wctype
! # define re_iswctype iswctype
! # define re_wctype_to_bit(cc) 0
! #else
! # define CHAR_CLASS_MAX_LENGTH  9 /* Namely, `multibyte'.  */
! # define btowc(c) c
! 
! /* Character classes.  */
! typedef enum { RECC_ERROR = 0,
!              RECC_ALNUM, RECC_ALPHA, RECC_WORD,
!              RECC_GRAPH, RECC_PRINT,
!              RECC_LOWER, RECC_UPPER,
!              RECC_PUNCT, RECC_CNTRL,
!              RECC_DIGIT, RECC_XDIGIT,
!              RECC_BLANK, RECC_SPACE,
!              RECC_MULTIBYTE, RECC_NONASCII,
!              RECC_ASCII, RECC_UNIBYTE
! } re_wctype_t;
! 
! typedef int re_wchar_t;
  
  /* Map a string to the char class it names (if any).  */
! static re_wctype_t
  re_wctype (str)
       re_char *str;
  {
--- 2015,2024 ----
         }                                                              \
      } while (0)
  
! #if ! WIDE_CHAR_SUPPORT
  
  /* Map a string to the char class it names (if any).  */
! re_wctype_t
  re_wctype (str)
       re_char *str;
  {
***************
*** 2063,2069 ****
  }
  
  /* True iff CH is in the char class CC.  */
! static boolean
  re_iswctype (ch, cc)
       int ch;
       re_wctype_t cc;
--- 2044,2050 ----
  }
  
  /* True iff CH is in the char class CC.  */
! boolean
  re_iswctype (ch, cc)
       int ch;
       re_wctype_t cc;
***************
*** 3464,3469 ****
--- 3445,3463 ----
              BUF_PUSH (wordend);
              break;
  
+           case '_':
+             if (syntax & RE_NO_GNU_OPS)
+               goto normal_char;
+               laststart = b;
+               PATFETCH (c);
+               if (c == '<')
+                 BUF_PUSH (symbeg);
+               else if (c == '>')
+                 BUF_PUSH (symend);
+               else
+                 FREE_STACK_RETURN (REG_BADPAT);
+               break;
+ 
            case 'b':
              if (syntax & RE_NO_GNU_OPS)
                goto normal_char;
***************
*** 3980,3985 ****
--- 3974,3981 ----
        case notwordbound:
        case wordbeg:
        case wordend:
+       case symbeg:
+       case symend:
          continue;
  
  
***************
*** 4768,4781 ****
        break;
  
      case wordend:
!     case notsyntaxspec:
        return ((re_opcode_t) *p1 == syntaxspec
!             && p1[1] == (op2 == wordend ? Sword : p2[1]));
  
      case wordbeg:
!     case syntaxspec:
        return ((re_opcode_t) *p1 == notsyntaxspec
!             && p1[1] == (op2 == wordend ? Sword : p2[1]));
  
      case wordbound:
        return (((re_opcode_t) *p1 == notsyntaxspec
--- 4764,4783 ----
        break;
  
      case wordend:
!       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
!     case symend:
        return ((re_opcode_t) *p1 == syntaxspec
!               && (p1[1] == Ssymbol || p1[1] == Sword));
!     case notsyntaxspec:
!       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
  
      case wordbeg:
!       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
!     case symbeg:
        return ((re_opcode_t) *p1 == notsyntaxspec
!               && (p1[1] == Ssymbol || p1[1] == Sword));
!     case syntaxspec:
!       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
  
      case wordbound:
        return (((re_opcode_t) *p1 == notsyntaxspec
***************
*** 5952,5957 ****
--- 5954,6045 ----
            }
          break;
  
+       case symbeg:
+         DEBUG_PRINT1 ("EXECUTING symbeg.\n");
+ 
+         /* We FAIL in one of the following cases: */
+ 
+         /* Case 1: D is at the end of string.  */
+         if (AT_STRINGS_END (d))
+           goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             re_wchar_t c1, c2;
+             int s1, s2;
+ #ifdef emacs
+             int offset = PTR_TO_OFFSET (d);
+             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+             UPDATE_SYNTAX_TABLE (charpos);
+ #endif
+             PREFETCH ();
+             c2 = RE_STRING_CHAR (d, dend - d);
+             s2 = SYNTAX (c2);
+       
+             /* Case 2: S2 is neither Sword nor Ssymbol. */
+             if (s2 != Sword && s2 != Ssymbol)
+               goto fail;
+ 
+             /* Case 3: D is not at the beginning of string ... */
+             if (!AT_STRINGS_BEG (d))
+               {
+                 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+ #ifdef emacs
+                 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
+ #endif
+                 s1 = SYNTAX (c1);
+ 
+                 /* ... and S1 is Sword or Ssymbol.  */
+                 if (s1 == Sword || s1 == Ssymbol)
+                   goto fail;
+               }
+           }
+         break;
+ 
+       case symend:
+         DEBUG_PRINT1 ("EXECUTING symend.\n");
+ 
+         /* We FAIL in one of the following cases: */
+ 
+         /* Case 1: D is at the beginning of string.  */
+         if (AT_STRINGS_BEG (d))
+           goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             re_wchar_t c1, c2;
+             int s1, s2;
+ #ifdef emacs
+             int offset = PTR_TO_OFFSET (d) - 1;
+             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+             UPDATE_SYNTAX_TABLE (charpos);
+ #endif
+             GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+             s1 = SYNTAX (c1);
+ 
+             /* Case 2: S1 is neither Ssymbol nor Sword.  */
+             if (s1 != Sword && s1 != Ssymbol)
+               goto fail;
+ 
+             /* Case 3: D is not at the end of string ... */
+             if (!AT_STRINGS_END (d))
+               {
+                 PREFETCH_NOLIMIT ();
+                 c2 = RE_STRING_CHAR (d, dend - d);
+ #ifdef emacs
+                 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+ #endif
+                 s2 = SYNTAX (c2);
+ 
+                 /* ... and S2 is Sword or Ssymbol.  */
+                 if (s2 == Sword || s2 == Ssymbol)
+                     goto fail;
+               }
+           }
+         break;
+ 
        case syntaxspec:
        case notsyntaxspec:
          not = (re_opcode_t) *(p - 1) == notsyntaxspec;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]