emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/src/regex.c


From: Kenichi Handa
Subject: [Emacs-diffs] Changes to emacs/src/regex.c
Date: Tue, 03 Sep 2002 00:09:40 -0400

Index: emacs/src/regex.c
diff -c emacs/src/regex.c:1.176 emacs/src/regex.c:1.177
*** emacs/src/regex.c:1.176     Sun Mar 24 19:45:48 2002
--- emacs/src/regex.c   Fri Aug 23 18:19:56 2002
***************
*** 19,25 ****
     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
     USA.        */
  
! /* TODO:
     - structure the opcode space into opcode+flag.
     - merge with glibc's regex.[ch].
     - replace (succeed_n + jump_n + set_number_at) with something that doesn't
--- 19,27 ----
     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
     USA.        */
  
! /* BUGS:
!    - (x?)*y\1z should match both xxxxyxz and xxxyz.
!    TODO:
     - structure the opcode space into opcode+flag.
     - merge with glibc's regex.[ch].
     - replace (succeed_n + jump_n + set_number_at) with something that doesn't
***************
*** 1682,1698 ****
  static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
                                    char *fastmap, const int multibyte));
  
- /* Fetch the next character in the uncompiled pattern---translating it
-    if necessary.  */
- #define PATFETCH(c)                                                   \
-   do {                                                                        
\
-     PATFETCH_RAW (c);                                                 \
-     c = TRANSLATE (c);                                                        
\
-   } while (0)
- 
  /* Fetch the next character in the uncompiled pattern, with no
     translation.  */
! #define PATFETCH_RAW(c)                                                       
\
    do {                                                                        
\
      int len;                                                          \
      if (p == pend) return REG_EEND;                                   \
--- 1684,1692 ----
  static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
                                    char *fastmap, const int multibyte));
  
  /* Fetch the next character in the uncompiled pattern, with no
     translation.  */
! #define PATFETCH(c)                                                   \
    do {                                                                        
\
      int len;                                                          \
      if (p == pend) return REG_EEND;                                   \
***************
*** 1914,1925 ****
  #define BIT_UPPER     0x10
  #define BIT_MULTIBYTE 0x20
  
! /* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
! #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)  \
!   do {                                                                        
\
!     EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);                    \
!     (work_area).table[(work_area).used++] = (range_start);            \
!     (work_area).table[(work_area).used++] = (range_end);              \
    } while (0)
  
  /* Free allocated memory for WORK_AREA.        */
--- 1908,1920 ----
  #define BIT_UPPER     0x10
  #define BIT_MULTIBYTE 0x20
  
! /* Set a range START..END to WORK_AREA.
!    The range is passed through TRANSLATE, so START and END
!    should be untranslated.  */
! #define SET_RANGE_TABLE_WORK_AREA(work_area, start, end)      \
!   do {                                                                \
!     EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);            \
!     set_image_of_range (&work_area, start, end, translate);   \
    } while (0)
  
  /* Free allocated memory for WORK_AREA.        */
***************
*** 2077,2082 ****
--- 2072,2102 ----
  }
  #endif
  
+ 
+ 
+ /* We need to find the image of the range start..end when passed through
+    TRANSLATE.  This is not necessarily TRANSLATE(start)..TRANSLATE(end)
+    and is not even necessarily contiguous.
+    We approximate it with the smallest contiguous range that contains
+    all the chars we need.  */
+ static void
+ set_image_of_range (work_area, start, end, translate)
+      RE_TRANSLATE_TYPE translate;
+      struct range_table_work_area *work_area;
+      re_wchar_t start, end;
+ {
+   re_wchar_t cmin = TRANSLATE (start), cmax = TRANSLATE (end);
+   if (RE_TRANSLATE_P (translate))
+     for (; start <= end; start++)
+       {
+       re_wchar_t c = TRANSLATE (start);
+       cmin = MIN (cmin, c);
+       cmax = MAX (cmax, c);
+       }
+   work_area->table[work_area->used++] = (cmin);
+   work_area->table[work_area->used++] = (cmax);
+ }
+ 
  /* Explicit quit checking is only used on NTemacs.  */
  #if defined WINDOWSNT && defined emacs && defined QUIT
  extern int immediate_quit;
***************
*** 2525,2530 ****
--- 2545,2554 ----
  
                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
+               /* Don't translate yet.  The range TRANSLATE(X..Y) cannot
+                  always be determined from TRANSLATE(X) and TRANSLATE(Y)
+                  So the translation is done later in a loop.  Example:
+                  (let ((case-fold-search t)) (string-match "[A-_]" "A"))  */
                PATFETCH (c);
  
                /* \ might escape characters inside [...] and [^...].  */
***************
*** 2584,2590 ****
                       them).  */
                    if (c == ':' && *p == ']')
                      {
!                       int ch;
                        re_wctype_t cc;
  
                        cc = re_wctype (str);
--- 2608,2614 ----
                       them).  */
                    if (c == ':' && *p == ']')
                      {
!                       re_wchar_t ch;
                        re_wctype_t cc;
  
                        cc = re_wctype (str);
***************
*** 2653,2660 ****
                               starting at the smallest character in
                               the charset of C1 and ending at C1.  */
                            int charset = CHAR_CHARSET (c1);
!                           int c2 = MAKE_CHAR (charset, 0, 0);
!                           
                            SET_RANGE_TABLE_WORK_AREA (range_table_work,
                                                       c2, c1);
                            c1 = 0377;
--- 2677,2684 ----
                               starting at the smallest character in
                               the charset of C1 and ending at C1.  */
                            int charset = CHAR_CHARSET (c1);
!                           re_wchar_t c2 = MAKE_CHAR (charset, 0, 0);
! 
                            SET_RANGE_TABLE_WORK_AREA (range_table_work,
                                                       c2, c1);
                            c1 = 0377;
***************
*** 2672,2678 ****
                  /* ... into bitmap.  */
                  {
                    re_wchar_t this_char;
!                   int range_start = c, range_end = c1;
  
                    /* If the start is after the end, the range is empty.  */
                    if (range_start > range_end)
--- 2696,2702 ----
                  /* ... into bitmap.  */
                  {
                    re_wchar_t this_char;
!                   re_wchar_t range_start = c, range_end = c1;
  
                    /* If the start is after the end, the range is empty.  */
                    if (range_start > range_end)
***************
*** 2769,2775 ****
          /* Do not translate the character after the \, so that we can
             distinguish, e.g., \B from \b, even if we normally would
             translate, e.g., B to b.  */
!         PATFETCH_RAW (c);
  
          switch (c)
            {
--- 2793,2799 ----
          /* Do not translate the character after the \, so that we can
             distinguish, e.g., \B from \b, even if we normally would
             translate, e.g., B to b.  */
!         PATFETCH (c);
  
          switch (c)
            {
***************
*** 3129,3141 ****
  
            case 'c':
              laststart = b;
!             PATFETCH_RAW (c);
              BUF_PUSH_2 (categoryspec, c);
              break;
  
            case 'C':
              laststart = b;
!             PATFETCH_RAW (c);
              BUF_PUSH_2 (notcategoryspec, c);
              break;
  #endif /* emacs */
--- 3153,3165 ----
  
            case 'c':
              laststart = b;
!             PATFETCH (c);
              BUF_PUSH_2 (categoryspec, c);
              break;
  
            case 'C':
              laststart = b;
!             PATFETCH (c);
              BUF_PUSH_2 (notcategoryspec, c);
              break;
  #endif /* emacs */
***************
*** 3225,3231 ****
              /* You might think it would be useful for \ to mean
                 not to translate; but if we don't translate it
                 it will never match anything.  */
-             c = TRANSLATE (c);
              goto normal_char;
            }
          break;
--- 3249,3254 ----
***************
*** 3234,3240 ****
        default:
        /* Expects the character in `c'.  */
        normal_char:
!             /* If no exactn currently being built.  */
          if (!pending_exact
  
              /* If last exactn not at current position.  */
--- 3257,3263 ----
        default:
        /* Expects the character in `c'.  */
        normal_char:
!         /* If no exactn currently being built.  */
          if (!pending_exact
  
              /* If last exactn not at current position.  */
***************
*** 3265,3270 ****
--- 3288,3294 ----
          {
            int len;
  
+           c = TRANSLATE (c);
            if (multibyte)
              len = CHAR_STRING (c, b);
            else
***************
*** 4427,4433 ****
             they don't overlap.  The union of the two sets of excluded
             chars should cover all possible chars, which, as a matter of
             fact, is virtually impossible in multibyte buffers.  */
!         ;
        }
        break;
  
--- 4451,4457 ----
             they don't overlap.  The union of the two sets of excluded
             chars should cover all possible chars, which, as a matter of
             fact, is virtually impossible in multibyte buffers.  */
!         break;
        }
        break;
  




reply via email to

[Prev in Thread] Current Thread [Next in Thread]