emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] emacs/src ChangeLog search.c


From: Andreas Schwab
Subject: [Emacs-diffs] emacs/src ChangeLog search.c
Date: Thu, 16 Apr 2009 09:30:47 +0000

CVSROOT:        /sources/emacs
Module name:    emacs
Changes by:     Andreas Schwab <schwab> 09/04/16 09:30:47

Modified files:
        src            : ChangeLog search.c 

Log message:
        (boyer_moore): Use zero as marker value for a possible
        match instead of depending on overflow behavior.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/emacs/src/ChangeLog?cvsroot=emacs&r1=1.7489&r2=1.7490
http://cvs.savannah.gnu.org/viewcvs/emacs/src/search.c?cvsroot=emacs&r1=1.238&r2=1.239

Patches:
Index: ChangeLog
===================================================================
RCS file: /sources/emacs/emacs/src/ChangeLog,v
retrieving revision 1.7489
retrieving revision 1.7490
diff -u -b -r1.7489 -r1.7490
--- ChangeLog   16 Apr 2009 03:58:44 -0000      1.7489
+++ ChangeLog   16 Apr 2009 09:30:45 -0000      1.7490
@@ -1,3 +1,8 @@
+2009-04-16  Andreas Schwab  <address@hidden>
+
+       * search.c (boyer_moore): Use zero as marker value for a possible
+       match instead of depending on overflow behavior.
+
 2009-04-16  Chong Yidong  <address@hidden>
 
        * keyboard.c (adjust_point_for_property): Disable 2009-02-12

Index: search.c
===================================================================
RCS file: /sources/emacs/emacs/src/search.c,v
retrieving revision 1.238
retrieving revision 1.239
diff -u -b -r1.238 -r1.239
--- search.c    15 Apr 2009 17:06:34 -0000      1.238
+++ search.c    16 Apr 2009 09:30:47 -0000      1.239
@@ -1729,9 +1729,8 @@
 {
   int direction = ((n > 0) ? 1 : -1);
   register int dirlen;
-  int infinity, limit, stride_for_teases = 0;
-  register int *BM_tab;
-  int *BM_tab_base;
+  int limit, stride_for_teases = 0;
+  int BM_tab[0400];
   register unsigned char *cursor, *p_limit;
   register int i, j;
   unsigned char *pat, *pat_end;
@@ -1747,37 +1746,28 @@
   int translate_prev_byte3 = 0;
   int translate_prev_byte4 = 0;
 
-  BM_tab = (int *) alloca (0400 * sizeof (int));
-
-  /* The general approach is that we are going to maintain that we know */
-  /* the first (closest to the present position, in whatever direction */
-  /* we're searching) character that could possibly be the last */
-  /* (furthest from present position) character of a valid match.  We */
-  /* advance the state of our knowledge by looking at that character */
-  /* and seeing whether it indeed matches the last character of the */
-  /* pattern.  If it does, we take a closer look.  If it does not, we */
-  /* move our pointer (to putative last characters) as far as is */
-  /* logically possible.  This amount of movement, which I call a */
-  /* stride, will be the length of the pattern if the actual character */
-  /* appears nowhere in the pattern, otherwise it will be the distance */
-  /* from the last occurrence of that character to the end of the */
-  /* pattern. */
-  /* As a coding trick, an enormous stride is coded into the table for */
-  /* characters that match the last character.  This allows use of only */
-  /* a single test, a test for having gone past the end of the */
-  /* permissible match region, to test for both possible matches (when */
-  /* the stride goes past the end immediately) and failure to */
-  /* match (where you get nudged past the end one stride at a time). */
-
-  /* Here we make a "mickey mouse" BM table.  The stride of the search */
-  /* is determined only by the last character of the putative match. */
-  /* If that character does not match, we will stride the proper */
-  /* distance to propose a match that superimposes it on the last */
-  /* instance of a character that matches it (per trt), or misses */
-  /* it entirely if there is none. */
+  /* The general approach is that we are going to maintain that we know
+     the first (closest to the present position, in whatever direction
+     we're searching) character that could possibly be the last
+     (furthest from present position) character of a valid match.  We
+     advance the state of our knowledge by looking at that character
+     and seeing whether it indeed matches the last character of the
+     pattern.  If it does, we take a closer look.  If it does not, we
+     move our pointer (to putative last characters) as far as is
+     logically possible.  This amount of movement, which I call a
+     stride, will be the length of the pattern if the actual character
+     appears nowhere in the pattern, otherwise it will be the distance
+     from the last occurrence of that character to the end of the
+     pattern.  If the amount is zero we have a possible match.  */
+
+  /* Here we make a "mickey mouse" BM table.  The stride of the search
+     is determined only by the last character of the putative match.
+     If that character does not match, we will stride the proper
+     distance to propose a match that superimposes it on the last
+     instance of a character that matches it (per trt), or misses
+     it entirely if there is none. */
 
   dirlen = len_byte * direction;
-  infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
 
   /* Record position after the end of the pattern.  */
   pat_end = base_pat + len_byte;
@@ -1787,23 +1777,14 @@
   if (direction < 0)
     base_pat = pat_end - 1;
 
-  BM_tab_base = BM_tab;
-  BM_tab += 0400;
-  j = dirlen;          /* to get it in a register */
-  /* A character that does not appear in the pattern induces a */
-  /* stride equal to the pattern length. */
-  while (BM_tab_base != BM_tab)
-    {
-      *--BM_tab = j;
-      *--BM_tab = j;
-      *--BM_tab = j;
-      *--BM_tab = j;
-    }
+  /* A character that does not appear in the pattern induces a
+     stride equal to the pattern length.  */
+  for (i = 0; i < 0400; i++)
+    BM_tab[i] = dirlen;
 
   /* We use this for translation, instead of TRT itself.
      We fill this in to handle the characters that actually
      occur in the pattern.  Others don't matter anyway!  */
-  bzero (simple_translate, sizeof simple_translate);
   for (i = 0; i < 0400; i++)
     simple_translate[i] = i;
 
@@ -1828,12 +1809,10 @@
     }
 
   i = 0;
-  while (i != infinity)
+  while (i != dirlen)
     {
       unsigned char *ptr = base_pat + i;
       i += direction;
-      if (i == dirlen)
-       i = infinity;
       if (! NILP (trt))
        {
          /* If the byte currently looking at is the last of a
@@ -1861,7 +1840,7 @@
          else
            j = *ptr;
 
-         if (i == infinity)
+         if (i == dirlen)
            stride_for_teases = BM_tab[j];
 
          BM_tab[j] = dirlen - i;
@@ -1894,17 +1873,16 @@
        {
          j = *ptr;
 
-         if (i == infinity)
+         if (i == dirlen)
            stride_for_teases = BM_tab[j];
          BM_tab[j] = dirlen - i;
        }
-      /* stride_for_teases tells how much to stride if we get a */
-      /* match on the far character but are subsequently */
-      /* disappointed, by recording what the stride would have been */
-      /* for that character if the last character had been */
-      /* different. */
+      /* stride_for_teases tells how much to stride if we get a
+        match on the far character but are subsequently
+        disappointed, by recording what the stride would have been
+        for that character if the last character had been
+        different.  */
     }
-  infinity = dirlen - infinity;
   pos_byte += dirlen - ((direction > 0) ? direction : 0);
   /* loop invariant - POS_BYTE points at where last char (first
      char if reverse) of pattern would align in a possible match.  */
@@ -1948,43 +1926,34 @@
 
          p_limit = BYTE_POS_ADDR (limit);
          p2 = (cursor = BYTE_POS_ADDR (pos_byte));
-         /* In this loop, pos + cursor - p2 is the surrogate for pos */
+         /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
          while (1)             /* use one cursor setting as long as i can */
            {
              if (direction > 0) /* worth duplicating */
                {
-                 /* Use signed comparison if appropriate
-                    to make cursor+infinity sure to be > p_limit.
-                    Assuming that the buffer lies in a range of addresses
-                    that are all "positive" (as ints) or all "negative",
-                    either kind of comparison will work as long
-                    as we don't step by infinity.  So pick the kind
-                    that works when we do step by infinity.  */
-                 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
-                   while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
-                     cursor += BM_tab[*cursor];
-                 else
-                   while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
+                 while (cursor <= p_limit)
+                   {
+                     if (BM_tab[*cursor] == 0)
+                       goto hit;
                      cursor += BM_tab[*cursor];
                }
+               }
              else
                {
-                 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
-                   while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
-                     cursor += BM_tab[*cursor];
-                 else
-                   while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
+                 while (cursor >= p_limit)
+                   {
+                     if (BM_tab[*cursor] == 0)
+                       goto hit;
                      cursor += BM_tab[*cursor];
                }
-/* If you are here, cursor is beyond the end of the searched region. */
-/* This can happen if you match on the far character of the pattern, */
-/* because the "stride" of that character is infinity, a number able */
-/* to throw you well beyond the end of the search.  It can also */
-/* happen if you fail to match within the permitted region and would */
-/* otherwise try a character beyond that region */
-             if ((cursor - p_limit) * direction <= len_byte)
-               break;  /* a small overrun is genuine */
-             cursor -= infinity; /* large overrun = hit */
+               }
+             /* If you are here, cursor is beyond the end of the
+                searched region.  You fail to match within the
+                permitted region and would otherwise try a character
+                beyond that region.  */
+             break;
+
+           hit:
              i = dirlen - direction;
              if (! NILP (trt))
                {
@@ -2056,8 +2025,8 @@
          pos_byte += cursor - p2;
        }
       else
-       /* Now we'll pick up a clump that has to be done the hard */
-       /* way because it covers a discontinuity */
+       /* Now we'll pick up a clump that has to be done the hard
+          way because it covers a discontinuity.  */
        {
          limit = ((direction > 0)
                   ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
@@ -2069,19 +2038,21 @@
             and still be valid for a possible match.  */
          while (1)
            {
-             /* This loop can be coded for space rather than */
-             /* speed because it will usually run only once. */
-             /* (the reach is at most len + 21, and typically */
-             /* does not exceed len) */
+             /* This loop can be coded for space rather than
+                speed because it will usually run only once.
+                (the reach is at most len + 21, and typically
+                does not exceed len).  */
              while ((limit - pos_byte) * direction >= 0)
-               pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
-             /* now run the same tests to distinguish going off the */
-             /* end, a match or a phony match. */
-             if ((pos_byte - limit) * direction <= len_byte)
+               {
+                 int ch = FETCH_BYTE (pos_byte);
+                 if (BM_tab[ch] == 0)
+                   goto hit2;
+                 pos_byte += BM_tab[ch];
+               }
                break;  /* ran off the end */
-             /* Found what might be a match.
-                Set POS_BYTE back to last (first if reverse) pos.  */
-             pos_byte -= infinity;
+
+           hit2:
+             /* Found what might be a match.  */
              i = dirlen - direction;
              while ((i -= direction) + direction != 0)
                {
@@ -2110,7 +2081,7 @@
              /* Above loop has moved POS_BYTE part or all the way
                 back to the first pos (last pos if reverse).
                 Set it once again at the last (first if reverse) char.  */
-             pos_byte += dirlen - i- direction;
+             pos_byte += dirlen - i - direction;
              if (i + direction == 0)
                {
                  int position, start, end;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]