bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#34525: replace-regexp missing some matches


From: Alan Mackenzie
Subject: bug#34525: replace-regexp missing some matches
Date: Fri, 1 Mar 2019 14:34:14 +0000
User-agent: Mutt/1.10.1 (2018-07-13)

Hello, Daniel.

On Wed, Feb 20, 2019 at 21:25:27 +0000, Daniel Lopez wrote:
> Hi Eli and Alan,

> Thanks for investigating.

[ .... ]

We now understand what went wrong.  There was a problem in the handling
of a position cache in the code for "intervals" (which support text
properties).  There was also a smaller problem with an off-by-1 in the
regex handling code.

I enclose below a patch to fix the bug, which should apply cleanly to
the Emacs 26.1 source tree.

The fix will NOT be incorporated into the upcoming 26.2 release, since
it is just too risky for the small amount of testing time we have left.
However, I think the patch will also apply cleanly to 26.2 when it gets
released.

Please feel free to try out the patch, and let me know of any
undesirable things you see with it.

Thanks again for taking the trouble to report this interesting bug.




diff --git a/src/intervals.c b/src/intervals.c
index e7595b23b3..36bbc122b0 100644
--- a/src/intervals.c
+++ b/src/intervals.c
@@ -713,11 +713,21 @@ previous_interval (register INTERVAL interval)
   return NULL;
 }
 
-/* Find the interval containing POS given some non-NULL INTERVAL
-   in the same tree.  Note that we need to update interval->position
-   if we go down the tree.
-   To speed up the process, we assume that the ->position of
-   I and all its parents is already uptodate.  */
+/* Set the ->position field of I's parent, based on I->position. */
+#define SET_PARENT_POSITION(i)                                  \
+  if (AM_LEFT_CHILD (i))                                        \
+    INTERVAL_PARENT (i)->position =                             \
+      i->position + TOTAL_LENGTH (i) - LEFT_TOTAL_LENGTH (i);   \
+  else                                                          \
+    INTERVAL_PARENT (i)->position =                             \
+      i->position - LEFT_TOTAL_LENGTH (i)                       \
+      - LENGTH (INTERVAL_PARENT (i))
+
+/* Find the interval containing POS, given some non-NULL INTERVAL in
+   the same tree.  Note that we update interval->position in each
+   interval we traverse, assuming it is already correctly set for the
+   argument I.  We don't assume that any other interval already has a
+   correctly set ->position.  */
 INTERVAL
 update_interval (register INTERVAL i, ptrdiff_t pos)
 {
@@ -738,7 +748,10 @@ update_interval (register INTERVAL i, ptrdiff_t pos)
          else if (NULL_PARENT (i))
            error ("Point before start of properties");
          else
-             i = INTERVAL_PARENT (i);
+            {
+              SET_PARENT_POSITION (i);
+              i = INTERVAL_PARENT (i);
+            }
          continue;
        }
       else if (pos >= INTERVAL_LAST_POS (i))
@@ -753,7 +766,10 @@ update_interval (register INTERVAL i, ptrdiff_t pos)
          else if (NULL_PARENT (i))
            error ("Point %"pD"d after end of properties", pos);
          else
-            i = INTERVAL_PARENT (i);
+            {
+              SET_PARENT_POSITION (i);
+              i = INTERVAL_PARENT (i);
+            }
          continue;
        }
       else
diff --git a/src/intervals.h b/src/intervals.h
index 311ef79466..873e2748ea 100644
--- a/src/intervals.h
+++ b/src/intervals.h
@@ -32,11 +32,15 @@ struct interval
 
   ptrdiff_t total_length;       /* Length of myself and both children.  */
   ptrdiff_t position;          /* Cache of interval's character position.  */
-                               /* This field is usually updated
-                                  simultaneously with an interval
-                                  traversal, there is no guarantee
-                                  that it is valid for a random
-                                  interval.  */
+                                /* This field is valid in the final
+                                   target interval returned by
+                                   find_interval, next_interval,
+                                   previous_interval and
+                                   update_interval.  It cannot be
+                                   depended upon in any intermediate
+                                   intervals traversed by these
+                                   functions, or any other
+                                   interval. */
   struct interval *left;       /* Intervals which precede me.  */
   struct interval *right;      /* Intervals which succeed me.  */
 
diff --git a/src/regex.c b/src/regex.c
index 09ed64a6e1..d5d22f7188 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -5898,8 +5898,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, 
const_re_char *string1,
                int s1, s2;
                int dummy;
 #ifdef emacs
-               ssize_t offset = PTR_TO_OFFSET (d - 1);
-               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+               ssize_t offset = PTR_TO_OFFSET (d);
+               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
                UPDATE_SYNTAX_TABLE_FAST (charpos);
 #endif
                GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
@@ -5985,8 +5985,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, 
const_re_char *string1,
              int s1, s2;
              int dummy;
 #ifdef emacs
-             ssize_t offset = PTR_TO_OFFSET (d) - 1;
-             ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+             ssize_t offset = PTR_TO_OFFSET (d);
+             ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
              UPDATE_SYNTAX_TABLE_FAST (charpos);
 #endif
              GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
@@ -6002,7 +6002,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, 
const_re_char *string1,
                  PREFETCH_NOLIMIT ();
                  GET_CHAR_AFTER (c2, d, dummy);
 #ifdef emacs
-                 UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
+                 UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
 #endif
                  s2 = SYNTAX (c2);
 
@@ -6072,8 +6072,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, 
const_re_char *string1,
              re_wchar_t c1, c2;
              int s1, s2;
 #ifdef emacs
-             ssize_t offset = PTR_TO_OFFSET (d) - 1;
-             ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+             ssize_t offset = PTR_TO_OFFSET (d);
+             ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
              UPDATE_SYNTAX_TABLE_FAST (charpos);
 #endif
              GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
diff --git a/src/syntax.c b/src/syntax.c
index 3cc32094a8..ca9e2bbca9 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -339,20 +339,6 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, 
bool init,
       invalidate = false;
       if (!i)
        return;
-      /* interval_of updates only ->position of the return value, so
-        update the parents manually to speed up update_interval.  */
-      while (!NULL_PARENT (i))
-       {
-         if (AM_RIGHT_CHILD (i))
-           INTERVAL_PARENT (i)->position = i->position
-             - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
-             - TOTAL_LENGTH (INTERVAL_PARENT (i))
-             + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
-         else
-           INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
-             + TOTAL_LENGTH (i);
-         i = INTERVAL_PARENT (i);
-       }
       i = gl_state.forward_i;
       gl_state.b_property = i->position - gl_state.offset;
       gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;


> Daniel

-- 
Alan Mackenzie (Nuremberg, Germany).





reply via email to

[Prev in Thread] Current Thread [Next in Thread]