emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 5d2d28458d0: Fix regexp character class syntax property ghost mat


From: Mattias Engdegård
Subject: master 5d2d28458d0: Fix regexp character class syntax property ghost matching bug
Date: Sat, 22 Jul 2023 15:33:17 -0400 (EDT)

branch: master
commit 5d2d28458d0eb378a7e94363ef716e8648ef129a
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>

    Fix regexp character class syntax property ghost matching bug
    
    The syntax-table-dependent regexp character classes [:space:],
    [:word:] and [:punct:] always use the buffer-local syntax table for
    performance reasons.  Fix a bug that could cause ghost (mis)matches
    from use of lingering state by constructs that do use syntax
    properties, such as `\sX`.
    
    * src/regex-emacs.c (BUFFER_SYNTAX): New macro.
    (ISPUNCT, ISSPACE, ISWORD): Use BUFFER_SYNTAX instead of SYNTAX.
    (regex_compile): Delete syntax table setup code that is no longer
    needed.
    * test/src/regex-emacs-tests.el (regex-emacs-syntax-properties):
    New regression test.
---
 src/regex-emacs.c             | 24 ++++++++++++------------
 test/src/regex-emacs-tests.el | 16 ++++++++++++++++
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 51fc2b0558d..7e75f0ac597 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -47,6 +47,9 @@
 /* Make syntax table lookup grant data in gl_state.  */
 #define SYNTAX(c) syntax_property (c, 1)
 
+/* Explicit syntax lookup using the buffer-local table.  */
+#define BUFFER_SYNTAX(c) syntax_property (c, 0)
+
 #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
 #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
 #define RE_STRING_CHAR(p, multibyte) \
@@ -132,18 +135,22 @@
 
 #define ISLOWER(c) lowercasep (c)
 
+#define ISUPPER(c) uppercasep (c)
+
+/* The following predicates use the buffer-local syntax table and
+   ignore syntax properties, for consistency with the up-front
+   assumptions made at compile time.  */
+
 #define ISPUNCT(c) (IS_REAL_ASCII (c)                          \
                    ? ((c) > ' ' && (c) < 0177                  \
                       && !(((c) >= 'a' && (c) <= 'z')          \
                            || ((c) >= 'A' && (c) <= 'Z')       \
                            || ((c) >= '0' && (c) <= '9')))     \
-                   : SYNTAX (c) != Sword)
+                   : BUFFER_SYNTAX (c) != Sword)
 
-#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
+#define ISSPACE(c) (BUFFER_SYNTAX (c) == Swhitespace)
 
-#define ISUPPER(c) uppercasep (c)
-
-#define ISWORD(c) (SYNTAX (c) == Sword)
+#define ISWORD(c) (BUFFER_SYNTAX (c) == Sword)
 
 /* Use alloca instead of malloc.  This is because using malloc in
    re_search* or re_match* could cause memory leaks when C-g is used
@@ -2048,13 +2055,6 @@ regex_compile (re_char *pattern, ptrdiff_t size,
                       is_xdigit, since they can only match ASCII characters.
                       We don't need to handle them for multibyte.  */
 
-                   /* Setup the gl_state object to its buffer-defined value.
-                      This hardcodes the buffer-global syntax-table for ASCII
-                      chars, while the other chars will obey syntax-table
-                      properties.  It's not ideal, but it's the way it's been
-                      done until now.  */
-                   SETUP_BUFFER_SYNTAX_TABLE ();
-
                    for (c = 0; c < 0x80; ++c)
                      if (re_iswctype (c, cc))
                        {
diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el
index 08a93dbf30e..4e2c0f67a44 100644
--- a/test/src/regex-emacs-tests.el
+++ b/test/src/regex-emacs-tests.el
@@ -949,4 +949,20 @@ This evaluates the TESTS test cases from glibc."
     (should (equal (smatch "a\\=*b" "ab") 0))
     ))
 
+(ert-deftest regex-emacs-syntax-properties ()
+  ;; Verify absence of character class syntax property ghost matching bug.
+  (let ((re "\\s-[[:space:]]")
+        (s (concat "a"
+                (propertize "b" 'syntax-table '(0))  ; whitespace
+                "éz"))
+        (parse-sexp-lookup-properties t))
+    ;; Test matching in a string...
+    (should (equal (string-match re s) nil))
+    ;; ... and in a buffer.
+    (should (equal (with-temp-buffer
+                     (insert s)
+                     (goto-char (point-min))
+                     (re-search-forward re nil t))
+                   nil))))
+
 ;;; regex-emacs-tests.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]