[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master 5d2d28458d0: Fix regexp character class syntax property ghost mat
From: |
Mattias Engdegård |
Subject: |
master 5d2d28458d0: Fix regexp character class syntax property ghost matching bug |
Date: |
Sat, 22 Jul 2023 15:33:17 -0400 (EDT) |
branch: master
commit 5d2d28458d0eb378a7e94363ef716e8648ef129a
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Fix regexp character class syntax property ghost matching bug
The syntax-table-dependent regexp character classes [:space:],
[:word:] and [:punct:] always use the buffer-local syntax table for
performance reasons. Fix a bug that could cause ghost (mis)matches
from use of lingering state by constructs that do use syntax
properties, such as `\sX`.
* src/regex-emacs.c (BUFFER_SYNTAX): New macro.
(ISPUNCT, ISSPACE, ISWORD): Use BUFFER_SYNTAX instead of SYNTAX.
(regex_compile): Delete syntax table setup code that is no longer
needed.
* test/src/regex-emacs-tests.el (regex-emacs-syntax-properties):
New regression test.
---
src/regex-emacs.c | 24 ++++++++++++------------
test/src/regex-emacs-tests.el | 16 ++++++++++++++++
2 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 51fc2b0558d..7e75f0ac597 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -47,6 +47,9 @@
/* Make syntax table lookup grant data in gl_state. */
#define SYNTAX(c) syntax_property (c, 1)
+/* Explicit syntax lookup using the buffer-local table. */
+#define BUFFER_SYNTAX(c) syntax_property (c, 0)
+
#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
#define RE_STRING_CHAR(p, multibyte) \
@@ -132,18 +135,22 @@
#define ISLOWER(c) lowercasep (c)
+#define ISUPPER(c) uppercasep (c)
+
+/* The following predicates use the buffer-local syntax table and
+ ignore syntax properties, for consistency with the up-front
+ assumptions made at compile time. */
+
#define ISPUNCT(c) (IS_REAL_ASCII (c) \
? ((c) > ' ' && (c) < 0177 \
&& !(((c) >= 'a' && (c) <= 'z') \
|| ((c) >= 'A' && (c) <= 'Z') \
|| ((c) >= '0' && (c) <= '9'))) \
- : SYNTAX (c) != Sword)
+ : BUFFER_SYNTAX (c) != Sword)
-#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
+#define ISSPACE(c) (BUFFER_SYNTAX (c) == Swhitespace)
-#define ISUPPER(c) uppercasep (c)
-
-#define ISWORD(c) (SYNTAX (c) == Sword)
+#define ISWORD(c) (BUFFER_SYNTAX (c) == Sword)
/* Use alloca instead of malloc. This is because using malloc in
re_search* or re_match* could cause memory leaks when C-g is used
@@ -2048,13 +2055,6 @@ regex_compile (re_char *pattern, ptrdiff_t size,
is_xdigit, since they can only match ASCII characters.
We don't need to handle them for multibyte. */
- /* Setup the gl_state object to its buffer-defined value.
- This hardcodes the buffer-global syntax-table for ASCII
- chars, while the other chars will obey syntax-table
- properties. It's not ideal, but it's the way it's been
- done until now. */
- SETUP_BUFFER_SYNTAX_TABLE ();
-
for (c = 0; c < 0x80; ++c)
if (re_iswctype (c, cc))
{
diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el
index 08a93dbf30e..4e2c0f67a44 100644
--- a/test/src/regex-emacs-tests.el
+++ b/test/src/regex-emacs-tests.el
@@ -949,4 +949,20 @@ This evaluates the TESTS test cases from glibc."
(should (equal (smatch "a\\=*b" "ab") 0))
))
+(ert-deftest regex-emacs-syntax-properties ()
+ ;; Verify absence of character class syntax property ghost matching bug.
+ (let ((re "\\s-[[:space:]]")
+ (s (concat "a"
+ (propertize "b" 'syntax-table '(0)) ; whitespace
+ "éz"))
+ (parse-sexp-lookup-properties t))
+ ;; Test matching in a string...
+ (should (equal (string-match re s) nil))
+ ;; ... and in a buffer.
+ (should (equal (with-temp-buffer
+ (insert s)
+ (goto-char (point-min))
+ (re-search-forward re nil t))
+ nil))))
+
;;; regex-emacs-tests.el ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master 5d2d28458d0: Fix regexp character class syntax property ghost matching bug,
Mattias Engdegård <=