emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] emacs-25 61a4b57: * lisp/character-fold.el: Add back multi


From: Artur Malabarba
Subject: [Emacs-diffs] emacs-25 61a4b57: * lisp/character-fold.el: Add back multi-char matching
Date: Tue, 01 Dec 2015 13:55:55 +0000

branch: emacs-25
commit 61a4b57f1d90f81fb491abfebd94a5a6389db62f
Author: Artur Malabarba <address@hidden>
Commit: Artur Malabarba <address@hidden>

    * lisp/character-fold.el: Add back multi-char matching
    
    (character-fold-to-regexp): Uncomment recently commented code
    and make the algorithm "dummer" by not checking every possible
    combination.  This will miss some possible matches, but it
    greatly reduces regexp size.
    
    * test/automated/character-fold-tests.el
    (character-fold--test-fold-to-regexp): Comment out test of
    functionality no longer supported.
---
 lisp/character-fold.el                 |   68 +++++++++++++++++--------------
 test/automated/character-fold-tests.el |    5 ++-
 2 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/lisp/character-fold.el b/lisp/character-fold.el
index 07537d8..fb28bae 100644
--- a/lisp/character-fold.el
+++ b/lisp/character-fold.el
@@ -180,43 +180,49 @@ from which to start."
                              (regexp-quote (string c))))
                  (alist nil))
              ;; Long string.  The regexp would probably be too long.
-             ;; (unless (> end 50)
-             ;;   (setq alist (aref multi-char-table c))
-             ;;   (when case-fold-search
-             ;;     (let ((other-c (aref lower-case-table c)))
-             ;;       (when (or (not other-c)
-             ;;                 (eq other-c c))
-             ;;         (setq other-c (aref upper-case-table c)))
-             ;;       (when other-c
-             ;;         (setq alist (append alist (aref multi-char-table 
other-c)))
-             ;;         (setq regexp (concat "\\(?:" regexp "\\|"
-             ;;                              (or (aref character-fold-table 
other-c)
-             ;;                                  (regexp-quote (string 
other-c)))
-             ;;                              "\\)"))))))
-             (push (let ((alist-out '("\\)")))
-                     (pcase-dolist (`(,suffix . ,out-regexp) alist)
-                       (let ((len-suf (length suffix)))
+             (unless (> end 50)
+               (setq alist (aref multi-char-table c))
+               (when case-fold-search
+                 (let ((other-c (aref lower-case-table c)))
+                   (when (or (not other-c)
+                             (eq other-c c))
+                     (setq other-c (aref upper-case-table c)))
+                   (when other-c
+                     (setq alist (append alist (aref multi-char-table 
other-c)))
+                     (setq regexp (concat "\\(?:" regexp "\\|"
+                                          (or (aref character-fold-table 
other-c)
+                                              (regexp-quote (string other-c)))
+                                          "\\)"))))))
+             (push (let ((matched-entries nil)
+                         (max-length 0))
+                     (dolist (entry alist)
+                       (let* ((suffix (car entry))
+                              (len-suf (length suffix)))
                          (when (eq (compare-strings suffix 0 nil
                                                     string (1+ i) (+ i 1 
len-suf)
                                                     nil)
                                    t)
-                           ;; FIXME: If N suffixes match, we "branch"
-                           ;; out into N+1 executions for the rest of
-                           ;; the string.  This involves redundant
-                           ;; work and makes a huge regexp.
-                           (push (concat "\\|" out-regexp
-                                         (character-fold-to-regexp
-                                          string nil (+ i 1 len-suf)))
-                                 alist-out))))
+                           (push (cons len-suf (cdr entry)) matched-entries)
+                           (setq max-length (max max-length len-suf)))))
                      ;; If no suffixes matched, just go on.
-                     (if (not (cdr alist-out))
+                     (if (not matched-entries)
                          regexp
-                       ;; Otherwise, add a branch for the
-                       ;; no-suffix case, and stop the loop here.
-                       (prog1 (apply #'concat "\\(?:" regexp
-                                     (character-fold-to-regexp string nil (1+ 
i))
-                                     alist-out)
-                         (setq i end))))
+;;; If N suffixes match, we "branch" out into N+1 executions for the
+;;; length of the longest match.  This means "fix" will match "fix" but
+;;; not "fⅸ", but it's necessary to keep the regexp size from scaling
+;;; exponentially.  See 
https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html
+                       (let ((subs (substring string (1+ i) (+ i 1 
max-length))))
+                         ;; `i' is still going to inc by 1 below.
+                         (setq i (+ i max-length))
+                         (concat
+                          "\\(?:"
+                          (mapconcat (lambda (entry)
+                                       (let ((length (car entry))
+                                             (suffix-regexp (cdr entry)))
+                                         (concat suffix-regexp
+                                                 (character-fold-to-regexp 
subs nil length))))
+                                     `((0 . ,regexp) . ,matched-entries) "\\|")
+                          "\\)"))))
                    out))))
       (setq i (1+ i)))
     (when (> spaces 0)
diff --git a/test/automated/character-fold-tests.el 
b/test/automated/character-fold-tests.el
index cf19584..40735e5 100644
--- a/test/automated/character-fold-tests.el
+++ b/test/automated/character-fold-tests.el
@@ -93,7 +93,10 @@
     (aset multi ?1 '(("2" . "yy")))
     (character-fold--test-match-exactly "a1" "xx44" "99")
     (character-fold--test-match-exactly "a12" "77" "xx442" "992")
-    (character-fold--test-match-exactly "a12" "xxyy")))
+    ;; Support for this case is disabled.  See function definition or:
+    ;; https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html
+    ;; (character-fold--test-match-exactly "a12" "xxyy")
+    ))
 
 
 (provide 'character-fold-tests)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]