[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/xr 19d52a7 1/5: Add branch subsumption check
From: |
Mattias Engdegård |
Subject: |
[elpa] externals/xr 19d52a7 1/5: Add branch subsumption check |
Date: |
Sun, 26 May 2019 13:17:52 -0400 (EDT) |
branch: externals/xr
commit 19d52a7edbb5f22d471481cca8e6cca70a465641
Author: Mattias Engdegård <address@hidden>
Commit: Mattias Engdegård <address@hidden>
Add branch subsumption check
Look for alternatives containing a branch that matches a superset of
another branch, like ".\\|a". This is wasteful, or indicates a mistake.
---
xr-test.el | 32 +++++++++
xr.el | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 262 insertions(+), 2 deletions(-)
diff --git a/xr-test.el b/xr-test.el
index 99c85fa..a3219dd 100644
--- a/xr-test.el
+++ b/xr-test.el
@@ -378,6 +378,38 @@
'((2 . "Repetition of zero-width assertion")
(5 . "Repetition of zero-width assertion")
(13 . "Repetition of zero-width assertion"))))
+ (should (equal (xr-lint "a.cde*f?g\\|g\\|abcdefg")
+ '((14 . "Branch matches subset of a previous branch"))))
+ (should (equal (xr-lint "abcd\\|e\\|[aA].[^0-9z]d")
+ '((9 . "Branch matches superset of a previous branch"))))
+ (should (equal (xr-lint "\\(?:\\(a\\)\\|.\\)\\(?:a\\|\\(.\\)\\)")
+ '((21 . "Branch matches superset of a previous branch"))))
+ (should (equal (xr-lint ".\\|\n\\|\r")
+ '((6 . "Branch matches subset of a previous branch"))))
+ (should (equal (xr-lint "[^mM]\\|[^a-zA-Z]")
+ '((7 . "Branch matches subset of a previous branch"))))
+ (should (equal (xr-lint "[^mM]\\|[^A-LN-Z]")
+ nil))
+ (should (equal (xr-lint "[ab]\\|[^bcd]")
+ nil))
+ (should (equal (xr-lint "[ab]\\|[^cd]")
+ '((6 . "Branch matches superset of a previous branch"))))
+ (should (equal (xr-lint ".\\|[a\n]")
+ nil))
+ (should (equal (xr-lint ".\\|[[:space:]\r]")
+ '((3 . "Branch matches subset of a previous branch"))))
+ (should (equal (xr-lint "ab?c+\\|a?b*c*")
+ '((7 . "Branch matches superset of a previous branch"))))
+ (should (equal (xr-lint "\\(?:[aA]\\|b\\)\\|a")
+ '((15 . "Branch matches subset of a previous branch"))))
+ (should (equal (xr-lint "\\(?:a\\|b\\)\\|[abc]")
+ '((12 . "Branch matches superset of a previous branch"))))
+ (should (equal (xr-lint "\\(?:a\\|b\\)\\|\\(?:[abd]\\|[abc]\\)")
+ '((12 . "Branch matches superset of a previous branch"))))
+ (should (equal (xr-lint "ab\\|abc?")
+ '((4 . "Branch matches superset of a previous branch"))))
+ (should (equal (xr-lint "abc\\|abcd*e?")
+ '((5 . "Branch matches superset of a previous branch"))))
)
(ert-deftest xr-skip-set ()
diff --git a/xr.el b/xr.el
index 28545a9..99239fb 100644
--- a/xr.el
+++ b/xr.el
@@ -667,6 +667,224 @@ UPPER may be nil, meaning infinity."
(t
(cons 'seq item-seq))))))
+(defun xr--range-string-to-items (str)
+ "Convert a string of ranges to a list of pairs of their endpoints."
+ (let ((len (length str))
+ (ranges nil)
+ (i 0))
+ (while (< i len)
+ (push (cons (aref str i) (aref str (+ i 2)))
+ ranges)
+ (setq i (+ i 3)))
+ ranges))
+
+(defun xr--any-arg-to-items (arg)
+ "Convert an `any' argument to a list of characters, ranges (as pairs),
+and classes (symbols)."
+ ;; We know (since we built it) that x is either a symbol or
+ ;; a string, and that the string does not mix ranges and chars.
+ (cond ((symbolp arg) (list arg))
+ ((and (>= (length arg) 3)
+ (eq (aref arg 1) ?-))
+ (xr--range-string-to-items arg))
+ (t (string-to-list arg))))
+
+(defun xr--any-item-superset-p (a b)
+ "Whether A is a superset of B, both being `any' items: a character,
+a range (pair of chars), or a class (symbol)."
+ (cond
+ ((symbolp a)
+ (cond ((symbolp b) (eq a b))
+ ((eq b ?\n)
+ (memq a '(alnum alpha blank digit graph
+ lower multibyte nonascii print punct space
+ upper word xdigit)))))
+ ((consp a)
+ (or (and (characterp b)
+ (<= (car a) b (cdr a)))
+ (and (consp b)
+ (<= (car a) (car b) (cdr b) (cdr a)))))
+ (t
+ (and (characterp b) (eq a b)))))
+
+(defun xr--any-item-may-intersect-p (a b)
+ "Whether A intersects B, both being `any' items: a character,
+a range (pair of chars), or a class (symbol). If in doubt, return t."
+ (cond
+ ((symbolp a)
+ (cond ((eq b ?\n)
+ (not (memq a '(alnum alpha blank digit graph
+ lower multibyte nonascii print punct space
+ upper word xdigit))))
+ (t t)))
+ ((consp a)
+ (or (and (characterp b)
+ (<= (car a) b (cdr a)))
+ (and (consp b)
+ (<= (car a) (cdr b))
+ (<= (car b) (cdr a)))
+ (symbolp b)))
+ ;; Now a must be a character.
+ ((characterp b) (eq a b))
+ (t (xr--any-item-may-intersect-p b a))))
+
+(defun xr--char-superset-of-char-set-p (a-sets negated b-sets)
+ "Whether A-SETS, possibly NEGATED, is a superset of B-SETS.
+A-SETS and B-SETS are arguments to `any'."
+ (let ((a-items (mapcan #'xr--any-arg-to-items a-sets))
+ (b-items (mapcan #'xr--any-arg-to-items b-sets)))
+ (cl-every (lambda (b-item)
+ (if negated
+ (not (cl-some
+ (lambda (a-item)
+ (xr--any-item-may-intersect-p b-item a-item))
+ a-items))
+ (cl-some (lambda (a-item)
+ (xr--any-item-superset-p a-item b-item))
+ a-items)))
+ b-items)))
+
+(defun xr--char-superset-of-rx-p (sets negated rx)
+ "Whether SETS, possibly NEGATED, is a superset of RX."
+ (pcase rx
+ (`(any . ,b-sets)
+ (xr--char-superset-of-char-set-p sets negated b-sets))
+ (`(not (any . ,b-sets))
+ (and negated
+ (xr--char-superset-of-char-set-p b-sets nil sets)))
+ ((or 'ascii 'alnum 'alpha 'blank 'cntrl 'digit 'graph
+ 'lower 'multibyte 'nonascii 'print 'punct 'space
+ 'unibyte 'upper 'word 'xdigit)
+ (xr--char-superset-of-char-set-p sets negated `(any ,rx)))
+ ((pred stringp)
+ (and (= (length rx) 1)
+ (xr--char-superset-of-char-set-p sets negated (list rx))))))
+
+(defun xr--single-non-newline-char-p (rx)
+ "Whether RX only matches single characters none of which is newline."
+ (pcase rx
+ ((or 'nonl 'wordchar) t)
+ (`(category ,_) t)
+ (`(syntax ,s) (not (eq s ?>))) ; comment-end often matches newline
+ (_ (xr--char-superset-of-rx-p '("\n") t rx))))
+
+(defun xr--syntax-superset-of-rx-p (syntax negated rx)
+ "Whether SYNTAX, possibly NEGATED, is a superset of RX."
+ ;; Syntax tables vary, but we make a (quite conservative) guess.
+ (let* ((always-set
+ ;; Characters we think always will be in the syntax set.
+ '((whitespace " \t")
+ (word "A-Za-z0-9")
+ (open-parenthesis "([")
+ (close-parenthesis "])")))
+ (never-set
+ ;; Characters we think never will be in the syntax set.
+ '((whitespace "!-~")
+ (punctuation "A-Za-z0-9")
+ (open-parenthesis "\x00- A-Za-z0-9")
+ (close-parenthesis "\x00- A-Za-z0-9")))
+ (set (assq syntax (if negated never-set always-set))))
+ (and set
+ (xr--char-superset-of-rx-p (cdr set) nil rx))))
+
+(defun xr--string-to-chars (str)
+ (mapcar #'char-to-string (string-to-list str)))
+
+(defun xr--expand-strings (rx)
+ "If RX is a string or a seq of strings, convert them to seqs of
+single-character strings."
+ (cond ((consp rx)
+ (if (eq (car rx) 'seq)
+ (cons 'seq (mapcan (lambda (x)
+ (if (and (stringp x)
+ (> (length x) 1))
+ (xr--string-to-chars x)
+ (list x)))
+ (cdr rx)))
+ rx))
+ ((and (stringp rx)
+ (> (length rx) 1))
+ (cons 'seq (xr--string-to-chars rx)))
+ (t rx)))
+
+(defun xr--superset-seq-p (a b)
+ "Whether A matches all that B matches, both lists of expressions."
+ (while (and a b (xr--superset-p (car a) (car b)))
+ (setq a (cdr a))
+ (setq b (cdr b)))
+ (and (not b)
+ (or (not a)
+ (xr--matches-empty-p (cons 'seq a)))))
+
+(defun xr--make-seq (body)
+ (if (> (length body) 1)
+ (cons 'seq body)
+ (car body)))
+
+(defun xr--superset-p (a b)
+ "Whether A matches all that B matches."
+ (setq a (xr--expand-strings a))
+ (setq b (xr--expand-strings b))
+
+ (pcase b
+ (`(or . ,b-body)
+ (cl-every (lambda (b-expr) (xr--superset-p a b-expr)) b-body))
+ (_
+ (pcase a
+ (`(any . ,sets)
+ (xr--char-superset-of-rx-p sets nil b))
+ (`(not (any . ,sets))
+ (xr--char-superset-of-rx-p sets t b))
+ ('nonl (xr--single-non-newline-char-p b))
+
+ (`(seq . ,a-body)
+ (pcase b
+ (`(seq . ,b-body)
+ (xr--superset-seq-p a-body b-body))
+ (_
+ (xr--superset-seq-p a-body (list b)))))
+ (`(or . ,a-body)
+ (cl-some (lambda (a-expr) (xr--superset-p a-expr b)) a-body))
+
+ (`(zero-or-more . ,a-body)
+ (pcase b
+ (`(,(or 'opt 'zero-or-more 'one-or-more) . ,b-body)
+ (xr--superset-p (xr--make-seq a-body) (xr--make-seq b-body)))
+ (_ (xr--superset-p (xr--make-seq a-body) b))))
+ (`(one-or-more . ,a-body)
+ (pcase b
+ (`(one-or-more . ,b-body)
+ (xr--superset-p (xr--make-seq a-body) (xr--make-seq b-body)))
+ (_ (xr--superset-p (xr--make-seq a-body) b))))
+ (`(opt . ,a-body)
+ (pcase b
+ (`(opt . ,b-body)
+ (xr--superset-p (xr--make-seq a-body) (xr--make-seq b-body)))
+ (_ (xr--superset-p (xr--make-seq a-body) b))))
+ (`(repeat ,lo ,_ . ,a-body)
+ (if (<= lo 1)
+ (xr--superset-p (xr--make-seq a-body) b)
+ (equal a b)))
+
+ ;; We do not expand through groups on the subset (b) side to
+ ;; avoid false positives; "\\(a\\)\\|." should be without warning.
+ (`(group . ,body)
+ (xr--superset-p (xr--make-seq body) b))
+ (`(group-n ,_ . ,body)
+ (xr--superset-p (xr--make-seq body) b))
+
+ (`(syntax ,syn)
+ (or (equal a b) (xr--syntax-superset-of-rx-p syn nil b)))
+ (`(not (syntax ,syn))
+ (or (equal a b) (xr--syntax-superset-of-rx-p syn t b)))
+
+ ((or `(category ,_) `(not (category ,cat)))
+ (or (equal a b)
+ (and (stringp b)
+ (string-match-p (rx-to-string a) b))))
+
+ (_ (equal a b))))))
+
(defun xr--parse-alt (warnings)
(let ((alternatives nil)) ; reversed
(push (xr--parse-seq warnings) alternatives)
@@ -674,8 +892,18 @@ UPPER may be nil, meaning infinity."
(forward-char 2) ; skip \|
(let ((pos (point))
(seq (xr--parse-seq warnings)))
- (when (and warnings (member seq alternatives))
- (xr--report warnings pos "Duplicated alternative branch"))
+ (when warnings
+ (cond
+ ((member seq alternatives)
+ (xr--report warnings pos "Duplicated alternative branch"))
+ ((cl-some (lambda (branch) (xr--superset-p seq branch))
+ alternatives)
+ (xr--report warnings pos
+ "Branch matches superset of a previous branch"))
+ ((cl-some (lambda (branch) (xr--superset-p branch seq))
+ alternatives)
+ (xr--report warnings pos
+ "Branch matches subset of a previous branch"))))
(push seq alternatives)))
(if (cdr alternatives)
;; Simplify (or nonl "\n") to anything