[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/xr 2cfd98f 2/3: Add xr-lint, a regexp linting tool
From: |
Mattias Engdegård |
Subject: |
[elpa] externals/xr 2cfd98f 2/3: Add xr-lint, a regexp linting tool |
Date: |
Wed, 20 Feb 2019 06:30:43 -0500 (EST) |
branch: externals/xr
commit 2cfd98ff44d194901c0d3370a5e2dc6675f21745
Author: Mattias Engdegård <address@hidden>
Commit: Mattias Engdegård <address@hidden>
Add xr-lint, a regexp linting tool
Since we are already parsing the regexp, we can easily detect uses
of questionable or obsolete syntax; `xr-lint' does that.
Suggested by Stefan Monnier.
---
xr-test.el | 21 +++++++++++++
xr.el | 101 ++++++++++++++++++++++++++++++++++++++++++++-----------------
2 files changed, 94 insertions(+), 28 deletions(-)
diff --git a/xr-test.el b/xr-test.el
index c7fc6f7..f646c13 100644
--- a/xr-test.el
+++ b/xr-test.el
@@ -254,6 +254,27 @@
"(repeat 1 63 \"a\")\n"))
)
+(ert-deftest xr-lint ()
+ (should (equal (xr-lint "^a*\\(b\\{3\\}\\|c\\)[^]\\a-d^-]$")
+ nil))
+ (should (equal (xr-lint "a^b$c")
+ '((1 . "Unescaped literal `^'")
+ (3 . "Unescaped literal `$'"))))
+ (should (equal (xr-lint "^**$")
+ '((1 . "Unescaped literal `*'"))))
+ (should (equal (xr-lint "a[\\\\[]")
+ '((2 . "Escaped `\\' inside character alternative"))))
+ (should (equal (xr-lint "\\{\\(+\\|?\\)\\}")
+ '((0 . "Escaped non-special character `{'")
+ (4 . "Unescaped literal `+'")
+ (7 . "Unescaped literal `?'")
+ (10 . "Escaped non-special character `}'"))))
+ (should (equal (xr-lint "\\}\\w\\a\\b\\%")
+ '((0 . "Escaped non-special character `}'")
+ (4 . "Escaped non-special character `a'")
+ (8 . "Escaped non-special character `%'"))))
+ )
+
(provide 'xr-test)
;;; xr-test.el ends here
diff --git a/xr.el b/xr.el
index 6e56172..a779f1b 100644
--- a/xr.el
+++ b/xr.el
@@ -30,8 +30,13 @@
;;
;; Please refer to `rx' for more information about the notation.
;;
-;; The exported functions are `xr', which simply returns the converted
-;; rx expression, and `xr-pp', which pretty-prints the rx expression.
+;; The exported functions are:
+;;
+;; `xr' - returns the converted rx expression
+;; `xr-pp' - pretty-prints the converted rx expression
+;; `xr-lint' - finds deprecated syntax in a regexp string
+;; `xr-pp-rx-to-str' - pretty-prints an rx expression to a string
+;;
;; Suggested use is from an interactive elisp buffer.
;;
;; Example (regexp found in compile.el):
@@ -66,7 +71,12 @@
(require 'rx)
-(defun xr--parse-char-alt (negated)
+;; Add the report MESSAGE at POSITION to WARNINGS.
+(defun xr--report (warnings position message)
+ (when warnings
+ (push (cons (1- position) message) (car warnings))))
+
+(defun xr--parse-char-alt (negated warnings)
(let ((set nil))
(cond
;; Initial ]-x range
@@ -111,6 +121,11 @@
(t
(let* ((ch (following-char))
(ch-str (char-to-string ch)))
+ (when (and (eq ch ?\\)
+ (stringp (car set))
+ (string-match "\\\\\\'" (car set)))
+ (xr--report warnings (1- (point))
+ "Escaped `\\' inside character alternative"))
;; Merge with the previous string if neither contains "-".
(if (and (stringp (car set))
(not (eq ch ?-))
@@ -269,29 +284,40 @@
(list operand))))
(append operator body)))
-(defun xr--parse-seq ()
+(defun xr--parse-seq (warnings)
(let ((sequence nil)) ; reversed
(while (not (looking-at (rx (or "\\|" "\\)" eos))))
(cond
;; ^ - only special at beginning of sequence
- ((and (looking-at (rx "^")) (null sequence))
+ ((looking-at (rx "^"))
(forward-char 1)
- (push 'bol sequence))
+ (if (null sequence)
+ (push 'bol sequence)
+ (xr--report warnings (match-beginning 0) "Unescaped literal `^'")
+ (push "^" sequence)))
;; $ - only special at end of sequence
- ((looking-at (rx "$" (or "\\|" "\\)" eos)))
+ ((looking-at (rx "$"))
(forward-char 1)
- (push 'eol sequence))
+ (if (looking-at (rx (or "\\|" "\\)" eos)))
+ (push 'eol sequence)
+ (xr--report warnings (match-beginning 0) "Unescaped literal `$'")
+ (push "$" sequence)))
;; * ? + (and non-greedy variants)
;; - not special at beginning of sequence or after ^
- ((and (looking-at (rx (any "*?+") (opt "?")))
- sequence
- (not (and (eq (car sequence) 'bol) (eq (preceding-char) ?^))))
- (let ((operator (match-string 0)))
- (goto-char (match-end 0))
- (setq sequence (cons (xr--postfix operator (car sequence))
- (cdr sequence)))))
+ ((looking-at (rx (group (any "*?+")) (opt "?")))
+ (if (and sequence
+ (not (and (eq (car sequence) 'bol) (eq (preceding-char) ?^))))
+ (let ((operator (match-string 0)))
+ (goto-char (match-end 0))
+ (setq sequence (cons (xr--postfix operator (car sequence))
+ (cdr sequence))))
+ (let ((literal (match-string 1)))
+ (goto-char (match-end 1))
+ (xr--report warnings (match-beginning 0)
+ (format "Unescaped literal `%s'" literal))
+ (push literal sequence))))
;; \{..\} - not special at beginning of sequence or after ^
((and (looking-at (rx "\\{"))
@@ -325,7 +351,7 @@
((looking-at (rx "[" (opt (group "^"))))
(goto-char (match-end 0))
(let ((negated (match-string 1)))
- (push (xr--parse-char-alt negated) sequence)))
+ (push (xr--parse-char-alt negated warnings) sequence)))
;; group
((looking-at (rx "\\(" (opt (group "?")
@@ -338,7 +364,7 @@
(when (and question (not colon))
(error "Invalid \\(? syntax"))
(goto-char (match-end 0))
- (let* ((group (xr--parse-alt))
+ (let* ((group (xr--parse-alt warnings))
;; simplify - group has an implicit seq
(operand (if (and (listp group) (eq (car group) 'seq))
(cdr group)
@@ -407,9 +433,14 @@
;; Escaped character. Only \*+?.^$[ really need escaping, but we accept
;; any not otherwise handled character after the backslash since
;; such sequences are found in the wild.
- ((looking-at (rx "\\" (group anything)))
+ ((looking-at (rx "\\" (group (or (any "\\*+?.^$")
+ (group anything)))))
(forward-char 2)
- (push (match-string 1) sequence))
+ (push (match-string 1) sequence)
+ (when (match-beginning 2)
+ (xr--report warnings (match-beginning 0)
+ (format "Escaped non-special character `%s'"
+ (match-string 2)))))
(t (error "Backslash at end of regexp"))))
@@ -421,12 +452,12 @@
(t
(cons 'seq item-seq))))))
-(defun xr--parse-alt ()
+(defun xr--parse-alt (warnings)
(let ((alternatives nil)) ; reversed
- (push (xr--parse-seq) alternatives)
+ (push (xr--parse-seq warnings) alternatives)
(while (not (looking-at (rx (or "\\)" eos))))
(forward-char 2) ; skip \|
- (push (xr--parse-seq) alternatives))
+ (push (xr--parse-seq warnings) alternatives))
(if (cdr alternatives)
;; Simplify (or nonl "\n") to anything
(if (or (equal alternatives '(nonl "\n"))
@@ -435,19 +466,33 @@
(cons 'or (reverse alternatives)))
(car alternatives))))
-;;;###autoload
-(defun xr (re-string)
- "Convert a regexp string to rx notation; the inverse of `rx'.
-Passing the returned value to `rx' (or `rx-to-string') yields a regexp string
-equivalent to RE-STRING."
+(defun xr--parse (re-string warnings)
(with-temp-buffer
(insert re-string)
(goto-char (point-min))
- (let ((rx (xr--parse-alt)))
+ (let ((rx (xr--parse-alt warnings)))
(when (looking-at (rx "\\)"))
(error "Unbalanced \\)"))
rx)))
+;;;###autoload
+(defun xr (re-string)
+ "Convert a regexp string to rx notation; the inverse of `rx'.
+Passing the returned value to `rx' (or `rx-to-string') yields a regexp string
+equivalent to RE-STRING."
+ (xr--parse re-string nil))
+
+;;;###autoload
+(defun xr-lint (re-string)
+ "Detect dubious practices in RE-STRING.
+This includes uses of tolerated but discouraged constructs.
+Outright regexp syntax violations are signalled as errors.
+Return a list of (OFFSET . COMMENT) where COMMENT applies at OFFSET
+in RE-STRING."
+ (let ((warnings (list nil)))
+ (xr--parse re-string warnings)
+ (reverse (car warnings))))
+
;; Print a rx expression to a string, unformatted.
(defun xr--rx-to-string (rx)
(cond