[Emacs-diffs] Changes to emacs/lisp/emacs-lisp/rx.el [emacs-unicode-2]

emacs-diffs
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] Changes to emacs/lisp/emacs-lisp/rx.el [emacs-unicode-2]

From:	Miles Bader
Subject:	[Emacs-diffs] Changes to emacs/lisp/emacs-lisp/rx.el [emacs-unicode-2]
Date:	Mon, 28 Jun 2004 04:53:04 -0400
Index: emacs/lisp/emacs-lisp/rx.el
diff -c emacs/lisp/emacs-lisp/rx.el:1.4.6.1 emacs/lisp/emacs-lisp/rx.el:1.4.6.2
*** emacs/lisp/emacs-lisp/rx.el:1.4.6.1 Fri Apr 16 12:50:14 2004
--- emacs/lisp/emacs-lisp/rx.el Mon Jun 28 07:29:46 2004
***************
*** 1,6 ****
  ;;; rx.el --- sexp notation for regular expressions
  
! ;; Copyright (C) 2001, 2003, 2004  Free Software Foundation, Inc.
  
  ;; Author: Gerd Moellmann <address@hidden>
  ;; Maintainer: FSF
--- 1,6 ----
  ;;; rx.el --- sexp notation for regular expressions
  
! ;; Copyright (C) 2001, 03, 2004  Free Software Foundation, Inc.
  
  ;; Author: Gerd Moellmann <address@hidden>
  ;; Maintainer: FSF
***************
*** 32,37 ****
--- 32,53 ----
  ;; from the bugs mentioned in the commentary section of Sregex, and
  ;; uses a nicer syntax (IMHO, of course :-).
  
+ ;; This significantly extended version of the original, is almost
+ ;; compatible with Sregex.  The only incompatibility I (fx) know of is
+ ;; that the `repeat' form can't have multiple regexp args.
+ 
+ ;; Now alternative forms are provided for a degree of compatibility
+ ;; with Shivers' attempted definitive SRE notation
+ ;; <URL:http://www.ai.mit.edu/~/shivers/sre.txt>.  SRE forms not
+ ;; catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>,
+ ;; ,<exp>, (word ...), word+, posix-string, and character class forms.
+ ;; Some forms are inconsistent with SRE, either for historical reasons
+ ;; or because of the implementation -- simple translation into Emacs
+ ;; regexp strings.  These include: any, word.  Also, case-sensitivity
+ ;; and greediness are controlled by variables external to the regexp,
+ ;; and you need to feed the forms to the `posix-' functions to get
+ ;; SRE's POSIX semantics.  There are probably more difficulties.
+ 
  ;; Rx translates a sexp notation for regular expressions into the
  ;; usual string notation.  The translation can be done at compile-time
  ;; by using the `rx' macro.  It can be done at run-time by calling
***************
*** 94,155 ****
  
  ;;; Code:
  
- 
  (defconst rx-constituents
    '((and              . (rx-and 1 nil))
      (or                       . (rx-or 1 nil))
      (not-newline      . ".")
      (anything         . ".\\|\n")
!     (any              . (rx-any 1 1 rx-check-any))
      (in                       . any)
      (not              . (rx-not 1 1 rx-check-not))
      (repeat           . (rx-repeat 2 3))
!     (submatch         . (rx-submatch 1 nil))
      (group            . submatch)
!     (zero-or-more     . (rx-kleene 1 1))
!     (one-or-more      . (rx-kleene 1 1))
!     (zero-or-one      . (rx-kleene 1 1))
!     (\?                       . zero-or-one)
      (\??              . zero-or-one)
!     (*                        . zero-or-more)
      (*?                       . zero-or-more)
      (0+                       . zero-or-more)
!     (+                        . one-or-more)
      (+?                       . one-or-more)
      (1+                       . one-or-more)
      (optional         . zero-or-one)
      (minimal-match    . (rx-greedy 1 1))
      (maximal-match    . (rx-greedy 1 1))
      (backref          . (rx-backref 1 1 rx-check-backref))
      (line-start               . "^")
      (line-end         . "$")
      (string-start     . "\\`")
      (string-end               . "\\'")
      (buffer-start     . "\\`")
      (buffer-end               . "\\'")
      (point            . "\\=")
      (word-start               . "\\<")
      (word-end         . "\\>")
      (word-boundary    . "\\b")
      (syntax           . (rx-syntax 1 1))
      (category         . (rx-category 1 1 rx-check-category))
      (eval             . (rx-eval 1 1))
      (regexp           . (rx-regexp 1 1 stringp))
      (digit            . "[[:digit:]]")
!     (control          . "[[:cntrl:]]")
!     (hex-digit                . "[[:xdigit:]]")
!     (blank            . "[[:blank:]]")
!     (graphic          . "[[:graph:]]")
!     (printing         . "[[:print:]]")
!     (alphanumeric     . "[[:alnum:]]")
      (letter           . "[[:alpha:]]")
!     (ascii            . "[[:ascii:]]")
      (nonascii         . "[[:nonascii:]]")
!     (lower            . "[[:lower:]]")
!     (punctuation      . "[[:punct:]]")
!     (space            . "[[:space:]]")
!     (upper            . "[[:upper:]]")
!     (word             . "[[:word:]]"))
    "Alist of sexp form regexp constituents.
  Each element of the alist has the form (SYMBOL . DEFN).
  SYMBOL is a valid constituent of sexp regular expressions.
--- 110,212 ----
  
  ;;; Code:
  
  (defconst rx-constituents
    '((and              . (rx-and 1 nil))
+     (seq              . and)          ; SRE
+     (:                        . and)          ; SRE
+     (sequence         . and)          ; sregex
      (or                       . (rx-or 1 nil))
+     (|                        . or)           ; SRE
      (not-newline      . ".")
+     (nonl             . not-newline)  ; SRE
      (anything         . ".\\|\n")
!     (any              . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE
      (in                       . any)
+     (char             . any)          ; sregex
+     (not-char         . (rx-not-char 1 nil rx-check-any)) ; sregex
      (not              . (rx-not 1 1 rx-check-not))
+     ;; Partially consistent with sregex, whose `repeat' is like our
+     ;; `**'.  (`repeat' with optional max arg and multiple sexp forms
+     ;; is ambiguous.)
      (repeat           . (rx-repeat 2 3))
!     (=                        . (rx-= 2 nil))    ; SRE
!     (>=                       . (rx->= 2 nil))   ; SRE
!     (**                       . (rx-** 2 nil))   ; SRE
!     (submatch         . (rx-submatch 1 nil)) ; SRE
      (group            . submatch)
!     (zero-or-more     . (rx-kleene 1 nil))
!     (one-or-more      . (rx-kleene 1 nil))
!     (zero-or-one      . (rx-kleene 1 nil))
!     (\?                       . zero-or-one)  ; SRE
      (\??              . zero-or-one)
!     (*                        . zero-or-more) ; SRE
      (*?                       . zero-or-more)
      (0+                       . zero-or-more)
!     (+                        . one-or-more)  ; SRE
      (+?                       . one-or-more)
      (1+                       . one-or-more)
      (optional         . zero-or-one)
+     (opt              . zero-or-one)  ; sregex
      (minimal-match    . (rx-greedy 1 1))
      (maximal-match    . (rx-greedy 1 1))
      (backref          . (rx-backref 1 1 rx-check-backref))
      (line-start               . "^")
+     (bol              . line-start)   ; SRE
      (line-end         . "$")
+     (eol              . line-end)     ; SRE
      (string-start     . "\\`")
+     (bos              . string-start) ; SRE
+     (bot              . string-start) ; sregex
      (string-end               . "\\'")
+     (eos              . string-end)   ; SRE
+     (eot              . string-end)   ; sregex
      (buffer-start     . "\\`")
      (buffer-end               . "\\'")
      (point            . "\\=")
      (word-start               . "\\<")
+     (bow              . word-start)   ; SRE
      (word-end         . "\\>")
+     (eow              . word-end)     ; SRE
      (word-boundary    . "\\b")
+     (not-word-boundary        . "\\B")        ; sregex
      (syntax           . (rx-syntax 1 1))
+     (not-syntax               . (rx-not-syntax 1 1)) ; sregex
      (category         . (rx-category 1 1 rx-check-category))
      (eval             . (rx-eval 1 1))
      (regexp           . (rx-regexp 1 1 stringp))
      (digit            . "[[:digit:]]")
!     (numeric          . digit)        ; SRE
!     (num              . digit)        ; SRE
!     (control          . "[[:cntrl:]]") ; SRE
!     (cntrl            . control)       ; SRE
!     (hex-digit                . "[[:xdigit:]]") ; SRE
!     (hex              . hex-digit)      ; SRE
!     (xdigit           . hex-digit)      ; SRE
!     (blank            . "[[:blank:]]")  ; SRE
!     (graphic          . "[[:graph:]]")  ; SRE
!     (graph            . graphic)        ; SRE
!     (printing         . "[[:print:]]")  ; SRE
!     (print            . printing)       ; SRE
!     (alphanumeric     . "[[:alnum:]]")  ; SRE
!     (alnum            . alphanumeric)   ; SRE
      (letter           . "[[:alpha:]]")
!     (alphabetic               . letter)       ; SRE
!     (alpha            . letter)       ; SRE
!     (ascii            . "[[:ascii:]]") ; SRE
      (nonascii         . "[[:nonascii:]]")
!     (lower            . "[[:lower:]]") ; SRE
!     (lower-case               . lower)         ; SRE
!     (punctuation      . "[[:punct:]]") ; SRE
!     (punct            . punctuation)   ; SRE
!     (space            . "[[:space:]]") ; SRE
!     (whitespace               . space)         ; SRE
!     (white            . space)         ; SRE
!     (upper            . "[[:upper:]]") ; SRE
!     (upper-case               . upper)         ; SRE
!     (word             . "[[:word:]]")  ; inconsistent with SRE
!     (wordchar         . word)          ; sregex
!     (not-wordchar     . "[^[:word:]]") ; sregex (use \\W?)
!     )
    "Alist of sexp form regexp constituents.
  Each element of the alist has the form (SYMBOL . DEFN).
  SYMBOL is a valid constituent of sexp regular expressions.
***************
*** 252,257 ****
--- 309,316 ----
  
  (defun rx-check (form)
    "Check FORM according to its car's parsing info."
+   (unless (listp form)
+     (error "rx `%s' needs argument(s)" form))
    (let* ((rx (rx-info (car form)))
         (nargs (1- (length form)))
         (min-args (nth 1 rx))
***************
*** 297,349 ****
            "\\)")))
  
  
! (defun rx-quote-for-set (string)
!   "Transform STRING for use in a character set.
! If STRING contains a `]', move it to the front.
! If STRING starts with a '^', move it to the end."
!   (when (string-match "\\`\\(\\(?:.\\|\n\\)+\\)\\]\\(\\(?:.\\|\n\\)\\)*\\'"
!                     string)
!     (setq string (concat "]" (match-string 1 string)
!                        (match-string 2 string))))
!   (when (string-match "\\`^\\(\\(?:.\\|\n\\)+\\)\\'" string)
!     (setq string (concat (substring string 1) "^")))
!   string)
! 
  
  (defun rx-check-any (arg)
     "Check arg ARG for Rx `any'."
!    (cond ((integerp arg) t)
!        ((and (stringp arg) (zerop (length arg)))
!         (error "String arg for rx `any' must not be empty"))
!        ((stringp arg) t)
!        (t
!         (error "rx `any' requires string or character arg"))))
! 
  
  (defun rx-any (form)
!   "Parse and produce code from FORM, which is `(any STRING)'.
! STRING is optional.  If it is omitted, build a regexp that
! matches anything."
!   (rx-check form)
!   (let ((arg (cadr form)))
!     (cond ((integerp arg)
!          (char-to-string arg))
!         ((= (length arg) 1)
!          arg)
!         (t
!          (concat "[" (rx-quote-for-set (cadr form)) "]")))))
  
  
  (defun rx-check-not (arg)
    "Check arg ARG for Rx `not'."
!   (unless (or (memq form
!                   '(digit control hex-digit blank graphic printing
!                           alphanumeric letter ascii nonascii lower
!                           punctuation space upper word))
!             (and (consp form)
!                  (memq (car form) '(not any in syntax category:))))
!     (error "rx `not' syntax error: %s" form))
!     t)
  
  
  (defun rx-not (form)
--- 356,416 ----
            "\\)")))
  
  
! (defvar rx-bracket)                  ; dynamically bound in `rx-any'
  
  (defun rx-check-any (arg)
     "Check arg ARG for Rx `any'."
!    (if (integerp arg)
!        (setq arg (string arg)))
!    (when (stringp arg)
!      (if (zerop (length arg))
!        (error "String arg for Rx `any' must not be empty"))
!      ;; Quote ^ at start; don't bother to check whether this is first arg.
!      (if (eq ?^ (aref arg 0))
!        (setq arg (concat "\\" arg)))
!      ;; Remove ] and set flag for adding it to start of overall result.
!      (when (string-match "]" arg)
!        (setq arg (replace-regexp-in-string "]" "" arg)
!            rx-bracket "]")))
!    (when (symbolp arg)
!      (let ((translation (condition-case nil
!                           (rx-to-string arg 'no-group)
!                         (error nil))))
!        (unless translation (error "Invalid char class `%s' in Rx `any'" arg))
!        (setq arg (substring translation 1 -1)))) ; strip outer brackets
!    ;; sregex compatibility
!    (when (and (integerp (car-safe arg))
!             (integerp (cdr-safe arg)))
!      (setq arg (string (car arg) ?- (cdr arg))))
!    (unless (stringp arg)
!      (error "rx `any' requires string, character, char pair or char class 
args"))
!    arg)
  
  (defun rx-any (form)
!   "Parse and produce code from FORM, which is `(any ARG ...)'.
! ARG is optional."
!   (rx-check form)
!   (let* ((rx-bracket nil)
!        (args (mapcar #'rx-check-any (cdr form)))) ; side-effects `rx-bracket'
!     ;; If there was a ?- in the form, move it to the front to avoid
!     ;; accidental range.
!     (if (member "-" args)
!       (setq args (cons "-" (delete "-" args))))
!     (apply #'concat "[" rx-bracket (append args '("]")))))
  
  
  (defun rx-check-not (arg)
    "Check arg ARG for Rx `not'."
!   (unless (or (and (symbolp arg)
!                  (string-match "\\`\\[\\[:[-a-z]:]]\\'"
!                                (condition-case nil
!                                    (rx-to-string arg 'no-group)
!                                  (error ""))))
!             (eq arg 'word-boundary)
!             (and (consp arg)
!                  (memq (car arg) '(not any in syntax category))))
!     (error "rx `not' syntax error: %s" arg))
!   t)
  
  
  (defun rx-not (form)
***************
*** 355,378 ****
           (if (= (length result) 4)
               (substring result 2 3)
             (concat "[" (substring result 2))))
!         ((string-match "\\`\\[" result)
           (concat "[^" (substring result 1)))
!         ((string-match "\\`\\\\s." result)
!          (concat "\\S" (substring result 2)))
!         ((string-match "\\`\\\\S." result)
!          (concat "\\s" (substring result 2)))
!         ((string-match "\\`\\\\c." result)
!          (concat "\\C" (substring result 2)))
!         ((string-match "\\`\\\\C." result)
!          (concat "\\c" (substring result 2)))
!         ((string-match "\\`\\\\B" result)
!          (concat "\\b" (substring result 2)))
!         ((string-match "\\`\\\\b" result)
!          (concat "\\B" (substring result 2)))
          (t
           (concat "[^" result "]")))))
  
  
  (defun rx-repeat (form)
    "Parse and produce code from FORM.
  FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'."
--- 422,488 ----
           (if (= (length result) 4)
               (substring result 2 3)
             (concat "[" (substring result 2))))
!         ((eq ?\[ (aref result 0))
           (concat "[^" (substring result 1)))
!         ((string-match "\\`\\\\[scb]" result)
!          (concat (capitalize (substring result 0 2)) (substring result 2)))
          (t
           (concat "[^" result "]")))))
  
  
+ (defun rx-not-char (form)
+   "Parse and produce code from FORM.  FORM is `(not-char ...)'."
+   (rx-check form)
+   (rx-not `(not (in ,@(cdr form)))))
+ 
+ 
+ (defun rx-not-syntax (form)
+   "Parse and produce code from FORM.  FORM is `(not-syntax SYNTAX)'."
+   (rx-check form)
+   (rx-not `(not (syntax ,@(cdr form)))))
+ 
+ 
+ (defun rx-trans-forms (form &optional skip)
+   "If FORM's length is greater than two, transform it to length two.
+ A form (HEAD REST ...) becomes (HEAD (and REST ...)).
+ If SKIP is non-nil, allow that number of items after the head, i.e.
+ `(= N REST ...)' becomes `(= N (and REST ...))' if SKIP is 1."
+   (unless skip (setq skip 0))
+   (let ((tail (nthcdr (1+ skip) form)))
+     (if (= (length tail) 1)
+       form
+       (let ((form (copy-sequence form)))
+       (setcdr (nthcdr skip form) (list (cons 'and tail)))
+       form))))
+ 
+ 
+ (defun rx-= (form)
+   "Parse and produce code from FORM `(= N ...)'."
+   (rx-check form)
+   (setq form (rx-trans-forms form 1))
+   (unless (and (integerp (nth 1 form))
+              (> (nth 1 form) 0))
+     (error "rx `=' requires positive integer first arg"))
+   (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form)))
+ 
+ 
+ (defun rx->= (form)
+   "Parse and produce code from FORM `(>= N ...)'."
+   (rx-check form)
+   (setq form (rx-trans-forms form 1))
+   (unless (and (integerp (nth 1 form))
+              (> (nth 1 form) 0))
+     (error "rx `>=' requires positive integer first arg"))
+   (format "%s\\{%d,\\}" (rx-to-string (nth 2 form)) (nth 1 form)))
+ 
+ 
+ (defun rx-** (form)
+   "Parse and produce code from FORM `(** N M ...)'."
+   (rx-check form)
+   (setq form (cons 'repeat (cdr (rx-trans-forms form 2))))
+   (rx-to-string form))
+ 
+ 
  (defun rx-repeat (form)
    "Parse and produce code from FORM.
  FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'."
***************
*** 419,424 ****
--- 529,535 ----
  If OP is anything else, produce a greedy regexp if `rx-greedy-flag'
  is non-nil."
    (rx-check form)
+   (setq form (rx-trans-forms form))
    (let ((suffix (cond ((memq (car form) '(* + ? )) "")
                      ((memq (car form) '(*? +? ??)) "?")
                      (rx-greedy-flag "")
***************
*** 468,476 ****
  (defun rx-syntax (form)
    "Parse and produce code from FORM, which is `(syntax SYMBOL)'."
    (rx-check form)
!   (let ((syntax (assq (cadr form) rx-syntax)))
      (unless syntax
!       (error "Unknown rx syntax `%s'" (cadr form)))
      (format "\\s%c" (cdr syntax))))
  
  
--- 579,593 ----
  (defun rx-syntax (form)
    "Parse and produce code from FORM, which is `(syntax SYMBOL)'."
    (rx-check form)
!   (let* ((sym (cadr form))
!        (syntax (assq sym rx-syntax)))
      (unless syntax
!       ;; Try sregex compatibility.
!       (let ((name (symbol-name sym)))
!       (if (= 1 (length name))
!           (setq syntax (rassq (aref name 0) rx-syntax))))
!       (unless syntax
!       (error "Unknown rx syntax `%s'" (cadr form))))
      (format "\\s%c" (cdr syntax))))
  
  
***************
*** 483,489 ****
  
  
  (defun rx-category (form)
!   "Parse and produce code from FORM, which is `(category SYMBOL ...)'."
    (rx-check form)
    (let ((char (if (integerp (cadr form))
                  (cadr form)
--- 600,606 ----
  
  
  (defun rx-category (form)
!   "Parse and produce code from FORM, which is `(category SYMBOL)'."
    (rx-check form)
    (let ((char (if (integerp (cadr form))
                  (cadr form)
***************
*** 543,550 ****
  
  
  ;;;###autoload
! (defmacro rx (regexp)
!   "Translate a regular expression REGEXP in sexp form to a regexp string.
  See also `rx-to-string' for how to do such a translation at run-time.
  
  The following are valid subforms of regular expressions in sexp
--- 660,668 ----
  
  
  ;;;###autoload
! (defmacro rx (&rest regexps)
!   "Translate regular expressions REGEXPS in sexp form to a regexp string.
! REGEXPS is a non-empty sequence of forms of the sort listed below.
  See also `rx-to-string' for how to do such a translation at run-time.
  
  The following are valid subforms of regular expressions in sexp
***************
*** 556,608 ****
  CHAR
       matches character CHAR literally.
  
! `not-newline'
       matches any character except a newline.
                        .
  `anything'
       matches any character
  
! `(any SET)'
!      matches any character in SET.  SET may be a character or string.
       Ranges of characters can be specified as `A-Z' in strings.
  
! '(in SET)'
!      like `any'.
  
! `(not (any SET))'
!      matches any character not in SET
  
! `line-start'
       matches the empty string, but only at the beginning of a line
       in the text being matched
  
! `line-end'
       is similar to `line-start' but matches only at the end of a line
  
! `string-start'
       matches the empty string, but only at the beginning of the
       string being matched against.
  
! `string-end'
       matches the empty string, but only at the end of the
       string being matched against.
  
  `buffer-start'
       matches the empty string, but only at the beginning of the
!      buffer being matched against.
  
  `buffer-end'
       matches the empty string, but only at the end of the
!      buffer being matched against.
  
  `point'
       matches the empty string, but only at point.
  
! `word-start'
       matches the empty string, but only at the beginning or end of a
       word.
  
! `word-end'
       matches the empty string, but only at the end of a word.
  
  `word-boundary'
--- 674,731 ----
  CHAR
       matches character CHAR literally.
  
! `not-newline', `nonl'
       matches any character except a newline.
                        .
  `anything'
       matches any character
  
! `(any SET ...)'
! `(in SET ...)'
! `(char SET ...)'
!      matches any character in SET ....  SET may be a character or string.
       Ranges of characters can be specified as `A-Z' in strings.
+      Ranges may also be specified as conses like `(?A . ?Z)'.
  
!      SET may also be the name of a character class: `digit',
!      `control', `hex-digit', `blank', `graph', `print', `alnum',
!      `alpha', `ascii', `nonascii', `lower', `punct', `space', `upper',
!      `word', or one of their synonyms.
  
! `(not (any SET ...))'
!      matches any character not in SET ...
  
! `line-start', `bol'
       matches the empty string, but only at the beginning of a line
       in the text being matched
  
! `line-end', `eol'
       is similar to `line-start' but matches only at the end of a line
  
! `string-start', `bos', `bot'
       matches the empty string, but only at the beginning of the
       string being matched against.
  
! `string-end', `eos', `eot'
       matches the empty string, but only at the end of the
       string being matched against.
  
  `buffer-start'
       matches the empty string, but only at the beginning of the
!      buffer being matched against.  Actually equivalent to `string-start'.
  
  `buffer-end'
       matches the empty string, but only at the end of the
!      buffer being matched against.  Actually equivalent to `string-end'.
  
  `point'
       matches the empty string, but only at point.
  
! `word-start', `bow'
       matches the empty string, but only at the beginning or end of a
       word.
  
! `word-end', `eow'
       matches the empty string, but only at the end of a word.
  
  `word-boundary'
***************
*** 610,643 ****
       word.
  
  `(not word-boundary)'
       matches the empty string, but not at the beginning or end of a
       word.
  
! `digit'
       matches 0 through 9.
  
! `control'
       matches ASCII control characters.
  
! `hex-digit'
       matches 0 through 9, a through f and A through F.
  
  `blank'
       matches space and tab only.
  
! `graphic'
       matches graphic characters--everything except ASCII control chars,
       space, and DEL.
  
! `printing'
       matches printing characters--everything except ASCII control chars
       and DEL.
  
! `alphanumeric'
       matches letters and digits.  (But at present, for multibyte characters,
       it matches anything that has word syntax.)
  
! `letter'
       matches letters.  (But at present, for multibyte characters,
       it matches anything that has word syntax.)
  
--- 733,767 ----
       word.
  
  `(not word-boundary)'
+ `not-word-boundary'
       matches the empty string, but not at the beginning or end of a
       word.
  
! `digit', `numeric', `num'
       matches 0 through 9.
  
! `control', `cntrl'
       matches ASCII control characters.
  
! `hex-digit', `hex', `xdigit'
       matches 0 through 9, a through f and A through F.
  
  `blank'
       matches space and tab only.
  
! `graphic', `graph'
       matches graphic characters--everything except ASCII control chars,
       space, and DEL.
  
! `printing', `print'
       matches printing characters--everything except ASCII control chars
       and DEL.
  
! `alphanumeric', `alnum'
       matches letters and digits.  (But at present, for multibyte characters,
       it matches anything that has word syntax.)
  
! `letter', `alphabetic', `alpha'
       matches letters.  (But at present, for multibyte characters,
       it matches anything that has word syntax.)
  
***************
*** 647,671 ****
  `nonascii'
       matches non-ASCII (multibyte) characters.
  
! `lower'
       matches anything lower-case.
  
! `upper'
       matches anything upper-case.
  
! `punctuation'
       matches punctuation.  (But at present, for multibyte characters,
       it matches anything that has non-word syntax.)
  
! `space'
       matches anything that has whitespace syntax.
  
! `word'
       matches anything that has word syntax.
  
  `(syntax SYNTAX)'
       matches a character with syntax SYNTAX.  SYNTAX must be one
!      of the following symbols.
  
       `whitespace'             (\\s- in string notation)
       `punctuation'            (\\s.)
--- 771,799 ----
  `nonascii'
       matches non-ASCII (multibyte) characters.
  
! `lower', `lower-case'
       matches anything lower-case.
  
! `upper', `upper-case'
       matches anything upper-case.
  
! `punctuation', `punct'
       matches punctuation.  (But at present, for multibyte characters,
       it matches anything that has non-word syntax.)
  
! `space', `whitespace', `white'
       matches anything that has whitespace syntax.
  
! `word', `wordchar'
       matches anything that has word syntax.
  
+ `not-wordchar'
+      matches anything that has non-word syntax.
+ 
  `(syntax SYNTAX)'
       matches a character with syntax SYNTAX.  SYNTAX must be one
!      of the following symbols, or a symbol corresponding to the syntax
!      character, e.g. `\\.' for `\\s.'.
  
       `whitespace'             (\\s- in string notation)
       `punctuation'            (\\s.)
***************
*** 684,690 ****
       `comment-delimiter'      (\\s!)
  
  `(not (syntax SYNTAX))'
!      matches a character that has not syntax SYNTAX.
  
  `(category CATEGORY)'
       matches a character with category CATEGORY.  CATEGORY must be
--- 812,818 ----
       `comment-delimiter'      (\\s!)
  
  `(not (syntax SYNTAX))'
!      matches a character that doesn't have syntax SYNTAX.
  
  `(category CATEGORY)'
       matches a character with category CATEGORY.  CATEGORY must be
***************
*** 710,716 ****
       `japanese-katakana-two-byte'     (\\cK)
       `korean-hangul-two-byte'         (\\cN)
       `cyrillic-two-byte'              (\\cY)
!      `combining-diacritic'              (\\c^)
       `ascii'                          (\\ca)
       `arabic'                         (\\cb)
       `chinese'                                (\\cc)
--- 838,844 ----
       `japanese-katakana-two-byte'     (\\cK)
       `korean-hangul-two-byte'         (\\cN)
       `cyrillic-two-byte'              (\\cY)
!      `combining-diacritic'            (\\c^)
       `ascii'                          (\\ca)
       `arabic'                         (\\cb)
       `chinese'                                (\\cc)
***************
*** 731,742 ****
       `can-break'                      (\\c|)
  
  `(not (category CATEGORY))'
!      matches a character that has not category CATEGORY.
  
  `(and SEXP1 SEXP2 ...)'
       matches what SEXP1 matches, followed by what SEXP2 matches, etc.
  
  `(submatch SEXP1 SEXP2 ...)'
       like `and', but makes the match accessible with `match-end',
       `match-beginning', and `match-string'.
  
--- 859,874 ----
       `can-break'                      (\\c|)
  
  `(not (category CATEGORY))'
!      matches a character that doesn't have category CATEGORY.
  
  `(and SEXP1 SEXP2 ...)'
+ `(: SEXP1 SEXP2 ...)'
+ `(seq SEXP1 SEXP2 ...)'
+ `(sequence SEXP1 SEXP2 ...)'
       matches what SEXP1 matches, followed by what SEXP2 matches, etc.
  
  `(submatch SEXP1 SEXP2 ...)'
+ `(group SEXP1 SEXP2 ...)'
       like `and', but makes the match accessible with `match-end',
       `match-beginning', and `match-string'.
  
***************
*** 744,749 ****
--- 876,882 ----
       another name for `submatch'.
  
  `(or SEXP1 SEXP2 ...)'
+ `(| SEXP1 SEXP2 ...)'
       matches anything that matches SEXP1 or SEXP2, etc.  If all
       args are strings, use `regexp-opt' to optimize the resulting
       regular expression.
***************
*** 757,803 ****
  `(maximal-match SEXP)'
       produce a greedy regexp for SEXP.  This is the default.
  
! `(zero-or-more SEXP)'
!      matches zero or more occurrences of what SEXP matches.
! 
! `(0+ SEXP)'
!      like `zero-or-more'.
! 
! `(* SEXP)'
!      like `zero-or-more', but always produces a greedy regexp.
! 
! `(*? SEXP)'
!      like `zero-or-more', but always produces a non-greedy regexp.
  
! `(one-or-more SEXP)'
!      matches one or more occurrences of A.
  
! `(1+ SEXP)'
!      like `one-or-more'.
! 
! `(+ SEXP)'
       like `one-or-more', but always produces a greedy regexp.
  
! `(+? SEXP)'
       like `one-or-more', but always produces a non-greedy regexp.
  
! `(zero-or-one SEXP)'
       matches zero or one occurrences of A.
  
! `(optional SEXP)'
!      like `zero-or-one'.
! 
! `(? SEXP)'
       like `zero-or-one', but always produces a greedy regexp.
  
! `(?? SEXP)'
       like `zero-or-one', but always produces a non-greedy regexp.
  
  `(repeat N SEXP)'
!      matches N occurrences of what SEXP matches.
  
  `(repeat N M SEXP)'
!      matches N to M occurrences of what SEXP matches.
  
  `(backref N)'
       matches what was matched previously by submatch N.
--- 890,944 ----
  `(maximal-match SEXP)'
       produce a greedy regexp for SEXP.  This is the default.
  
! Below, `SEXP ...' represents a sequence of regexp forms, treated as if
! enclosed in `(and ...)'.
  
! `(zero-or-more SEXP ...)'
! `(0+ SEXP ...)'
!      matches zero or more occurrences of what SEXP ... matches.
! 
! `(* SEXP ...)'
!      like `zero-or-more', but always produces a greedy regexp, independent
!      of `rx-greedy-flag'.
! 
! `(*? SEXP ...)'
!      like `zero-or-more', but always produces a non-greedy regexp,
!      independent of `rx-greedy-flag'.
! 
! `(one-or-more SEXP ...)'
! `(1+ SEXP ...)'
!      matches one or more occurrences of SEXP ...
  
! `(+ SEXP ...)'
       like `one-or-more', but always produces a greedy regexp.
  
! `(+? SEXP ...)'
       like `one-or-more', but always produces a non-greedy regexp.
  
! `(zero-or-one SEXP ...)'
! `(optional SEXP ...)'
! `(opt SEXP ...)'
       matches zero or one occurrences of A.
  
! `(? SEXP ...)'
       like `zero-or-one', but always produces a greedy regexp.
  
! `(?? SEXP ...)'
       like `zero-or-one', but always produces a non-greedy regexp.
  
  `(repeat N SEXP)'
! `(= N SEXP ...)'
!      matches N occurrences.
! 
! `(>= N SEXP ...)'
!      matches N or more occurrences.
  
  `(repeat N M SEXP)'
! `(** N M SEXP ...)'
!      matches N to M occurrences.
! 
! `(backref N)'
!     matches what was matched previously by submatch N.
  
  `(backref N)'
       matches what was matched previously by submatch N.
***************
*** 811,819 ****
  
  `(regexp REGEXP)'
       include REGEXP in string notation in the result."
! 
!   (rx-to-string regexp))
! 
  (provide 'rx)
  
  ;;; arch-tag: 12d01a63-0008-42bb-ab8c-1c7d63be370b
--- 952,972 ----
  
  `(regexp REGEXP)'
       include REGEXP in string notation in the result."
!   (cond ((null regexps)
!        (error "No regexp"))
!       ((cdr regexps)
!        (rx-to-string `(and ,@regexps) t))
!       (t
!        (rx-to-string (car regexps) t))))
! 
! ;; ;; sregex.el replacement
! 
! ;; ;;;###autoload (provide 'sregex)
! ;; ;;;###autoload (autoload 'sregex "rx")
! ;; (defalias 'sregex 'rx-to-string)
! ;; ;;;###autoload (autoload 'sregexq "rx" nil nil 'macro)
! ;; (defalias 'sregexq 'rx)
! 
  (provide 'rx)
  
  ;;; arch-tag: 12d01a63-0008-42bb-ab8c-1c7d63be370b
[Prev in Thread]
Current Thread
[Next in Thread]
[Emacs-diffs] Changes to emacs/lisp/emacs-lisp/rx.el [emacs-unicode-2], Miles Bader <=
Prev by Date: [Emacs-diffs] Changes to emacs/lisp/emacs-lisp/re-builder.el [emacs-unicode-2]
Next by Date: [Emacs-diffs] Changes to emacs/lisp/faces.el [emacs-unicode-2]
Previous by thread: [Emacs-diffs] Changes to emacs/lisp/emacs-lisp/re-builder.el [emacs-unicode-2]
Next by thread: [Emacs-diffs] Changes to emacs/lisp/faces.el [emacs-unicode-2]
Index(es):
- Date
- Thread