emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master a122a02: Make [:graph:] act like [:print:] sans spa


From: Paul Eggert
Subject: [Emacs-diffs] master a122a02: Make [:graph:] act like [:print:] sans space
Date: Wed, 15 Apr 2015 07:27:24 +0000

branch: master
commit a122a0276bddbda8ca84f9b94250a5a5f4e0582a
Author: Paul Eggert <address@hidden>
Commit: Paul Eggert <address@hidden>

    Make [:graph:] act like [:print:] sans space
    
    In POSIX [[:print:]] is equivalent to [ [:graph:]], so change
    [:graph:] so that it matches everything that [:print:] does,
    except for space.
    * doc/lispref/searching.texi (Char Classes):
    * etc/NEWS:
    * lisp/emacs-lisp/rx.el (rx):
    Document [:graph:] to be [:print:] sans ' '.
    * src/character.c, src/character.h (graphicp): New function.
    * src/regex.c (ISGRAPH) [emacs]: Use it.
    (BIT_GRAPH): New macro.
    (BIT_PRINT): Increase to 0x200, to make room for BIT_GRAPH.
    (re_wctype_to_bit) [! WIDE_CHAR_SUPPORT]:
    Return BIT_GRAPH for RECC_GRAPH.
    (re_match_2_internal) [emacs]: Use ISGRAPH if BIT_GRAPH,
    and ISPRINT if BIT_PRINT.
---
 doc/lispref/searching.texi |   14 +++++++-------
 etc/NEWS                   |   10 +++++-----
 lisp/emacs-lisp/rx.el      |    8 ++++----
 src/character.c            |    8 ++++++++
 src/character.h            |    1 +
 src/regex.c                |   12 ++++++++----
 6 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi
index 238d814..10ea411 100644
--- a/doc/lispref/searching.texi
+++ b/doc/lispref/searching.texi
@@ -558,8 +558,11 @@ This matches any @acronym{ASCII} control character.
 This matches @samp{0} through @samp{9}.  Thus, @samp{[-+[:digit:]]}
 matches any digit, as well as @samp{+} and @samp{-}.
 @item [:graph:]
-This matches graphic characters---everything except @acronym{ASCII} control
-characters, space, and the delete character.
+This matches graphic characters---everything except space,
address@hidden and address@hidden control characters,
+surrogates, and codepoints unassigned by Unicode, as indicated by the
+Unicode @samp{general-category} property (@pxref{Character
+Properties}).
 @item [:lower:]
 This matches any lower-case letter, as determined by the current case
 table (@pxref{Case Tables}).  If @code{case-fold-search} is
@@ -569,11 +572,8 @@ This matches any multibyte character (@pxref{Text 
Representations}).
 @item [:nonascii:]
 This matches any address@hidden character.
 @item [:print:]
-This matches printing characters---everything except @acronym{ASCII}
-and address@hidden control characters (including the delete
-character), surrogates, and codepoints unassigned by Unicode, as
-indicated by the Unicode @samp{general-category} property
-(@pxref{Character Properties}).
+This matches any printing character---either space, or a graphic
+character matched by @samp{[:graph:]}.
 @item [:punct:]
 This matches any punctuation character.  (At present, for multibyte
 characters, it matches anything that has non-word syntax.)
diff --git a/etc/NEWS b/etc/NEWS
index 907787a..d97e80a 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -629,12 +629,12 @@ notifications, if Emacs is compiled with file 
notification support.
 *** gulp.el
 
 +++
-** The character class [:print:] in regular expressions
-no longer matches any multibyte character.  Instead, Emacs now
+** The character classes [:graph:] and [:print:] in regular expressions
+no longer match every multibyte character.  Instead, Emacs now
 consults the Unicode character properties to determine which
-characters are printable.  In particular, surrogates and unassigned
-codepoints are now rejected by this class.  If you want the old
-behavior, use [:multibyte:] instead.
+characters are graphic or printable.  In particular, surrogates and
+unassigned codepoints are now rejected.  If you want the old behavior,
+use [:multibyte:] instead.
 
 
 * New Modes and Packages in Emacs 25.1
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index a5a228e..ab9beb6 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -965,12 +965,12 @@ CHAR
      matches space and tab only.
 
 `graphic', `graph'
-     matches graphic characters--everything except ASCII control chars,
-     space, and DEL.
+     matches graphic characters--everything except space, ASCII
+     and non-ASCII control characters, surrogates, and codepoints
+     unassigned by Unicode.
 
 `printing', `print'
-     matches printing characters--everything except ASCII and non-ASCII
-     control characters, surrogates, and codepoints unassigned by Unicode.
+     matches space and graphic characters.
 
 `alphanumeric', `alnum'
      matches alphabetic characters and digits.  (For multibyte characters,
diff --git a/src/character.c b/src/character.c
index b357dd5..ea98cf6 100644
--- a/src/character.c
+++ b/src/character.c
@@ -1022,6 +1022,14 @@ decimalnump (int c)
   return gen_cat == UNICODE_CATEGORY_Nd;
 }
 
+/* Return 'true' if C is a graphic character as defined by its
+   Unicode properties.  */
+bool
+graphicp (int c)
+{
+  return c == ' ' || printablep (c);
+}
+
 /* Return 'true' if C is a printable character as defined by its
    Unicode properties.  */
 bool
diff --git a/src/character.h b/src/character.h
index 1a5d2c8..859d717 100644
--- a/src/character.h
+++ b/src/character.h
@@ -662,6 +662,7 @@ extern Lisp_Object string_escape_byte8 (Lisp_Object);
 
 extern bool alphabeticp (int);
 extern bool decimalnump (int);
+extern bool graphicp (int);
 extern bool printablep (int);
 
 /* Return a translation table of id number ID.  */
diff --git a/src/regex.c b/src/regex.c
index b9d09d0..4af70c6 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -314,7 +314,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
 
 # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)                            \
                    ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237)        \
-                   : 1)
+                    : graphicp (c))
 
 # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)                            \
                    ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)       \
@@ -1875,7 +1875,8 @@ struct range_table_work_area
 #define BIT_MULTIBYTE  0x20
 #define BIT_ALPHA      0x40
 #define BIT_ALNUM      0x80
-#define BIT_PRINT      0x100
+#define BIT_GRAPH      0x100
+#define BIT_PRINT      0x200
 
 
 /* Set the bit for character C in a list.  */
@@ -2074,7 +2075,7 @@ re_wctype_to_bit (re_wctype_t cc)
 {
   switch (cc)
     {
-    case RECC_NONASCII: case RECC_GRAPH:
+    case RECC_NONASCII:
     case RECC_MULTIBYTE: return BIT_MULTIBYTE;
     case RECC_ALPHA: return BIT_ALPHA;
     case RECC_ALNUM: return BIT_ALNUM;
@@ -2083,6 +2084,7 @@ re_wctype_to_bit (re_wctype_t cc)
     case RECC_UPPER: return BIT_UPPER;
     case RECC_PUNCT: return BIT_PUNCT;
     case RECC_SPACE: return BIT_SPACE;
+    case RECC_GRAPH: return BIT_GRAPH;
     case RECC_PRINT: return BIT_PRINT;
     case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
     case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
@@ -5522,7 +5524,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, 
const_re_char *string1,
                    | (class_bits & BIT_UPPER && ISUPPER (c))
                    | (class_bits & BIT_WORD  && ISWORD  (c))
                    | (class_bits & BIT_ALPHA && ISALPHA (c))
-                   | (class_bits & BIT_ALNUM && ISALNUM (c)))
+                   | (class_bits & BIT_ALNUM && ISALNUM (c))
+                   | (class_bits & BIT_GRAPH && ISGRAPH (c))
+                   | (class_bits & BIT_PRINT && ISPRINT (c)))
                  not = !not;
                else
                  CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]