guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, master, updated. release_1-9-6-150-gde


From: Michael Gran
Subject: [Guile-commits] GNU Guile branch, master, updated. release_1-9-6-150-gdea901d
Date: Wed, 13 Jan 2010 05:13:05 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=dea901d66e46041f96d3d3a0f95bf0ab209387c9

The branch, master has been updated
       via  dea901d66e46041f96d3d3a0f95bf0ab209387c9 (commit)
      from  8470b3f45b48bf627642e8f41938492be4eacf2c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit dea901d66e46041f96d3d3a0f95bf0ab209387c9
Author: Michael Gran <address@hidden>
Date:   Tue Jan 12 21:02:41 2010 -0800

    Reader option for R6RS hex escapes
    
    This adds a reader option 'r6rs-hex-escapes that modifies the
    behavior of numeric escapes in characters and strings.  When enabled,
    variable-length character hex escapes (#\xNNN) are allowed and become
    the default output format for numerically-escaped characters.  Also,
    string hex escapes switch to a semicolon terminated hex escape (\xNNNN;).
    
    * libguile/print.c (PRINT_CHAR_ESCAPE): new macro
      (iprin1): use new macro PRINT_CHAR_ESCAPE
    
    * libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define
    
    * libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes
      (SCM_READ_HEX_ESCAPE): modify to take a terminator parameter
      (scm_read_string): parse R6RS hex string escapes
      (scm_read_character): parse R6RS hex character escapes
    
    * test-suite/tests/chars.test (with-read-options): new procedure
      (R6RS hex escapes): new tests
    
    * test-suite/tests/strings.test (with-read-options): new procedure
      (R6RS hex escapes): new tests

-----------------------------------------------------------------------

Summary of changes:
 libguile/print.c              |  106 ++++++++++++++++++++++++++++-------------
 libguile/private-options.h    |    8 ++-
 libguile/read.c               |   88 +++++++++++++++++++++++-----------
 test-suite/tests/chars.test   |   44 +++++++++++++++++
 test-suite/tests/strings.test |   96 +++++++++++++++++++++++++++++++++++--
 5 files changed, 274 insertions(+), 68 deletions(-)

diff --git a/libguile/print.c b/libguile/print.c
index aef575d..dcf28c7 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -409,6 +409,22 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display, 
g_display);
 
 static void iprin1 (SCM exp, SCM port, scm_print_state *pstate);
 
+
+/* Print a character as an octal or hex escape.  */
+#define PRINT_CHAR_ESCAPE(i, port)              \
+  do                                            \
+    {                                           \
+      if (!SCM_R6RS_ESCAPES_P)                  \
+        scm_intprint (i, 8, port);              \
+      else                                      \
+        {                                       \
+          scm_puts ("x", port);                 \
+          scm_intprint (i, 16, port);           \
+        }                                       \
+    }                                           \
+  while (0)
+
+  
 void 
 scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate)
 {
@@ -488,7 +504,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
                       else
                         /* Character is graphic but unrepresentable in
                            this port's encoding.  */
-                        scm_intprint (i, 8, port);
+                        PRINT_CHAR_ESCAPE (i, port);
                     }
                   else
                     {
@@ -507,12 +523,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
                       else
                         /* Character is graphic but unrepresentable in
                            this port's encoding.  */
-                        scm_intprint (i, 8, port);
+                        PRINT_CHAR_ESCAPE (i, port);
                     }
                 }
               else
                 /* Character is a non-graphical character.  */
-                scm_intprint (i, 8, port);
+                PRINT_CHAR_ESCAPE (i, port);
            }
          else
            scm_i_charprint (i, port);
@@ -579,9 +595,9 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
         case scm_tc7_string:
           if (SCM_WRITINGP (pstate))
             {
-              size_t i, j, len;
+              size_t i, len;
               static char const hex[] = "0123456789abcdef";
-              char buf[8];
+              char buf[9];
 
 
               scm_putc ('"', port);
@@ -647,37 +663,61 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
                     {
                       /* Character is graphic but unrepresentable in
                          this port's encoding or is not graphic.  */
-                      if (ch <= 0xFF)
+                      if (!SCM_R6RS_ESCAPES_P)
                         {
-                          buf[0] = '\\';
-                          buf[1] = 'x';
-                          buf[2] = hex[ch / 16];
-                          buf[3] = hex[ch % 16];
-                          scm_lfwrite (buf, 4, port);
-                        }
-                      else if (ch <= 0xFFFF)
-                        {
-                          buf[0] = '\\';
-                          buf[1] = 'u';
-                          buf[2] = hex[(ch & 0xF000) >> 12];
-                          buf[3] = hex[(ch & 0xF00) >> 8];
-                          buf[4] = hex[(ch & 0xF0) >> 4];
-                          buf[5] = hex[(ch & 0xF)];
-                          scm_lfwrite (buf, 6, port);
-                          j = i + 1;
+                          if (ch <= 0xFF)
+                            {
+                              buf[0] = '\\';
+                              buf[1] = 'x';
+                              buf[2] = hex[ch / 16];
+                              buf[3] = hex[ch % 16];
+                              scm_lfwrite (buf, 4, port);
+                            }
+                          else if (ch <= 0xFFFF)
+                            {
+                              buf[0] = '\\';
+                              buf[1] = 'u';
+                              buf[2] = hex[(ch & 0xF000) >> 12];
+                              buf[3] = hex[(ch & 0xF00) >> 8];
+                              buf[4] = hex[(ch & 0xF0) >> 4];
+                              buf[5] = hex[(ch & 0xF)];
+                              scm_lfwrite (buf, 6, port);
+                            }
+                          else if (ch > 0xFFFF)
+                            {
+                              buf[0] = '\\';
+                              buf[1] = 'U';
+                              buf[2] = hex[(ch & 0xF00000) >> 20];
+                              buf[3] = hex[(ch & 0xF0000) >> 16];
+                              buf[4] = hex[(ch & 0xF000) >> 12];
+                              buf[5] = hex[(ch & 0xF00) >> 8];
+                              buf[6] = hex[(ch & 0xF0) >> 4];
+                              buf[7] = hex[(ch & 0xF)];
+                              scm_lfwrite (buf, 8, port);
+                            }
                         }
-                      else if (ch > 0xFFFF)
+                      else
                         {
-                          buf[0] = '\\';
-                          buf[1] = 'U';
-                          buf[2] = hex[(ch & 0xF00000) >> 20];
-                          buf[3] = hex[(ch & 0xF0000) >> 16];
-                          buf[4] = hex[(ch & 0xF000) >> 12];
-                          buf[5] = hex[(ch & 0xF00) >> 8];
-                          buf[6] = hex[(ch & 0xF0) >> 4];
-                          buf[7] = hex[(ch & 0xF)];
-                          scm_lfwrite (buf, 8, port);
-                          j = i + 1;
+                          scm_t_wchar ch2 = ch;
+                          
+                          /* Print an R6RS variable-length hex escape: 
"\xNNNN;"
+                          */
+                          int i = 8;
+                          buf[i] = ';';
+                          i --;
+                          if (ch == 0)
+                            buf[i--] = '0';
+                          else
+                            while (ch2 > 0)
+                              {
+                                buf[i] = hex[ch2 & 0xF];
+                                ch2 >>= 4;
+                                i --;
+                              }
+                          buf[i] = 'x';
+                          i --;
+                          buf[i] = '\\';
+                          scm_lfwrite (buf + i, 9 - i, port);
                         }
                     }
                 }
diff --git a/libguile/private-options.h b/libguile/private-options.h
index 703ca8a..40d40fb 100644
--- a/libguile/private-options.h
+++ b/libguile/private-options.h
@@ -94,9 +94,13 @@ SCM_API scm_t_option scm_read_opts[];
 #if SCM_ENABLE_ELISP
 #define SCM_ELISP_VECTORS_P    scm_read_opts[4].val
 #define SCM_ESCAPED_PARENS_P   scm_read_opts[5].val
-#define SCM_N_READ_OPTIONS 6
+#endif
+#define SCM_R6RS_ESCAPES_P     scm_read_opts[6].val
+
+#if SCM_ENABLE_ELISP
+#define SCM_N_READ_OPTIONS 7
 #else
-#define SCM_N_READ_OPTIONS 4
+#define SCM_N_READ_OPTIONS 5
 #endif
 
 #endif  /* PRIVATE_OPTIONS */ 
diff --git a/libguile/read.c b/libguile/read.c
index 011684b..9e66cce 100644
--- a/libguile/read.c
+++ b/libguile/read.c
@@ -76,6 +76,8 @@ scm_t_option scm_read_opts[] = {
   { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
     "Support `\\(' and `\\)' in strings."},
 #endif
+  { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
+    "Use R6RS variable-length character and string hex escapes."},
   { 0, },
 };
 
@@ -412,32 +414,37 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
 
 
 /* Read a hexadecimal number NDIGITS in length.  Put its value into the 
variable
-   C.  */
-#define SCM_READ_HEX_ESCAPE(ndigits)            \
-  do                                            \
-    {                                           \
-      scm_t_wchar a;                            \
-      size_t i = 0;                             \
-      c = 0;                                    \
-      while (i < ndigits)                       \
-        {                                       \
-          a = scm_getc (port);                  \
-          if (a == EOF)                         \
-            goto str_eof;                       \
-          if ('0' <= a && a <= '9')             \
-            a -= '0';                           \
-          else if ('A' <= a && a <= 'F')        \
-            a = a - 'A' + 10;                   \
-          else if ('a' <= a && a <= 'f')        \
-            a = a - 'a' + 10;                   \
-          else                                  \
-            {                                   \
-              c = a;                            \
-              goto bad_escaped;                 \
-            }                                   \
-          c = c * 16 + a;                       \
-          i ++;                                 \
-        }                                       \
+   C.  If TERMINATOR is non-null, terminate early if the TERMINATOR character 
is
+   found.  */
+#define SCM_READ_HEX_ESCAPE(ndigits, terminator)                   \
+  do                                                               \
+    {                                                              \
+      scm_t_wchar a;                                               \
+      size_t i = 0;                                                \
+      c = 0;                                                       \
+      while (i < ndigits)                                          \
+        {                                                          \
+          a = scm_getc (port);                                     \
+          if (a == EOF)                                            \
+            goto str_eof;                                          \
+          if (terminator                                           \
+              && (a == (scm_t_wchar) terminator)                   \
+              && (i > 0))                                          \
+            break;                                                 \
+          if ('0' <= a && a <= '9')                                \
+            a -= '0';                                              \
+          else if ('A' <= a && a <= 'F')                           \
+            a = a - 'A' + 10;                                      \
+          else if ('a' <= a && a <= 'f')                           \
+            a = a - 'a' + 10;                                      \
+          else                                                     \
+            {                                                      \
+              c = a;                                               \
+              goto bad_escaped;                                    \
+            }                                                      \
+          c = c * 16 + a;                                          \
+          i ++;                                                    \
+        }                                                          \
     } while (0)
 
 static SCM
@@ -511,13 +518,16 @@ scm_read_string (int chr, SCM port)
               c = '\010';
               break;
             case 'x':
-              SCM_READ_HEX_ESCAPE (2);
+              if (SCM_R6RS_ESCAPES_P)
+                SCM_READ_HEX_ESCAPE (10, ';');
+              else
+                SCM_READ_HEX_ESCAPE (2, '\0');
               break;
             case 'u':
-              SCM_READ_HEX_ESCAPE (4);
+              SCM_READ_HEX_ESCAPE (4, '\0');
               break;
             case 'U':
-              SCM_READ_HEX_ESCAPE (6);
+              SCM_READ_HEX_ESCAPE (6, '\0');
               break;
             default:
             bad_escaped:
@@ -828,6 +838,26 @@ scm_read_character (scm_t_wchar chr, SCM port)
         }
     }
 
+  if (cp == 'x' && (charname_len > 1) && SCM_R6RS_ESCAPES_P)
+    {
+      SCM p;
+      scm_t_wchar chr;
+      
+      /* Convert from hex, skipping the initial 'x' character in CHARNAME */
+      p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
+                                scm_from_uint (16));
+      if (SCM_I_INUMP (p))
+        {
+          scm_t_wchar c = SCM_I_INUM (p);
+          if (SCM_IS_UNICODE_CHAR (c))
+            return SCM_MAKE_CHAR (c);
+          else
+            scm_i_input_error (FUNC_NAME, port,
+                               "out-of-range hex character escape: ~a",
+                               scm_list_1 (charname));
+        }
+    }
+
   /* The names of characters should never have non-Latin1
      characters.  */
   if (scm_i_is_narrow_string (charname)
diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test
index 509f070..25c82e8 100644
--- a/test-suite/tests/chars.test
+++ b/test-suite/tests/chars.test
@@ -29,6 +29,16 @@
   (cons #t "out-of-range"))
 
 
+;; Run THUNK in the context of the reader options OPTS
+(define (with-read-options opts thunk)
+  (let ((saved-options (read-options)))
+    (dynamic-wind
+        (lambda ()
+          (read-options opts))
+        thunk
+        (lambda ()
+          (read-options saved-options)))))
+
 (with-test-prefix "basic char handling"
 
   (with-test-prefix "evaluator"
@@ -313,3 +323,37 @@
        (with-output-to-string (lambda () (write #\soh)))
        "#\\soh"))))
 
+(with-test-prefix "R6RS hex escapes"
+
+  (pass-if "one-digit hex escape"
+    (eqv? (with-read-options '(r6rs-hex-escapes)
+            (lambda ()
+              (with-input-from-string "#\\xA" read)))
+          (integer->char #x0A)))
+
+  (pass-if "two-digit hex escape"
+    (eqv? (with-read-options '(r6rs-hex-escapes)
+            (lambda ()
+              (with-input-from-string "#\\xFF" read)))
+          (integer->char #xFF)))
+
+  (pass-if "four-digit hex escape"
+    (eqv? (with-read-options '(r6rs-hex-escapes)
+            (lambda ()
+              (with-input-from-string "#\\x00FF" read)))
+          (integer->char #xFF)))
+
+  (pass-if "eight-digit hex escape"
+    (eqv? (with-read-options '(r6rs-hex-escapes)
+            (lambda ()
+              (with-input-from-string "#\\x00006587" read)))
+          (integer->char #x6587)))
+  (pass-if "write R6RS escapes"
+    (string=?
+     (with-read-options '(r6rs-hex-escapes)
+       (lambda ()
+         (with-output-to-string 
+           (lambda () 
+             (write (integer->char #x80))))))
+     "#\\x80")))
+
diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test
index e04c026..47ae93a 100644
--- a/test-suite/tests/strings.test
+++ b/test-suite/tests/strings.test
@@ -2,23 +2,24 @@
 ;;;; Jim Blandy <address@hidden> --- August 1999
 ;;;;
 ;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009 Free Software 
Foundation, Inc.
-;;;; 
+;;;;
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
 ;;;; License as published by the Free Software Foundation; either
 ;;;; version 3 of the License, or (at your option) any later version.
-;;;; 
+;;;;
 ;;;; This library is distributed in the hope that it will be useful,
 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;;;; Lesser General Public License for more details.
-;;;; 
+;;;;
 ;;;; You should have received a copy of the GNU Lesser General Public
 ;;;; License along with this library; if not, write to the Free Software
 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
USA
 
 (define-module (test-strings)
-  #:use-module (test-suite lib))
+  #:use-module (test-suite lib)
+  #:use-module (srfi srfi-1))
 
 (define exception:read-only-string
   (cons 'misc-error "^string is read-only"))
@@ -29,6 +30,16 @@
 (define exception:wrong-type-arg
   (cons #t "Wrong type"))
 
+;; Run THUNK in the context of the reader options OPTS
+(define (with-read-options opts thunk)
+  (let ((saved-options (read-options)))
+    (dynamic-wind
+        (lambda ()
+          (read-options opts))
+        thunk
+        (lambda ()
+          (read-options saved-options)))))
+
 ;; Create a string from integer char values, eg. (string-ints 65) => "A"
 (define (string-ints . args)
   (apply string (map integer->char args)))
@@ -229,6 +240,83 @@
   (pass-if "Guile extensions backslash escapes"
     (string=? "\0" (string #\nul))))
 
+
+(with-test-prefix "R6RS hex escapes"
+
+  (pass-if-exception "non-hex char in two-digit hex-escape"
+    exception:illegal-escape                     
+    (with-read-options '(r6rs-hex-escapes)
+      (lambda ()
+        (with-input-from-string "\"\\x0g;\"" read))))
+
+  (pass-if-exception "non-hex char in four-digit hex-escape"
+    exception:illegal-escape                     
+    (with-read-options '(r6rs-hex-escapes)
+      (lambda ()
+        (with-input-from-string "\"\\x000g;\"" read))))
+
+  (pass-if-exception "non-hex char in six-digit hex-escape"
+    exception:illegal-escape                     
+    (with-read-options '(r6rs-hex-escapes)
+      (lambda ()
+        (with-input-from-string "\"\\x00000g;\"" read))))
+
+  (pass-if-exception "no semicolon at termination of one-digit hex-escape"
+    exception:illegal-escape                     
+    (with-read-options '(r6rs-hex-escapes)
+      (lambda ()
+        (with-input-from-string "\"\\x0\"" read))))
+
+  (pass-if-exception "no semicolon at termination of three-digit hex-escape"
+    exception:illegal-escape                     
+    (with-read-options '(r6rs-hex-escapes)
+      (lambda ()
+        (with-input-from-string "\"\\x000\"" read))))
+
+  (pass-if "two-digit hex escape"
+    (eqv? 
+     (with-read-options '(r6rs-hex-escapes)
+       (lambda ()
+         (string-ref (with-input-from-string "\"--\\xff;--\"" read) 2)))
+     (integer->char #xff)))
+
+  (pass-if "four-digit hex escape"
+    (eqv?
+     (with-read-options '(r6rs-hex-escapes)
+       (lambda ()
+         (string-ref (with-input-from-string "\"--\\x0100;--\"" read) 2)))
+     (integer->char #x0100)))
+
+  (pass-if "six-digit hex escape"
+    (eqv? 
+     (with-read-options '(r6rs-hex-escapes)
+       (lambda ()
+         (string-ref (with-input-from-string "\"--\\x010300;--\"" read) 2)))
+     (integer->char #x010300)))
+
+  (pass-if "escaped characters match non-escaped ASCII characters"
+    (string=?
+     (with-read-options '(r6rs-hex-escapes)
+       (lambda ()
+         (with-input-from-string "\"\\x41;\\x0042;\\x000043;\"" read)))
+     "ABC"))
+
+  (pass-if "write R6RS escapes"
+    
+     (let* ((s1 (apply string 
+                       (map integer->char '(#x8 ; backspace
+                                            #x20 ; space
+                                            #x30 ; zero
+                                            #x40 ; at sign
+                                            ))))
+            (s2 (with-read-options '(r6rs-hex-escapes)
+                  (lambda ()
+                    (with-output-to-string 
+                      (lambda () (write s1)))))))
+       (lset= eqv? 
+              (string->list s2)
+              (list #\" #\\ #\x #\8 #\; #\space #\0 #\@ #\")))))
+
 ;;
 ;; string?
 ;;


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]