guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, string-abstraction, updated. 274d338ba


From: Michael Gran
Subject: [Guile-commits] GNU Guile branch, string-abstraction, updated. 274d338ba7ee788d06ec9a87708afc3130c8616d
Date: Fri, 24 Apr 2009 16:37:05 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=274d338ba7ee788d06ec9a87708afc3130c8616d

The branch, string-abstraction has been updated
       via  274d338ba7ee788d06ec9a87708afc3130c8616d (commit)
       via  85d33207ea51f77a182865c1d7d4485848be5c08 (commit)
      from  2fbadb37c561659d9ae9f73b472061551309c571 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 274d338ba7ee788d06ec9a87708afc3130c8616d
Author: Michael Gran <address@hidden>
Date:   Fri Apr 24 09:34:58 2009 -0700

    Add capability to read symbols with wide characters and hex escapes.
    
        * read.c: Add capability to read symbols with wide characters and
        hex escapes as suggested by R6RS. (Symbols with hex escapes seems
        like a terrible idea, though.)

commit 85d33207ea51f77a182865c1d7d4485848be5c08
Author: Michael Gran <address@hidden>
Date:   Fri Apr 24 09:29:07 2009 -0700

    Add R6RS syntax lexical syntax tests for chars, symbols
    
        * chars.test: Add tests for the 'write' of characters
    
        * strings.test: Update copyright date
    
        * symbols.test: Add tests for symbol syntax and for symbol hex
        escapes.

-----------------------------------------------------------------------

Summary of changes:
 libguile/read.c               |  196 ++++++++++++++++++++++++-----------------
 test-suite/tests/chars.test   |   20 ++++
 test-suite/tests/strings.test |    2 +-
 test-suite/tests/symbols.test |   82 +++++++++++++++++-
 4 files changed, 216 insertions(+), 84 deletions(-)

diff --git a/libguile/read.c b/libguile/read.c
index 39cba5f..098b75f 100644
--- a/libguile/read.c
+++ b/libguile/read.c
@@ -373,6 +373,66 @@ scm_read_sexp (int chr, SCM port)
 }
 #undef FUNC_NAME
 
+static scm_t_uint32
+scm_read_hex_escape (const char *func_name, SCM port)
+{
+  /* Read character with the syntax \xNN; where NN is a
+     hex number of 1 to 8 digits, and the semicolon
+     terminates the character escape.  */
+  int siz, i;
+  scm_t_uint32 c, d;
+  char digit[11];      /* slash + x + 8 digits + semicolon */
+
+  digit[0] = '\\';
+  digit[1] = 'x';
+  siz = 2;
+
+  while (siz < 11)
+    {
+      d = scm_getc (port);
+      if (d == (scm_t_uint32) EOF)
+       goto bad_hex_escaped;
+      if (!(isxdigit (d) || d == ';'))
+       goto bad_hex_escaped;
+
+      digit[siz] = (char) d;
+      siz++;
+      if (d == (scm_t_uint32)';')
+       break;
+    }
+  if (siz == 3 || siz > 11)
+    goto bad_hex_escaped;
+  c = 0;
+  for (i = 2; i < siz-1; i ++)
+    {
+      c = c << 4;
+      if      ('0' <= digit[i] && digit[i] <= '9') 
+       c += digit[i] - '0';
+      else if ('A' <= digit[i] && digit[i] <= 'F') 
+       c += digit[i] - 'A' + 10;
+      else if ('a' <= digit[i] && digit[i] <= 'f') 
+       c += digit[i] - 'a' + 10;
+      else goto bad_hex_escaped;
+    }
+  if ((c < 0) 
+      || (c > SCM_CODEPOINT_MAX)
+      || ((c >= SCM_CODEPOINT_SURROGATE_START)
+         && (c <= SCM_CODEPOINT_SURROGATE_END)))
+    goto bad_hex_escaped;
+
+  return c;
+
+ bad_hex_escaped:
+  scm_i_input_error (func_name, port, "invalid character escape ~a",
+                    scm_list_1 (scm_from_locale_stringn (digit,siz)));
+
+  /* Never gets here.  */
+  return '?';
+}
+
+
+
+
 static SCM
 scm_read_string (int chr, SCM port)
 #define FUNC_NAME "scm_lreadr"
@@ -401,9 +461,7 @@ scm_read_string (int chr, SCM port)
          SCM addy = scm_c_make_string (READER_STRING_BUFFER_SIZE, 
                                        SCM_MAKE_8BIT_CHAR ('0'));
 
-         str = scm_string_append_shared (scm_list_2 (str, addy));
-
-         c_str_len = 0;
+         str = scm_string_append (scm_list_2 (str, addy));
        }
 
       if (c == '\\')
@@ -447,58 +505,13 @@ scm_read_string (int chr, SCM port)
          case 'v':
            c = '\v';
            break;
-
          case 'x':
-           {
-             /* Read character with the syntax \xNN; where NN is a
-                hex number of 1 to 8 digits, and the semicolon
-                terminates the character escape.  */
-             int siz, i;
-             int d;
-             char digit[11];   /* slash + x + 8 digits + semicolon */
-             digit[0] = '\\';
-             digit[1] = 'x';
-             siz = 2;
-             while (siz < 11)
-               {
-                 d = scm_getc (port);
-                 if (d == EOF)
-                   goto bad_hex_escaped;
-                 digit[siz] = (char) d;
-                 siz++;
-                 if (d == (int)';')
-                   break;
-                 if (! isxdigit (d))
-                   goto bad_hex_escaped;
-               }
-             if (siz == 3 || siz > 11)
-               goto bad_hex_escaped;
-             c = 0;
-             for (i = 2; i < siz-1; i ++)
-               {
-                 c = c << 4;
-                 if      ('0' <= digit[i] && digit[i] <= '9') 
-                   c += digit[i] - '0';
-                 else if ('A' <= digit[i] && digit[i] <= 'F') 
-                   c += digit[i] - 'A' + 10;
-                 else if ('a' <= digit[i] && digit[i] <= 'f') 
-                   c += digit[i] - 'a' + 10;
-                 else goto bad_hex_escaped;
-               }
-             if ((c < 0) 
-                 || (c > SCM_CODEPOINT_MAX)
-                 || ((c >= SCM_CODEPOINT_SURROGATE_START)
-                     && (c <= SCM_CODEPOINT_SURROGATE_END)))
-               goto bad_hex_escaped;
-             break;
-           bad_hex_escaped:
-             scm_i_input_error (FUNC_NAME, port, "invalid character escape ~a",
-                                scm_list_1 (scm_from_locale_stringn 
(digit,siz)));
-           }
+           c = scm_read_hex_escape (FUNC_NAME, port);
+           break;
          default:
          bad_escaped:
            scm_i_input_error (FUNC_NAME, port,
-                              "Illegal character in escape sequence: ~S",
+                              "Invalid character in escape sequence: ~S",
                               scm_list_1 (SCM_MAKE_CHAR (c)));
          }
       scm_i_string_set (str, c_str_len, SCM_MAKE_CHAR (c));
@@ -516,7 +529,6 @@ scm_read_string (int chr, SCM port)
 }
 #undef FUNC_NAME
 
-
 static SCM
 scm_read_number (int chr, SCM port)
 {
@@ -557,46 +569,66 @@ scm_read_number (int chr, SCM port)
 }
 
 static SCM
-scm_read_mixed_case_symbol (int chr, SCM port)
+scm_read_mixed_case_symbol (char *func_name, int chr, SCM port)
 {
   SCM result, str = SCM_EOL;
-  int overflow = 0, ends_with_colon = 0;
-  char buffer[READER_BUFFER_SIZE];
-  size_t read = 0;
+  int ends_with_colon = 0;
+  scm_t_uint32 c;
+  unsigned c_str_len = 0;
   int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
 
+  str = scm_c_make_string (READER_BUFFER_SIZE, 
+                          SCM_MAKE_8BIT_CHAR ('0'));
+
   scm_ungetc (chr, port);
-  do
+
+  while ((c = scm_getc (port)) != (scm_t_uint32) EOF)
     {
-      overflow = read_token (port, buffer, sizeof (buffer), &read);
+      c = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (c) : c);
 
-      if (read > 0)
-       ends_with_colon = (buffer[read - 1] == ':');
+      if (CHAR_IS_DELIMITER (c))
+       {
+         scm_ungetc (c, port);
+         break;
+       }
+      if ((c_str_len > 0)
+         && (c_str_len % READER_BUFFER_SIZE == 0))
+       {
+         /* Expand the string buffer.  */
+         SCM addy = scm_c_make_string (READER_BUFFER_SIZE, 
+                                       SCM_MAKE_8BIT_CHAR ('0'));
 
-      if ((overflow) || (scm_is_pair (str)))
-       str = scm_cons (scm_from_locale_stringn (buffer, read), str);
+         str = scm_string_append (scm_list_2 (str, addy));
+       }
+
+      if (c == '\\')
+       switch (c = scm_getc (port))
+         {
+         case (scm_t_uint32) EOF:
+           scm_i_input_error (func_name, port,
+                              "Invalid symbol terminator: ~S",
+                              scm_list_1 (SCM_MAKE_8BIT_CHAR ('\\')));
+         case 'x':
+           c = scm_read_hex_escape (func_name, port);
+           break;
+         default:
+           scm_i_input_error (func_name, port,
+                              "Invalid character in escape sequence: ~S",
+                              scm_list_1 (SCM_MAKE_CHAR (c)));
+         }
+      scm_i_string_set (str, c_str_len, SCM_MAKE_CHAR (c));
+      c_str_len ++;
     }
-  while (overflow);
 
-  if (scm_is_pair (str))
-    {
-      str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
-      result = scm_string_to_symbol (str);
+  ends_with_colon = scm_i_string_ref_eq_char (str, c_str_len - 1, ':');
 
-      /* Per SRFI-88, `:' alone is an identifier, not a keyword.  */
-      if (postfix && ends_with_colon && (scm_c_string_length (result) > 1))
-       result = scm_symbol_to_keyword (result);
-    }
-  else
-    {
-      /* For symbols smaller than `sizeof (buffer)', we don't need to recur
-        to Scheme strings.  Therefore, we only create one Scheme object (a
-        symbol) per symbol read.  */
-      if (postfix && ends_with_colon && (read > 1))
-       result = scm_from_locale_keywordn (buffer, read - 1);
-      else
-       result = scm_from_locale_symboln (buffer, read);
-    }
+  str =  scm_c_substring_copy (str, 0, c_str_len);
+
+  result = scm_string_to_symbol (str);
+
+  /* Per SRFI-88, `:' alone is an identifier, not a keyword.  */
+  if (postfix && ends_with_colon && (scm_c_string_length (result) > 1))
+    result = scm_symbol_to_keyword (result);
 
   return result;
 }
@@ -1139,7 +1171,7 @@ scm_read_expression (SCM port)
                || (strchr ("+-.", chr)))
              return (scm_read_number (chr, port));
            else
-             return (scm_read_mixed_case_symbol (chr, port));
+             return (scm_read_mixed_case_symbol (FUNC_NAME, chr, port));
          }
        }
     }
diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test
index 5995620..2b13262 100644
--- a/test-suite/tests/chars.test
+++ b/test-suite/tests/chars.test
@@ -201,6 +201,26 @@
                 (list #\000 #\010 #\020 #\030)
                 (list #\x00 #\x08 #\x10 #\x18)))))
                 
+
+;;;;;;;;;;
+;; Write format of characters
+
+(with-test-prefix "character write format"
+
+  (pass-if "ASCII characters"
+    (let* ((lst (map integer->char (iota 128)))
+          (str (with-output-to-string (lambda () (write lst)))))
+      (list= eqv? 
+            lst
+            (with-input-from-string str read))))
+
+  (pass-if "ISO-8859-1 characters"
+    (let* ((lst (map integer->char (iota 128 128)))
+          (str (with-output-to-string (lambda () (write lst)))))
+      (list= eqv? 
+            lst
+            (with-input-from-string str read)))))
+
 ;;;;;;;;;;
 ;; Basic character functions
 (with-test-prefix "R6RS character procedures"
diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test
index e708092..9686418 100644
--- a/test-suite/tests/strings.test
+++ b/test-suite/tests/strings.test
@@ -1,7 +1,7 @@
 ;;;; strings.test --- test suite for Guile's string functions    -*- scheme -*-
 ;;;; Jim Blandy <address@hidden> --- August 1999
 ;;;;
-;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008 Free Software 
Foundation, Inc.
+;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009 Free Software 
Foundation, Inc.
 ;;;; 
 ;;;; This program is free software; you can redistribute it and/or modify
 ;;;; it under the terms of the GNU General Public License as published by
diff --git a/test-suite/tests/symbols.test b/test-suite/tests/symbols.test
index 3fe3402..be4e2da 100644
--- a/test-suite/tests/symbols.test
+++ b/test-suite/tests/symbols.test
@@ -1,6 +1,6 @@
 ;;;; symbols.test --- test suite for Guile's symbols    -*- scheme -*-
 ;;;;
-;;;; Copyright (C) 2001, 2006, 2008 Free Software Foundation, Inc.
+;;;; Copyright (C) 2001, 2006, 2008, 2009 Free Software Foundation, Inc.
 ;;;; 
 ;;;; This program is free software; you can redistribute it and/or modify
 ;;;; it under the terms of the GNU General Public License as published by
@@ -21,6 +21,8 @@
   #:use-module (test-suite lib)
   #:use-module (ice-9 documentation))
 
+(define exception:read-error
+  (cons 'read-error "^.*"))
 
 ;;;
 ;;; miscellaneous
@@ -88,3 +90,81 @@
   (pass-if "accepts embedded NULs"
     (> (string-length (symbol->string (gensym 
"foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0")))
 6)))
 
+;;;
+;;; lexical syntax of identifiers
+;;;
+
+(define (legit x)
+  (expect-fail-exception (string-append x " is invalid") exception:read-error
+    (with-input-from-string x read)))                   
+
+(define (invalid x)
+  (pass-if-exception (string-append x " is invalid")  exception:read-error
+    (with-input-from-string x read)))                   
+              
+(define (should-be-invalid x)
+  (pass-if (string-append x " is invalid")
+    (throw 'unresolved)))
+
+(with-test-prefix "symbol syntax"
+
+ (with-test-prefix "1-character ASCII identifiers"
+   (map legit (list "'!" "'$" "'%" "'&" "'*" "'/" "':" "'<" "'=" "'>" "'?" 
+               "'^" "'_" "'~"))
+   (map should-be-invalid (list "'+" "'-" "'." "'0" "'@" "'[" "']" "'{" 
+                               "'|" "'}"))
+   (map invalid (list "'\"" "'#" "'\\")))
+
+ (with-test-prefix "2-character ASCII identifiers"
+   (map legit (list "'A!" "'A$" "'A%" "'A&" "'A*" "'A+" "'A-" "'A." "'A/"
+                   "'A:" "'A<" "'A=" "'A")) 
+   (map should-be-invalid (list "'a\"" "'a#" "'a[" "'a]" "'a{" "'a|" "'a}"))
+   (map invalid (list "'a\\")))
+
+ (with-test-prefix "inline hex escapes"
+
+  (pass-if "2 digit"
+    (eq? 'AB '\x41;B
+        ))
+
+  (pass-if-exception "\\+ is invalid" exception:read-error
+    (with-input-from-string "'\\+" read))
+
+  (pass-if-exception "\\x-1; is invalid" exception:read-error
+    (with-input-from-string "'\\x-1;" read))
+
+  (pass-if-exception "\\xd800; is invalid" 
+    exception:read-error
+    (with-input-from-string "'\\xd800;" read))
+
+  (pass-if-exception "\\xdfff; is invalid" 
+    exception:read-error
+    (with-input-from-string "'\\xdfff;" read))
+
+  (pass-if-exception "\\x1a0000; is invalid" 
+    exception:read-error
+    (with-input-from-string "'\\x1a0000;" read))
+
+  (pass-if "'abc is '\\x61;\\x62;\\x63"
+    (eqv? 'abc
+         '\x61;\x62;\x63;
+         ))
+
+  (pass-if "'\\x41;bc is 'Abc"
+    (eqv? 'Abc '\x41;bc
+         ))
+
+  (pass-if-exception "'\\x4' is missing semicolon at EOF" exception:read-error
+    (with-input-from-string "'\\x41" read))
+
+  (pass-if-exception "'\\x;' is invalid" exception:read-error
+    (with-input-from-string "'\\x;" read))
+
+  (pass-if "'\\x00000041; is 'A"
+    (eqv? 'A '\x00000041;
+         ))
+
+  (pass-if "'\\x0010FFFF; is #\\x10FFFF"
+    (eqv? '\x0010FFFF; '\x10FFFF;
+         ))))
+


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]