guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Have string ports honor ‘%default-por t-encoding’


From: Ludovic Courtès
Subject: [PATCH] Have string ports honor ‘%default-por t-encoding’
Date: Tue, 05 Jan 2010 01:18:23 +0100
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/23.1 (gnu/linux)

Hello,

The attached patch fixes string ports so that their encoding defaults to
‘%default-port-encoding’ (as for other ports) instead of UTF-8.

Mike: can you review it?

Thanks,
Ludo’.

>From 7228c59977106d6b87f2347ffb77466073f832a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= <address@hidden>
Date: Tue, 5 Jan 2010 01:10:12 +0100
Subject: [PATCH] Have string ports honor `%default-port-encoding'.

* libguile/strports.c (scm_i_mkstrport): Remove.
  (scm_mkstrport): Don't change the port's encoding to UTF-8; convert
  STR to the default port encoding.
  (scm_strport_to_string): Fix documentation & indentation.

* libguile/strports.h (scm_i_mkstrport): Remove.

* test-suite/lib.scm (exception:encoding-error): New variable.
  (format-test-name): Set `%default-port-encoding' to "UTF-8".

* test-suite/tests/ports.test ("string ports")["%default-port-encoding
  is honored", "suitable encoding [latin-1]", "suitable encoding
  [latin-3]", "wrong encoding"]: New tests.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with UTF-16 string port", "put-bytevector
  with wrong-encoding string port"]: New tests.

* test-suite/tests/reader.test (read-string): Set
  `%default-port-encoding' to `#f'.
  ("reading")["unprintable symbol"]: Use a string that doesn't contain
  zeros.
---
 libguile/strports.c              |   80 +++++++++++++------------------------
 libguile/strports.h              |    4 +-
 test-suite/lib.scm               |   26 ++++++++-----
 test-suite/tests/ports.test      |   41 ++++++++++++++++++-
 test-suite/tests/r6rs-ports.test |   26 ++++++++++--
 test-suite/tests/reader.test     |   11 +++--
 6 files changed, 110 insertions(+), 78 deletions(-)

diff --git a/libguile/strports.c b/libguile/strports.c
index 95e93c9..75c3773 100644
--- a/libguile/strports.c
+++ b/libguile/strports.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2005, 2006, 2009 
Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2005, 2006, 2009, 
2010 Free Software Foundation, Inc.
  * 
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -289,84 +289,60 @@ st_truncate (SCM port, scm_t_off length)
     pt->write_pos = pt->read_end;
 }
 
-SCM 
-scm_i_mkstrport (SCM pos, const char *utf8_str, size_t str_len, long modes, 
const char *caller)
+SCM
+scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
 {
-  SCM z, str;
+  SCM z;
   scm_t_port *pt;
-  size_t c_pos;
-  char *buf;
-
-  /* Because ports are inherently 8-bit, strings need to be converted
-     to a locale representation for storage.  But, since string ports
-     rely on string functionality for their memory management, we need
-     to create a new string that has the 8-bit locale representation
-     of the underlying string.  
+  size_t str_len, c_pos;
+  char *buf, *c_str;
 
-     locale_str is already in the locale of the port.  */
-  str = scm_i_make_string (str_len, &buf);
-  memcpy (buf, utf8_str, str_len);
-
-  c_pos = scm_to_unsigned_integer (pos, 0, str_len);
+  SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
+  c_pos = scm_to_unsigned_integer (pos, 0, scm_i_string_length (str));
 
   if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG)))
     scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL);
 
   scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
+
   z = scm_new_port_table_entry (scm_tc16_strport);
   pt = SCM_PTAB_ENTRY(z);
   SCM_SETSTREAM (z, SCM_UNPACK (str));
-  SCM_SET_CELL_TYPE(z, scm_tc16_strport|modes);
-  pt->write_buf = pt->read_buf = (unsigned char *) scm_i_string_chars (str);
+  SCM_SET_CELL_TYPE (z, scm_tc16_strport | modes);
+
+  /* Create a copy of STR in the encoding of Z.  */
+  buf = scm_to_stringn (str, &str_len, pt->encoding,
+                       SCM_FAILED_CONVERSION_ERROR);
+  /* FIXME: strdup doesn't do the right thing if BUF contains zeros, but we
+     don't know the size in bytes of STR.  */
+  c_str = scm_gc_strdup (buf, "strport");
+  free (buf);
+
+  pt->write_buf = pt->read_buf = (unsigned char *) c_str;
   pt->read_pos = pt->write_pos = pt->read_buf + c_pos;
   pt->write_buf_size = pt->read_buf_size = str_len;
   pt->write_end = pt->read_end = pt->read_buf + pt->read_buf_size;
 
   pt->rw_random = 1;
+
   scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
 
-  /* ensure write_pos is writable. */
+  /* Ensure WRITE_POS is writable.  */
   if ((modes & SCM_WRTNG) && pt->write_pos == pt->write_end)
     st_flush (z);
 
-  scm_i_set_port_encoding_x (z, "UTF-8");
   scm_i_set_conversion_strategy_x (z, SCM_FAILED_CONVERSION_ERROR);
   return z;
 }
 
-SCM 
-scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
-{
-  SCM z;
-  size_t str_len;
-  char *buf;
-
-  SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
-
-  /* Because ports are inherently 8-bit, strings need to be converted
-     to a locale representation for storage.  But, since string ports
-     rely on string functionality for their memory management, we need
-     to create a new string that has the 8-bit locale representation
-     of the underlying string.  This violates the guideline that the
-     internal encoding of characters in strings is in unicode
-     codepoints. */
-
-  /* String ports are are always initialized with "UTF-8" as their
-     encoding.  */
-  buf = scm_to_stringn (str, &str_len, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
-  z = scm_i_mkstrport (pos, buf, str_len, modes, caller);
-  free (buf);
-  return z;
-}
-
-/* Create a new string from a string port's buffer, converting from
-   the port's 8-bit locale-specific representation to the standard
-   string representation.  */
-SCM scm_strport_to_string (SCM port)
+/* Create a new string from the buffer of PORT, a string port, converting from
+   PORT's encoding to the standard string representation.  */
+SCM
+scm_strport_to_string (SCM port)
 {
-  scm_t_port *pt = SCM_PTAB_ENTRY (port);
   SCM str;
-  
+  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
   if (pt->rw_active == SCM_PORT_WRITE)
     st_flush (port);
 
diff --git a/libguile/strports.h b/libguile/strports.h
index d93266a..3a9c3ec 100644
--- a/libguile/strports.h
+++ b/libguile/strports.h
@@ -3,7 +3,7 @@
 #ifndef SCM_STRPORTS_H
 #define SCM_STRPORTS_H
 
-/* Copyright (C) 1995,1996,2000,2001,2002, 2006, 2008 Free Software 
Foundation, Inc.
+/* Copyright (C) 1995,1996,2000,2001,2002, 2006, 2008, 2010 Free Software 
Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -44,8 +44,6 @@ SCM_API scm_t_bits scm_tc16_strport;
 
 
 SCM_API SCM scm_mkstrport (SCM pos, SCM str, long modes, const char * caller);
-SCM_INTERNAL SCM scm_i_mkstrport (SCM pos, const char *locale_str, size_t 
str_len, 
-                                 long modes, const char *caller);
 SCM_API SCM scm_strport_to_string (SCM port);
 SCM_API SCM scm_object_to_string (SCM obj, SCM printer);
 SCM_API SCM scm_call_with_output_string (SCM proc);
diff --git a/test-suite/lib.scm b/test-suite/lib.scm
index e5b7a08..a2390da 100644
--- a/test-suite/lib.scm
+++ b/test-suite/lib.scm
@@ -1,5 +1,5 @@
 ;;;; test-suite/lib.scm --- generic support for testing
-;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009 Free Software 
Foundation, Inc.
+;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009, 2010 Free 
Software Foundation, Inc.
 ;;;;
 ;;;; This program is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -30,6 +30,7 @@
  exception:numerical-overflow
  exception:struct-set!-denied
  exception:system-error
+ exception:encoding-error
  exception:miscellaneous-error
  exception:string-contains-nul
  exception:read-error
@@ -267,6 +268,8 @@ with-locale with-locale*
   (cons 'misc-error "^set! denied for field"))
 (define exception:system-error
   (cons 'system-error ".*"))
+(define exception:encoding-error
+  (cons 'misc-error "(cannot convert to output locale|input locale conversion 
error)"))
 (define exception:miscellaneous-error
   (cons 'misc-error "^.*"))
 (define exception:read-error
@@ -389,15 +392,18 @@ with-locale with-locale*
 
 ;;;; Turn a test name into a nice human-readable string.
 (define (format-test-name name)
-  (call-with-output-string
-   (lambda (port)
-     (let loop ((name name)
-               (separator ""))
-       (if (pair? name)
-          (begin
-            (display separator port)
-            (display (car name) port)
-            (loop (cdr name) ": ")))))))
+  ;; Choose a Unicode-capable encoding so that the string port can contain any
+  ;; valid Unicode character.
+  (with-fluids ((%default-port-encoding "UTF-8"))
+    (call-with-output-string
+     (lambda (port)
+       (let loop ((name name)
+                  (separator ""))
+         (if (pair? name)
+             (begin
+               (display separator port)
+               (display (car name) port)
+               (loop (cdr name) ": "))))))))
 
 ;;;; For a given test-name, deliver the full name including all prefixes.
 (define (full-name name)
diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test
index 312467d..72dcb63 100644
--- a/test-suite/tests/ports.test
+++ b/test-suite/tests/ports.test
@@ -1,7 +1,7 @@
-;;;; ports.test --- test suite for Guile I/O ports     -*- scheme -*-
+;;;; ports.test --- Guile I/O ports.    -*- coding: utf-8; mode: scheme; -*-
 ;;;; Jim Blandy <address@hidden> --- May 1999
 ;;;;
-;;;;   Copyright (C) 1999, 2001, 2004, 2006, 2007, 2009 Free Software 
Foundation, Inc.
+;;;;   Copyright (C) 1999, 2001, 2004, 2006, 2007, 2009, 2010 Free Software 
Foundation, Inc.
 ;;;; 
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -307,7 +307,42 @@
     (string-set! text 0 #\a)
     (string-set! text (- len 1) #\b)
     (pass-if "output check"
-            (string=? text result))))
+            (string=? text result)))
+
+  (pass-if "%default-port-encoding is honored"
+    (let ((encodings '("UTF-8" "UTF-16" "ISO-8859-1" "ISO-8859-3")))
+      (equal? (map (lambda (e)
+                     (with-fluids ((%default-port-encoding e))
+                       (call-with-output-string
+                         (lambda (p)
+                           (display (port-encoding p) p)))))
+                   encodings)
+              encodings)))
+
+  (pass-if "suitable encoding [latin-1]"
+    (let ((str "hello, world"))
+      (with-fluids ((%default-port-encoding "ISO-8859-1"))
+        (equal? str
+                (with-output-to-string
+                  (lambda ()
+                    (display str)))))))
+
+  (pass-if "suitable encoding [latin-3]"
+    (let ((str "ĉu bone?"))
+      (with-fluids ((%default-port-encoding "ISO-8859-3"))
+        (equal? str
+                (with-output-to-string
+                  (lambda ()
+                    (display str)))))))
+
+  (pass-if-exception "wrong encoding"
+    exception:encoding-error
+    (let ((str "ĉu bone?"))
+      ;; Latin-1 cannot represent ‘ĉ’.
+      (with-fluids ((%default-port-encoding "ISO-8859-1"))
+        (with-output-to-string
+          (lambda ()
+            (display str)))))))
 
 (with-test-prefix "call-with-output-string"
 
diff --git a/test-suite/tests/r6rs-ports.test b/test-suite/tests/r6rs-ports.test
index eb60cf3..1d60991 100644
--- a/test-suite/tests/r6rs-ports.test
+++ b/test-suite/tests/r6rs-ports.test
@@ -1,6 +1,6 @@
-;;;; r6rs-ports.test --- Exercise the R6RS I/O port API.
+;;;; r6rs-ports.test --- R6RS I/O port tests.   -*- coding: iso-8859-1; -*-
 ;;;;
-;;;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
 ;;;; Ludovic Courtès
 ;;;;
 ;;;; This library is free software; you can redistribute it and/or
@@ -219,7 +219,25 @@
            (port (%make-void-port "w")))
 
       (close-port port)
-      (put-bytevector port bv))))
+      (put-bytevector port bv)))
+
+  (pass-if "put-bytevector with UTF-16 string port"
+    (let* ((str "hello, world")
+           (bv  (string->utf16 str)))
+      (equal? str
+              (with-fluids ((%default-port-encoding "UTF-16BE"))
+                (call-with-output-string
+                  (lambda (port)
+                    (put-bytevector port bv)))))))
+
+  (pass-if-exception "put-bytevector with wrong-encoding string port"
+    exception:encoding-error
+    (let* ((str "hello, world")
+           (bv  (string->utf16 str)))
+      (with-fluids ((%default-port-encoding "UTF-32"))
+        (call-with-output-string
+          (lambda (port)
+            (put-bytevector port bv)))))))
 
 
 (with-test-prefix "7.2.7 Input Ports"
@@ -452,8 +470,6 @@
            (not eof?)
            (bytevector=? sink source)))))
 
-
 ;;; Local Variables:
-;;; coding: latin-1
 ;;; mode: scheme
 ;;; End:
diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test
index 2ee21c1..b819e63 100644
--- a/test-suite/tests/reader.test
+++ b/test-suite/tests/reader.test
@@ -1,6 +1,6 @@
-;;;; reader.test --- Exercise the reader.               -*- Scheme -*-
+;;;; reader.test --- Reader test.    -*- coding: iso-8859-1; mode: scheme -*-
 ;;;;
-;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007, 2008, 2009 Free Software 
Foundation, Inc.
+;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007, 2008, 2009, 2010 Free 
Software Foundation, Inc.
 ;;;; Jim Blandy <address@hidden>
 ;;;;
 ;;;; This library is free software; you can redistribute it and/or
@@ -41,7 +41,8 @@
 
 
 (define (read-string s)
-  (with-input-from-string s (lambda () (read))))
+  (with-fluids ((%default-port-encoding #f))
+    (with-input-from-string s (lambda () (read)))))
 
 (define (with-read-options opts thunk)
   (let ((saved-options (read-options)))
@@ -110,8 +111,8 @@
 
   (pass-if "unprintable symbol"
     ;; The reader tolerates unprintable characters for symbols.
-    (equal? (string->symbol "\001\002\003")
-            (read-string "\001\002\003")))
+    (equal? (string->symbol "\x01\x02\x03")
+            (read-string "\x01\x02\x03")))
 
   (pass-if "CR recognized as a token delimiter"
     ;; In 1.8.3, character 0x0d was not recognized as a delimiter.
-- 
1.6.4.2


reply via email to

[Prev in Thread] Current Thread [Next in Thread]