guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 01/01: Thread-safety fixes for iconv and ports


From: Andy Wingo
Subject: [Guile-commits] 01/01: Thread-safety fixes for iconv and ports
Date: Mon, 23 May 2016 14:44:01 +0000 (UTC)

wingo pushed a commit to branch master
in repository guile.

commit 6bf7ec0c9c615f0b3bbb03bae15ebb10194e2bf8
Author: Andy Wingo <address@hidden>
Date:   Mon May 23 16:37:23 2016 +0200

    Thread-safety fixes for iconv and ports
    
    * libguile/ports-internal.h (scm_t_port): Rework to store iconv
      descriptors inline, so that the port finalizer doesn't race with the
      iconv descriptor finalizer.  Access is serialized through a lock.
      Fixes a bug whereby if the port finalizer and the descriptor finalizer
      run on different threads, the close-port run by the port finalizer
      could try to free the iconv descriptors at the same time as the
      descriptor finalizer.
    * libguile/ports.c (iconv_lock): New static variable.
      (scm_c_make_port_with_encoding): Initialize iconv-related fields.
      (scm_close_port): Lock while frobbing iconv descriptors.
      (prepare_iconv_descriptors): Adapt.
      (scm_specialize_port_encoding_x, scm_i_set_port_encoding_x): Lock
      while preparing iconv descriptors.
      (scm_port_acquire_iconv_descriptors)
      (scm_port_release_iconv_descriptors): New functions, which replace
      scm_i_port_iconv_descriptors.
      (scm_port_decode_char): Lock around iconv operations.
      (port_clear_stream_start_for_bom_write): Acquire iconv descriptors
      before checking precise_encoding, to make sure precise_encoding is
      initialized.
    * libguile/print.c (display_string_using_iconv): Adapt to use the new
      interface to get iconv descriptors from a port.
---
 libguile/ports-internal.h |   43 +++++-----
 libguile/ports.c          |  201 +++++++++++++++++++++++----------------------
 libguile/print.c          |   14 ++--
 3 files changed, 128 insertions(+), 130 deletions(-)

diff --git a/libguile/ports-internal.h b/libguile/ports-internal.h
index 0bfda4f..7aabee7 100644
--- a/libguile/ports-internal.h
+++ b/libguile/ports-internal.h
@@ -23,6 +23,7 @@
 #define SCM_PORTS_INTERNAL
 
 #include <assert.h>
+#include <iconv.h>
 
 #include "libguile/_scm.h"
 #include "libguile/ports.h"
@@ -302,24 +303,6 @@ scm_port_buffer_putback (SCM buf, const scm_t_uint8 *src, 
size_t count)
            src, count);
 }
 
-/* This is a separate object so that only those ports that use iconv
-   cause finalizers to be registered.  */
-struct scm_iconv_descriptors
-{
-  /* This is the same as pt->encoding, except if pt->encoding is UTF-16
-     or UTF-32, in which case this is UTF-16LE or a similar
-     byte-order-specialed version of UTF-16 or UTF-32.  We don't re-set
-     pt->encoding because being just plain UTF-16 or UTF-32 has an
-     additional meaning, being that we should consume and produce byte
-     order marker codepoints as appropriate. */
-  SCM precise_encoding;
-  /* input/output iconv conversion descriptors */
-  void *input_cd;
-  void *output_cd;
-};
-
-typedef struct scm_iconv_descriptors scm_t_iconv_descriptors;
-
 struct scm_t_port
 {
   /* Source location information.  */
@@ -342,15 +325,26 @@ struct scm_t_port
   /* True if the port is random access.  Implies that the buffers must
      be flushed before switching between reading and writing, seeking,
      and so on.  */
-  int rw_random;
+  unsigned rw_random : 1;
+  unsigned at_stream_start_for_bom_read  : 1;
+  unsigned at_stream_start_for_bom_write : 1;
 
   /* Character encoding support.  */
   SCM encoding;  /* A symbol of upper-case ASCII.  */
   SCM conversion_strategy; /* A symbol; either substitute, error, or escape.  
*/
 
-  unsigned at_stream_start_for_bom_read  : 1;
-  unsigned at_stream_start_for_bom_write : 1;
-  scm_t_iconv_descriptors *iconv_descriptors;
+  /* This is the same as pt->encoding, except if `encoding' is UTF-16 or
+     UTF-32, in which case this is UTF-16LE or a similar
+     byte-order-specialed version of UTF-16 or UTF-32.  This is a
+     separate field from `encoding' because being just plain UTF-16 or
+     UTF-32 has an additional meaning, being that we should consume and
+     produce byte order marker codepoints as appropriate.  Set to #f
+     before the iconv descriptors have been opened.  */
+  SCM precise_encoding;  /* with iconv_lock */
+  iconv_t input_cd;      /* with iconv_lock */
+  iconv_t output_cd;     /* with iconv_lock */
+
+  /* Port properties.  */
   SCM alist;
 };
 
@@ -359,6 +353,9 @@ struct scm_t_port
 #define SCM_FILENAME(x)           (SCM_PORT (x)->file_name)
 #define SCM_SET_FILENAME(x, n)    (SCM_PORT (x)->file_name = (n))
 
-SCM_INTERNAL scm_t_iconv_descriptors * scm_i_port_iconv_descriptors (SCM port);
+SCM_INTERNAL void scm_port_acquire_iconv_descriptors (SCM port,
+                                                      iconv_t *input_cd,
+                                                      iconv_t *output_cd);
+SCM_INTERNAL void scm_port_release_iconv_descriptors (SCM port);
 
 #endif
diff --git a/libguile/ports.c b/libguile/ports.c
index ff1db9d..f58da4b 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -112,6 +112,11 @@ static SCM sym_escape;
 
 
 
+/* We have to serialize operations on any given iconv descriptor.  */
+static scm_i_pthread_mutex_t iconv_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
+
+
+
 /* See Unicode 8.0 section 5.22, "Best Practice for U+FFFD
    Substitution".  */
 static const scm_t_wchar UNICODE_REPLACEMENT_CHARACTER = 0xFFFD;
@@ -675,12 +680,15 @@ scm_c_make_port_with_encoding (scm_t_port_type *ptob, 
unsigned long mode_bits,
   pt->encoding = encoding;
   pt->conversion_strategy = conversion_strategy;
   pt->file_name = SCM_BOOL_F;
-  pt->iconv_descriptors = NULL;
   pt->position = scm_cons (SCM_INUM0, SCM_INUM0);
 
   pt->at_stream_start_for_bom_read  = 1;
   pt->at_stream_start_for_bom_write = 1;
 
+  pt->precise_encoding = SCM_BOOL_F;
+  pt->input_cd = (iconv_t) -1;
+  pt->output_cd = (iconv_t) -1;
+
   pt->alist = SCM_EOL;
 
   if (SCM_PORT_TYPE (ret)->flags & SCM_PORT_TYPE_NEEDS_CLOSE_ON_GC)
@@ -770,12 +778,6 @@ SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
 
 /* Closing ports.  */
 
-static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
-
-/* scm_close_port
- * Call the close operation on a port object. 
- * see also scm_close.
- */
 SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
            (SCM port),
            "Close the specified port object.  Return @code{#t} if it\n"
@@ -809,13 +811,17 @@ SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
        should be resilient to non-local exits.  */
     SCM_PORT_TYPE (port)->close (port);
 
-  if (pt->iconv_descriptors)
+  scm_i_pthread_mutex_lock (&iconv_lock);
+  if (scm_is_true (pt->precise_encoding))
     {
-      /* If we don't get here, the iconv_descriptors finalizer will
-         clean up. */
-      close_iconv_descriptors (pt->iconv_descriptors);
-      pt->iconv_descriptors = NULL;
+      if (pt->input_cd != (iconv_t) -1)
+        iconv_close (pt->input_cd);
+      if (pt->output_cd != (iconv_t) -1)
+        iconv_close (pt->output_cd);
+      pt->precise_encoding = SCM_BOOL_F;
+      pt->input_cd = pt->output_cd = (iconv_t) -1;
     }
+  scm_i_pthread_mutex_unlock (&iconv_lock);
 
   return SCM_BOOL_T;
 }
@@ -979,51 +985,53 @@ static const unsigned char scm_utf16le_bom[2] = {0xFF, 
0xFE};
 static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
 static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
 
+/* Called with the iconv lock.  Will release the lock before throwing
+   any error.  */
 static void
-finalize_iconv_descriptors (void *ptr, void *data)
+prepare_iconv_descriptors (SCM port, SCM precise_encoding)
 {
-  close_iconv_descriptors (ptr);
-}
-
-static scm_t_iconv_descriptors *
-open_iconv_descriptors (SCM precise_encoding, int reading, int writing)
-{
-  const char *encoding;
-  scm_t_iconv_descriptors *id;
+  scm_t_port *pt = SCM_PORT (port);
   iconv_t input_cd, output_cd;
+  const char *encoding;
   size_t i;
 
-  input_cd = (iconv_t) -1;
-  output_cd = (iconv_t) -1;
+  /* If the specified encoding is UTF-16 or UTF-32, then default to
+     big-endian byte order.  This fallback isn't necessary if you read
+     on the port before writing to it, as the read will sniff the BOM if
+     any and specialize the encoding; see the manual.  */
+  if (scm_is_eq (precise_encoding, sym_UTF_16))
+    precise_encoding = sym_UTF_16BE;
+  else if (scm_is_eq (precise_encoding, sym_UTF_32))
+    precise_encoding = sym_UTF_32BE;
+
+  if (scm_is_eq (pt->precise_encoding, precise_encoding))
+    return;
+
+  input_cd = output_cd = (iconv_t) -1;
+
+  if (!scm_is_symbol (precise_encoding))
+    goto invalid_encoding;
 
   encoding = scm_i_symbol_chars (precise_encoding);
   for (i = 0; encoding[i]; i++)
     if (encoding[i] > 127)
       goto invalid_encoding;
 
-  if (reading)
-    {
-      /* Open an input iconv conversion descriptor, from ENCODING
-         to UTF-8.  We choose UTF-8, not UTF-32, because iconv
-         implementations can typically convert from anything to
-         UTF-8, but not to UTF-32 (see
-         
<http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>).  */
-
-      /* Assume opening an iconv descriptor causes about 16 KB of
-         allocation.  */
-      scm_gc_register_allocation (16 * 1024);
+  /* Open a iconv conversion descriptors between ENCODING and UTF-8.  We
+     choose UTF-8, not UTF-32, because iconv implementations can
+     typically convert from anything to UTF-8, but not to UTF-32 (see
+     http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html,
+     for more details).  */
 
+  if (SCM_INPUT_PORT_P (port))
+    {
       input_cd = iconv_open ("UTF-8", encoding);
       if (input_cd == (iconv_t) -1)
         goto invalid_encoding;
     }
 
-  if (writing)
+  if (SCM_OUTPUT_PORT_P (port))
     {
-      /* Assume opening an iconv descriptor causes about 16 KB of
-         allocation.  */
-      scm_gc_register_allocation (16 * 1024);
-
       output_cd = iconv_open (encoding, "UTF-8");
       if (output_cd == (iconv_t) -1)
         {
@@ -1033,55 +1041,27 @@ open_iconv_descriptors (SCM precise_encoding, int 
reading, int writing)
         }
     }
 
-  id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
-  id->precise_encoding = precise_encoding;
-  id->input_cd = input_cd;
-  id->output_cd = output_cd;
+  if (pt->input_cd != (iconv_t) -1)
+    iconv_close (pt->input_cd);
+  if (pt->output_cd != (iconv_t) -1)
+    iconv_close (pt->output_cd);
+
+  pt->precise_encoding = precise_encoding;
+  pt->input_cd = input_cd;
+  pt->output_cd = output_cd;
 
-  /* Register a finalizer to close the descriptors.  */
-  scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
+  /* Make sure this port has a finalizer.  */
+  scm_i_set_finalizer (SCM2PTR (port), finalize_port, NULL);
 
-  return id;
+  return;
 
  invalid_encoding:
+  scm_i_pthread_mutex_unlock (&iconv_lock);
   scm_misc_error ("open_iconv_descriptors",
                   "invalid or unknown character encoding ~s",
                   scm_list_1 (precise_encoding));
 }
 
-static void
-close_iconv_descriptors (scm_t_iconv_descriptors *id)
-{
-  if (id->input_cd != (iconv_t) -1)
-    iconv_close (id->input_cd);
-  if (id->output_cd != (iconv_t) -1)
-    iconv_close (id->output_cd);
-  id->input_cd = (void *) -1;
-  id->output_cd = (void *) -1;
-}
-
-static void
-prepare_iconv_descriptors (SCM port, SCM encoding)
-{
-  scm_t_port *pt = SCM_PORT (port);
-  scm_t_iconv_descriptors *desc = pt->iconv_descriptors;
-
-  /* If the specified encoding is UTF-16 or UTF-32, then default to
-     big-endian byte order.  This fallback isn't necessary if you read
-     on the port before writing to it, as the read will sniff the BOM if
-     any and specialize the encoding; see the manual.  */
-  if (scm_is_eq (encoding, sym_UTF_16))
-    encoding = sym_UTF_16BE;
-  else if (scm_is_eq (encoding, sym_UTF_32))
-    encoding = sym_UTF_32BE;
-
-  if (desc && scm_is_eq (desc->precise_encoding, encoding))
-    return;
-
-  pt->iconv_descriptors = open_iconv_descriptors
-    (encoding, SCM_INPUT_PORT_P (port), SCM_OUTPUT_PORT_P (port));
-}
-
 SCM_INTERNAL SCM scm_specialize_port_encoding_x (SCM port, SCM encoding);
 SCM_DEFINE (scm_specialize_port_encoding_x,
             "specialize-port-encoding!", 2, 0, 0,
@@ -1107,33 +1087,41 @@ SCM_DEFINE (scm_specialize_port_encoding_x,
   else
     SCM_OUT_OF_RANGE (2, encoding);
 
+  scm_i_pthread_mutex_lock (&iconv_lock);
   prepare_iconv_descriptors (port, encoding);
+  scm_i_pthread_mutex_unlock (&iconv_lock);
 
   return SCM_UNSPECIFIED;
 }
 #undef FUNC_NAME
 
-scm_t_iconv_descriptors *
-scm_i_port_iconv_descriptors (SCM port)
+/* Acquire the iconv lock and fill in *INPUT_CD and/or *OUTPUT_CD.  */
+void
+scm_port_acquire_iconv_descriptors (SCM port, iconv_t *input_cd,
+                                    iconv_t *output_cd)
 {
   scm_t_port *pt = SCM_PORT (port);
 
-  if (!pt->iconv_descriptors)
+  scm_i_pthread_mutex_lock (&iconv_lock);
+  if (scm_is_false (pt->precise_encoding))
     prepare_iconv_descriptors (port, pt->encoding);
+  if (input_cd)
+    *input_cd = pt->input_cd;
+  if (output_cd)
+    *output_cd = pt->output_cd;
+}
 
-  return pt->iconv_descriptors;
+void
+scm_port_release_iconv_descriptors (SCM port)
+{
+  scm_i_pthread_mutex_unlock (&iconv_lock);
 }
 
 /* The name of the encoding is itself encoded in ASCII.  */
 void
 scm_i_set_port_encoding_x (SCM port, const char *encoding)
 {
-  scm_t_port *pt;
-  scm_t_iconv_descriptors *prev;
-
-  /* Set the character encoding for this port.  */
-  pt = SCM_PORT (port);
-  prev = pt->iconv_descriptors;
+  scm_t_port *pt = SCM_PORT (port);
 
   /* In order to handle cases where the encoding changes mid-stream
      (e.g. within an HTTP stream, or within a file that is composed of
@@ -1144,9 +1132,14 @@ scm_i_set_port_encoding_x (SCM port, const char 
*encoding)
   pt->at_stream_start_for_bom_write = 1;
   pt->encoding = canonicalize_encoding (encoding);
 
-  pt->iconv_descriptors = NULL;
-  if (prev)
-    close_iconv_descriptors (prev);
+  scm_i_pthread_mutex_lock (&iconv_lock);
+  if (pt->input_cd != (iconv_t) -1)
+    iconv_close (pt->input_cd);
+  if (pt->output_cd != (iconv_t) -1)
+    iconv_close (pt->output_cd);
+  pt->precise_encoding = SCM_BOOL_F;
+  pt->input_cd = pt->output_cd = (iconv_t) -1;
+  scm_i_pthread_mutex_unlock (&iconv_lock);
 }
 
 SCM_DEFINE (scm_sys_port_encoding, "%port-encoding", 1, 0, 0,
@@ -1783,7 +1776,7 @@ SCM_DEFINE (scm_port_decode_char, "port-decode-char", 4, 
0, 0,
 {
   char *input, *output;
   scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
-  scm_t_iconv_descriptors *id;
+  iconv_t input_cd;
   size_t c_start, c_count;
   size_t input_left, output_left, done;
 
@@ -1794,14 +1787,15 @@ SCM_DEFINE (scm_port_decode_char, "port-decode-char", 
4, 0, 0,
   SCM_ASSERT_RANGE (3, start, c_start <= SCM_BYTEVECTOR_LENGTH (bv));
   SCM_ASSERT_RANGE (4, count, c_count <= SCM_BYTEVECTOR_LENGTH (bv) - c_start);
 
-  id = scm_i_port_iconv_descriptors (port);
   input = (char *) SCM_BYTEVECTOR_CONTENTS (bv) + c_start;
   input_left = c_count;
   output = (char *) utf8_buf;
   output_left = sizeof (utf8_buf);
 
   /* FIXME: locking!  */
-  done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
+  scm_port_acquire_iconv_descriptors (port, &input_cd, NULL);
+  done = iconv (input_cd, &input, &input_left, &output, &output_left);
+  scm_port_release_iconv_descriptors (port);
 
   if (done == (size_t) -1)
     {
@@ -1856,7 +1850,8 @@ peek_iconv_codepoint (SCM port, size_t *len)
               /* Make sure iconv descriptors have been opened even if
                  there were no bytes, to be sure that a decoding error
                  is signalled if the encoding itself was invalid.  */
-              scm_i_port_iconv_descriptors (port);
+              scm_port_acquire_iconv_descriptors (port, NULL, NULL);
+              scm_port_release_iconv_descriptors (port);
               return EOF;
             }
 
@@ -2469,16 +2464,22 @@ port_clear_stream_start_for_bom_write (SCM port, enum 
bom_io_mode io_mode)
   /* Write a BOM if appropriate.  */
   if (scm_is_eq (pt->encoding, sym_UTF_16))
     {
-      scm_t_iconv_descriptors *id = scm_i_port_iconv_descriptors (port);
-      if (scm_is_eq (id->precise_encoding, sym_UTF_16LE))
+      SCM precise_encoding;
+      scm_port_acquire_iconv_descriptors (port, NULL, NULL);
+      precise_encoding = pt->precise_encoding;
+      scm_port_release_iconv_descriptors (port);
+      if (scm_is_eq (precise_encoding, sym_UTF_16LE))
         scm_c_write (port, scm_utf16le_bom, sizeof (scm_utf16le_bom));
       else
         scm_c_write (port, scm_utf16be_bom, sizeof (scm_utf16be_bom));
     }
   else if (scm_is_eq (pt->encoding, sym_UTF_32))
     {
-      scm_t_iconv_descriptors *id = scm_i_port_iconv_descriptors (port);
-      if (scm_is_eq (id->precise_encoding, sym_UTF_32LE))
+      SCM precise_encoding;
+      scm_port_acquire_iconv_descriptors (port, NULL, NULL);
+      precise_encoding = pt->precise_encoding;
+      scm_port_release_iconv_descriptors (port);
+      if (scm_is_eq (precise_encoding, sym_UTF_32LE))
         scm_c_write (port, scm_utf32le_bom, sizeof (scm_utf32le_bom));
       else
         scm_c_write (port, scm_utf32be_bom, sizeof (scm_utf32be_bom));
diff --git a/libguile/print.c b/libguile/print.c
index 5620577..84c9455 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -24,7 +24,6 @@
 #endif
 
 #include <errno.h>
-#include <iconv.h>
 #include <stdio.h>
 #include <assert.h>
 
@@ -1026,9 +1025,7 @@ display_string_using_iconv (const void *str, int 
narrow_p, size_t len,
                            scm_t_string_failed_conversion_handler strategy)
 {
   size_t printed;
-  scm_t_iconv_descriptors *id;
-
-  id = scm_i_port_iconv_descriptors (port);
+  iconv_t output_cd;
 
   printed = 0;
 
@@ -1057,8 +1054,9 @@ display_string_using_iconv (const void *str, int 
narrow_p, size_t len,
       output = encoded_output;
       output_left = sizeof (encoded_output);
 
-      done = iconv (id->output_cd, &input, &input_left,
-                   &output, &output_left);
+      scm_port_acquire_iconv_descriptors (port, NULL, &output_cd);
+      done = iconv (output_cd, &input, &input_left, &output, &output_left);
+      scm_port_release_iconv_descriptors (port);
 
       output_len = sizeof (encoded_output) - output_left;
 
@@ -1067,7 +1065,9 @@ display_string_using_iconv (const void *str, int 
narrow_p, size_t len,
           int errno_save = errno;
 
          /* Reset the `iconv' state.  */
-         iconv (id->output_cd, NULL, NULL, NULL, NULL);
+          scm_port_acquire_iconv_descriptors (port, NULL, &output_cd);
+         iconv (output_cd, NULL, NULL, NULL, NULL);
+          scm_port_release_iconv_descriptors (port);
 
          /* Print the OUTPUT_LEN bytes successfully converted.  */
          scm_lfwrite (encoded_output, output_len, port);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]