guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, stable-2.0, updated. v2.0.5-96-ge26da7


From: Mark H Weaver
Subject: [Guile-commits] GNU Guile branch, stable-2.0, updated. v2.0.5-96-ge26da7a
Date: Mon, 02 Apr 2012 23:13:08 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=e26da7a24e79cf3a9d3052e78228a9dfed3c4f3d

The branch, stable-2.0 has been updated
       via  e26da7a24e79cf3a9d3052e78228a9dfed3c4f3d (commit)
      from  2c1b79513b7b5826db48b6e5e1d7f4dc7731d13b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit e26da7a24e79cf3a9d3052e78228a9dfed3c4f3d
Author: Mark H Weaver <address@hidden>
Date:   Mon Apr 2 18:55:45 2012 -0400

    Fix scm_to_utf8_stringn to return the length in bytes, et al
    
    * libguile/strings.c (u32_u8_length_in_bytes): Internal static function
      renamed from u32_u8_strlen, whose name was potentially confusing.  For
      added safety, handle everything that can be encoded in the more
      general UTF-8 encoding: up to six bytes for each code point, with code
      points up to 2^31-1.
    
      (scm_to_utf8_stringn): NUL-terminate only if (lenp == NULL).
      If (lenp != NULL) return the length in bytes in *lenp.

-----------------------------------------------------------------------

Summary of changes:
 libguile/strings.c |   52 ++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/libguile/strings.c b/libguile/strings.c
index a9e5afe..f4828f8 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1874,19 +1874,28 @@ latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
   return u8_result;
 }
 
-/* From RFC 3629:
+/* UTF-8 code table
+
+   (Note that this includes code points that are not allowed by Unicode,
+    but since this function has no way to report an error, and its
+    purpose is to determine the size of destination buffers for
+    libunicode conversion functions, we err on the safe side and handle
+    everything that libunicode might conceivably handle, now or in the
+    future.)
 
    Char. number range  |        UTF-8 octet sequence
       (hexadecimal)    |              (binary)
-   --------------------+---------------------------------------------
+   --------------------+------------------------------------------------------
    0000 0000-0000 007F | 0xxxxxxx
    0000 0080-0000 07FF | 110xxxxx 10xxxxxx
    0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
-   0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+   0001 0000-001F FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+   0020 0000-03FF FFFF | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+   0400 0000-7FFF FFFF | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 */
 
 static size_t
-u32_u8_strlen (const scm_t_uint32 *str, size_t len)
+u32_u8_length_in_bytes (const scm_t_uint32 *str, size_t len)
 {
   size_t ret, i;
 
@@ -1900,8 +1909,12 @@ u32_u8_strlen (const scm_t_uint32 *str, size_t len)
         ret += 2;
       else if (c <= 0xffff)
         ret += 3;
-      else
+      else if (c <= 0x1fffff)
         ret += 4;
+      else if (c <= 0x3ffffff)
+        ret += 5;
+      else
+        ret += 6;
     }
 
   return ret;
@@ -1917,21 +1930,28 @@ scm_to_utf8_stringn (SCM str, size_t *lenp)
   else
     {
       scm_t_uint8 *buf, *ret;
-      size_t len, allocated;
+      size_t predicted_len, actual_len;  /* length in bytes */
 
-      len = u32_u8_strlen ((scm_t_uint32*)scm_i_string_wide_chars (str),
-                           scm_i_string_length (str));
-      allocated = len + 1;
-      buf = scm_malloc (allocated);
+      predicted_len = u32_u8_length_in_bytes
+        ((scm_t_uint32 *) scm_i_string_wide_chars (str),
+         scm_i_string_length (str));
 
-      ret = u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
-                       scm_i_string_length (str), buf, &len);
-
-      if (ret == buf && len + 1 == allocated)
+      if (lenp)
         {
-          ret[len] = 0;
-          return (char *) ret;
+          *lenp = predicted_len;
+          buf = scm_malloc (predicted_len);
         }
+      else
+        {
+          buf = scm_malloc (predicted_len + 1);
+          ret[predicted_len] = 0;
+        }
+
+      ret = u32_to_u8 ((scm_t_uint32 *) scm_i_string_wide_chars (str),
+                       scm_i_string_length (str), buf, &actual_len);
+
+      if (SCM_LIKELY (ret == buf && actual_len == predicted_len))
+        return (char *) ret;
 
       /* An error: a bad codepoint.  */
       {


hooks/post-receive
-- 
GNU Guile



reply via email to

[Prev in Thread] Current Thread [Next in Thread]