[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] GNU Guile branch, stable-2.0, updated. v2.0.5-39-gc2c3bd
From: |
Andy Wingo |
Subject: |
[Guile-commits] GNU Guile branch, stable-2.0, updated. v2.0.5-39-gc2c3bdd |
Date: |
Fri, 10 Feb 2012 12:45:00 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".
http://git.savannah.gnu.org/cgit/guile.git/commit/?id=c2c3bddb1d0b2180282d78262e84c3ae7a44731f
The branch, stable-2.0 has been updated
via c2c3bddb1d0b2180282d78262e84c3ae7a44731f (commit)
via e3d4597469a543d97c4997b128509c2ceb13ca2b (commit)
from e7cf0457d7c71acd2c597d1644328960f136e4bc (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit c2c3bddb1d0b2180282d78262e84c3ae7a44731f
Author: Andy Wingo <address@hidden>
Date: Thu Feb 9 23:15:25 2012 +0100
more efficient scm_string_to_utf8, scm_string_to_utf32
* libguile/bytevectors.c (scm_string_to_utf8): More efficient
implementation.
(scm_string_to_utf32): Likewise.
commit e3d4597469a543d97c4997b128509c2ceb13ca2b
Author: Andy Wingo <address@hidden>
Date: Thu Feb 9 23:14:11 2012 +0100
more efficient scm_to_utf8_stringn, scm_to_utf32_stringn
* libguile/strings.c (scm_to_utf8_stringn): More efficient
implementation than calling scm_to_stringn.
(scm_to_utf32_stringn): Likewise.
-----------------------------------------------------------------------
Summary of changes:
libguile/bytevectors.c | 56 +++++++++++++++++++++++++----------------------
libguile/strings.c | 56 ++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 82 insertions(+), 30 deletions(-)
diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c
index fff5355..dc326f5 100644
--- a/libguile/bytevectors.c
+++ b/libguile/bytevectors.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@@ -1954,33 +1954,15 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8",
#define FUNC_NAME s_scm_string_to_utf8
{
SCM utf;
- uint8_t *c_utf;
- size_t c_strlen, c_utf_len = 0;
+ scm_t_uint8 *c_utf;
+ size_t c_utf_len = 0;
SCM_VALIDATE_STRING (1, str);
- c_strlen = scm_i_string_length (str);
- if (scm_i_is_narrow_string (str))
- c_utf = u8_conv_from_encoding ("ISO-8859-1", iconveh_question_mark,
- scm_i_string_chars (str), c_strlen,
- NULL, NULL, &c_utf_len);
- else
- {
- const scm_t_wchar *wbuf = scm_i_string_wide_chars (str);
- c_utf = u32_to_u8 ((const uint32_t *) wbuf, c_strlen, NULL, &c_utf_len);
- }
- if (SCM_UNLIKELY (c_utf == NULL))
- scm_syserror (FUNC_NAME);
- else
- {
- scm_dynwind_begin (0);
- scm_dynwind_free (c_utf);
-
- utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
- memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
-
- scm_dynwind_end ();
- }
+ c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len);
+ utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+ memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
+ free (c_utf);
return (utf);
}
@@ -1997,6 +1979,14 @@ SCM_DEFINE (scm_string_to_utf16, "string->utf16",
}
#undef FUNC_NAME
+static void
+swap_u32 (scm_t_wchar *vals, size_t len)
+{
+ size_t n;
+ for (n = 0; n < len; n++)
+ vals[n] = bswap_32 (vals[n]);
+}
+
SCM_DEFINE (scm_string_to_utf32, "string->utf32",
1, 1, 0,
(SCM str, SCM endianness),
@@ -2004,7 +1994,21 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
"encoding of @var{str}.")
#define FUNC_NAME s_scm_string_to_utf32
{
- STRING_TO_UTF (32);
+ SCM bv;
+ scm_t_wchar *wchars;
+ size_t wchar_len, bytes_len;
+
+ wchars = scm_to_utf32_stringn (str, &wchar_len);
+ bytes_len = wchar_len * sizeof (scm_t_wchar);
+ if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness,
+ scm_i_native_endianness))
+ swap_u32 (wchars, wchar_len);
+
+ bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+ memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len);
+ free (wchars);
+
+ return bv;
}
#undef FUNC_NAME
diff --git a/libguile/strings.c b/libguile/strings.c
index 69632d6..71eee6c 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1844,10 +1844,47 @@ scm_to_utf8_string (SCM str)
return scm_to_utf8_stringn (str, NULL);
}
+static size_t
+latin1_u8_strlen (const scm_t_uint8 *str, size_t len)
+{
+ size_t ret, i;
+ for (i = 0, ret = 0; i < len; i++)
+ ret += (str[i] < 128) ? 1 : 2;
+ return ret;
+}
+
+static scm_t_uint8*
+latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
+ scm_t_uint8 *u8_result, size_t *u8_lenp)
+{
+ size_t i, n;
+ size_t u8_len = latin1_u8_strlen (str, latin_len);
+
+ if (!(u8_result && u8_lenp && *u8_lenp > u8_len))
+ u8_result = scm_malloc (u8_len + 1);
+ if (u8_lenp)
+ *u8_lenp = u8_len;
+
+ for (i = 0, n = 0; i < latin_len; i++)
+ n += u8_uctomb (u8_result + n, str[i], u8_len - n);
+ if (n != u8_len)
+ abort ();
+ u8_result[n] = 0;
+
+ return u8_result;
+}
+
char *
scm_to_utf8_stringn (SCM str, size_t *lenp)
{
- return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
+ if (scm_i_is_narrow_string (str))
+ return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str),
+ scm_i_string_length (str),
+ NULL, lenp);
+ else
+ return (char *) u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
+ scm_i_string_length (str),
+ NULL, lenp);
}
scm_t_wchar *
@@ -1865,9 +1902,20 @@ scm_to_utf32_stringn (SCM str, size_t *lenp)
SCM_VALIDATE_STRING (1, str);
if (scm_i_is_narrow_string (str))
- result = (scm_t_wchar *)
- scm_to_stringn (str, lenp, "UTF-32",
- SCM_FAILED_CONVERSION_ERROR);
+ {
+ scm_t_uint8 *codepoints;
+ size_t i, len;
+
+ codepoints = (scm_t_uint8*) scm_i_string_chars (str);
+ len = scm_i_string_length (str);
+ if (lenp)
+ *lenp = len;
+
+ result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
+ for (i = 0; i < len; i++)
+ result[i] = codepoints[i];
+ result[len] = 0;
+ }
else
{
size_t len;
hooks/post-receive
--
GNU Guile
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Guile-commits] GNU Guile branch, stable-2.0, updated. v2.0.5-39-gc2c3bdd,
Andy Wingo <=