guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 54/55: Strings, i18n: Limit the use of alloca to approxi


From: Andy Wingo
Subject: [Guile-commits] 54/55: Strings, i18n: Limit the use of alloca to approximately 8 kilobytes.
Date: Thu, 23 May 2019 11:52:45 -0400 (EDT)

wingo pushed a commit to branch master
in repository guile.

commit bd50407d1f9adc3eedd6687c9a84aa7a503b7ce1
Author: Mark H Weaver <address@hidden>
Date:   Mon May 6 21:11:26 2019 -0400

    Strings, i18n: Limit the use of alloca to approximately 8 kilobytes.
    
    * libguile/i18n.c (SCM_MAX_ALLOCA): New macro.
    (SCM_STRING_TO_U32_BUF): Accept an additional variable to remember
    whether we used malloc to allocate the buffer.  Use malloc if the
    allocation size is greater than SCM_MAX_ALLOCA.
    (SCM_CLEANUP_U32_BUF): New macro.
    (compare_u32_strings, compare_u32_strings_ci, str_to_case): Adapt.
    * libguile/strings.c (SCM_MAX_ALLOCA): New macro.
    (normalize_str, unistring_escapes_to_r6rs_escapes): Use malloc if the
    allocation size is greater than SCM_MAX_ALLOCA.
    * test-suite/tests/i18n.test, test-suite/tests/strings.test: Add tests.
---
 libguile/i18n.c               | 72 +++++++++++++++++++++++++++++--------------
 libguile/strings.c            | 43 ++++++++++++++++++--------
 test-suite/tests/i18n.test    | 17 +++++++++-
 test-suite/tests/strings.test | 12 ++++++++
 4 files changed, 107 insertions(+), 37 deletions(-)

diff --git a/libguile/i18n.c b/libguile/i18n.c
index fa7a9bd..fc47fdf 100644
--- a/libguile/i18n.c
+++ b/libguile/i18n.c
@@ -51,6 +51,10 @@
 
 #include "i18n.h"
 
+#ifndef SCM_MAX_ALLOCA
+# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
+#endif
+
 #if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE
 /* The GNU thread-aware locale API is documented in ``Thread-Aware Locale
    Model, a Proposal'', by Ulrich Drepper:
@@ -752,23 +756,35 @@ SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0,
    A similar API can be found in MzScheme starting from version 200:
    http://download.plt-scheme.org/chronology/mzmr200alpha14.html .  */
 
-#define SCM_STRING_TO_U32_BUF(s1, c_s1)                                        
\
-  do                                                                   \
-    {                                                                  \
-      if (scm_i_is_narrow_string (s1))                                 \
-       {                                                               \
-         size_t i, len;                                                \
-         const char *buf = scm_i_string_chars (s1);                    \
-                                                                       \
-         len = scm_i_string_length (s1);                               \
-         c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1));             \
-                                                                       \
-         for (i = 0; i < len; i ++)                                    \
-           c_s1[i] = (unsigned char ) buf[i];                          \
-         c_s1[len] = 0;                                                \
-       }                                                               \
-      else                                                             \
-       c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1);            \
+#define SCM_STRING_TO_U32_BUF(str, c_str, c_str_malloc_p)               \
+  do                                                                    \
+    {                                                                   \
+      if (scm_i_is_narrow_string (str))                                 \
+        {                                                               \
+          size_t i, len, bytes;                                         \
+          const char *buf = scm_i_string_chars (str);                   \
+                                                                        \
+          len = scm_i_string_length (str);                              \
+          bytes = (len + 1) * sizeof (scm_t_wchar);                     \
+          c_str_malloc_p = (bytes > SCM_MAX_ALLOCA);                    \
+          c_str = c_str_malloc_p ? malloc (bytes) : alloca (bytes);     \
+                                                                        \
+          for (i = 0; i < len; i ++)                                    \
+            c_str[i] = (unsigned char ) buf[i];                         \
+          c_str[len] = 0;                                               \
+        }                                                               \
+      else                                                              \
+        {                                                               \
+          c_str_malloc_p = 0;                                           \
+          c_str = (scm_t_wchar *) scm_i_string_wide_chars (str);        \
+        }                                                               \
+    } while (0)
+
+#define SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p)                      \
+  do                                                                    \
+    {                                                                   \
+      if (c_str_malloc_p)                                               \
+        free (c_str);                                                   \
     } while (0)
 
 
@@ -782,10 +798,11 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const 
char *func_name)
   int result;
   scm_t_locale c_locale;
   scm_t_wchar *c_s1, *c_s2;
+  int c_s1_malloc_p, c_s2_malloc_p;
   SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
 
-  SCM_STRING_TO_U32_BUF (s1, c_s1);
-  SCM_STRING_TO_U32_BUF (s2, c_s2);
+  SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
+  SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
 
   if (c_locale)
     RUN_IN_LOCALE_SECTION (c_locale, 
@@ -795,6 +812,9 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char 
*func_name)
     result = u32_strcoll ((const uint32_t *) c_s1,
                          (const uint32_t *) c_s2);
 
+  SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
+  SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
+
   scm_remember_upto_here_2 (s1, s2);
   scm_remember_upto_here (locale);
   return result;
@@ -837,10 +857,11 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const 
char *func_name)
   int result, ret = 0;
   scm_t_locale c_locale;
   scm_t_wchar *c_s1, *c_s2;
+  int c_s1_malloc_p, c_s2_malloc_p;
   SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
 
-  SCM_STRING_TO_U32_BUF (s1, c_s1);
-  SCM_STRING_TO_U32_BUF (s2, c_s2);
+  SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
+  SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
 
   if (c_locale)
     RUN_IN_LOCALE_SECTION
@@ -855,6 +876,9 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const 
char *func_name)
                               (const uint32_t *) c_s2,
                               &result);
 
+  SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
+  SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
+
   if (SCM_UNLIKELY (ret != 0))
     {
       errno = ret;
@@ -1221,13 +1245,13 @@ str_to_case (SCM str, scm_t_locale c_locale,
   scm_t_wchar *c_str, *c_buf;
   uint32_t *c_convstr;
   size_t len, convlen;
-  int ret;
+  int ret, c_str_malloc_p;
   SCM convstr;
 
   len = scm_i_string_length (str);
   if (len == 0)
     return scm_nullstr;
-  SCM_STRING_TO_U32_BUF (str, c_str);
+  SCM_STRING_TO_U32_BUF (str, c_str, c_str_malloc_p);
 
   if (c_locale)
     RUN_IN_LOCALE_SECTION (c_locale, ret =
@@ -1239,6 +1263,8 @@ str_to_case (SCM str, scm_t_locale c_locale,
       u32_locale_tocase ((uint32_t *) c_str, len,
                          &c_convstr, &convlen, func);
 
+  SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p);
+
   scm_remember_upto_here (str);
 
   if (SCM_UNLIKELY (ret != 0))
diff --git a/libguile/strings.c b/libguile/strings.c
index e6ae5cb..8f6a47e 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -50,6 +50,10 @@
 #include "strings.h"
 
 
+#ifndef SCM_MAX_ALLOCA
+# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
+#endif
+
 
 
 /* {Strings}
@@ -1813,6 +1817,7 @@ static void
 unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
 {
   char *before, *after;
+  int malloc_p;
   size_t i, j;
   /* The worst case is if the input string contains all 4-digit hex escapes.
      "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
@@ -1820,7 +1825,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t 
*lenp)
   size_t nzeros, ndigits;
 
   before = buf;
-  after = alloca (max_out_len);
+  malloc_p = (max_out_len > SCM_MAX_ALLOCA);
+  after = malloc_p ? malloc (max_out_len) : alloca (max_out_len);
   i = 0;
   j = 0;
   while (i < *lenp)
@@ -1878,6 +1884,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t 
*lenp)
     }
   *lenp = j;
   memcpy (before, after, j);
+  if (malloc_p)
+    free (after);
 }
 
 char *
@@ -2318,28 +2326,37 @@ normalize_str (SCM string, uninorm_t form)
 {
   SCM ret;
   uint32_t *w_str;
+  uint32_t *w_norm_str;
   scm_t_wchar *cbuf;
-  size_t rlen, len = scm_i_string_length (string);
+  int malloc_p;
+  size_t norm_len, len = scm_i_string_length (string);
   
   if (scm_i_is_narrow_string (string))
     {
-      size_t i;
+      size_t i, bytes;
       const char *buf = scm_i_string_chars (string);
-      
-      w_str = alloca (sizeof (scm_t_wchar) * (len + 1));
-      
+
+      bytes = (len + 1) * sizeof (scm_t_wchar);
+      malloc_p = (bytes > SCM_MAX_ALLOCA);
+      w_str = malloc_p ? malloc (bytes) : alloca (bytes);
+
       for (i = 0; i < len; i ++)
        w_str[i] = (unsigned char) buf[i];
       w_str[len] = 0;
     }
-  else 
-    w_str = (uint32_t *) scm_i_string_wide_chars (string);
+  else
+    {
+      malloc_p = 0;
+      w_str = (uint32_t *) scm_i_string_wide_chars (string);
+    }
 
-  w_str = u32_normalize (form, w_str, len, NULL, &rlen);  
-  
-  ret = scm_i_make_wide_string (rlen, &cbuf, 0);
-  u32_cpy ((uint32_t *) cbuf, w_str, rlen);
-  free (w_str);
+  w_norm_str = u32_normalize (form, w_str, len, NULL, &norm_len);
+
+  ret = scm_i_make_wide_string (norm_len, &cbuf, 0);
+  u32_cpy ((uint32_t *) cbuf, w_norm_str, norm_len);
+  free (w_norm_str);
+  if (malloc_p)
+    free (w_str);
 
   scm_i_try_narrow_string (ret);
 
diff --git a/test-suite/tests/i18n.test b/test-suite/tests/i18n.test
index 811be7b..427aef4 100644
--- a/test-suite/tests/i18n.test
+++ b/test-suite/tests/i18n.test
@@ -78,7 +78,13 @@
   (pass-if "string-locale-ci<?"
     (and (string-locale-ci<? "hello" "WORLD")
          (string-locale-ci<? "hello" "WORLD"
-                             (make-locale (list LC_COLLATE) "C")))))
+                             (make-locale (list LC_COLLATE) "C"))))
+  (pass-if "large strings"
+    ;; In Guile <= 2.2.4, these would overflow the C stack and crash.
+    (let ((large (make-string 4000000 #\a)))
+      (and (string-locale-ci=? large large)
+           (not (string-locale-ci<? large large))
+           (not (string-locale<? large large))))))
 
 
 (define mingw?
@@ -333,6 +339,15 @@
         (string=? "Hello, World" (string-locale-titlecase 
                                   "hello, world" (make-locale LC_ALL "C")))))
 
+  (pass-if "large strings"
+    ;; In Guile <= 2.2.4, these would overflow the C stack and crash.
+    (let ((hellos (string-join (make-list 700000 "hello")))
+          (HELLOs (string-join (make-list 700000 "HELLO")))
+          (Hellos (string-join (make-list 700000 "Hello"))))
+      (and (string=? hellos (string-locale-downcase Hellos))
+           (string=? HELLOs (string-locale-upcase   Hellos))
+           (string=? Hellos (string-locale-titlecase hellos)))))
+
   (pass-if "string-locale-upcase German"
     (under-german-utf8-locale-or-unresolved
      (lambda ()
diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test
index 2bfb2b4..32c9b9e 100644
--- a/test-suite/tests/strings.test
+++ b/test-suite/tests/strings.test
@@ -472,6 +472,18 @@
     (equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
 
 ;;
+;; normalizing large strings
+;;
+
+(pass-if "string-normalize-{nfd,nfc,nfkd,nfkc} on large strings"
+  ;; In Guile <= 2.2.4, these would overflow the C stack and crash.
+  (let ((large (make-string 4000000 #\a)))
+    (and (string=? large (string-normalize-nfd large))
+         (string=? large (string-normalize-nfc large))
+         (string=? large (string-normalize-nfkd large))
+         (string=? large (string-normalize-nfkc large)))))
+
+;;
 ;; string-utf8-length
 ;;
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]