[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: new module 'mbrlen'
From: |
Bruno Haible |
Subject: |
Re: new module 'mbrlen' |
Date: |
Tue, 23 Dec 2008 00:33:33 +0100 |
User-agent: |
KMail/1.9.9 |
> New module 'mbrlen'.
mbrlen() has essentially the same set of platform dependent bugs as mbrtowc().
This patch provides a workaround:
2008-12-22 Bruno Haible <address@hidden>
Work around mbrlen() bugs on AIX, HP-UX, OSF/1, Solaris.
* m4/mbrlen.m4 (gl_FUNC_MBRLEN): Set REPLACE_MBRLEN if mbrtowc is
being overridden.
(gl_MBRLEN_INCOMPLETE_STATE, gl_MBRLEN_RETVAL, gl_MBRLEN_NUL_RETVAL):
New macros.
* lib/wchar.in.h (mbrlen): Override if REPLACE_MBRLEN is set.
* m4/wchar.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_MBRLEN.
* modules/wchar (Makefile.am): Substitute REPLACE_MBRLEN.
* doc/posix-functions/mbrlen.texi: Mention the various platform bugs.
--- doc/posix-functions/mbrlen.texi.orig 2008-12-23 00:27:48.000000000
+0100
+++ doc/posix-functions/mbrlen.texi 2008-12-22 23:53:43.000000000 +0100
@@ -11,6 +11,18 @@
@item
This function is missing on some platforms:
HP-UX 11.00, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5.
address@hidden
+This function does not put the state into non-initial state when parsing an
+incomplete multibyte character on some platforms:
+AIX 5.1, OSF/1 5.1.
address@hidden
+This function returns the total number of bytes that make up the multibyte
+character, not the number of bytes that were needed to complete the multibyte
+character, on some platforms:
+HP-UX 11.11, Solaris 10.
address@hidden
+This function may not return 0 when parsing the NUL character on some
platforms:
+Solaris 9.
@end itemize
Portability problems not fixed by Gnulib:
--- lib/wchar.in.h.orig 2008-12-23 00:27:48.000000000 +0100
+++ lib/wchar.in.h 2008-12-22 13:34:03.000000000 +0100
@@ -157,7 +157,11 @@
/* Recognize a multibyte character. */
#if @GNULIB_MBRLEN@
-# if address@hidden@
+# if @REPLACE_MBRLEN@
+# undef mbrlen
+# define mbrlen rpl_mbrlen
+# endif
+# if address@hidden@ || @REPLACE_MBRLEN@
extern size_t mbrlen (const char *s, size_t n, mbstate_t *ps);
# endif
#elif defined GNULIB_POSIXCHECK
--- m4/mbrlen.m4.orig 2008-12-23 00:27:48.000000000 +0100
+++ m4/mbrlen.m4 2008-12-23 00:25:05.000000000 +0100
@@ -1,4 +1,4 @@
-# mbrlen.m4 serial 1
+# mbrlen.m4 serial 2
dnl Copyright (C) 2008 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -9,15 +9,188 @@
AC_REQUIRE([gl_WCHAR_H_DEFAULTS])
AC_REQUIRE([AC_TYPE_MBSTATE_T])
+ AC_REQUIRE([gl_FUNC_MBRTOWC])
AC_CHECK_FUNCS_ONCE([mbrlen])
if test $ac_cv_func_mbrlen = no; then
HAVE_MBRLEN=0
+ else
+ dnl Most bugs affecting the system's mbrtowc function also affect the
+ dnl mbrlen function. So override mbrlen whenever mbrtowc is overridden.
+ dnl We could also run the individual tests below; the results would be
+ dnl the same.
+ if test $REPLACE_MBRTOWC = 1; then
+ REPLACE_MBRLEN=1
+ fi
+ fi
+ if test $HAVE_MBRLEN = 0 || test $REPLACE_MBRLEN = 1; then
gl_REPLACE_WCHAR_H
AC_LIBOBJ([mbrlen])
gl_PREREQ_MBRLEN
fi
])
+dnl Test whether mbrlen puts the state into non-initial state when parsing an
+dnl incomplete multibyte character.
+dnl Result is gl_cv_func_mbrlen_incomplete_state.
+
+AC_DEFUN([gl_MBRLEN_INCOMPLETE_STATE],
+[
+ AC_REQUIRE([AC_PROG_CC])
+ AC_REQUIRE([gt_LOCALE_JA])
+ AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+ AC_CACHE_CHECK([whether mbrlen handles incomplete characters],
+ [gl_cv_func_mbrlen_incomplete_state],
+ [
+ dnl Initial guess, used when cross-compiling or when no suitable locale
+ dnl is present.
+changequote(,)dnl
+ case "$host_os" in
+ # Guess no on AIX and OSF/1.
+ osf*) gl_cv_func_mbrlen_incomplete_state="guessing no" ;;
+ # Guess yes otherwise.
+ *) gl_cv_func_mbrlen_incomplete_state="guessing yes" ;;
+ esac
+changequote([,])dnl
+ if test $LOCALE_JA != none; then
+ AC_TRY_RUN([
+#include <locale.h>
+#include <string.h>
+#include <wchar.h>
+int main ()
+{
+ if (setlocale (LC_ALL, "$LOCALE_JA") != NULL)
+ {
+ const char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrlen (input + 1, 1, &state) == (size_t)(-2))
+ if (mbsinit (&state))
+ return 1;
+ }
+ return 0;
+}],
+ [gl_cv_func_mbrlen_incomplete_state=yes],
+ [gl_cv_func_mbrlen_incomplete_state=no],
+ [])
+ fi
+ ])
+])
+
+dnl Test whether mbrlen, when parsing the end of a multibyte character,
+dnl correctly returns the number of bytes that were needed to complete the
+dnl character (not the total number of bytes of the multibyte character).
+dnl Result is gl_cv_func_mbrlen_retval.
+
+AC_DEFUN([gl_MBRLEN_RETVAL],
+[
+ AC_REQUIRE([AC_PROG_CC])
+ AC_REQUIRE([gt_LOCALE_FR_UTF8])
+ AC_REQUIRE([gt_LOCALE_JA])
+ AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+ AC_CACHE_CHECK([whether mbrlen has a correct return value],
+ [gl_cv_func_mbrlen_retval],
+ [
+ dnl Initial guess, used when cross-compiling or when no suitable locale
+ dnl is present.
+changequote(,)dnl
+ case "$host_os" in
+ # Guess no on HP-UX and Solaris.
+ hpux* | solaris*) gl_cv_func_mbrlen_retval="guessing no" ;;
+ # Guess yes otherwise.
+ *) gl_cv_func_mbrlen_retval="guessing yes" ;;
+ esac
+changequote([,])dnl
+ if test $LOCALE_FR_UTF8 != none || test $LOCALE_JA != none; then
+ AC_TRY_RUN([
+#include <locale.h>
+#include <string.h>
+#include <wchar.h>
+int main ()
+{
+ /* This fails on Solaris. */
+ if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL)
+ {
+ char input[] = "B\303\274\303\237er"; /* "Büßer" */
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrlen (input + 1, 1, &state) == (size_t)(-2))
+ {
+ input[1] = '\0';
+ if (mbrlen (input + 2, 5, &state) != 1)
+ return 1;
+ }
+ }
+ /* This fails on HP-UX 11.11. */
+ if (setlocale (LC_ALL, "$LOCALE_JA") != NULL)
+ {
+ char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrlen (input + 1, 1, &state) == (size_t)(-2))
+ {
+ input[1] = '\0';
+ if (mbrlen (input + 2, 5, &state) != 2)
+ return 1;
+ }
+ }
+ return 0;
+}],
+ [gl_cv_func_mbrlen_retval=yes],
+ [gl_cv_func_mbrlen_retval=no],
+ [])
+ fi
+ ])
+])
+
+dnl Test whether mbrlen, when parsing a NUL character, correctly returns 0.
+dnl Result is gl_cv_func_mbrlen_nul_retval.
+
+AC_DEFUN([gl_MBRLEN_NUL_RETVAL],
+[
+ AC_REQUIRE([AC_PROG_CC])
+ AC_REQUIRE([gt_LOCALE_ZH_CN])
+ AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+ AC_CACHE_CHECK([whether mbrlen returns 0 when parsing a NUL character],
+ [gl_cv_func_mbrlen_nul_retval],
+ [
+ dnl Initial guess, used when cross-compiling or when no suitable locale
+ dnl is present.
+changequote(,)dnl
+ case "$host_os" in
+ # Guess no on Solaris 9.
+ solaris2.9) gl_cv_func_mbrlen_nul_retval="guessing no" ;;
+ # Guess yes otherwise.
+ *) gl_cv_func_mbrlen_nul_retval="guessing yes" ;;
+ esac
+changequote([,])dnl
+ if test $LOCALE_ZH_CN != none; then
+ AC_TRY_RUN([
+#include <locale.h>
+#include <string.h>
+#include <wchar.h>
+int main ()
+{
+ /* This crashes on Solaris 9 inside __mbrtowc_dense_gb18030. */
+ if (setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL)
+ {
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrlen ("", 1, &state) != 0)
+ return 1;
+ }
+ return 0;
+}],
+ [gl_cv_func_mbrlen_nul_retval=yes],
+ [gl_cv_func_mbrlen_nul_retval=no],
+ [])
+ fi
+ ])
+])
+
# Prerequisites of lib/mbrlen.c.
AC_DEFUN([gl_PREREQ_MBRLEN], [
:
--- m4/wchar.m4.orig 2008-12-23 00:27:48.000000000 +0100
+++ m4/wchar.m4 2008-12-22 13:34:42.000000000 +0100
@@ -7,7 +7,7 @@
dnl Written by Eric Blake.
-# wchar.m4 serial 21
+# wchar.m4 serial 22
AC_DEFUN([gl_WCHAR_H],
[
@@ -89,6 +89,7 @@
REPLACE_WCTOB=0; AC_SUBST([REPLACE_WCTOB])
REPLACE_MBSINIT=0; AC_SUBST([REPLACE_MBSINIT])
REPLACE_MBRTOWC=0; AC_SUBST([REPLACE_MBRTOWC])
+ REPLACE_MBRLEN=0; AC_SUBST([REPLACE_MBRLEN])
REPLACE_MBSRTOWCS=0; AC_SUBST([REPLACE_MBSRTOWCS])
REPLACE_MBSNRTOWCS=0;AC_SUBST([REPLACE_MBSNRTOWCS])
REPLACE_WCRTOMB=0; AC_SUBST([REPLACE_WCRTOMB])
--- modules/wchar.orig 2008-12-23 00:27:48.000000000 +0100
+++ modules/wchar 2008-12-22 13:34:56.000000000 +0100
@@ -53,6 +53,7 @@
-e 's|@''REPLACE_WCTOB''@|$(REPLACE_WCTOB)|g' \
-e 's|@''REPLACE_MBSINIT''@|$(REPLACE_MBSINIT)|g' \
-e 's|@''REPLACE_MBRTOWC''@|$(REPLACE_MBRTOWC)|g' \
+ -e 's|@''REPLACE_MBRLEN''@|$(REPLACE_MBRLEN)|g' \
-e 's|@''REPLACE_MBSRTOWCS''@|$(REPLACE_MBSRTOWCS)|g' \
-e 's|@''REPLACE_MBSNRTOWCS''@|$(REPLACE_MBSNRTOWCS)|g' \
-e 's|@''REPLACE_WCRTOMB''@|$(REPLACE_WCRTOMB)|g' \