bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: mbrtowc: don't replace mbstate_t on MSVC


From: Bruno Haible
Subject: Re: mbrtowc: don't replace mbstate_t on MSVC
Date: Thu, 02 Jan 2020 23:14:41 +0100
User-agent: KMail/5.1.3 (Linux/4.4.0-170-generic; KDE/5.18.0; x86_64; ; )

This patch also caused test failures on MSVC. All caused by the fact that
the original mbrtowc (which the code now uses, instead of an emulation
through mbtowc) stores a wide character even when reporting an incomplete
multibyte character. This patch fixes it.


2020-01-02  Bruno Haible  <address@hidden>

        mbrtowc: Fix test failures on MSVC (regression by previous commit).
        * m4/mbrtowc.m4 (gl_MBRTOWC_STORES_INCOMPLETE): New macro.
        (gl_FUNC_MBRTOWC): Invoke it. Define MBRTOWC_STORES_INCOMPLETE_BUG.
        * lib/mbrtowc.c (rpl_mbrtowc): Add workaround for
        MBRTOWC_STORES_INCOMPLETE_BUG.
        * doc/posix-functions/mbrtowc.texi: Mention the MSVC bug.

diff --git a/doc/posix-functions/mbrtowc.texi b/doc/posix-functions/mbrtowc.texi
index c7c34e0..3b7aed0 100644
--- a/doc/posix-functions/mbrtowc.texi
+++ b/doc/posix-functions/mbrtowc.texi
@@ -28,6 +28,10 @@ This function does not put the state into non-initial state 
when parsing an
 incomplete multibyte character on some platforms:
 AIX 7.2.
 @item
+This function stores a wide character when when parsing an incomplete multibyte
+character on some platforms:
+MSVC 14.
+@item
 This function returns the total number of bytes that make up the multibyte
 character, not the number of bytes that were needed to complete the multibyte
 character, on some platforms:
diff --git a/lib/mbrtowc.c b/lib/mbrtowc.c
index 9cc9f5d..1cdd1af 100644
--- a/lib/mbrtowc.c
+++ b/lib/mbrtowc.c
@@ -524,7 +524,13 @@ rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, 
mbstate_t *ps)
   }
 # endif
 
+# if MBRTOWC_STORES_INCOMPLETE_BUG
+  ret = mbrtowc (&wc, s, n, ps);
+  if (ret < (size_t) -2 && pwc != NULL)
+    *pwc = wc;
+# else
   ret = mbrtowc (pwc, s, n, ps);
+# endif
 
 # if MBRTOWC_NUL_RETVAL_BUG
   if (ret < (size_t) -2 && !*pwc)
diff --git a/m4/mbrtowc.m4 b/m4/mbrtowc.m4
index af4e25d..5ad246f 100644
--- a/m4/mbrtowc.m4
+++ b/m4/mbrtowc.m4
@@ -1,4 +1,4 @@
-# mbrtowc.m4 serial 35  -*- coding: utf-8 -*-
+# mbrtowc.m4 serial 36  -*- coding: utf-8 -*-
 dnl Copyright (C) 2001-2002, 2004-2005, 2008-2020 Free Software Foundation,
 dnl Inc.
 dnl This file is free software; the Free Software Foundation
@@ -39,6 +39,7 @@ AC_DEFUN([gl_FUNC_MBRTOWC],
       gl_MBRTOWC_NULL_ARG2
       gl_MBRTOWC_RETVAL
       gl_MBRTOWC_NUL_RETVAL
+      gl_MBRTOWC_STORES_INCOMPLETE
       gl_MBRTOWC_EMPTY_INPUT
       gl_MBRTOWC_C_LOCALE
       case "$gl_cv_func_mbrtowc_null_arg1" in
@@ -69,6 +70,13 @@ AC_DEFUN([gl_FUNC_MBRTOWC],
            REPLACE_MBRTOWC=1
            ;;
       esac
+      case "$gl_cv_func_mbrtowc_stores_incomplete" in
+        *no) ;;
+        *) AC_DEFINE([MBRTOWC_STORES_INCOMPLETE_BUG], [1],
+             [Define if the mbrtowc function stores a wide character when 
reporting incomplete input.])
+           REPLACE_MBRTOWC=1
+           ;;
+      esac
       case "$gl_cv_func_mbrtowc_empty_input" in
         *yes) ;;
         *) AC_DEFINE([MBRTOWC_EMPTY_INPUT_BUG], [1],
@@ -592,6 +600,126 @@ int main ()
     ])
 ])
 
+dnl Test whether mbrtowc stores a wide character when reporting incomplete
+dnl input.
+
+AC_DEFUN([gl_MBRTOWC_STORES_INCOMPLETE],
+[
+  AC_REQUIRE([AC_PROG_CC])
+  AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+  AC_CACHE_CHECK([whether mbrtowc stores incomplete characters],
+    [gl_cv_func_mbrtowc_stores_incomplete],
+    [
+     dnl Initial guess, used when cross-compiling or when no suitable locale
+     dnl is present.
+changequote(,)dnl
+     case "$host_os" in
+               # Guess yes on native Windows.
+       mingw*) gl_cv_func_mbrtowc_stores_incomplete="guessing yes" ;;
+       *)      gl_cv_func_mbrtowc_stores_incomplete="guessing no" ;;
+     esac
+changequote([,])dnl
+     case "$host_os" in
+       mingw*)
+         AC_RUN_IFELSE(
+           [AC_LANG_SOURCE([[
+#include <locale.h>
+#include <string.h>
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+   <wchar.h>.
+   BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
+   included before <wchar.h>.  */
+#include <stddef.h>
+#include <stdio.h>
+#include <time.h>
+#include <wchar.h>
+int main ()
+{
+  int result = 0;
+  if (setlocale (LC_ALL, "French_France.65001") != NULL)
+    {
+      wchar_t wc = (wchar_t) 0xBADFACE;
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      if (mbrtowc (&wc, "\303", 1, &state) == (size_t)(-2)
+          && wc != (wchar_t) 0xBADFACE)
+        result |= 1;
+    }
+  if (setlocale (LC_ALL, "Japanese_Japan.932") != NULL)
+    {
+      wchar_t wc = (wchar_t) 0xBADFACE;
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      if (mbrtowc (&wc, "\226", 1, &state) == (size_t)(-2)
+          && wc != (wchar_t) 0xBADFACE)
+        result |= 2;
+    }
+  if (setlocale (LC_ALL, "Chinese_Taiwan.950") != NULL)
+    {
+      wchar_t wc = (wchar_t) 0xBADFACE;
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      if (mbrtowc (&wc, "\245", 1, &state) == (size_t)(-2)
+          && wc != (wchar_t) 0xBADFACE)
+        result |= 4;
+    }
+  if (setlocale (LC_ALL, "Chinese_China.936") != NULL)
+    {
+      wchar_t wc = (wchar_t) 0xBADFACE;
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      if (mbrtowc (&wc, "\261", 1, &state) == (size_t)(-2)
+          && wc != (wchar_t) 0xBADFACE)
+        result |= 8;
+    }
+  return result;
+}]])],
+           [gl_cv_func_mbrtowc_stores_incomplete=no],
+           [gl_cv_func_mbrtowc_stores_incomplete=yes],
+           [:])
+         ;;
+       *)
+         AC_REQUIRE([gt_LOCALE_FR_UTF8])
+         if test $LOCALE_FR_UTF8 != none; then
+           AC_RUN_IFELSE(
+             [AC_LANG_SOURCE([[
+#include <locale.h>
+#include <string.h>
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+   <wchar.h>.
+   BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
+   included before <wchar.h>.  */
+#include <stddef.h>
+#include <stdio.h>
+#include <time.h>
+#include <wchar.h>
+int main ()
+{
+  if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL)
+    {
+      wchar_t wc = (wchar_t) 0xBADFACE;
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      if (mbrtowc (&wc, "\303", 1, &state) == (size_t)(-2)
+          && wc != (wchar_t) 0xBADFACE)
+        return 1;
+    }
+  return 0;
+}]])],
+             [gl_cv_func_mbrtowc_stores_incomplete=no],
+             [gl_cv_func_mbrtowc_stores_incomplete=yes],
+             [:])
+         fi
+         ;;
+     esac
+    ])
+])
+
 dnl Test whether mbrtowc returns the correct value on empty input.
 
 AC_DEFUN([gl_MBRTOWC_EMPTY_INPUT],




reply via email to

[Prev in Thread] Current Thread [Next in Thread]