bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

vasnprintf: add support for printing wide characters using escapes


From: Bruno Haible
Subject: vasnprintf: add support for printing wide characters using escapes
Date: Sun, 19 Apr 2020 17:11:45 +0200
User-agent: KMail/5.1.3 (Linux/4.4.0-177-generic; KDE/5.18.0; x86_64; ; )

printf or fprintf is often used for debugging purposes. In native Windows
programs, one of the most frequent types is a wide string (wchar_t[]).
But there is no working primitive for printing such a wide string: all
of printf("%ls"), _putws, etc. fail if the wide string contains characters
that are not in convertible to the GetACP() code page.

As a makeshift, to allow use of printf and fprintf for debugging purposes,
I'm adding an option ENABLE_WCHAR_FALLBACK, to be enabled at compile time.
The effect is that unconvertible characters get shown in hexadecimal
escape syntax (\unnnn or \Unnnnnnnn) or, if wchar_t is not Unicode encoded,
\wnnnn or \Wnnnnnnnn.


2020-04-19  Bruno Haible  <address@hidden>

        vasnprintf: Add support for printing wide characters using escapes.
        * lib/vasnprintf.c (ENABLE_WCHAR_FALLBACK): Document optional macro.
        (wctomb_fallback): New function.
        (local_wctomb): New function.
        (local_wcrtomb): New function or macro.
        (MAX_ROOM_NEEDED): Adjust estimate for %lc.
        (VASNPRINTF): Simplify %ls code by use of local_wcrtomb. Add code for
        %lc.

diff --git a/lib/vasnprintf.c b/lib/vasnprintf.c
index bca04d7..b3b6e7b 100644
--- a/lib/vasnprintf.c
+++ b/lib/vasnprintf.c
@@ -41,7 +41,14 @@
      DCHAR_CONV_FROM_ENCODING A function to convert from char[] to DCHAR[].
      DCHAR_IS_UINT8_T   Set to 1 if DCHAR_T is uint8_t.
      DCHAR_IS_UINT16_T  Set to 1 if DCHAR_T is uint16_t.
-     DCHAR_IS_UINT32_T  Set to 1 if DCHAR_T is uint32_t.  */
+     DCHAR_IS_UINT32_T  Set to 1 if DCHAR_T is uint32_t.
+     ENABLE_UNISTDIO    Set to 1 to enable the unistdio extensions.
+     ENABLE_WCHAR_FALLBACK  Set to 1 to avoid EILSEQ during conversion of wide
+                        characters (wchar_t) and wide character strings
+                        (wchar_t[]) to multibyte sequences.  The fallback is 
the
+                        hexadecimal escape syntax (\unnnn or \Unnnnnnnn) or,
+                        if wchar_t is not Unicode encoded, \wnnnn or 
\Wnnnnnnnn.
+ */
 
 /* Tell glibc's <stdio.h> to provide a prototype for snprintf().
    This must come before <config.h> because <config.h> may include
@@ -277,6 +284,74 @@ local_wcsnlen (const wchar_t *s, size_t maxlen)
 # endif
 #endif
 
+#if (((!USE_SNPRINTF || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || 
(NEED_PRINTF_DIRECTIVE_LS && !defined IN_LIBINTL) || ENABLE_WCHAR_FALLBACK) && 
HAVE_WCHAR_T) || (ENABLE_WCHAR_FALLBACK && HAVE_WINT_T)) && !WIDE_CHAR_VERSION
+# if ENABLE_WCHAR_FALLBACK
+static size_t
+wctomb_fallback (char *s, wchar_t wc)
+{
+  static char hex[16] = "0123456789ABCDEF";
+
+  s[0] = '\\';
+  if (sizeof (wchar_t) > 2 && wc > 0xffff)
+    {
+#  if __STDC_ISO_10646__ || (__GLIBC__ >= 2) || (defined _WIN32 || defined 
__CYGWIN__)
+      s[1] = 'U';
+#  else
+      s[1] = 'W';
+#  endif
+      s[2] = hex[(wc & 0xf0000000U) >> 28];
+      s[3] = hex[(wc & 0xf000000U) >> 24];
+      s[4] = hex[(wc & 0xf00000U) >> 20];
+      s[5] = hex[(wc & 0xf0000U) >> 16];
+      s[6] = hex[(wc & 0xf000U) >> 12];
+      s[7] = hex[(wc & 0xf00U) >> 8];
+      s[8] = hex[(wc & 0xf0U) >> 4];
+      s[9] = hex[wc & 0xfU];
+      return 10;
+    }
+  else
+    {
+#  if __STDC_ISO_10646__ || (__GLIBC__ >= 2) || (defined _WIN32 || defined 
__CYGWIN__)
+      s[1] = 'u';
+#  else
+      s[1] = 'w';
+#  endif
+      s[2] = hex[(wc & 0xf000U) >> 12];
+      s[3] = hex[(wc & 0xf00U) >> 8];
+      s[4] = hex[(wc & 0xf0U) >> 4];
+      s[5] = hex[wc & 0xfU];
+      return 6;
+    }
+}
+#  if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+static size_t
+local_wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
+{
+  size_t count = wcrtomb (s, wc, ps);
+  if (count == (size_t)(-1))
+    count = wctomb_fallback (s, wc);
+  return count;
+}
+#  else
+static int
+local_wctomb (char *s, wchar_t wc)
+{
+  int count = wctomb (s, wc);
+  if (count < 0)
+    count = wctomb_fallback (s, wc);
+  return count;
+}
+#   define local_wcrtomb(S, WC, PS)  local_wctomb ((S), (WC))
+#  endif
+# else
+#  if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+#   define local_wcrtomb(S, WC, PS)  wcrtomb ((S), (WC), (PS))
+#  else
+#   define local_wcrtomb(S, WC, PS)  wctomb ((S), (WC))
+#  endif
+# endif
+#endif
+
 #if (NEED_PRINTF_DIRECTIVE_A || NEED_PRINTF_LONG_DOUBLE || 
NEED_PRINTF_INFINITE_LONG_DOUBLE || NEED_PRINTF_DOUBLE || 
NEED_PRINTF_INFINITE_DOUBLE) && !defined IN_LIBINTL
 /* Determine the decimal-point character according to the current locale.  */
 # ifndef decimal_point_char_defined
@@ -1677,7 +1752,13 @@ MAX_ROOM_NEEDED (const arguments *ap, size_t arg_index, 
FCHAR_T conversion,
     case 'c':
 # if HAVE_WINT_T && !WIDE_CHAR_VERSION
       if (type == TYPE_WIDE_CHAR)
-        tmp_length = MB_CUR_MAX;
+        {
+          tmp_length = MB_CUR_MAX;
+#  if ENABLE_WCHAR_FALLBACK
+          if (tmp_length < (sizeof (wchar_t) > 2 ? 10 : 6))
+            tmp_length = (sizeof (wchar_t) > 2 ? 10 : 6);
+#  endif
+        }
       else
 # endif
         tmp_length = 1;
@@ -2394,7 +2475,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
                   }
               }
 #endif
-#if (!USE_SNPRINTF || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || 
(NEED_PRINTF_DIRECTIVE_LS && !defined IN_LIBINTL)) && HAVE_WCHAR_T
+#if (!USE_SNPRINTF || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || 
(NEED_PRINTF_DIRECTIVE_LS && !defined IN_LIBINTL) || ENABLE_WCHAR_FALLBACK) && 
HAVE_WCHAR_T
             else if (dp->conversion == 's'
 # if WIDE_CHAR_VERSION
                      && a.arg[dp->arg_index].type != TYPE_WIDE_STRING
@@ -2669,11 +2750,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
                           if (*arg_end == 0)
                             /* Found the terminating null wide character.  */
                             break;
-#  if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
-                          count = wcrtomb (cbuf, *arg_end, &state);
-#  else
-                          count = wctomb (cbuf, *arg_end);
-#  endif
+                          count = local_wcrtomb (cbuf, *arg_end, &state);
                           if (count < 0)
                             {
                               /* Cannot convert.  */
@@ -2714,11 +2791,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
                           if (*arg_end == 0)
                             /* Found the terminating null wide character.  */
                             break;
-#  if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
-                          count = wcrtomb (cbuf, *arg_end, &state);
-#  else
-                          count = wctomb (cbuf, *arg_end);
-#  endif
+                          count = local_wcrtomb (cbuf, *arg_end, &state);
                           if (count < 0)
                             {
                               /* Cannot convert.  */
@@ -2763,11 +2836,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
 
                         if (*arg == 0)
                           abort ();
-#   if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
-                        count = wcrtomb (cbuf, *arg, &state);
-#   else
-                        count = wctomb (cbuf, *arg);
-#   endif
+                        count = local_wcrtomb (cbuf, *arg, &state);
                         if (count <= 0)
                           /* Inconsistency.  */
                           abort ();
@@ -2844,11 +2913,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
 
                           if (*arg == 0)
                             abort ();
-#   if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
-                          count = wcrtomb (cbuf, *arg, &state);
-#   else
-                          count = wctomb (cbuf, *arg);
-#   endif
+                          count = local_wcrtomb (cbuf, *arg, &state);
                           if (count <= 0)
                             /* Inconsistency.  */
                             abort ();
@@ -2873,11 +2938,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
 
                           if (*arg == 0)
                             abort ();
-#   if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
-                          count = wcrtomb (cbuf, *arg, &state);
-#   else
-                          count = wctomb (cbuf, *arg);
-#   endif
+                          count = local_wcrtomb (cbuf, *arg, &state);
                           if (count <= 0)
                             {
                               /* Cannot convert.  */
@@ -2913,6 +2974,210 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
 # endif
               }
 #endif
+#if ENABLE_WCHAR_FALLBACK && HAVE_WINT_T && !WIDE_CHAR_VERSION
+            else if (dp->conversion == 'c'
+                     && a.arg[dp->arg_index].type == TYPE_WIDE_CHAR)
+              {
+                /* Implement the 'lc' directive ourselves, in order to provide
+                   the fallback that avoids EILSEQ.  */
+                int flags = dp->flags;
+                int has_width;
+                size_t width;
+
+                has_width = 0;
+                width = 0;
+                if (dp->width_start != dp->width_end)
+                  {
+                    if (dp->width_arg_index != ARG_NONE)
+                      {
+                        int arg;
+
+                        if (!(a.arg[dp->width_arg_index].type == TYPE_INT))
+                          abort ();
+                        arg = a.arg[dp->width_arg_index].a.a_int;
+                        width = arg;
+                        if (arg < 0)
+                          {
+                            /* "A negative field width is taken as a '-' flag
+                                followed by a positive field width."  */
+                            flags |= FLAG_LEFT;
+                            width = -width;
+                          }
+                      }
+                    else
+                      {
+                        const FCHAR_T *digitp = dp->width_start;
+
+                        do
+                          width = xsum (xtimes (width, 10), *digitp++ - '0');
+                        while (digitp != dp->width_end);
+                      }
+                    has_width = 1;
+                  }
+
+                /* %lc in vasnprintf.  See the specification of fprintf.  */
+                {
+                  wchar_t arg = (wchar_t) a.arg[dp->arg_index].a.a_wide_char;
+                  size_t characters;
+# if !DCHAR_IS_TCHAR
+                  /* This code assumes that TCHAR_T is 'char'.  */
+                  verify (sizeof (TCHAR_T) == 1);
+                  TCHAR_T tmpsrc[64]; /* Assume MB_CUR_MAX <= 64.  */
+                  DCHAR_T *tmpdst;
+                  size_t tmpdst_len;
+# endif
+                  size_t w;
+
+# if DCHAR_IS_TCHAR
+                  if (has_width)
+# endif
+                    {
+                      /* Count the number of bytes.  */
+                      characters = 0;
+                      if (arg != 0)
+                        {
+                          char cbuf[64]; /* Assume MB_CUR_MAX <= 64.  */
+                          int count;
+# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+                          mbstate_t state;
+                          memset (&state, '\0', sizeof (mbstate_t));
+# endif
+
+                          count = local_wcrtomb (cbuf, arg, &state);
+                          if (count < 0)
+                            /* Inconsistency.  */
+                            abort ();
+                          characters = count;
+                        }
+                    }
+# if DCHAR_IS_TCHAR
+                  else
+                    {
+                      /* The number of bytes doesn't matter.  */
+                      characters = 0;
+                    }
+# endif
+
+# if !DCHAR_IS_TCHAR
+                  /* Convert the string into a piece of temporary memory.  */
+                  if (characters > 0) /* implies arg != 0 */
+                    {
+                      char cbuf[64]; /* Assume MB_CUR_MAX <= 64.  */
+                      int count;
+#  if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+                      mbstate_t state;
+                      memset (&state, '\0', sizeof (mbstate_t));
+#  endif
+
+                      count = local_wcrtomb (cbuf, arg, &state);
+                      if (count <= 0)
+                        /* Inconsistency.  */
+                        abort ();
+                      memcpy (tmpsrc, cbuf, count);
+                    }
+
+                  /* Convert from TCHAR_T[] to DCHAR_T[].  */
+                  tmpdst =
+                    DCHAR_CONV_FROM_ENCODING (locale_charset (),
+                                              iconveh_question_mark,
+                                              tmpsrc, characters,
+                                              NULL,
+                                              NULL, &tmpdst_len);
+                  if (tmpdst == NULL)
+                    {
+                      int saved_errno = errno;
+                      if (!(result == resultbuf || result == NULL))
+                        free (result);
+                      if (buf_malloced != NULL)
+                        free (buf_malloced);
+                      CLEANUP ();
+                      errno = saved_errno;
+                      return NULL;
+                    }
+# endif
+
+                  if (has_width)
+                    {
+# if ENABLE_UNISTDIO
+                      /* Outside POSIX, it's preferable to compare the width
+                         against the number of _characters_ of the converted
+                         value.  */
+                      w = DCHAR_MBSNLEN (result + length, characters);
+# else
+                      /* The width is compared against the number of _bytes_
+                         of the converted value, says POSIX.  */
+                      w = characters;
+# endif
+                    }
+                  else
+                    /* w doesn't matter.  */
+                    w = 0;
+
+                  if (w < width && !(dp->flags & FLAG_LEFT))
+                    {
+                      size_t n = width - w;
+                      ENSURE_ALLOCATION (xsum (length, n));
+                      DCHAR_SET (result + length, ' ', n);
+                      length += n;
+                    }
+
+# if DCHAR_IS_TCHAR
+                  if (has_width)
+                    {
+                      /* We know the number of bytes in advance.  */
+                      ENSURE_ALLOCATION (xsum (length, characters));
+                      if (characters > 0) /* implies arg != 0 */
+                        {
+                          int count;
+#  if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+                          mbstate_t state;
+                          memset (&state, '\0', sizeof (mbstate_t));
+#  endif
+
+                          count = local_wcrtomb (result + length, arg, &state);
+                          if (count <= 0)
+                            /* Inconsistency.  */
+                            abort ();
+                          length += count;
+                        }
+                    }
+                  else
+                    {
+                      if (arg != 0)
+                        {
+                          char cbuf[64]; /* Assume MB_CUR_MAX <= 64.  */
+                          int count;
+#  if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+                          mbstate_t state;
+                          memset (&state, '\0', sizeof (mbstate_t));
+#  endif
+
+                          count = local_wcrtomb (cbuf, arg, &state);
+                          if (count <= 0)
+                            /* Inconsistency.  */
+                            abort ();
+                          ENSURE_ALLOCATION (xsum (length, count));
+                          memcpy (result + length, cbuf, count);
+                          length += count;
+                        }
+                    }
+# else
+                  ENSURE_ALLOCATION (xsum (length, tmpdst_len));
+                  DCHAR_CPY (result + length, tmpdst, tmpdst_len);
+                  free (tmpdst);
+                  length += tmpdst_len;
+# endif
+
+                  if (w < width && (dp->flags & FLAG_LEFT))
+                    {
+                      size_t n = width - w;
+                      ENSURE_ALLOCATION (xsum (length, n));
+                      DCHAR_SET (result + length, ' ', n);
+                      length += n;
+                    }
+                }
+              }
+#endif
 #if (NEED_PRINTF_DIRECTIVE_A || NEED_PRINTF_LONG_DOUBLE || NEED_PRINTF_DOUBLE) 
&& !defined IN_LIBINTL
             else if ((dp->conversion == 'a' || dp->conversion == 'A')
 # if !(NEED_PRINTF_DIRECTIVE_A || (NEED_PRINTF_LONG_DOUBLE && 
NEED_PRINTF_DOUBLE))




reply via email to

[Prev in Thread] Current Thread [Next in Thread]