[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
vasnprintf: add support for printing wide characters using escapes
From: |
Bruno Haible |
Subject: |
vasnprintf: add support for printing wide characters using escapes |
Date: |
Sun, 19 Apr 2020 17:11:45 +0200 |
User-agent: |
KMail/5.1.3 (Linux/4.4.0-177-generic; KDE/5.18.0; x86_64; ; ) |
printf or fprintf is often used for debugging purposes. In native Windows
programs, one of the most frequent types is a wide string (wchar_t[]).
But there is no working primitive for printing such a wide string: all
of printf("%ls"), _putws, etc. fail if the wide string contains characters
that are not in convertible to the GetACP() code page.
As a makeshift, to allow use of printf and fprintf for debugging purposes,
I'm adding an option ENABLE_WCHAR_FALLBACK, to be enabled at compile time.
The effect is that unconvertible characters get shown in hexadecimal
escape syntax (\unnnn or \Unnnnnnnn) or, if wchar_t is not Unicode encoded,
\wnnnn or \Wnnnnnnnn.
2020-04-19 Bruno Haible <address@hidden>
vasnprintf: Add support for printing wide characters using escapes.
* lib/vasnprintf.c (ENABLE_WCHAR_FALLBACK): Document optional macro.
(wctomb_fallback): New function.
(local_wctomb): New function.
(local_wcrtomb): New function or macro.
(MAX_ROOM_NEEDED): Adjust estimate for %lc.
(VASNPRINTF): Simplify %ls code by use of local_wcrtomb. Add code for
%lc.
diff --git a/lib/vasnprintf.c b/lib/vasnprintf.c
index bca04d7..b3b6e7b 100644
--- a/lib/vasnprintf.c
+++ b/lib/vasnprintf.c
@@ -41,7 +41,14 @@
DCHAR_CONV_FROM_ENCODING A function to convert from char[] to DCHAR[].
DCHAR_IS_UINT8_T Set to 1 if DCHAR_T is uint8_t.
DCHAR_IS_UINT16_T Set to 1 if DCHAR_T is uint16_t.
- DCHAR_IS_UINT32_T Set to 1 if DCHAR_T is uint32_t. */
+ DCHAR_IS_UINT32_T Set to 1 if DCHAR_T is uint32_t.
+ ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions.
+ ENABLE_WCHAR_FALLBACK Set to 1 to avoid EILSEQ during conversion of wide
+ characters (wchar_t) and wide character strings
+ (wchar_t[]) to multibyte sequences. The fallback is
the
+ hexadecimal escape syntax (\unnnn or \Unnnnnnnn) or,
+ if wchar_t is not Unicode encoded, \wnnnn or
\Wnnnnnnnn.
+ */
/* Tell glibc's <stdio.h> to provide a prototype for snprintf().
This must come before <config.h> because <config.h> may include
@@ -277,6 +284,74 @@ local_wcsnlen (const wchar_t *s, size_t maxlen)
# endif
#endif
+#if (((!USE_SNPRINTF || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF ||
(NEED_PRINTF_DIRECTIVE_LS && !defined IN_LIBINTL) || ENABLE_WCHAR_FALLBACK) &&
HAVE_WCHAR_T) || (ENABLE_WCHAR_FALLBACK && HAVE_WINT_T)) && !WIDE_CHAR_VERSION
+# if ENABLE_WCHAR_FALLBACK
+static size_t
+wctomb_fallback (char *s, wchar_t wc)
+{
+ static char hex[16] = "0123456789ABCDEF";
+
+ s[0] = '\\';
+ if (sizeof (wchar_t) > 2 && wc > 0xffff)
+ {
+# if __STDC_ISO_10646__ || (__GLIBC__ >= 2) || (defined _WIN32 || defined
__CYGWIN__)
+ s[1] = 'U';
+# else
+ s[1] = 'W';
+# endif
+ s[2] = hex[(wc & 0xf0000000U) >> 28];
+ s[3] = hex[(wc & 0xf000000U) >> 24];
+ s[4] = hex[(wc & 0xf00000U) >> 20];
+ s[5] = hex[(wc & 0xf0000U) >> 16];
+ s[6] = hex[(wc & 0xf000U) >> 12];
+ s[7] = hex[(wc & 0xf00U) >> 8];
+ s[8] = hex[(wc & 0xf0U) >> 4];
+ s[9] = hex[wc & 0xfU];
+ return 10;
+ }
+ else
+ {
+# if __STDC_ISO_10646__ || (__GLIBC__ >= 2) || (defined _WIN32 || defined
__CYGWIN__)
+ s[1] = 'u';
+# else
+ s[1] = 'w';
+# endif
+ s[2] = hex[(wc & 0xf000U) >> 12];
+ s[3] = hex[(wc & 0xf00U) >> 8];
+ s[4] = hex[(wc & 0xf0U) >> 4];
+ s[5] = hex[wc & 0xfU];
+ return 6;
+ }
+}
+# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+static size_t
+local_wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
+{
+ size_t count = wcrtomb (s, wc, ps);
+ if (count == (size_t)(-1))
+ count = wctomb_fallback (s, wc);
+ return count;
+}
+# else
+static int
+local_wctomb (char *s, wchar_t wc)
+{
+ int count = wctomb (s, wc);
+ if (count < 0)
+ count = wctomb_fallback (s, wc);
+ return count;
+}
+# define local_wcrtomb(S, WC, PS) local_wctomb ((S), (WC))
+# endif
+# else
+# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+# define local_wcrtomb(S, WC, PS) wcrtomb ((S), (WC), (PS))
+# else
+# define local_wcrtomb(S, WC, PS) wctomb ((S), (WC))
+# endif
+# endif
+#endif
+
#if (NEED_PRINTF_DIRECTIVE_A || NEED_PRINTF_LONG_DOUBLE ||
NEED_PRINTF_INFINITE_LONG_DOUBLE || NEED_PRINTF_DOUBLE ||
NEED_PRINTF_INFINITE_DOUBLE) && !defined IN_LIBINTL
/* Determine the decimal-point character according to the current locale. */
# ifndef decimal_point_char_defined
@@ -1677,7 +1752,13 @@ MAX_ROOM_NEEDED (const arguments *ap, size_t arg_index,
FCHAR_T conversion,
case 'c':
# if HAVE_WINT_T && !WIDE_CHAR_VERSION
if (type == TYPE_WIDE_CHAR)
- tmp_length = MB_CUR_MAX;
+ {
+ tmp_length = MB_CUR_MAX;
+# if ENABLE_WCHAR_FALLBACK
+ if (tmp_length < (sizeof (wchar_t) > 2 ? 10 : 6))
+ tmp_length = (sizeof (wchar_t) > 2 ? 10 : 6);
+# endif
+ }
else
# endif
tmp_length = 1;
@@ -2394,7 +2475,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
}
}
#endif
-#if (!USE_SNPRINTF || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF ||
(NEED_PRINTF_DIRECTIVE_LS && !defined IN_LIBINTL)) && HAVE_WCHAR_T
+#if (!USE_SNPRINTF || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF ||
(NEED_PRINTF_DIRECTIVE_LS && !defined IN_LIBINTL) || ENABLE_WCHAR_FALLBACK) &&
HAVE_WCHAR_T
else if (dp->conversion == 's'
# if WIDE_CHAR_VERSION
&& a.arg[dp->arg_index].type != TYPE_WIDE_STRING
@@ -2669,11 +2750,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
if (*arg_end == 0)
/* Found the terminating null wide character. */
break;
-# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
- count = wcrtomb (cbuf, *arg_end, &state);
-# else
- count = wctomb (cbuf, *arg_end);
-# endif
+ count = local_wcrtomb (cbuf, *arg_end, &state);
if (count < 0)
{
/* Cannot convert. */
@@ -2714,11 +2791,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
if (*arg_end == 0)
/* Found the terminating null wide character. */
break;
-# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
- count = wcrtomb (cbuf, *arg_end, &state);
-# else
- count = wctomb (cbuf, *arg_end);
-# endif
+ count = local_wcrtomb (cbuf, *arg_end, &state);
if (count < 0)
{
/* Cannot convert. */
@@ -2763,11 +2836,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
if (*arg == 0)
abort ();
-# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
- count = wcrtomb (cbuf, *arg, &state);
-# else
- count = wctomb (cbuf, *arg);
-# endif
+ count = local_wcrtomb (cbuf, *arg, &state);
if (count <= 0)
/* Inconsistency. */
abort ();
@@ -2844,11 +2913,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
if (*arg == 0)
abort ();
-# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
- count = wcrtomb (cbuf, *arg, &state);
-# else
- count = wctomb (cbuf, *arg);
-# endif
+ count = local_wcrtomb (cbuf, *arg, &state);
if (count <= 0)
/* Inconsistency. */
abort ();
@@ -2873,11 +2938,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
if (*arg == 0)
abort ();
-# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
- count = wcrtomb (cbuf, *arg, &state);
-# else
- count = wctomb (cbuf, *arg);
-# endif
+ count = local_wcrtomb (cbuf, *arg, &state);
if (count <= 0)
{
/* Cannot convert. */
@@ -2913,6 +2974,210 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
# endif
}
#endif
+#if ENABLE_WCHAR_FALLBACK && HAVE_WINT_T && !WIDE_CHAR_VERSION
+ else if (dp->conversion == 'c'
+ && a.arg[dp->arg_index].type == TYPE_WIDE_CHAR)
+ {
+ /* Implement the 'lc' directive ourselves, in order to provide
+ the fallback that avoids EILSEQ. */
+ int flags = dp->flags;
+ int has_width;
+ size_t width;
+
+ has_width = 0;
+ width = 0;
+ if (dp->width_start != dp->width_end)
+ {
+ if (dp->width_arg_index != ARG_NONE)
+ {
+ int arg;
+
+ if (!(a.arg[dp->width_arg_index].type == TYPE_INT))
+ abort ();
+ arg = a.arg[dp->width_arg_index].a.a_int;
+ width = arg;
+ if (arg < 0)
+ {
+ /* "A negative field width is taken as a '-' flag
+ followed by a positive field width." */
+ flags |= FLAG_LEFT;
+ width = -width;
+ }
+ }
+ else
+ {
+ const FCHAR_T *digitp = dp->width_start;
+
+ do
+ width = xsum (xtimes (width, 10), *digitp++ - '0');
+ while (digitp != dp->width_end);
+ }
+ has_width = 1;
+ }
+
+ /* %lc in vasnprintf. See the specification of fprintf. */
+ {
+ wchar_t arg = (wchar_t) a.arg[dp->arg_index].a.a_wide_char;
+ size_t characters;
+# if !DCHAR_IS_TCHAR
+ /* This code assumes that TCHAR_T is 'char'. */
+ verify (sizeof (TCHAR_T) == 1);
+ TCHAR_T tmpsrc[64]; /* Assume MB_CUR_MAX <= 64. */
+ DCHAR_T *tmpdst;
+ size_t tmpdst_len;
+# endif
+ size_t w;
+
+# if DCHAR_IS_TCHAR
+ if (has_width)
+# endif
+ {
+ /* Count the number of bytes. */
+ characters = 0;
+ if (arg != 0)
+ {
+ char cbuf[64]; /* Assume MB_CUR_MAX <= 64. */
+ int count;
+# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+ mbstate_t state;
+ memset (&state, '\0', sizeof (mbstate_t));
+# endif
+
+ count = local_wcrtomb (cbuf, arg, &state);
+ if (count < 0)
+ /* Inconsistency. */
+ abort ();
+ characters = count;
+ }
+ }
+# if DCHAR_IS_TCHAR
+ else
+ {
+ /* The number of bytes doesn't matter. */
+ characters = 0;
+ }
+# endif
+
+# if !DCHAR_IS_TCHAR
+ /* Convert the string into a piece of temporary memory. */
+ if (characters > 0) /* implies arg != 0 */
+ {
+ char cbuf[64]; /* Assume MB_CUR_MAX <= 64. */
+ int count;
+# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+ mbstate_t state;
+ memset (&state, '\0', sizeof (mbstate_t));
+# endif
+
+ count = local_wcrtomb (cbuf, arg, &state);
+ if (count <= 0)
+ /* Inconsistency. */
+ abort ();
+ memcpy (tmpsrc, cbuf, count);
+ }
+
+ /* Convert from TCHAR_T[] to DCHAR_T[]. */
+ tmpdst =
+ DCHAR_CONV_FROM_ENCODING (locale_charset (),
+ iconveh_question_mark,
+ tmpsrc, characters,
+ NULL,
+ NULL, &tmpdst_len);
+ if (tmpdst == NULL)
+ {
+ int saved_errno = errno;
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ if (buf_malloced != NULL)
+ free (buf_malloced);
+ CLEANUP ();
+ errno = saved_errno;
+ return NULL;
+ }
+# endif
+
+ if (has_width)
+ {
+# if ENABLE_UNISTDIO
+ /* Outside POSIX, it's preferable to compare the width
+ against the number of _characters_ of the converted
+ value. */
+ w = DCHAR_MBSNLEN (result + length, characters);
+# else
+ /* The width is compared against the number of _bytes_
+ of the converted value, says POSIX. */
+ w = characters;
+# endif
+ }
+ else
+ /* w doesn't matter. */
+ w = 0;
+
+ if (w < width && !(dp->flags & FLAG_LEFT))
+ {
+ size_t n = width - w;
+ ENSURE_ALLOCATION (xsum (length, n));
+ DCHAR_SET (result + length, ' ', n);
+ length += n;
+ }
+
+# if DCHAR_IS_TCHAR
+ if (has_width)
+ {
+ /* We know the number of bytes in advance. */
+ ENSURE_ALLOCATION (xsum (length, characters));
+ if (characters > 0) /* implies arg != 0 */
+ {
+ int count;
+# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+ mbstate_t state;
+ memset (&state, '\0', sizeof (mbstate_t));
+# endif
+
+ count = local_wcrtomb (result + length, arg, &state);
+ if (count <= 0)
+ /* Inconsistency. */
+ abort ();
+ length += count;
+ }
+ }
+ else
+ {
+ if (arg != 0)
+ {
+ char cbuf[64]; /* Assume MB_CUR_MAX <= 64. */
+ int count;
+# if HAVE_WCRTOMB && !defined GNULIB_defined_mbstate_t
+ mbstate_t state;
+ memset (&state, '\0', sizeof (mbstate_t));
+# endif
+
+ count = local_wcrtomb (cbuf, arg, &state);
+ if (count <= 0)
+ /* Inconsistency. */
+ abort ();
+ ENSURE_ALLOCATION (xsum (length, count));
+ memcpy (result + length, cbuf, count);
+ length += count;
+ }
+ }
+# else
+ ENSURE_ALLOCATION (xsum (length, tmpdst_len));
+ DCHAR_CPY (result + length, tmpdst, tmpdst_len);
+ free (tmpdst);
+ length += tmpdst_len;
+# endif
+
+ if (w < width && (dp->flags & FLAG_LEFT))
+ {
+ size_t n = width - w;
+ ENSURE_ALLOCATION (xsum (length, n));
+ DCHAR_SET (result + length, ' ', n);
+ length += n;
+ }
+ }
+ }
+#endif
#if (NEED_PRINTF_DIRECTIVE_A || NEED_PRINTF_LONG_DOUBLE || NEED_PRINTF_DOUBLE)
&& !defined IN_LIBINTL
else if ((dp->conversion == 'a' || dp->conversion == 'A')
# if !(NEED_PRINTF_DIRECTIVE_A || (NEED_PRINTF_LONG_DOUBLE &&
NEED_PRINTF_DOUBLE))
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- vasnprintf: add support for printing wide characters using escapes,
Bruno Haible <=