bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

uchar: ISO C 23: Define char8_t


From: Bruno Haible
Subject: uchar: ISO C 23: Define char8_t
Date: Mon, 27 Mar 2023 15:12:38 +0200

ISO C 23 specifies a new type, to be defined by <uchar.h>.

This patch adds it.


2023-03-27  Bruno Haible  <bruno@clisp.org>

        uchar: ISO C 23: Define char8_t.
        * lib/uchar.in.h (char8_t): New type or macro.
        * m4/uchar_h.m4 (gl_TYPE_CHAR8_T): New macro.
        (gl_UCHAR_H): Invoke it. Set CXX_HAS_CHAR8_TYPE.
        * modules/uchar (Makefile.am): Substitute CXX_HAS_CHAR8_TYPE,
        GNULIBHEADERS_OVERRIDE_CHAR8_T.
        * tests/test-uchar.c: Add tests for char8_t.

diff --git a/lib/uchar.in.h b/lib/uchar.in.h
index 115ae1e84b..4d5f07fcce 100644
--- a/lib/uchar.in.h
+++ b/lib/uchar.in.h
@@ -17,7 +17,7 @@
 /* Written by Bruno Haible <bruno@clisp.org>, 2019.  */
 
 /*
- * ISO C 11 <uchar.h> for platforms that lack it.
+ * ISO C 23 <uchar.h> for platforms that lack it.
  */
 
 #ifndef _@GUARD_PREFIX@_UCHAR_H
@@ -58,11 +58,26 @@
 /* The definitions of _GL_FUNCDECL_RPL etc. are copied here.  */
 
 
+#if !(@HAVE_UCHAR_H@ || (defined __cplusplus && @CXX_HAS_CHAR8_TYPE@))
+
+/* An 8-bit variant of wchar_t.
+   Note: This type is only mandated by ISO C 23 or newer, and it does
+   denote UTF-8 units.  */
+typedef unsigned char char8_t;
+
+#elif @GNULIBHEADERS_OVERRIDE_CHAR8_T@
+
+typedef unsigned char gl_char8_t;
+# define char8_t gl_char8_t
+
+#endif
+
 #if !(@HAVE_UCHAR_H@ || (defined __cplusplus && @CXX_HAS_UCHAR_TYPES@))
 
 /* A 16-bit variant of wchar_t.
-   Note: This type does *NOT* denote UTF-16 units.  (Only on platforms
-   on which __STDC_UTF_16__ is defined.)  */
+   Note: This type is only mandated by ISO C 11 or newer.  In ISO C 23
+   and newer, it denotes UTF-16 units; in older versions of ISO C it did
+   so only on platforms on which __STDC_UTF_16__ was defined.  */
 typedef uint_least16_t char16_t;
 
 #elif @GNULIBHEADERS_OVERRIDE_CHAR16_T@
@@ -75,8 +90,9 @@ typedef uint_least16_t gl_char16_t;
 #if !(@HAVE_UCHAR_H@ || (defined __cplusplus && @CXX_HAS_UCHAR_TYPES@))
 
 /* A 32-bit variant of wchar_t.
-   Note: This type does *NOT* denote UTF-32 code points.  (Only on platforms
-   on which __STDC_UTF_32__ is defined.)  */
+   Note: This type is only mandated by ISO C 11 or newer.  In ISO C 23
+   and newer, it denotes UTF-32 code points; in older versions of ISO C
+   it did so only on platforms on which __STDC_UTF_32__ was defined.  */
 typedef uint_least32_t char32_t;
 
 #elif @GNULIBHEADERS_OVERRIDE_CHAR32_T@
diff --git a/m4/uchar_h.m4 b/m4/uchar_h.m4
index 2d1869a293..6df3056b32 100644
--- a/m4/uchar_h.m4
+++ b/m4/uchar_h.m4
@@ -1,4 +1,4 @@
-# uchar_h.m4 serial 20
+# uchar_h.m4 serial 21
 dnl Copyright (C) 2019-2023 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -19,6 +19,7 @@ AC_DEFUN_ONCE([gl_UCHAR_H]
   fi
   AC_SUBST([HAVE_UCHAR_H])
 
+  gl_TYPE_CHAR8_T
   gl_TYPE_CHAR16_T
   gl_TYPE_CHAR32_T
 
@@ -26,6 +27,7 @@ AC_DEFUN_ONCE([gl_UCHAR_H]
   dnl on some platforms (e.g. OpenBSD 6.7), and as types defined by many
   dnl header files (<limits.h>, <stddef.h>, <stdint.h>, <stdio.h>, <stdlib.h>
   dnl and others) on some platforms (e.g. Mac OS X 10.13).
+  dnl The same thing may also happen for 'char8_t'; so, be prepared for it.
   m4_ifdef([gl_ANSI_CXX], [AC_REQUIRE([gl_ANSI_CXX])])
   CXX_HAS_UCHAR_TYPES=0
   if test $HAVE_UCHAR_H = 0; then
@@ -53,6 +55,31 @@ AC_DEFUN_ONCE([gl_UCHAR_H]
     fi
   fi
   AC_SUBST([CXX_HAS_UCHAR_TYPES])
+  CXX_HAS_CHAR8_TYPE=0
+  if test $HAVE_UCHAR_H = 0; then
+    if test "$CXX" != no; then
+      AC_CACHE_CHECK([whether the C++ compiler predefines the char8_t types],
+        [gl_cv_cxx_has_char8_type],
+        [dnl We can't use AC_LANG_PUSH([C++]) and AC_LANG_POP([C++]) here, due 
to
+         dnl an autoconf bug <https://savannah.gnu.org/support/?110294>.
+         cat > conftest.cpp <<\EOF
+#include <stddef.h>
+char8_t a;
+EOF
+         gl_command="$CXX $CXXFLAGS $CPPFLAGS -c conftest.cpp"
+         if AC_TRY_EVAL([gl_command]); then
+           gl_cv_cxx_has_char8_type=yes
+         else
+           gl_cv_cxx_has_char8_type=no
+         fi
+         rm -fr conftest*
+        ])
+      if test $gl_cv_cxx_has_char8_type = yes; then
+        CXX_HAS_CHAR8_TYPE=1
+      fi
+    fi
+  fi
+  AC_SUBST([CXX_HAS_CHAR8_TYPE])
 
   dnl Test whether a 'char32_t' can hold more characters than a 'wchar_t'.
   gl_STDINT_BITSIZEOF([wchar_t], [gl_STDINT_INCLUDES])
@@ -71,6 +98,28 @@ AC_DEFUN_ONCE([gl_UCHAR_H]
     ]], [c32rtomb mbrtoc32])
 ])
 
+AC_DEFUN_ONCE([gl_TYPE_CHAR8_T],
+[
+  dnl Determine whether gnulib's <uchar.h> would, if present, override char8_t.
+  AC_CACHE_CHECK([whether char8_t is correctly defined],
+    [gl_cv_type_char8_t_works],
+    [AC_COMPILE_IFELSE(
+       [AC_LANG_PROGRAM([[
+          #include <uchar.h>
+          int verify[(char8_t)(-1) >= 0 && sizeof (char8_t) == sizeof 
(unsigned char) ? 1 : -1];
+          ]])
+       ],
+       [gl_cv_type_char8_t_works=yes],
+       [gl_cv_type_char8_t_works=no])
+    ])
+  if test $gl_cv_type_char8_t_works = no; then
+    GNULIBHEADERS_OVERRIDE_CHAR8_T=1
+  else
+    GNULIBHEADERS_OVERRIDE_CHAR8_T=0
+  fi
+  AC_SUBST([GNULIBHEADERS_OVERRIDE_CHAR8_T])
+])
+
 dnl On Haiku 2020, char16_t and char32_t are incorrectly defined.
 dnl See <https://dev.haiku-os.org/ticket/15990>.
 AC_DEFUN_ONCE([gl_TYPE_CHAR16_T],
diff --git a/modules/uchar b/modules/uchar
index 2c947ed243..8cf4cfb5cf 100644
--- a/modules/uchar
+++ b/modules/uchar
@@ -30,8 +30,10 @@ uchar.h: uchar.in.h $(top_builddir)/config.status 
$(CXXDEFS_H)
              -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
              -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \
              -e 's|@''NEXT_UCHAR_H''@|$(NEXT_UCHAR_H)|g' \
+             -e 's|@''CXX_HAS_CHAR8_TYPE''@|$(CXX_HAS_CHAR8_TYPE)|g' \
              -e 's|@''CXX_HAS_UCHAR_TYPES''@|$(CXX_HAS_UCHAR_TYPES)|g' \
              -e 's|@''SMALL_WCHAR_T''@|$(SMALL_WCHAR_T)|g' \
+             -e 
's|@''GNULIBHEADERS_OVERRIDE_CHAR8_T''@|$(GNULIBHEADERS_OVERRIDE_CHAR8_T)|g' \
              -e 
's|@''GNULIBHEADERS_OVERRIDE_CHAR16_T''@|$(GNULIBHEADERS_OVERRIDE_CHAR16_T)|g' \
              -e 
's|@''GNULIBHEADERS_OVERRIDE_CHAR32_T''@|$(GNULIBHEADERS_OVERRIDE_CHAR32_T)|g' \
              -e 's/@''GNULIB_BTOC32''@/$(GNULIB_BTOC32)/g' \
diff --git a/tests/test-uchar.c b/tests/test-uchar.c
index 0d5b7d77eb..38c5f2538e 100644
--- a/tests/test-uchar.c
+++ b/tests/test-uchar.c
@@ -23,15 +23,23 @@
 /* Check that the types are defined.  */
 mbstate_t a = { 0 };
 size_t b = 5;
-char16_t c = 'x';
-char32_t d = 'y';
+char8_t c = 'x';
+char16_t d = 'y';
+char32_t e = 'z';
 
-/* Check that char16_t and char32_t are unsigned types.  */
+/* Check that char8_t, char16_t, and char32_t are unsigned types.  */
+static_assert ((char8_t)(-1) >= 0);
 static_assert ((char16_t)(-1) >= 0);
 #if !defined __HP_cc
 static_assert ((char32_t)(-1) >= 0);
 #endif
 
+/* Check that char8_t is at least 8 bits wide.  */
+static_assert ((char8_t)0xFF != (char8_t)0x7F);
+
+/* Check that char16_t is at least 16 bits wide.  */
+static_assert ((char16_t)0xFFFF != (char16_t)0x7FFF);
+
 /* Check that char32_t is at least 31 bits wide.  */
 static_assert ((char32_t)0x7FFFFFFF != (char32_t)0x3FFFFFFF);
 






reply via email to

[Prev in Thread] Current Thread [Next in Thread]