From f6112aca41ea8bd2028ea5b00a3a75db14a32eef Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Thu, 1 May 2014 23:09:00 -0700
Subject: [PATCH] awk: simplify dfa.c by having it not include mbsupport.h
 directly

This syncs dfa.c better with 'grep'.
* Makefile.am (STDBOOL_H, WCHAR_H, WCTYPE_H): New macros.
($(gawk_OBJCETS)): Depend on them.
(stdbool.h, wchar.h, wctype.h): New rules.
(CLEANFILES): Add the new files to this list.
* awk.h, regex_internal.h, dfa.c: Don't include mbsupport.h.
* configure.ac: Arrange for config.h to include it instead.
(STDBOOL_H, WCHAR_H, WCTYPE_H): New configuration items.
* custom.h (_GL_ATTRIBUTE_PURE): Move here from dfa.c, to lessen the
number of differences between grep's dfa.c and ours.
* dfa.c: Include wchar.h and wctype.h unconditionally, as
this simplifies the use of dfa.c in grep, and it does no harm
in gawk.
(setlocale) [!LC_ALL]:
(gawk_mb_cur_max, MB_CUR_MAX, mbrtowc) [LIBC_IS_BORKED]:
Move to mbsupport.h (needed for consistency in all uses),
and fix mbrtowc to return size_t.
(struct dfa, dfambcache, mbs_to_wchar)
(is_valid_unibyte_character, setbit_wc, using_utf8, FETCH_WC)
(addtok_wc, add_utf8_anychar, atom, state_index, epsclosure)
(dfaanalyze, dfastate, prepare_wc_buf, dfaoptimize, dfafree, dfamust):
* dfasearch.c (EGexecute):
* grep.c (main):
* searchutils.c (mbtoupper):
Assume MBS_SUPPORT.
* dfa.h: Include stdbool.h unconditionally, so that this file is
closer to what's in grep.
* mbsupport.h [!MBS_SUPPORT]: Include wchar.h, wctype.h
before overriding their definitions.
(WEOF, towupper, towlower, btowc, iswalnum, iswalpha, iswupper)
(iswlower, mbrtowc, wcrtomb, wctype, iswctype, wcscoll):
(btowc): Parenthesize properly.
(mbrtowc, wcrtomb): New macros.
(wctype, iswctype, wcscoll): Define to gawk_wctype etc. to avoid
collisions with standard library.
---
 ChangeLog           | 40 +++++++++++++++++++++++
 Makefile.am         | 11 +++++++
 awk.h               |  2 --
 configure.ac        |  9 ++++++
 custom.h            |  7 ++++
 dfa.c               | 93 +++++++----------------------------------------------
 dfa.h               |  4 ---
 mbsupport.h         | 57 +++++++++++++++++++++++++++++---
 missing_d/ChangeLog |  4 +++
 missing_d/wcmisc.c  | 10 ------
 regex.h             | 18 ++++++++---
 regex_internal.h    |  2 --
 12 files changed, 149 insertions(+), 108 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index c1b294b..8ebfaed 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,43 @@
+2014-05-01  Paul Eggert  <address@hidden>
+
+	awk: simplify dfa.c by having it not include mbsupport.h directly
+	This syncs dfa.c better with 'grep'.
+	* Makefile.am (STDBOOL_H, WCHAR_H, WCTYPE_H): New macros.
+	($(gawk_OBJCETS)): Depend on them.
+	(stdbool.h, wchar.h, wctype.h): New rules.
+	(CLEANFILES): Add the new files to this list.
+	* awk.h, regex_internal.h, dfa.c: Don't include mbsupport.h.
+	* configure.ac: Arrange for config.h to include it instead.
+	(STDBOOL_H, WCHAR_H, WCTYPE_H): New configuration items.
+	* custom.h (_GL_ATTRIBUTE_PURE): Move here from dfa.c, to lessen the
+	number of differences between grep's dfa.c and ours.
+	* dfa.c: Include wchar.h and wctype.h unconditionally, as
+	this simplifies the use of dfa.c in grep, and it does no harm
+	in gawk.
+	(setlocale) [!LC_ALL]:
+	(gawk_mb_cur_max, MB_CUR_MAX, mbrtowc) [LIBC_IS_BORKED]:
+	Move to mbsupport.h (needed for consistency in all uses),
+	and fix mbrtowc to return size_t.
+	(struct dfa, dfambcache, mbs_to_wchar)
+	(is_valid_unibyte_character, setbit_wc, using_utf8, FETCH_WC)
+	(addtok_wc, add_utf8_anychar, atom, state_index, epsclosure)
+	(dfaanalyze, dfastate, prepare_wc_buf, dfaoptimize, dfafree, dfamust):
+	* dfasearch.c (EGexecute):
+	* grep.c (main):
+	* searchutils.c (mbtoupper):
+	Assume MBS_SUPPORT.
+	* dfa.h: Include stdbool.h unconditionally, so that this file is
+	closer to what's in grep.
+	* mbsupport.h [!MBS_SUPPORT]: Include wchar.h, wctype.h
+	before overriding their definitions.
+	(WEOF, towupper, towlower, btowc, iswalnum, iswalpha, iswupper)
+	(iswlower, mbrtowc, wcrtomb, wctype, iswctype, wcscoll):
+	#undef before #defining.
+	(btowc): Parenthesize properly.
+	(mbrtowc, wcrtomb): New macros.
+	(wctype, iswctype, wcscoll): Define to gawk_wctype etc. to avoid
+	collisions with standard library.
+
 2014-04-25         Andrew J. Schorr     <address@hidden>
 
 	* io.c (two_way_open): In forked child, reset SIGPIPE to SIG_DFL.
diff --git a/Makefile.am b/Makefile.am
index 6e5715d..f1a725a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -196,6 +196,17 @@ command.c: command.y
 	$(YACC) -p zz $<
 	sed 's/parse error/syntax error/g' < y.tab.c | awk -f $(srcdir)/bisonfix.awk command > $*.c && rm y.tab.c
 
+# Arrange for some standard headers on platforms that lack them.
+STDBOOL_H = @STDBOOL_H@
+WCHAR_H = @WCHAR_H@
+WCTYPE_H = @WCTYPE_H@
+$(gawk_OBJECTS): $(STDBOOL_H) $(WCHAR_H) $(WCTYPE_H)
+stdbool.h:
+	echo '#include "missing_d/gawkbool.h"' >$@
+wchar.h wctype.h:
+	echo '' >$@
+CLEANFILES += stdbool.h wchar.h wctype.h
+
 # This is for my development & testing.
 efence: gawk
 	$(CC) $(LDFLAGS) -o gawk $$(ls *.o | grep -v '_p.o$$') $(LIBS) -lefence
diff --git a/awk.h b/awk.h
index aefdd07..cdba7a8 100644
--- a/awk.h
+++ b/awk.h
@@ -95,8 +95,6 @@ extern int errno;
 #include "missing_d/gawkbool.h"
 #endif
 
-#include "mbsupport.h" /* defines MBS_SUPPORT */
-
 #if MBS_SUPPORT
 /* We can handle multibyte strings.  */
 #include <wchar.h>
diff --git a/configure.ac b/configure.ac
index e7e2d5f..2447c32 100644
--- a/configure.ac
+++ b/configure.ac
@@ -153,6 +153,14 @@ else
 	AC_CHECK_HEADERS(strings.h)
 fi
 
+STDBOOL_H= WCHAR_H= WCTYPE_H=
+test "$ac_cv_header_stdbool_h" != yes && STDBOOL_H=stdbool.h
+test "$ac_cv_header_wchar_h" != yes && WCHAR_H=wchar.h
+test "$ac_cv_header_wctype_h" != yes && WCTYPE_H=wctype.h
+AC_SUBST([STDBOOL_H])
+AC_SUBST([WCHAR_H])
+AC_SUBST([WCTYPE_H])
+
 dnl Check cross compiling
 AM_CONDITIONAL([TEST_CROSS_COMPILE], [test "x$build_alias" != "x$host_alias"])
 
@@ -390,6 +398,7 @@ AC_C_STRINGIZE
 
 AC_CONFIG_HEADERS([config.h:configh.in])
 AH_BOTTOM([#include "custom.h"])
+AH_BOTTOM([#include "mbsupport.h"])
 
 dnl Crude but small hack to make plug-ins work on Mac OS X
 dnl We should really use the libtool value for shrext_cmds, but that
diff --git a/custom.h b/custom.h
index 36b4aa0..5b19dd4 100644
--- a/custom.h
+++ b/custom.h
@@ -76,3 +76,10 @@
 extern int setenv(const char *name, const char *value, int rewrite);
 extern int unsetenv(const char *name);
 #endif
+
+/* The __pure__ attribute was added in gcc 2.96.  */
+#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+# define _GL_ATTRIBUTE_PURE __attribute__ ((__pure__))
+#else
+# define _GL_ATTRIBUTE_PURE /* empty */
+#endif
diff --git a/dfa.c b/dfa.c
index d306d5c..9c41fd1 100644
--- a/dfa.c
+++ b/dfa.c
@@ -22,6 +22,8 @@
 
 #include <config.h>
 
+#include "dfa.h"
+
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
@@ -38,11 +40,6 @@
 #include <locale.h>
 #endif
 
-/* Gawk doesn't use Gnulib, so don't assume that setlocale is present.  */
-#ifndef LC_ALL
-# define setlocale(category, locale) NULL
-#endif
-
 #define STREQ(a, b) (strcmp (a, b) == 0)
 
 /* ISASCIIDIGIT differs from isdigit, as follows:
@@ -59,26 +56,11 @@
 #include "gettext.h"
 #define _(str) gettext (str)
 
-#include "mbsupport.h" /* Define MBS_SUPPORT to 1 or 0, as appropriate.  */
-#if MBS_SUPPORT
-/* We can handle multibyte strings.  */
-# include <wchar.h>
-# include <wctype.h>
-#endif
-
-#ifdef GAWK
-/* The __pure__ attribute was added in gcc 2.96.  */
-#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
-# define _GL_ATTRIBUTE_PURE __attribute__ ((__pure__))
-#else
-# define _GL_ATTRIBUTE_PURE /* empty */
-#endif
-#endif /* GAWK */
+#include <wchar.h>
+#include <wctype.h>
 
 #include "xalloc.h"
 
-#include "dfa.h"
-
 #ifdef GAWK
 static int
 is_blank (int c)
@@ -87,14 +69,6 @@ is_blank (int c)
 }
 #endif /* GAWK */
 
-#ifdef LIBC_IS_BORKED
-extern int gawk_mb_cur_max;
-#undef MB_CUR_MAX
-#define MB_CUR_MAX gawk_mb_cur_max
-#undef mbrtowc
-#define mbrtowc(a, b, c, d) (-1)
-#endif
-
 /* HPUX defines these as macros in sys/param.h.  */
 #ifdef setbit
 # undef setbit
@@ -402,13 +376,11 @@ struct dfa
    */
   int *multibyte_prop;
 
-#if MBS_SUPPORT
   /* A table indexed by byte values that contains the corresponding wide
      character (if any) for that byte.  WEOF means the byte is the
      leading byte of a multibyte character.  Invalid and null bytes are
      mapped to themselves.  */
   wint_t mbrtowc_cache[NOTCHAR];
-#endif
 
   /* Array of the bracket expression in the DFA.  */
   struct mb_char_classes *mbcsets;
@@ -488,7 +460,6 @@ static void regexp (void);
 static void
 dfambcache (struct dfa *d)
 {
-#if MBS_SUPPORT
   int i;
   for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
     {
@@ -505,10 +476,8 @@ dfambcache (struct dfa *d)
         }
       d->mbrtowc_cache[uc] = wi;
     }
-#endif
 }
 
-#if MBS_SUPPORT
 /* Store into *PWC the result of converting the leading bytes of the
    multibyte buffer S of length N bytes, using the mbrtowc_cache in *D
    and updating the conversion state in *D.  On conversion error,
@@ -543,7 +512,6 @@ mbs_to_wchar (wchar_t *pwc, char const *s, size_t n, struct dfa *d)
   *pwc = wc;
   return 1;
 }
-#endif
 
 #ifdef DEBUG
 
@@ -737,7 +705,7 @@ static charclass newline;
 #ifdef __GLIBC__
 # define is_valid_unibyte_character(c) 1
 #else
-# define is_valid_unibyte_character(c) (! (MBS_SUPPORT && btowc (c) == WEOF))
+# define is_valid_unibyte_character(c) (btowc (c) != WEOF)
 #endif
 
 /* Return non-zero if C is a "word-constituent" byte; zero otherwise.  */
@@ -798,17 +766,12 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
 static bool
 setbit_wc (wint_t wc, charclass c)
 {
-#if MBS_SUPPORT
   int b = wctob (wc);
   if (b == EOF)
     return false;
 
   setbit (b, c);
   return true;
-#else
-  abort ();
-   /*NOTREACHED*/ return false;
-#endif
 }
 
 /* Set a bit for B and its case variants in the charclass C.
@@ -904,7 +867,6 @@ static wchar_t wctok;           /* Wide character representation of the current
                                    multibyte character.  */
 
 
-#if MBS_SUPPORT
 /* Note that characters become unsigned here.  */
 # define FETCH_WC(c, wc, eoferr)		\
   do {						\
@@ -927,23 +889,6 @@ static wchar_t wctok;           /* Wide character representation of the current
       }						\
   } while (0)
 
-#else
-/* Note that characters become unsigned here.  */
-# define FETCH_WC(c, unused, eoferr)  \
-  do {				      \
-    if (! lexleft)		      \
-      {				      \
-        if ((eoferr) != 0)	      \
-          dfaerror (eoferr);	      \
-        else			      \
-          return lasttok = END;	      \
-      }				      \
-    (c) = to_uchar (*lexptr++);       \
-    --lexleft;			      \
-  } while (0)
-
-#endif /* MBS_SUPPORT */
-
 #ifndef MIN
 # define MIN(a,b) ((a) < (b) ? (a) : (b))
 #endif
@@ -1728,7 +1673,6 @@ addtok (token t)
     }
 }
 
-#if MBS_SUPPORT
 /* We treat a multibyte character as a single atom, so that DFA
    can treat a multibyte character as a single expression.
 
@@ -1760,17 +1704,10 @@ addtok_wc (wint_t wc)
       addtok (CAT);
     }
 }
-#else
-static void
-addtok_wc (wint_t wc)
-{
-}
-#endif
 
 static void
 add_utf8_anychar (void)
 {
-#if MBS_SUPPORT
   static const charclass utf8_classes[5] = {
     {0, 0, 0, 0, ~0, ~0, 0, 0},		/* 80-bf: non-leading bytes */
     {~0, ~0, ~0, ~0, 0, 0, 0, 0},       /* 00-7f: 1-byte sequence */
@@ -1815,7 +1752,6 @@ add_utf8_anychar (void)
       addtok (CAT);
       addtok (OR);
     }
-#endif
 }
 
 /* The grammar understood by the parser is as follows.
@@ -1856,7 +1792,7 @@ add_utf8_anychar (void)
 static void
 atom (void)
 {
-  if (MBS_SUPPORT && tok == WCHAR)
+  if (tok == WCHAR)
     {
       addtok_wc (wctok);
 
@@ -1873,7 +1809,7 @@ atom (void)
 
       tok = lex ();
     }
-  else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8 ())
+  else if (tok == ANYCHAR && using_utf8 ())
     {
       /* For UTF-8 expand the period to a series of CSETs that define a valid
          UTF-8 character.  This avoids using the slow multibyte path.  I'm
@@ -1887,9 +1823,7 @@ atom (void)
     }
   else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
            || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
-#if MBS_SUPPORT
            || tok == ANYCHAR || tok == MBCSET
-#endif /* MBS_SUPPORT */
            || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
     {
       addtok (tok);
@@ -2224,10 +2158,8 @@ epsclosure (position_set * s, struct dfa const *d)
   for (i = 0; i < s->nelem; ++i)
     if (d->tokens[s->elems[i].index] >= NOTCHAR
         && d->tokens[s->elems[i].index] != BACKREF
-#if MBS_SUPPORT
         && d->tokens[s->elems[i].index] != ANYCHAR
         && d->tokens[s->elems[i].index] != MBCSET
-#endif
         && d->tokens[s->elems[i].index] < CSET)
       {
         old = s->elems[i];
@@ -2541,9 +2473,7 @@ dfaanalyze (struct dfa *d, int searchflag)
      it with its epsilon closure.  */
   for (i = 0; i < d->tindex; ++i)
     if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
-#if MBS_SUPPORT
         || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET
-#endif
         || d->tokens[i] >= CSET)
       {
 #ifdef DEBUG
@@ -2643,9 +2573,8 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
         setbit (d->tokens[pos.index], matches);
       else if (d->tokens[pos.index] >= CSET)
         copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
-      else if (MBS_SUPPORT
-               && (d->tokens[pos.index] == ANYCHAR
-                   || d->tokens[pos.index] == MBCSET))
+      else if (d->tokens[pos.index] == ANYCHAR
+               || d->tokens[pos.index] == MBCSET)
         /* MB_CUR_MAX > 1  */
         {
           /* ANYCHAR and MBCSET must match with a single character, so we
@@ -2820,7 +2749,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
       /* If we are building a searching matcher, throw in the positions
          of state 0 as well.  */
       if (d->searchflag
-          && (!MBS_SUPPORT || (!d->multibyte || !next_isnt_1st_byte)))
+          && (MB_CUR_MAX == 1 || !next_isnt_1st_byte))
         for (j = 0; j < d->states[0].elems.nelem; ++j)
           insert (d->states[0].elems.elems[j], &follows);
 
@@ -3541,7 +3470,7 @@ dfaoptimize (struct dfa *d)
 {
   size_t i;
 
-  if (!MBS_SUPPORT || !using_utf8 ())
+  if (!using_utf8 ())
     return;
 
   for (i = 0; i < d->tindex; ++i)
diff --git a/dfa.h b/dfa.h
index 1514236..60aff11 100644
--- a/dfa.h
+++ b/dfa.h
@@ -19,11 +19,7 @@
 /* Written June, 1988 by Mike Haertel */
 
 #include <regex.h>
-#ifdef HAVE_STDBOOL_H
 #include <stdbool.h>
-#else
-#include "missing_d/gawkbool.h"
-#endif /* HAVE_STDBOOL_H */
 #include <stddef.h>
 
 /* Element of a list of strings, at least one of which is known to
diff --git a/mbsupport.h b/mbsupport.h
index 9a62486..198a0f3 100644
--- a/mbsupport.h
+++ b/mbsupport.h
@@ -66,6 +66,15 @@
 #endif
 
 #if ! MBS_SUPPORT
+
+/* Include wchar.h and wctype.h so their definitions can be overridden.  */
+
+# include <wchar.h>
+# include <wctype.h>
+
+/* Override the definitions of wchar.h and wctype.h to provide a
+   unibyte substitute that is good enough for Gawk.  */
+
 # undef MB_CUR_MAX
 # define MB_CUR_MAX 1
 
@@ -78,15 +87,24 @@
 #define wctype_t	int
 #define wint_t		int
 #define mbstate_t	int
+#undef WEOF
 #define WEOF		EOF
+#undef towupper
 #define towupper	toupper
+#undef towlower
 #define towlower	tolower
 #ifndef __DJGPP__
-#define btowc(x)	((int)x)
+#undef btowc
+#define btowc(x)	((int) (x))
 #endif
+#undef iswalnum
 #define iswalnum	isalnum
+#undef iswalpha
 #define iswalpha	isalpha
+#undef iswupper
 #define iswupper	isupper
+#undef iswlower
+#define iswlower	islower
 #if defined(ZOS_USS)
 #undef towupper
 #undef towlower
@@ -94,12 +112,43 @@
 #undef iswalnum
 #undef iswalpha
 #undef iswupper
-#undef wctype
-#undef iswctype
-#undef wcscoll
 #endif
 
+#undef mbrtowc
+#define mbrtowc(pwc, s, n, ps) ((size_t) -1)
+#undef wcrtomb
+#define wcrtomb(s, wc, ps) ((size_t) -1)
+
+#undef wctype
+#define wctype gawk_wctype
 extern wctype_t wctype(const char *name);
+#undef iswctype
+#define iswctype gawk_iswctype
 extern int iswctype(wint_t wc, wctype_t desc);
+#undef wcscoll
+#define wcscoll gawk_wcscoll
 extern int wcscoll(const wchar_t *ws1, const wchar_t *ws2);
 #endif
+
+#ifdef LIBC_IS_BORKED
+# include <wchar.h>
+extern int gawk_mb_cur_max;
+# undef MB_CUR_MAX
+# undef mbrtowc
+# define MB_CUR_MAX gawk_mb_cur_max
+# define mbrtowc(a, b, c, d) ((size_t) -1)
+#endif
+
+#include <locale.h>
+#ifndef LC_ALL
+# define setlocale(category, locale) NULL
+#endif
+
+#include <assert.h>
+#ifndef static_assert
+# define static_assert(cond, diagnostic) \
+    extern int (*foo (void)) [!!sizeof (struct { int foo: (cond) ? 8 : -1; })]
+#endif
+
+/* Make sure RE_DUP_MAX gets the correct value.  */
+#define _REGEX_INCLUDE_LIMITS_H
diff --git a/missing_d/ChangeLog b/missing_d/ChangeLog
index 70fbde6..4686c74 100644
--- a/missing_d/ChangeLog
+++ b/missing_d/ChangeLog
@@ -1,3 +1,7 @@
+2014-05-01  Paul Eggert  <address@hidden>
+
+	* wcmisc.c: Remove now-unnecessary ifdefs.
+
 2014-04-08         Arnold D. Robbins     <address@hidden>
 
 	* 4.1.1: Release tar ball made.
diff --git a/missing_d/wcmisc.c b/missing_d/wcmisc.c
index d2b7aa0..89e24c9 100644
--- a/missing_d/wcmisc.c
+++ b/missing_d/wcmisc.c
@@ -16,7 +16,6 @@
    Foundation, Inc.,
    51 Franklin Street - Fifth Floor, Boston, MA  02110-1301, USA */
 
-#if !defined(HAVE_WCTYPE) || !defined(HAVE_ISWCTYPE)
 static const char *classes[] = {
 	"<dummy>",
 	"alnum",
@@ -33,16 +32,12 @@ static const char *classes[] = {
 	"xdigit",
 	NULL
 };
-#endif
 
-#ifndef HAVE_ISWCTYPE
 static int is_blank (int c)
 {
    return (c == ' ' || c == '\t');
 }
-#endif
 
-#ifndef HAVE_WCTYPE
 wctype_t wctype(const char *name)
 {
 	int i;
@@ -53,9 +48,7 @@ wctype_t wctype(const char *name)
 
 	return 0;
 }
-#endif
 
-#ifndef HAVE_ISWCTYPE
 int iswctype(wint_t wc, wctype_t desc)
 {
 	int j = sizeof(classes) / sizeof(classes[0]);
@@ -79,9 +72,7 @@ int iswctype(wint_t wc, wctype_t desc)
 	default:	return 0;
 	}
 }
-#endif
 
-#ifndef HAVE_WCSCOLL
 int wcscoll(const wchar_t *ws1, const wchar_t *ws2)
 {
 	size_t i;
@@ -95,6 +86,5 @@ int wcscoll(const wchar_t *ws1, const wchar_t *ws2)
 
 	return (ws1[i] - ws2[i]);
 }
-#endif
 
 /*wcmisc.c*/
diff --git a/regex.h b/regex.h
index 5660296..400b407 100644
--- a/regex.h
+++ b/regex.h
@@ -264,14 +264,24 @@ extern reg_syntax_t re_syntax_options;
    | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
    | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
 /* [[[end syntaxes]]] */
-
-/* Maximum number of duplicates an interval can allow.  Some systems
-   (erroneously) define this in other header files, but we want our
+
+/* Maximum number of duplicates an interval can allow.  POSIX-conforming
+   systems might define this in <limits.h>, but we want our
    value, so remove any previous define.  */
+# ifdef _REGEX_INCLUDE_LIMITS_H
+#  include <limits.h>
+# endif
 # ifdef RE_DUP_MAX
 #  undef RE_DUP_MAX
 # endif
-/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
+
+/* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored
+   the counter as a 2-byte signed integer.  This is no longer true, so
+   RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to
+   ((SIZE_MAX - 9) / 10) if _REGEX_LARGE_OFFSETS is defined.
+   However, there would be a huge performance problem if someone
+   actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains
+   its historical value.  */
 # define RE_DUP_MAX (0x7fff)
 #endif
 
diff --git a/regex_internal.h b/regex_internal.h
index c8981a0..758cf47 100644
--- a/regex_internal.h
+++ b/regex_internal.h
@@ -26,8 +26,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "mbsupport.h" /* gawk */
-
 #if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
 # include <langinfo.h>
 #endif
-- 
1.9.0