diff --git a/awk.h b/awk.h index 86c8883..636be96 100644 --- a/awk.h +++ b/awk.h @@ -1591,10 +1591,6 @@ extern const wchar_t *wcasestrstr(const wchar_t *haystack, size_t hs_len, const wchar_t *needle, size_t needle_len); extern void r_free_wstr(NODE *n); #define free_wstr(n) do { if ((n)->flags & WSTRCUR) r_free_wstr(n); } while(0) -extern wint_t btowc_cache[]; -#define btowc_cache(x) btowc_cache[(x)&0xFF] -extern void init_btowc_cache(); -#define is_valid_character(b) (btowc_cache[(b)&0xFF] != WEOF) /* re.c */ extern Regexp *make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal); extern int research(Regexp *rp, char *str, int start, size_t len, int flags); diff --git a/dfa.c b/dfa.c index fff4599..a2c73b1 100644 --- a/dfa.c +++ b/dfa.c @@ -464,10 +464,10 @@ static void regexp (void); /* A table indexed by byte values that contains the corresponding wide character (if any) for that byte. WEOF means the byte is not a valid single-byte character. */ -static wint_t mbrtowc_cache[NOTCHAR]; +wint_t btowc_cache[NOTCHAR]; /* Store into *PWC the result of converting the leading bytes of the - multibyte buffer S of length N bytes, using the mbrtowc_cache in *D + multibyte buffer S of length N bytes, using the btowc_cache in *D and updating the conversion state in *D. On conversion error, convert just a single byte, to WEOF. Return the number of bytes converted. @@ -476,7 +476,7 @@ static wint_t mbrtowc_cache[NOTCHAR]; * PWC points to wint_t, not to wchar_t. * The last arg is a dfa *D instead of merely a multibyte conversion - state D->mbs. D also contains an mbrtowc_cache for speed. + state D->mbs. D also contains an btowc_cache for speed. * N must be at least 1. * S[N - 1] must be a sentinel byte. * Shift encodings are not supported. @@ -487,7 +487,7 @@ static size_t mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d) { unsigned char uc = s[0]; - wint_t wc = mbrtowc_cache[uc]; + wint_t wc = btowc_cache[uc]; if (wc == WEOF) { @@ -695,7 +695,7 @@ static charclass newline; static bool unibyte_word_constituent (unsigned char c) { - return mbrtowc_cache[c] != WEOF && (isalnum (c) || (c) == '_'); + return btowc_cache[c] != WEOF && (isalnum (c) || (c) == '_'); } static int @@ -718,25 +718,44 @@ wchar_context (wint_t wc) return CTX_NONE; } +void init_btowc_cache(void) +{ + static bool inited = false; + int i; + + if (inited) + return; + + for (i = CHAR_MIN; i <= CHAR_MAX; ++i) + { + char c = i; + unsigned char uc = i; + mbstate_t s = { 0 }; + wchar_t wc; + size_t ret = mbrtowc (&wc, &c, 1, &s); + btowc_cache[uc] = (ret == (size_t)-1 || ret == (size_t) -2) ? WEOF : wc; + } + + inited = true; +} + /* Entry point to set syntax options. */ void dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) { int i; + syntax_bits_set = 1; syntax_bits = bits; case_fold = fold != 0; eolbyte = eol; + init_btowc_cache(); + /* Now that btowc_cache[uc] is set, use it to calculate sbit. */ for (i = CHAR_MIN; i <= CHAR_MAX; ++i) { - char c = i; unsigned char uc = i; - mbstate_t s = { 0 }; - wchar_t wc; - mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF; - /* Now that mbrtowc_cache[uc] is set, use it to calculate sbit. */ sbit[uc] = char_context (uc); switch (sbit[uc]) { diff --git a/dfa.h b/dfa.h index 18be7f5..f2dd656 100644 --- a/dfa.h +++ b/dfa.h @@ -120,4 +120,15 @@ extern void dfawarn (const char *); The user must supply a dfaerror. */ extern _Noreturn void dfaerror (const char *); +/* General support routines. */ + +/* using_utf8() lets us know if our locale is one based on UTF-8. */ extern int using_utf8 (void); + +/* init_mbcache() initializes the cache that maps bytes to m.b. characters. */ +extern void init_btowc_cache(void); + +/* is_valid_character() tells us if a byte is also a valid m.b. character. */ +extern wint_t btowc_cache[]; +#define is_valid_character(byte) (btowc_cache[(byte)&0xFF] != WEOF) +#define btowc_cache(x) btowc_cache[(x)&0xFF] diff --git a/node.c b/node.c index a7c19db..22119d2 100644 --- a/node.c +++ b/node.c @@ -949,19 +949,6 @@ get_ieee_magic_val(const char *val) return v; } -wint_t btowc_cache[256]; - -/* init_btowc_cache --- initialize the cache */ - -void init_btowc_cache() -{ - int i; - - for (i = 0; i < 255; i++) { - btowc_cache[i] = btowc(i); - } -} - #define BLOCKCHUNK 100 BLOCK nextfree[BLOCK_MAX] = {