From dc2773944154b305c893b7459829bde21c5a6182 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Fri, 5 Aug 2016 08:28:20 +0900 Subject: [PATCH 1/2] sed: cache results of mbrtowc for speed * sed/mbcs.c (mbrtowc_cache, mbrlen_cache): New vars. (initialize_mbcs): Initialize the cache. * sed/sed.h: Include limits.h (MBRTOWC, MBRLEN): Use the cache. --- sed/mbcs.c | 14 ++++++++++++++ sed/sed.h | 11 ++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/sed/mbcs.c b/sed/mbcs.c index bce39fa..8105ecd 100644 --- a/sed/mbcs.c +++ b/sed/mbcs.c @@ -24,6 +24,9 @@ int mb_cur_max; bool is_utf8; +size_t mbrlen_cache[UCHAR_MAX + 1]; +wint_t mbrtowc_cache[UCHAR_MAX + 1]; + /* Return non-zero if CH is part of a valid multibyte sequence: Either incomplete yet valid sequence (in case of a leading byte), or the last byte of a valid multibyte sequence. @@ -73,4 +76,15 @@ initialize_mbcs (void) is_utf8 = (strcmp (codeset_name, "UTF-8") == 0); mb_cur_max = MB_CUR_MAX; + + for (int i = CHAR_MIN; i <= CHAR_MAX; ++i) + { + char c = i; + unsigned char uc = i; + mbstate_t mbs = { 0 }; + wchar_t wc; + size_t len = mbrtowc (&wc, &c, 1, &mbs); + mbrlen_cache[uc] = len ? len : 1; + mbrtowc_cache[uc] = len == 1 ? wc : WEOF; + } } diff --git a/sed/sed.h b/sed/sed.h index bbddd25..3716bcb 100644 --- a/sed/sed.h +++ b/sed/sed.h @@ -19,6 +19,7 @@ #include "basicdefs.h" #include "regex.h" #include +#include #include "unlocked-io.h" #include "utils.h" @@ -238,9 +239,12 @@ extern bool use_extended_syntax_p; extern int mb_cur_max; extern bool is_utf8; +extern size_t mbrlen_cache[UCHAR_MAX + 1]; +extern wint_t mbrtowc_cache[UCHAR_MAX + 1]; + #define MBRTOWC(pwc, s, n, ps) \ - (mb_cur_max == 1 ? \ - (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \ + (mbrlen_cache[*(unsigned char *) (s)] == 1 ? \ + (*(pwc) = mbrtowc_cache[*(unsigned char *) (s)], 1) : \ mbrtowc ((pwc), (s), (n), (ps))) #define WCRTOMB(s, wc, ps) \ @@ -252,7 +256,8 @@ extern bool is_utf8; (mb_cur_max == 1 ? 1 : mbsinit ((s))) #define MBRLEN(s, n, ps) \ - (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps)) + (mbrlen_cache[*(unsigned char *) (s)] == 1 ? \ + 1 : mbrtowc (NULL, s, n, ps)) #define IS_MB_CHAR(ch, ps) \ (mb_cur_max == 1 ? 0 : is_mb_char (ch, ps)) -- 1.7.1