[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Patch] faster fnmatch
From: |
Ondrej Bilka |
Subject: |
Re: [Patch] faster fnmatch |
Date: |
Fri, 8 May 2009 12:23:31 +0200 |
User-agent: |
Mutt/1.5.18 (2008-05-17) |
Hello
I finished writing this patch. I added test(and fnmatchcomp.m4 which I forgot
in previous). I didnt change much, prettied and fixed bugs discovered by tests.
Now it should be stable. I hope this patch looks OK.
Moreover I made two independent changes
first is preallocating buffer in fnmatch which should speed multibyte strings
by third.
Second is adding compiled fnmatch in glob
diff --git a/lib/fnmatch.c b/lib/fnmatch.c
index 48bc8b5..6daf8b0 100644
--- a/lib/fnmatch.c
+++ b/lib/fnmatch.c
@@ -275,6 +348,10 @@ int
fnmatch (const char *pattern, const char *string, int flags)
{
# if HANDLE_MULTIBYTE
+#define ALLOCALIMIT(n) (__builtin_expect((n < 2000), 1) ? alloca(n):
malloc(n))
+#define ALLOCAFREE(ptr, n) if (__builtin_expect((n >= 2000), 0)) free(ptr);
+#define NOMEMTEST(exp) if (__builtin_expect(!(exp), 0)) {errno = ENOMEM;
goto err; }
+#define BE __builtin_expect
# define ALLOCA_LIMIT 2000
if (__builtin_expect (MB_CUR_MAX, 1) != 1)
{
@@ -285,7 +362,35 @@ fnmatch (const char *pattern, const char *string, int
flags)
wchar_t *wpattern;
wchar_t *wstring;
int res;
+ char *s = string, *p = pattern;
+ size_t plen = 2 * strlen (p) + 2, slen = 2 * strlen (s) + 2;
+ NOMEMTEST (wpattern =
+ ALLOCALIMIT (sizeof (wchar_t) * (plen + slen + 1)));
+ memset (&ps, '\0', sizeof (ps));
+ patsize = mbsrtowcs (wpattern, &p, plen, &ps);
+ if (BE (patsize == plen, 0))
+ {
+ ALLOCAFREE (wpattern);
+ goto toolarge;
+ }
+ if (BE (patsize < 0, 0))
+ return -1;
+ wstring = wpattern + patsize;
+ assert (mbsinit (&ps));
+ strsize = mbsrtowcs (wstring, &s, slen, &ps);
+ if (BE (strsize < 0, 0))
+ return -1;
+ if (BE (strsize == slen, 0))
+ {
+ ALLOCAFREE (wpattern);
+ goto toolarge;
+ }
+ res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1,
+ flags & FNM_PERIOD, flags);
+ ALLOCAFREE (wpattern);
+ return res;
+ toolarge:
/* Calculate the size needed to convert the strings to
wide characters. */
memset (&ps, '\0', sizeof (ps));
@@ -339,6 +445,8 @@ fnmatch (const char *pattern, const char *string, int flags)
return internal_fnmatch (pattern, string, string + strlen (string),
flags & FNM_PERIOD, flags);
+err:
+ return -1;
}
# ifdef _LIBC
diff --git a/lib/glob.c b/lib/glob.c
index 40cc9b3..1b61f67 100644
--- a/lib/glob.c
+++ b/lib/glob.c
@@ -152,7 +152,7 @@
#endif /* _LIBC */
#include <fnmatch.h>
-
+#include <fnmatchcomp.h>
#ifdef _SC_GETPW_R_SIZE_MAX
# define GETPW_R_SIZE_MAX() sysconf (_SC_GETPW_R_SIZE_MAX)
#else
@@ -1315,7 +1311,7 @@ glob_in_dir (const char *pattern, const char *directory,
int flags,
int meta;
int save;
int result;
-
+ fnmatch_state *fnm_pattern = NULL;
init_names.next = NULL;
init_names.count = INITIAL_COUNT;
@@ -1368,6 +1362,7 @@ glob_in_dir (const char *pattern, const char *directory,
int flags,
| FNM_CASEFOLD
#endif
);
+ fnm_pattern = fnmatch_compile (pattern, fnm_flags);
flags |= GLOB_MAGCHAR;
while (1)
@@ -1415,7 +1411,7 @@ glob_in_dir (const char *pattern, const char *directory,
int flags,
name = d->d_name;
- if (fnmatch (pattern, name, fnm_flags) == 0)
+ if (fnmatch_exec (fnm_pattern, name) == 0)
{
/* If the file we found is a symlink we have to
make sure the target file exists. */
@@ -1544,5 +1538,6 @@ glob_in_dir (const char *pattern, const char *directory,
int flags,
__set_errno (save);
}
+ fnmatch_free (fnm_pattern);
return result;
}
diff --git a/MODULES.html.sh b/MODULES.html.sh
index 06afa2d..03d308e 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -61,6 +61,7 @@ fenv
float
fmtmsg
fnmatch
+fnmatchcomp
ftw
glob
grp
@@ -418,6 +419,7 @@ fmodf
fmodl
fmtmsg
fnmatch
+fnmatchcomp
fopen
fork
fpathconf
diff --git a/lib/fnmatchcomp.c b/lib/fnmatchcomp.c
new file mode 100644
index 0000000..b374fec
--- /dev/null
+++ b/lib/fnmatchcomp.c
@@ -0,0 +1,632 @@
+ /* Copyright (C) 2009
+ Ondrej Bilka < address@hidden >
+
+ This program is free software: you can redistribute it and/or modif y
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see < http:// www.gnu.org/licenses/
> . */
+#ifndef _LIBC
+# include <config.h>
+#endif
+
+ /* Enable GNU extensions in fnmatch.h. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+#include <fnmatch.h>
+#include <stdint.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <nl_types.h>
+#include <langinfo.h>
+#include <locale.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fnmatchcomp.h>
+
+#if !defined __builtin_expect && __GNUC__ < 3
+# define __builtin_expect(expr, expected) (expr)
+#endif
+
+#define WIDE_CHAR_SUPPORT \
+ ((HAVE_MBLEN && HAVE_WCSTOMBS) || _LIBC)
+
+ /* For platfor m which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+#if WIDE_CHAR_SUPPORT
+# include <wctype.h>
+# include <wchar.h>
+#else
+#define mblen(x, y) 1
+#define MB_CUR_MAX 1
+#endif
+
+#define UINT4 uint32_t
+#define UINT2 uint16_t
+#define CHARSPERINT (sizeof (UINT4)/sizeof(char))
+#define STREQ(a, b) (strcmp(a, b) == 0)
+#define MAX_PATTERN_SIZE (sizeof (bracket_pattern))
+#define NOMEMTEST(exp) if (__builtin_expect(!(exp), 0)) {errno = ENOMEM;
goto err; }
+#define Calloc(x) calloc(1, x)
+#define ALLOCALIMIT(n) (__builtin_expect((n < 2000), 1) ? alloca(n):
malloc(n))
+#define ALLOCAFREE(ptr, n) if (__builtin_expect((n >= 2000), 0)) free(ptr);
+typedef struct
+{
+ char tp;
+ char chr;
+} chr_pattern;
+typedef struct
+{ /* We seek chars following *. This provides
major speedup.
+ For some encodings we could be wrong
because leading char was part of multibyte character. Then after successfull
match call fnmatch to eliminate false positives */
+ char tp;
+ char chars[CHARSPERINT];
+ short sizerest;
+ short charno;
+} star_pattern;
+typedef struct
+{ /*string must end here but cant continue*/
+ char tp;
+} end_pattern;
+typedef struct
+{ /* * at end of pattern. automatic success */
+ char tp;
+} endstar_pattern;
+#define BRACKET_INVMATCH 1
+#define BRACKET_SLASH 2
+#define BRACKET_MUSTCHECK 4
+typedef struct
+{
+ char tp;
+ char flags;
+ char *chars; /* multibyte characters separated by 0 */
+ int sizerest;
+ uint32_t hash[(UCHAR_MAX / 32) + 1];
+ /* We look at first char of multibyte character. We dont have to call mblen
if this fails. */
+} bracket_pattern;
+typedef struct
+{ /* if FNM_PERIOD was set this checks period
after / If character is . it fails otherwise dont consume anything*/
+ char tp;
+} begindot_pattern;
+typedef struct
+{ /* if FNM_LEADING_DIR was set we go after / or
0 to endstar pattern which accept everything. */
+ char tp;
+} slashend_pattern;
+
+typedef struct
+{ /* we must call fnmatch */
+ char tp;
+ char *ptr;
+ char *prefix;
+ int flags;
+} fallback_pattern;
+typedef struct
+{ /* we fold string and then call case sensitive
version */
+ char tp;
+ fnmatch_state *states;
+} fold_pattern;
+typedef struct
+{ /* we call our version and if succeed we check
by fnmatch fallback */
+ char tp;
+ fnmatch_state *states;
+ fnmatch_state *fallback;
+} checkneeded_pattern;
+
+ /* like star but we dont go beyond / */
+typedef star_pattern starslash_pattern;
+enum PATTERNS
+{
+ PATTERN_chr,
+ PATTERN_star,
+ PATTERN_starslash,
+ PATTERN_end,
+ PATTERN_endstar,
+ PATTERN_bracket,
+ PATTERN_begindot,
+ PATTERN_slashend,
+ PATTERN_fallback,
+ PATTERN_fold,
+ PATTERN_checkneeded
+};
+union _patterns
+{
+ chr_pattern chr;
+ star_pattern star;
+ starslash_pattern starslash;
+ end_pattern end;
+ endstar_pattern endstar;
+ bracket_pattern bracket;
+ begindot_pattern begindot;
+ fallback_pattern fallback;
+ fold_pattern fold;
+ checkneeded_pattern checkneeded;
+};
+typedef union _patterns patterns;
+static void freepatterns (patterns * p);
+
+struct states
+{
+ patterns *p;
+ struct states *next;
+};
+struct _fnmatch_state
+{
+ struct states *states; /* list of states of undrelying NFA. */
+ int flags;
+ int *refcount;
+ patterns *start;
+};
+
+int
+fnmatch_fallbacked (fnmatch_state * s)
+{
+ return (s->states->p->fallback.tp == PATTERN_fallback);
+}
+
+
+static void
+strfold (const char *str, char *buf)
+{
+ if (MB_CUR_MAX == 1)
+ {
+ while (*str)
+ *buf++ = tolower (*str++);
+ *buf = 0;
+ }
+ else
+ {
+#if WIDE_CHAR_SUPPORT
+ int i;
+ int len = 2 * strlen (str) + 1;
+ wchar_t *towc = ALLOCALIMIT (sizeof (wint_t) * (len));
+ towc[len - 1] = L'\0';
+ mbstowcs (towc, str, len);
+ if (towc[len - 1] != L'\0')
+ {
+ ALLOCAFREE (towc, sizeof (wint_t) * (len));
+ len = mbstowcs (NULL, str, 0) + 1;
+ towc = ALLOCALIMIT (sizeof (wint_t) * len);
+ mbstowcs (towc, str, len);
+ }
+ for (i = 0; towc[i] != L'\0'; i++)
+ {
+ towc[i] = towlower (towc[i]);
+ }
+ wcstombs (buf, towc, len);
+ ALLOCAFREE (towc, sizeof (wint_t) * (len));
+#endif
+ }
+}
+
+static fnmatch_state *
+initfnmatchstate ()
+{
+ fnmatch_state *st;
+ NOMEMTEST (st = (fnmatch_state *) Calloc (sizeof (fnmatch_state)));
+ NOMEMTEST (st->states = Calloc (sizeof (struct states)));
+ st->states->next = NULL;
+ NOMEMTEST (st->refcount = Calloc (sizeof (int)));
+ *st->refcount = 1;
+ return st;
+err:
+ if (st)
+ {
+ free (st->states);
+ free (st->refcount);
+ }
+ free (st);
+ return NULL;
+}
+
+#define FALLBACK_INIT(type) \
+ fnmatch_state *st; \
+ NOMEMTEST (st = initfnmatchstate ()); \
+ NOMEMTEST (st->start = st->states->p = Calloc (sizeof (type##_pattern))); \
+ st->states->p->fallback.tp = PATTERN_##type;
+
+static fnmatch_state *
+fnmatch_fallback (const char *str, int flags)
+{
+ FALLBACK_INIT (fallback);
+ NOMEMTEST (st->states->p->fallback.ptr = strdup (str));
+ st->states->p->fallback.flags = flags;
+ NOMEMTEST (st->states->p->fallback.prefix = strdup (""));
+ return st;
+err:
+ fnmatch_free (st);
+ return NULL;
+}
+
+static fnmatch_state *
+fold_fallback (fnmatch_state * s)
+{
+ FALLBACK_INIT (fold);
+ NOMEMTEST (s);
+ st->states->p->fold.states = s;
+ return st;
+err:
+ fnmatch_free (s);
+ fnmatch_free (st);
+ return NULL;
+}
+
+static fnmatch_state *
+checkneeded_fallback (fnmatch_state * s, fnmatch_state * fb)
+{
+ FALLBACK_INIT (checkneeded);
+ NOMEMTEST (fb);
+ NOMEMTEST (s);
+ st->states->p->checkneeded.states = s;
+ st->states->p->checkneeded.fallback = fb;
+ return st;
+err:
+ fnmatch_free (s);
+ fnmatch_free (fb);
+ fnmatch_free (st);
+ return NULL;
+}
+
+static int
+parse_bracket (const char *str, patterns * pat, int flags)
+{
+ unsigned char firstbyte;
+ char *chr;
+ int mlen;
+ const char *s = str;
+ int i;
+ pat->bracket.tp = PATTERN_bracket;
+ if (*s == '!' || (getenv ("POSIXLY_CORRECT") != NULL && *s == '^'))
+ {
+ pat->bracket.flags = BRACKET_INVMATCH;
+ s++;
+ }
+ if (flags & FNM_PATHNAME)
+ pat->bracket.flags |= BRACKET_SLASH;
+ if (!pat->bracket.chars)
+ {
+ if (!(pat->bracket.chars = Calloc (2 * strlen (s) + 2)))
+ return -3;
+ }
+ chr = pat->bracket.chars;
+ do
+ {
+ if (*s == 0 || *s == '-')
+ pat->bracket.flags |= BRACKET_MUSTCHECK | BRACKET_INVMATCH;
+ if (*s == '[' && (s[1] == '=' || s[1] == '.' || s[1] == ':'))
+ return -1; /* could be multichar sequence */
+ mlen = mblen (s, MB_CUR_MAX);
+ if (mlen < 0)
+ return -3;
+ /* We dont have call mblen often because first character of multibyte
+ is often not allowed at other places. */
+ firstbyte = (unsigned char) *s;
+ pat->bracket.hash[firstbyte / 32] |= 1 << (firstbyte % 32);
+ memcpy (chr, s, mlen);
+ s += mlen;
+ chr += mlen + 1;
+ }
+ while (*s && *s != ']');
+ *chr = 0;
+ if (pat->bracket.flags & BRACKET_MUSTCHECK)
+ { /* bracket is too complicated to process here
+ we replace it with ? and when we match
entire pattern
+ we call fnmatch it wasn't false positive */
+ *pat->bracket.chars = 0;
+ for (i = 0; i < (UCHAR_MAX / 32) + 1; i++)
+ pat->bracket.hash[i] = 0;
+ }
+ return s - str;
+}
+
+#define NEXTPATTERN(type) pat->chr.tp = PATTERN_##type; pat = (patterns
*)(((void *) pat)+sizeof (type##_pattern));
+#define FALLBACK freepatterns(ret); free(ret);
+#define BRACTEST(x) if ((x) == -3) {goto err; }
+fnmatch_state *
+fnmatch_compile (const char *str, int flags)
+{
+ const char *s;
+ patterns *pat, *ret = NULL;
+ int i, pass;
+ int size = 0, patsize;
+ int checkneeded = 0;
+ char *codeset = nl_langinfo (CODESET);
+ char *buf;
+ if (MB_CUR_MAX == 1 || STREQ (codeset, "UTF-8"))
+ {
+ /* encoding can be handled as singlebyte */
+ }
+ else if (mblen (NULL, 0) == 0)
+ { /* stateless */
+ checkneeded = 1;
+ }
+ else
+ {
+ return fnmatch_fallback (str, flags);
+ }
+ if (flags & FNM_CASEFOLD)
+ {
+ NOMEMTEST (buf = Calloc (2 * strlen (str) + 1));
+ strfold (str, buf);
+ fnmatch_state *st =
+ fold_fallback (fnmatch_compile (buf, flags & (~FNM_CASEFOLD)));
+ free (buf);
+ NOMEMTEST (st);
+ return st;
+ }
+ ret = (patterns *) Calloc ((strlen (str) + 3) * 2 * MAX_PATTERN_SIZE);
+
+ for (pass = 0; pass < 2; pass++)
+ {
+ /* two passes in first we compute values we use in second to optimize
+ */
+ patsize = size;
+ size = 0; /* patsize-size is minimal size of rest of
string */
+ /* it makes loops tighter and we dont need to check if we went outsize
string. */
+ pat = ret;
+ if (flags & FNM_PERIOD && *str != '.')
+ {
+ NEXTPATTERN (begindot)}
+ for (s = str; *s;)
+ {
+ if (flags & FNM_EXTMATCH && *(s + 1) == '(' &&
+ (*s == '*' || *s == '+' || *s == '@' || *s == '!'))
+ {
+ FALLBACK;
+ return fnmatch_fallback (str, flags);
+ }
+ switch (*s)
+ {
+ case '*':
+ while (*(s + 1) == '?')
+ {
+ /* by specif ication [!]] matches anything except ] */
+ BRACTEST (parse_bracket ("!", pat, flags));
+ size++;
+ pat->bracket.sizerest = patsize - size;
+ NEXTPATTERN (bracket);
+ s++;
+ }
+ if (*(s + 1) || (flags & FNM_PATHNAME))
+ {
+ if (pass)
+ {
+ /* we will find ocurence of next characters fast. */
+ patterns *tmppat = pat;
+ pat->star.sizerest = patsize - size;
+ NEXTPATTERN (star);
+ for (i = 0; pat->chr.tp == PATTERN_chr && pat->chr.chr
+ && i < CHARSPERINT; i++)
+ {
+ tmppat->star.chars[i] = pat->chr.chr;
+ NEXTPATTERN (chr);
+ }
+ if (i == 3)
+ i = 2;
+ tmppat->star.charno = i;
+ pat = tmppat;
+ }
+ if (flags & FNM_PATHNAME)
+ {
+ NEXTPATTERN (starslash);
+ }
+ else
+ {
+ NEXTPATTERN (star);
+ }
+ }
+ else
+ {
+ NEXTPATTERN (endstar);
+ }
+ s++;
+ break;
+ case '?':
+ BRACTEST (parse_bracket ("!", pat, flags));
+ size++;
+ pat->bracket.sizerest = patsize - size;
+ NEXTPATTERN (bracket);
+ s++;
+ break;
+ case '[':
+ {
+ int siz = parse_bracket (s + 1, pat, flags);
+ BRACTEST (siz);
+ if (siz < 0)
+ {
+ FALLBACK;
+ return fnmatch_fallback (str, flags);
+ }
+ size++;
+ s += siz + 2;
+ pat->bracket.sizerest = patsize - size;
+ if (pat->bracket.flags & BRACKET_MUSTCHECK)
+ {
+ checkneeded = 1;
+ }
+ NEXTPATTERN (bracket);
+ }
+ break;
+ default:
+ if (*s == '\\' && (!(flags & FNM_NOESCAPE)))
+ s++;
+ {
+ int nolp = (*s == '/' && (flags & FNM_PERIOD)
+ && (flags & FNM_PATHNAME)
+ && *(s + 1) != '.'), mlen = mblen (s, MB_CUR_MAX);
+ if (mlen < 0)
+ goto err;
+ for (i = 0; i < mlen; i++, s++)
+ {
+ pat->chr.chr = *s;
+ size++;
+ NEXTPATTERN (chr);
+ }
+ if (nolp)
+ {
+ NEXTPATTERN (begindot)}
+ }
+ break;
+ }
+ }
+ }
+ if (flags & FNM_LEADING_DIR)
+ {
+ NEXTPATTERN (slashend);
+ NEXTPATTERN (endstar);
+ }
+ else
+ {
+ NEXTPATTERN (end);
+ }
+
+ fnmatch_state *st;
+ NOMEMTEST (st = initfnmatchstate ());
+ st->states->p = ret;
+ st->start = ret;
+ if (checkneeded)
+ st = checkneeded_fallback (st, fnmatch_fallback (str, flags));
+ return st;
+err:
+ freepatterns (ret);
+ return NULL;
+}
+
+#undef NEXTPATTERN
+#define NEXTPATTERN(type) p = (patterns *)(((void *) p)+sizeof
(type##_pattern));
+
+static inline int
+fnmatch2_internal (const patterns * p, const char *str, int len)
+{
+#include "fnmatchcomp_loop.h"
+}
+
+int
+fnmatch_exec (const fnmatch_state * p, const char *str)
+{
+ const struct states *s;
+ int len = strlen (str);
+ for (s = p->states; s; s = s->next)
+ {
+ int ret = fnmatch2_internal (s->p, str, len);
+ if (ret <= 0)
+ return ret;
+ }
+ return FNM_NOMATCH;
+}
+
+#define FNMATCH_PREFIX
+static int
+fnmatch2_prefix_internal (patterns * p, const char *str, int len,
+ fnmatch_state * ret)
+{
+#include "fnmatchcomp_loop.h"
+}
+
+fnmatch_state *
+fnmatch_prefix (const fnmatch_state * p, const char *str)
+{
+ struct states *s;
+ int len = strlen (str);
+ fnmatch_state *ret;
+ NOMEMTEST (ret = (fnmatch_state *) Calloc (sizeof (fnmatch_state)));
+ memcpy ((void *) ret, (void *) p, sizeof (fnmatch_state));
+ ret->states = NULL;
+ (*ret->refcount)++;
+ for (s = p->states; s; s = s->next)
+ {
+ fnmatch2_prefix_internal (s->p, str, len, ret);
+ }
+ return ret;
+err:
+ return NULL;
+}
+
+static void
+freepatterns (patterns * p)
+{
+ if (!p)
+ return;
+#define SKIPPATTERN(type) case PATTERN_##type: NEXTPATTERN(type); break;
+ while (p->chr.tp != PATTERN_end && p->chr.tp != PATTERN_endstar)
+ {
+ switch (p->chr.tp)
+ {
+ case PATTERN_chr:
+ if (!p->chr.chr)
+ return;
+ NEXTPATTERN (chr);
+ break;
+ SKIPPATTERN (star);
+ SKIPPATTERN (starslash);
+ SKIPPATTERN (end);
+ SKIPPATTERN (endstar);
+ case PATTERN_bracket:
+ free (p->bracket.chars);
+ NEXTPATTERN (bracket);
+ break;
+ SKIPPATTERN (begindot);
+ SKIPPATTERN (slashend);
+ case PATTERN_fallback:
+ free (p->fallback.ptr);
+ return;
+ case PATTERN_fold:
+ return;
+ case PATTERN_checkneeded:
+ return;
+ }
+ }
+
+}
+
+void
+fnmatch_free (fnmatch_state * s)
+{ /* fnmatch_free must process incomplete
fnmatch_state too */
+ if (!s)
+ return;
+ struct states *st, *st2;
+ for (st = s->states; st;)
+ {
+ if (st->p)
+ {
+ switch (st->p->chr.tp)
+ {
+ case PATTERN_fallback:
+ free (st->p->fallback.prefix);
+ if (s->start != st->p)
+ free (st->p);
+ break;
+ case PATTERN_fold:
+ fnmatch_free (st->p->fold.states);
+ if (s->start != st->p)
+ free (st->p);
+ break;
+ case PATTERN_checkneeded:
+ fnmatch_free (st->p->checkneeded.states);
+ fnmatch_free (st->p->checkneeded.fallback);
+ if (s->start != st->p)
+ free (st->p);
+ break;
+ }
+ }
+ st2 = st->next;
+ free (st);
+ st = st2;
+ }
+ (*s->refcount)--;
+ if (!*s->refcount)
+ {
+ free (s->refcount);
+ freepatterns (s->start);
+ free (s->start);
+ }
+ free (s);
+}
diff --git a/lib/fnmatchcomp.h b/lib/fnmatchcomp.h
new file mode 100644
index 0000000..356fbf8
--- /dev/null
+++ b/lib/fnmatchcomp.h
@@ -0,0 +1,41 @@
+ /* Copyright (C) 2009
+ Ondrej Bilka < address@hidden >
+
+ This program is free software: you can redistribute it and/or modif y
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see < http:// www.gnu.org/licenses/
> . */
+
+struct _fnmatch_state;
+typedef struct _fnmatch_state fnmatch_state;
+
+ /* compile pattern */
+fnmatch_state *fnmatch_compile (const char *pattern, int flags);
+
+ /* add prefix to matched strings.
+ suppose you want match foo againist /home/foo, /home/bar/foo,
+ /home/bar/bar
+ fnmatch_state* s = fnmatch_compile("foo", 0);
+ fnmatch_state* home = fnmatch_prefix(s, "/home");
+ fnmatch_state* bar = fnmatch_prefix(home, "/bar");
+ fnmatch_exec(home, "/foo");
+ fnmatch_exec(bar, "/foo");
+ fnmatch_exec(bar, "/bar");
+ */
+fnmatch_state *fnmatch_prefix (const fnmatch_state * pattern,
+ const char *prefix);
+
+int fnmatch_exec (const fnmatch_state * pattern, const char *string);
+
+void fnmatch_free (fnmatch_state * pattern);
+
+ /* For some encodings we decide just call fnmatch. In that case s you can
gain some performance by checking fnmatch_fallbacked and if returns 1 call
fnmatch yourself */
+int fnmatch_fallbacked (fnmatch_state * pattern);
diff --git a/lib/fnmatchcomp_loop.h b/lib/fnmatchcomp_loop.h
new file mode 100644
index 0000000..f722ffb
--- /dev/null
+++ b/lib/fnmatchcomp_loop.h
@@ -0,0 +1,234 @@
+ /* Copyright (C) 2009
+ Ondrej Bilka < address@hidden >
+
+ This program is free software: you can redistribute it and/or modif y
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see < http:// www.gnu.org/licenses/
> . */
+
+const char *s = str, *chars;
+int i, buflen;
+char *buf;
+while (1)
+ {
+#ifndef FNMATCH_PREFIX
+ int retval;
+ int sizerest, charno;
+#undef ADDSTATE
+#define ADDSTATE
+#else
+ int statetwice;
+ patterns *pat, *pat2;
+ struct states *state;
+#undef ADDSTATE
+#define ADDSTATE \
+ statetwice = 0; \
+ for (state = ret->states; state; state = state->next)\
+ if (state->p == p) statetwice = 1; \
+ if (!statetwice) {\
+ state = calloc(1, sizeof (struct states)); \
+ state->next = ret->states; \
+ ret->states = state; \
+ state->p = p; \
+ }
+ if (!*s)
+ {
+ ADDSTATE;
+ return 0;
+ }
+#endif
+ switch (p->chr.tp)
+ {
+ case PATTERN_chr:
+ if (__builtin_expect ((*s != p->chr.chr), 1))
+ return FNM_NOMATCH;
+ NEXTPATTERN (chr);
+ s++;
+ break;
+ case PATTERN_starslash:
+#undef SLASHTEST
+#define SLASHTEST if (str[i] == '/') return FNM_NOMATCH;
+#include "fnmatchcomp_star.h"
+ break;
+ case PATTERN_star:
+#undef SLASHTEST
+#define SLASHTEST
+#include "fnmatchcomp_star.h"
+ break;
+
+ case PATTERN_end:
+ return (*s) ? FNM_NOMATCH : 0;
+ break;
+ case PATTERN_slashend:
+ if (*s == '/' || *s == 0)
+ {
+ NEXTPATTERN (slashend);
+ }
+ else
+ return FNM_NOMATCH;
+ break;
+ case PATTERN_endstar:
+ ADDSTATE;
+ return 0;
+ break;
+ case PATTERN_bracket:
+ {
+ int mat;
+ int mlen, mbuflen;
+ int firstbyte = (unsigned char) *s;
+ chars = p->bracket.chars;
+ if (MB_CUR_MAX == 1)
+ {
+ mlen = 1;
+ mat = (p->bracket.hash[firstbyte / 32] &
+ (1 << (firstbyte % 32))) ? 1 : 0;
+ }
+ else
+ {
+ mlen = 0;
+ mat = 1;
+ if (!
+ (p->bracket.hash[firstbyte / 32] & (1 << (firstbyte % 32))))
+ {
+ mat = 0;
+ goto match;
+ }
+ mlen = mblen (s, MB_CUR_MAX);
+ if (mlen < 0)
+ return FNM_NOMATCH;
+
+ while (*chars)
+ {
+ mbuflen = strlen (chars);
+ if (!strncmp (chars, s, mlen) && mlen == mbuflen)
+ goto match;
+ chars += mbuflen + 1;
+ }
+ mat = 0;
+ }
+ match:
+ if (mat ^ (p->bracket.flags & BRACKET_INVMATCH))
+ {
+ if (p->bracket.flags & BRACKET_SLASH && *s == '/')
+ return FNM_NOMATCH;
+ if (!mlen)
+ mlen = mblen (s, MB_CUR_MAX);
+ if (mlen < 0)
+ return FNM_NOMATCH;
+
+ s += mlen;
+ if (len - (str - s) < p->bracket.sizerest)
+ return FNM_NOMATCH;
+ }
+ else
+ {
+ return FNM_NOMATCH;
+ }
+ NEXTPATTERN (bracket);
+ }
+ break;
+ case PATTERN_begindot:
+ if (*s == '.')
+ return FNM_NOMATCH;
+ NEXTPATTERN (begindot);
+ break;
+ case PATTERN_fallback:
+
+ buflen = strlen (p->fallback.prefix) + len + 1;
+ NOMEMTEST (buf = ALLOCALIMIT (buflen));
+ strcpy (buf, p->fallback.prefix);
+ strcat (buf, s);
+#ifndef FNMATCH_PREFIX
+ retval = fnmatch (p->fallback.ptr, buf, p->fallback.flags);
+ ALLOCAFREE (buf, buflen);
+ return retval;
+#else
+ if (!(pat2 = malloc (sizeof (fallback_pattern))))
+ {
+ ALLOCAFREE (buf, buflen);
+ NOMEMTEST (NULL);
+ }
+ memcpy (pat2, p, sizeof (fallback_pattern));
+ if (!(pat2->fallback.prefix = strdup (buf)))
+ {
+ ALLOCAFREE (buf, buflen);
+ NOMEMTEST (NULL);
+ }
+ p = (patterns *) pat2;
+ ADDSTATE;
+ return FNM_NOMATCH;
+#endif
+ ALLOCAFREE (buf, buflen);
+
+ case PATTERN_fold:
+ buflen = 2 * len + 1;
+ NOMEMTEST (buf = (ALLOCALIMIT (buflen)));
+ strfold (str, buf);
+#ifndef FNMATCH_PREFIX
+ retval = fnmatch_exec (p->fold.states, buf);
+ ALLOCAFREE (buf, 2 * len + 1);
+ return retval;
+#else
+ if (!(pat2 = malloc (sizeof (fallback_pattern))))
+ {
+ ALLOCAFREE (buf, buflen);
+ NOMEMTEST (NULL);
+ }
+ pat2->fold.tp = PATTERN_fold;
+ if (!(pat2->fold.states =
+ (void *) fold_fallback (fnmatch_prefix (p->fold.states, buf))))
+ {
+ free (pat2);
+ ALLOCAFREE (buf, buflen);
+ NOMEMTEST (NULL);
+ }
+ p = (patterns *) pat2;
+ ADDSTATE;
+ ALLOCAFREE (buf, buflen);
+ return FNM_NOMATCH;
+#endif
+ break;
+ case PATTERN_checkneeded:
+#ifndef FNMATCH_PREFIX
+ {
+ int ret = fnmatch_exec (p->checkneeded.states, s);
+ if (ret > 0)
+ return FNM_NOMATCH;
+ if (ret < 0)
+ return ret;
+ return fnmatch_exec (p->checkneeded.fallback, s);
+ }
+#else
+ NOMEMTEST (pat2 = malloc (sizeof (checkneeded_pattern)));
+ pat2->checkneeded.tp = PATTERN_checkneeded;
+ if (!(pat2->checkneeded.states =
+ fnmatch_prefix (p->checkneeded.states, s)))
+ {
+ free (pat2);
+ NOMEMTEST (NULL);
+ }
+ if (!(pat2->checkneeded.fallback =
+ fnmatch_prefix (p->checkneeded.fallback, s)))
+ {
+ fnmatch_free (pat2->checkneeded.states);
+ free (pat2);
+ NOMEMTEST (NULL);
+ }
+ p = pat2;
+ ADDSTATE;
+ return FNM_NOMATCH;
+#endif
+ break;
+ }
+ }
+
+err:
+return -1;
diff --git a/lib/fnmatchcomp_star.h b/lib/fnmatchcomp_star.h
new file mode 100644
index 0000000..f542e3d
--- /dev/null
+++ b/lib/fnmatchcomp_star.h
@@ -0,0 +1,65 @@
+ /* Copyright (C) 2009
+ Ondrej Bilka < address@hidden >
+
+ This program is free software: you can redistribute it and/or modif y
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see < http:// www.gnu.org/licenses/
> . */
+
+#ifndef FNMATCH_PREFIX
+sizerest = p->star.sizerest;
+chars = p->star.chars;
+charno = p->star.charno;
+NEXTPATTERN (star);
+for (i = 0; i < charno; i++)
+ {
+ NEXTPATTERN (chr);
+ }
+
+switch (charno)
+ {
+ case 0:
+ for (i = s - str; i <= len - sizerest; i++)
+ {
+ if (!fnmatch2_internal (p, str + i, len - i))
+ return 0;
+ SLASHTEST;
+ }
+ break;
+#define CASEN(type, no) \
+ case no:\
+ for (i = s-str; i <= len-sizerest; i++) {\
+ if (__builtin_expect((*((type*)(str+i)) == *(type *) chars), 0)\
+ && !fnmatch2_internal(p, str+i+no, len-i-no)) return 0; \
+ SLASHTEST \
+ } \
+ break;
+ CASEN (char, 1);
+ CASEN (UINT2, 2);
+ case 3:
+ CASEN (UINT4, 4);
+ }
+
+return FNM_NOMATCH;
+#else
+pat = p;
+NEXTPATTERN (star);
+for (i = s - str; i < len; i++)
+ {
+ fnmatch2_prefix_internal (p, str + i, len - i, ret);
+ SLASHTEST;
+ }
+
+p = pat;
+ADDSTATE;
+return FNM_NOMATCH;
+#endif
+break;
diff --git a/m4/fnmatchcomp.m4 b/m4/fnmatchcomp.m4
new file mode 100644
index 0000000..c2d3812
--- /dev/null
+++ b/m4/fnmatchcomp.m4
@@ -0,0 +1,25 @@
+# fnmatchcomp.m4 serial 10
+dnl Copyright (C) 2005-2007 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+
+AC_DEFUN([gl_FNMATCHCOMP],
+[
+ gl_PREREQ_FNMATCHCOMP
+
+ FNMATCHCOMP_H=fnmatchcomp.h
+ AC_LIBOBJ([fnmatchcomp])
+ AC_SUBST([FNMATCHCOMP_H])
+])
+
+
+
+# Prerequisites of lib/fnmatchcomp.*.
+AC_DEFUN([gl_PREREQ_FNMATCHCOMP],
+[
+ AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])dnl
+ AC_CHECK_FUNCS_ONCE([mblen wcstombs])
+ AC_CHECK_HEADERS_ONCE([stdlib.h wctype.h])
+])
diff --git a/modules/fnmatchcomp b/modules/fnmatchcomp
new file mode 100644
index 0000000..5aaf80a
--- /dev/null
+++ b/modules/fnmatchcomp
@@ -0,0 +1,37 @@
+Description:
+ Allows you to compile fnmatch pattern. You can add to this pattern prefix
+ which will be added as prefix of any string you will match with this pattern
+
+Files:
+lib/fnmatchcomp.c
+lib/fnmatchcomp.h
+lib/fnmatchcomp_loop.h
+lib/fnmatchcomp_star.h
+m4/fnmatchcomp.m4
+
+Depends-on:
+fnmatch
+extensions
+alloca
+stdbool
+wchar
+wctype
+memchr
+memcmp
+mbsrtowcs
+mbsinit
+
+configure.ac:
+gl_FNMATCHCOMP
+
+Makefile.am:
+BUILT_SOURCES += $(FNMATCHCOMP_H)
+
+Include:
+<fnmatchcomp.h>
+
+License:
+LGPLv2+
+
+Maintainer:
+Ondrej Bilka
diff --git a/modules/fnmatchcomp-tests b/modules/fnmatchcomp-tests
new file mode 100644
index 0000000..2219093
--- /dev/null
+++ b/modules/fnmatchcomp-tests
@@ -0,0 +1,11 @@
+Files:
+tests/test-fnmatchcomp.c
+
+Depends-on:
+fnmatchcomp
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-fnmatchcomp
+check_PROGRAMS += test-fnmatchcomp
diff --git a/modules/glob b/modules/glob
index b765be7..ac07549 100644
--- a/modules/glob
+++ b/modules/glob
@@ -22,6 +22,7 @@ strdup
sys_stat
unistd
malloc-posix
+fnmatchcomp
configure.ac:
gl_GLOB
diff --git a/tests/test-fnmatchcomp.c b/tests/test-fnmatchcomp.c
new file mode 100644
index 0000000..5579924
--- /dev/null
+++ b/tests/test-fnmatchcomp.c
@@ -0,0 +1,132 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <nl_types.h>
+#include <langinfo.h>
+#include <locale.h>
+#define _GNU_SOURCE
+#include <fnmatch.h>
+#include <fnmatchcomp.h>
+int
+fnmatch2 (const char *ptr, const char *str, int flags)
+{
+ return fnmatch_exec (fnmatch_compile (ptr, flags), str);
+}
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+ fnmatch_state *x;
+ int l;
+ for (l = 0; l < 3; l++)
+ {
+ if (l == 0)
+ setlocale (LC_ALL, "C");
+ if (l == 1)
+ setlocale (LC_ALL, "en_US.UTF-8");
+ if (l == 2)
+ setlocale (LC_ALL, "zh_TW.BIG5");
+
+ int cnt = 0;
+ char *fooptr[20];
+#define TESTPREF(st, pat, str, fullstr, flag) if (fnmatch(pat, fullstr, flag)
!= fnmatch_exec(st, str)) { printf ("partial %s = ~ %s failed flags %i \n",
pat, fullstr, flag); }
+ int j;
+ fooptr[0] = "*?abc*";
+
+ fooptr[1] = "*a[a-z]*";
+
+ fooptr[2] = "*a[[:alnum:], ]*";
+ fooptr[3] = "*foo*";
+ fooptr[4] = "*[xf]*";
+ fooptr[5] = "*abcd*";
+ fooptr[6] = "*a[[ = alnum = ], ]*";
+ fooptr[7] = " /* o*";
+
+ for (j = 0; j < 8; j++)
+ {
+
+ for (i = 0; i < 32; i++)
+ {
+ fnmatch_state *s = fnmatch_compile (fooptr[j], i);
+ fnmatch_state *home = fnmatch_prefix (s, "/home");
+ fnmatch_state *bar = fnmatch_prefix (home, "/bar");
+ fnmatch_state *baz = fnmatch_prefix (bar, "/baz");
+ TESTPREF (home, fooptr[j], "/foo", "/home/foo", i);
+ TESTPREF (bar, fooptr[j], "/foo", "/home/bar/foo", i);
+ TESTPREF (bar, fooptr[j], "/bar", "/home/bar/bar", i);
+ TESTPREF (baz, fooptr[j], "/blabla", "/home/bar/baz/blabla", i);
+ fnmatch_free (home);
+ fnmatch_free (bar);
+ fnmatch_free (s);
+ fnmatch_free (baz);
+ }
+ }
+ int k;
+ for (k = 0; k < 3; k++)
+ {
+#define TESTP(pat, str, flag) x = fnmatch_compile(pat, flag); if
(fnmatch(pat, str, flag) != fnmatch_exec(x, str)) { printf ("%s = ~ %s failed
flags %i \n", pat, str, flag); } fnmatch_free(x);
+
+ for (i = 0; i < 32; i++)
+ {
+ TESTP (" */ aa*", "asdasd/aab", i);
+ TESTP (" */ [ab]a*", "as/asd/aab", i);
+ TESTP ("*ads*", ".asdvdf", i);
+ TESTP (" */ ?as", "asdasd/.asasd", i);
+ TESTP (" */ [a-z]as", "asdasd/.asasd", i);
+ TESTP (" */ [a-z]a*", "asdasd/aab", i);
+ }
+ /* m4 test of fnmatch */
+ char const *Apat = 'A' < '\\' ? "[A-\\\\]" : "[\\\\-A]";
+ char const *apat = 'a' < '\\' ? "[a-\\\\]" : "[\\\\-a]";
+ static char const A_1[] = { 'A' - 1, 0 };
+ static char const A01[] = { 'A' + 1, 0 };
+ static char const a_1[] = { 'a' - 1, 0 };
+ static char const a01[] = { 'a' + 1, 0 };
+ static char const bs_1[] = { '\\' - 1, 0 };
+ static char const bs01[] = { '\\' + 1, 0 };
+#define y TESTP
+#define n TESTP
+ n ("a*", "", 0)
+ y ("a*", "abc", 0)
+ n ("d* /* 1", "d/s/1", FNM_PATHNAME)
+ y ("a\\bc", "abc", 0)
+ n ("a\\bc", "abc", FNM_NOESCAPE)
+ y ("*x", ".x", 0)
+ n ("*x", ".x", FNM_PERIOD)
+ y (Apat, "\\", 0)
+ y (Apat, "A", 0)
+ y (apat, "\\", 0)
+ y (apat, "a", 0)
+ n (Apat, A_1, 0)
+ n (apat, a_1, 0)
+ y (Apat, A01, 0)
+ y (apat, a01, 0)
+ y (Apat, bs_1, 0)
+ y (apat, bs_1, 0)
+ n (Apat, bs01, 0)
+ n (apat, bs01, 0)
+ y ("xxXX", "xXxX", FNM_CASEFOLD)
+ y ("a++(x|yy)b", "a+xyyyyxb", FNM_EXTMATCH)
+ n ("d* /* 1", "d/s/1", FNM_FILE_NAME)
+ y ("*", "x", FNM_FILE_NAME | FNM_LEADING_DIR)
+ y ("x*", "x/y/z", FNM_FILE_NAME | FNM_LEADING_DIR)
+ y ("*c*", "c/x", FNM_FILE_NAME | FNM_LEADING_DIR);
+ }
+ }
+ setlocale (LC_ALL, "zh_TW.BIG5");
+ char tst[] = { 0xa5, 'a', 'b', 'c', 0 };
+ char ptr1[] = { '*', 0xa5, 'a', '*', 0 };
+ char ptr2[] = { '*', '[', 0xa5, 'b', ']', '*', 0 };
+ for (i = 0; i < 32; i++)
+ {
+ TESTP ("*a*", tst, i);
+ TESTP ("*b*", tst, i);
+ TESTP ("*c*", tst, i);
+ TESTP (ptr1, tst, i);
+ TESTP (ptr2, tst, i);
+ }
+
+
+ return (0);
+}