bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Patch] faster fnmatch


From: Ondrej Bilka
Subject: Re: [Patch] faster fnmatch
Date: Fri, 8 May 2009 12:23:31 +0200
User-agent: Mutt/1.5.18 (2008-05-17)

Hello 
 I finished writing this patch. I added test(and fnmatchcomp.m4 which I forgot 
in previous). I didnt change much, prettied and fixed bugs discovered by tests.
Now it should be stable. I hope this patch looks OK.

 Moreover I made two independent changes
first is preallocating buffer in fnmatch which should speed multibyte strings 
by third.
Second is adding compiled fnmatch in glob
diff --git a/lib/fnmatch.c b/lib/fnmatch.c
index 48bc8b5..6daf8b0 100644
--- a/lib/fnmatch.c
+++ b/lib/fnmatch.c
@@ -275,6 +348,10 @@ int
 fnmatch (const char *pattern, const char *string, int flags)
 {
 # if HANDLE_MULTIBYTE
+#define  ALLOCALIMIT(n) (__builtin_expect((n < 2000), 1) ? alloca(n): 
malloc(n))
+#define  ALLOCAFREE(ptr, n) if (__builtin_expect((n >= 2000), 0)) free(ptr);
+#define  NOMEMTEST(exp) if (__builtin_expect(!(exp), 0))  {errno = ENOMEM; 
goto err; }
+#define BE __builtin_expect
 #  define ALLOCA_LIMIT 2000
   if (__builtin_expect (MB_CUR_MAX, 1) != 1)
     {
@@ -285,7 +362,35 @@ fnmatch (const char *pattern, const char *string, int 
flags)
       wchar_t *wpattern;
       wchar_t *wstring;
       int res;
+      char *s = string, *p = pattern;
+      size_t plen = 2 * strlen (p) + 2, slen = 2 * strlen (s) + 2;
+      NOMEMTEST (wpattern =
+                 ALLOCALIMIT (sizeof (wchar_t) * (plen + slen + 1)));
+      memset (&ps, '\0', sizeof (ps));
+      patsize = mbsrtowcs (wpattern, &p, plen, &ps);
+      if (BE (patsize == plen, 0))
+        {
+          ALLOCAFREE (wpattern);
+          goto toolarge;
+        }
+      if (BE (patsize < 0, 0))
+        return -1;
+      wstring = wpattern + patsize;
+      assert (mbsinit (&ps));
+      strsize = mbsrtowcs (wstring, &s, slen, &ps);
+      if (BE (strsize < 0, 0))
+        return -1;
+      if (BE (strsize == slen, 0))
+        {
+          ALLOCAFREE (wpattern);
+          goto toolarge;
+        }
 
+      res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1,
+                               flags & FNM_PERIOD, flags);
+      ALLOCAFREE (wpattern);
+      return res;
+    toolarge:
       /* Calculate the size needed to convert the strings to
         wide characters.  */
       memset (&ps, '\0', sizeof (ps));
@@ -339,6 +445,8 @@ fnmatch (const char *pattern, const char *string, int flags)
 
   return internal_fnmatch (pattern, string, string + strlen (string),
                           flags & FNM_PERIOD, flags);
+err:
+  return -1;
 }
 
 # ifdef _LIBC
diff --git a/lib/glob.c b/lib/glob.c
index 40cc9b3..1b61f67 100644
--- a/lib/glob.c
+++ b/lib/glob.c
@@ -152,7 +152,7 @@
 #endif /* _LIBC */
 
 #include <fnmatch.h>
-
+#include <fnmatchcomp.h>
 #ifdef _SC_GETPW_R_SIZE_MAX
 # define GETPW_R_SIZE_MAX()    sysconf (_SC_GETPW_R_SIZE_MAX)
 #else
@@ -1315,7 +1311,7 @@ glob_in_dir (const char *pattern, const char *directory, 
int flags,
   int meta;
   int save;
   int result;
-
+  fnmatch_state *fnm_pattern = NULL;
   init_names.next = NULL;
   init_names.count = INITIAL_COUNT;
 
@@ -1368,6 +1362,7 @@ glob_in_dir (const char *pattern, const char *directory, 
int flags,
                           | FNM_CASEFOLD
 #endif
                           );
+          fnm_pattern = fnmatch_compile (pattern, fnm_flags);
          flags |= GLOB_MAGCHAR;
 
          while (1)
@@ -1415,7 +1411,7 @@ glob_in_dir (const char *pattern, const char *directory, 
int flags,
 
              name = d->d_name;
 
-             if (fnmatch (pattern, name, fnm_flags) == 0)
+              if (fnmatch_exec (fnm_pattern, name) == 0)
                {
                  /* If the file we found is a symlink we have to
                     make sure the target file exists.  */
@@ -1544,5 +1538,6 @@ glob_in_dir (const char *pattern, const char *directory, 
int flags,
       __set_errno (save);
     }
 
+  fnmatch_free (fnm_pattern);
   return result;
 }

diff --git a/MODULES.html.sh b/MODULES.html.sh
index 06afa2d..03d308e 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -61,6 +61,7 @@ fenv
 float
 fmtmsg
 fnmatch
+fnmatchcomp
 ftw
 glob
 grp
@@ -418,6 +419,7 @@ fmodf
 fmodl
 fmtmsg
 fnmatch
+fnmatchcomp
 fopen
 fork
 fpathconf
diff --git a/lib/fnmatchcomp.c b/lib/fnmatchcomp.c
new file mode 100644
index 0000000..b374fec
--- /dev/null
+++ b/lib/fnmatchcomp.c
@@ -0,0 +1,632 @@
+  /* Copyright (C) 2009
+     Ondrej Bilka < address@hidden >
+
+     This program is free software: you can redistribute it and/or modif y
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program. If not, see < http://     www.gnu.org/licenses/ 
> . */
+#ifndef _LIBC
+# include <config.h>
+#endif
+
+ /* Enable GNU extensions in fnmatch.h. */
+#ifndef _GNU_SOURCE
+# define  _GNU_SOURCE 1
+#endif
+#include <fnmatch.h>
+#include <stdint.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <nl_types.h>
+#include <langinfo.h>
+#include <locale.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fnmatchcomp.h>
+
+#if !defined __builtin_expect && __GNUC__ < 3
+# define  __builtin_expect(expr, expected) (expr)
+#endif
+
+#define  WIDE_CHAR_SUPPORT \
+   ((HAVE_MBLEN && HAVE_WCSTOMBS) || _LIBC)
+
+ /* For platfor m which support the ISO C amendement 1 functionality we
+    support user defined character classes. */
+#if WIDE_CHAR_SUPPORT
+# include <wctype.h>
+# include <wchar.h>
+#else
+#define  mblen(x, y) 1
+#define  MB_CUR_MAX 1
+#endif
+
+#define  UINT4 uint32_t
+#define  UINT2 uint16_t
+#define  CHARSPERINT (sizeof (UINT4)/sizeof(char))
+#define  STREQ(a, b) (strcmp(a, b) == 0)
+#define  MAX_PATTERN_SIZE (sizeof (bracket_pattern))
+#define  NOMEMTEST(exp) if (__builtin_expect(!(exp), 0))  {errno = ENOMEM; 
goto err; }
+#define  Calloc(x) calloc(1, x)
+#define  ALLOCALIMIT(n) (__builtin_expect((n < 2000), 1) ? alloca(n): 
malloc(n))
+#define  ALLOCAFREE(ptr, n) if (__builtin_expect((n >= 2000), 0)) free(ptr);
+typedef struct
+{
+  char tp;
+  char chr;
+} chr_pattern;
+typedef struct
+{                               /* We seek chars following *. This provides 
major speedup.
+                                   For some encodings we could be wrong 
because leading char was part of multibyte character. Then after successfull 
match call fnmatch to eliminate false positives  */
+  char tp;
+  char chars[CHARSPERINT];
+  short sizerest;
+  short charno;
+} star_pattern;
+typedef struct
+{ /*string must end here but cant continue*/
+  char tp;
+} end_pattern;
+typedef struct
+{                               /* * at end of pattern. automatic success  */
+  char tp;
+} endstar_pattern;
+#define  BRACKET_INVMATCH 1
+#define  BRACKET_SLASH 2
+#define  BRACKET_MUSTCHECK 4
+typedef struct
+{
+  char tp;
+  char flags;
+  char *chars;                  /* multibyte characters separated by 0 */
+  int sizerest;
+  uint32_t hash[(UCHAR_MAX / 32) + 1];
+  /* We look at first char of multibyte character. We dont have to call mblen 
if this fails. */
+} bracket_pattern;
+typedef struct
+{                               /* if FNM_PERIOD was set this checks period 
after / If character is . it fails otherwise dont consume anything*/
+  char tp;
+} begindot_pattern;
+typedef struct
+{                               /* if FNM_LEADING_DIR was set we go after / or 
0 to endstar pattern which accept everything. */
+  char tp;
+} slashend_pattern;
+
+typedef struct
+{                               /* we must call fnmatch */
+  char tp;
+  char *ptr;
+  char *prefix;
+  int flags;
+} fallback_pattern;
+typedef struct
+{                               /* we fold string and then call case sensitive 
version */
+  char tp;
+  fnmatch_state *states;
+} fold_pattern;
+typedef struct
+{                               /* we call our version and if succeed we check 
by fnmatch fallback */
+  char tp;
+  fnmatch_state *states;
+  fnmatch_state *fallback;
+} checkneeded_pattern;
+
+  /* like star but we dont go beyond / */
+typedef star_pattern starslash_pattern;
+enum PATTERNS
+{
+  PATTERN_chr,
+  PATTERN_star,
+  PATTERN_starslash,
+  PATTERN_end,
+  PATTERN_endstar,
+  PATTERN_bracket,
+  PATTERN_begindot,
+  PATTERN_slashend,
+  PATTERN_fallback,
+  PATTERN_fold,
+  PATTERN_checkneeded
+};
+union _patterns
+{
+  chr_pattern chr;
+  star_pattern star;
+  starslash_pattern starslash;
+  end_pattern end;
+  endstar_pattern endstar;
+  bracket_pattern bracket;
+  begindot_pattern begindot;
+  fallback_pattern fallback;
+  fold_pattern fold;
+  checkneeded_pattern checkneeded;
+};
+typedef union _patterns patterns;
+static void freepatterns (patterns * p);
+
+struct states
+{
+  patterns *p;
+  struct states *next;
+};
+struct _fnmatch_state
+{
+  struct states *states;        /* list of states of undrelying NFA. */
+  int flags;
+  int *refcount;
+  patterns *start;
+};
+
+int
+fnmatch_fallbacked (fnmatch_state * s)
+{
+  return (s->states->p->fallback.tp == PATTERN_fallback);
+}
+
+
+static void
+strfold (const char *str, char *buf)
+{
+  if (MB_CUR_MAX == 1)
+    {
+      while (*str)
+        *buf++ = tolower (*str++);
+      *buf = 0;
+    }
+  else
+    {
+#if WIDE_CHAR_SUPPORT
+      int i;
+      int len = 2 * strlen (str) + 1;
+      wchar_t *towc = ALLOCALIMIT (sizeof (wint_t) * (len));
+      towc[len - 1] = L'\0';
+      mbstowcs (towc, str, len);
+      if (towc[len - 1] != L'\0')
+        {
+          ALLOCAFREE (towc, sizeof (wint_t) * (len));
+          len = mbstowcs (NULL, str, 0) + 1;
+          towc = ALLOCALIMIT (sizeof (wint_t) * len);
+          mbstowcs (towc, str, len);
+        }
+      for (i = 0; towc[i] != L'\0'; i++)
+        {
+          towc[i] = towlower (towc[i]);
+        }
+      wcstombs (buf, towc, len);
+      ALLOCAFREE (towc, sizeof (wint_t) * (len));
+#endif
+    }
+}
+
+static fnmatch_state *
+initfnmatchstate ()
+{
+  fnmatch_state *st;
+  NOMEMTEST (st = (fnmatch_state *) Calloc (sizeof (fnmatch_state)));
+  NOMEMTEST (st->states = Calloc (sizeof (struct states)));
+  st->states->next = NULL;
+  NOMEMTEST (st->refcount = Calloc (sizeof (int)));
+  *st->refcount = 1;
+  return st;
+err:
+  if (st)
+    {
+      free (st->states);
+      free (st->refcount);
+    }
+  free (st);
+  return NULL;
+}
+
+#define  FALLBACK_INIT(type) \
+   fnmatch_state *st; \
+  NOMEMTEST (st = initfnmatchstate ()); \
+  NOMEMTEST (st->start = st->states->p = Calloc (sizeof (type##_pattern))); \
+  st->states->p->fallback.tp = PATTERN_##type;
+
+static fnmatch_state *
+fnmatch_fallback (const char *str, int flags)
+{
+  FALLBACK_INIT (fallback);
+  NOMEMTEST (st->states->p->fallback.ptr = strdup (str));
+  st->states->p->fallback.flags = flags;
+  NOMEMTEST (st->states->p->fallback.prefix = strdup (""));
+  return st;
+err:
+  fnmatch_free (st);
+  return NULL;
+}
+
+static fnmatch_state *
+fold_fallback (fnmatch_state * s)
+{
+  FALLBACK_INIT (fold);
+  NOMEMTEST (s);
+  st->states->p->fold.states = s;
+  return st;
+err:
+  fnmatch_free (s);
+  fnmatch_free (st);
+  return NULL;
+}
+
+static fnmatch_state *
+checkneeded_fallback (fnmatch_state * s, fnmatch_state * fb)
+{
+  FALLBACK_INIT (checkneeded);
+  NOMEMTEST (fb);
+  NOMEMTEST (s);
+  st->states->p->checkneeded.states = s;
+  st->states->p->checkneeded.fallback = fb;
+  return st;
+err:
+  fnmatch_free (s);
+  fnmatch_free (fb);
+  fnmatch_free (st);
+  return NULL;
+}
+
+static int
+parse_bracket (const char *str, patterns * pat, int flags)
+{
+  unsigned char firstbyte;
+  char *chr;
+  int mlen;
+  const char *s = str;
+  int i;
+  pat->bracket.tp = PATTERN_bracket;
+  if (*s == '!' || (getenv ("POSIXLY_CORRECT") != NULL && *s == '^'))
+    {
+      pat->bracket.flags = BRACKET_INVMATCH;
+      s++;
+    }
+  if (flags & FNM_PATHNAME)
+    pat->bracket.flags |= BRACKET_SLASH;
+  if (!pat->bracket.chars)
+    {
+      if (!(pat->bracket.chars = Calloc (2 * strlen (s) + 2)))
+        return -3;
+    }
+  chr = pat->bracket.chars;
+  do
+    {
+      if (*s == 0 || *s == '-')
+        pat->bracket.flags |= BRACKET_MUSTCHECK | BRACKET_INVMATCH;
+      if (*s == '[' && (s[1] == '=' || s[1] == '.' || s[1] == ':'))
+        return -1;              /* could be multichar sequence */
+      mlen = mblen (s, MB_CUR_MAX);
+      if (mlen < 0)
+        return -3;
+      /* We dont have call mblen often because first character of multibyte
+         is often not allowed at other places. */
+      firstbyte = (unsigned char) *s;
+      pat->bracket.hash[firstbyte / 32] |= 1 << (firstbyte % 32);
+      memcpy (chr, s, mlen);
+      s += mlen;
+      chr += mlen + 1;
+    }
+  while (*s && *s != ']');
+  *chr = 0;
+  if (pat->bracket.flags & BRACKET_MUSTCHECK)
+    {                           /* bracket is too complicated to process here
+                                   we replace it with ? and when we match 
entire pattern
+                                   we call fnmatch it wasn't false positive */
+      *pat->bracket.chars = 0;
+      for (i = 0; i < (UCHAR_MAX / 32) + 1; i++)
+        pat->bracket.hash[i] = 0;
+    }
+  return s - str;
+}
+
+#define  NEXTPATTERN(type) pat->chr.tp = PATTERN_##type; pat = (patterns 
*)(((void *) pat)+sizeof (type##_pattern));
+#define  FALLBACK freepatterns(ret); free(ret);
+#define  BRACTEST(x) if ((x) == -3)  {goto err; }
+fnmatch_state *
+fnmatch_compile (const char *str, int flags)
+{
+  const char *s;
+  patterns *pat, *ret = NULL;
+  int i, pass;
+  int size = 0, patsize;
+  int checkneeded = 0;
+  char *codeset = nl_langinfo (CODESET);
+  char *buf;
+  if (MB_CUR_MAX == 1 || STREQ (codeset, "UTF-8"))
+    {
+      /* encoding can be handled as singlebyte */
+    }
+  else if (mblen (NULL, 0) == 0)
+    {                           /* stateless */
+      checkneeded = 1;
+    }
+  else
+    {
+      return fnmatch_fallback (str, flags);
+    }
+  if (flags & FNM_CASEFOLD)
+    {
+      NOMEMTEST (buf = Calloc (2 * strlen (str) + 1));
+      strfold (str, buf);
+      fnmatch_state *st =
+        fold_fallback (fnmatch_compile (buf, flags & (~FNM_CASEFOLD)));
+      free (buf);
+      NOMEMTEST (st);
+      return st;
+    }
+  ret = (patterns *) Calloc ((strlen (str) + 3) * 2 * MAX_PATTERN_SIZE);
+
+  for (pass = 0; pass < 2; pass++)
+    {
+      /* two passes in first we compute values we use in second to optimize
+       */
+      patsize = size;
+      size = 0;                 /* patsize-size is minimal size of rest of 
string */
+      /* it makes loops tighter and we dont need to check if we went outsize 
string. */
+      pat = ret;
+      if (flags & FNM_PERIOD && *str != '.')
+        {
+        NEXTPATTERN (begindot)}
+      for (s = str; *s;)
+        {
+          if (flags & FNM_EXTMATCH && *(s + 1) == '(' &&
+              (*s == '*' || *s == '+' || *s == '@' || *s == '!'))
+            {
+              FALLBACK;
+              return fnmatch_fallback (str, flags);
+            }
+          switch (*s)
+            {
+            case '*':
+              while (*(s + 1) == '?')
+                {
+                  /* by specif ication [!]] matches anything except ] */
+                  BRACTEST (parse_bracket ("!", pat, flags));
+                  size++;
+                  pat->bracket.sizerest = patsize - size;
+                  NEXTPATTERN (bracket);
+                  s++;
+                }
+              if (*(s + 1) || (flags & FNM_PATHNAME))
+                {
+                  if (pass)
+                    {
+                      /* we will find ocurence of next characters fast. */
+                      patterns *tmppat = pat;
+                      pat->star.sizerest = patsize - size;
+                      NEXTPATTERN (star);
+                      for (i = 0; pat->chr.tp == PATTERN_chr && pat->chr.chr
+                           && i < CHARSPERINT; i++)
+                        {
+                          tmppat->star.chars[i] = pat->chr.chr;
+                          NEXTPATTERN (chr);
+                        }
+                      if (i == 3)
+                        i = 2;
+                      tmppat->star.charno = i;
+                      pat = tmppat;
+                    }
+                  if (flags & FNM_PATHNAME)
+                    {
+                      NEXTPATTERN (starslash);
+                    }
+                  else
+                    {
+                      NEXTPATTERN (star);
+                    }
+                }
+              else
+                {
+                  NEXTPATTERN (endstar);
+                }
+              s++;
+              break;
+            case '?':
+              BRACTEST (parse_bracket ("!", pat, flags));
+              size++;
+              pat->bracket.sizerest = patsize - size;
+              NEXTPATTERN (bracket);
+              s++;
+              break;
+            case '[':
+              {
+                int siz = parse_bracket (s + 1, pat, flags);
+                BRACTEST (siz);
+                if (siz < 0)
+                  {
+                    FALLBACK;
+                    return fnmatch_fallback (str, flags);
+                  }
+                size++;
+                s += siz + 2;
+                pat->bracket.sizerest = patsize - size;
+                if (pat->bracket.flags & BRACKET_MUSTCHECK)
+                  {
+                    checkneeded = 1;
+                  }
+                NEXTPATTERN (bracket);
+              }
+              break;
+            default:
+              if (*s == '\\' && (!(flags & FNM_NOESCAPE)))
+                s++;
+              {
+                int nolp = (*s == '/' && (flags & FNM_PERIOD)
+                            && (flags & FNM_PATHNAME)
+                            && *(s + 1) != '.'), mlen = mblen (s, MB_CUR_MAX);
+                if (mlen < 0)
+                  goto err;
+                for (i = 0; i < mlen; i++, s++)
+                  {
+                    pat->chr.chr = *s;
+                    size++;
+                    NEXTPATTERN (chr);
+                  }
+                if (nolp)
+                  {
+                  NEXTPATTERN (begindot)}
+              }
+              break;
+            }
+        }
+    }
+  if (flags & FNM_LEADING_DIR)
+    {
+      NEXTPATTERN (slashend);
+      NEXTPATTERN (endstar);
+    }
+  else
+    {
+      NEXTPATTERN (end);
+    }
+
+  fnmatch_state *st;
+  NOMEMTEST (st = initfnmatchstate ());
+  st->states->p = ret;
+  st->start = ret;
+  if (checkneeded)
+    st = checkneeded_fallback (st, fnmatch_fallback (str, flags));
+  return st;
+err:
+  freepatterns (ret);
+  return NULL;
+}
+
+#undef NEXTPATTERN
+#define  NEXTPATTERN(type) p = (patterns *)(((void *) p)+sizeof 
(type##_pattern));
+
+static inline int
+fnmatch2_internal (const patterns * p, const char *str, int len)
+{
+#include "fnmatchcomp_loop.h"
+}
+
+int
+fnmatch_exec (const fnmatch_state * p, const char *str)
+{
+  const struct states *s;
+  int len = strlen (str);
+  for (s = p->states; s; s = s->next)
+    {
+      int ret = fnmatch2_internal (s->p, str, len);
+      if (ret <= 0)
+        return ret;
+    }
+  return FNM_NOMATCH;
+}
+
+#define  FNMATCH_PREFIX
+static int
+fnmatch2_prefix_internal (patterns * p, const char *str, int len,
+                          fnmatch_state * ret)
+{
+#include "fnmatchcomp_loop.h"
+}
+
+fnmatch_state *
+fnmatch_prefix (const fnmatch_state * p, const char *str)
+{
+  struct states *s;
+  int len = strlen (str);
+  fnmatch_state *ret;
+  NOMEMTEST (ret = (fnmatch_state *) Calloc (sizeof (fnmatch_state)));
+  memcpy ((void *) ret, (void *) p, sizeof (fnmatch_state));
+  ret->states = NULL;
+  (*ret->refcount)++;
+  for (s = p->states; s; s = s->next)
+    {
+      fnmatch2_prefix_internal (s->p, str, len, ret);
+    }
+  return ret;
+err:
+  return NULL;
+}
+
+static void
+freepatterns (patterns * p)
+{
+  if (!p)
+    return;
+#define  SKIPPATTERN(type) case PATTERN_##type: NEXTPATTERN(type); break;
+  while (p->chr.tp != PATTERN_end && p->chr.tp != PATTERN_endstar)
+    {
+      switch (p->chr.tp)
+        {
+        case PATTERN_chr:
+          if (!p->chr.chr)
+            return;
+          NEXTPATTERN (chr);
+          break;
+          SKIPPATTERN (star);
+          SKIPPATTERN (starslash);
+          SKIPPATTERN (end);
+          SKIPPATTERN (endstar);
+        case PATTERN_bracket:
+          free (p->bracket.chars);
+          NEXTPATTERN (bracket);
+          break;
+          SKIPPATTERN (begindot);
+          SKIPPATTERN (slashend);
+        case PATTERN_fallback:
+          free (p->fallback.ptr);
+          return;
+        case PATTERN_fold:
+          return;
+        case PATTERN_checkneeded:
+          return;
+        }
+    }
+
+}
+
+void
+fnmatch_free (fnmatch_state * s)
+{                               /* fnmatch_free must process incomplete 
fnmatch_state too */
+  if (!s)
+    return;
+  struct states *st, *st2;
+  for (st = s->states; st;)
+    {
+      if (st->p)
+        {
+          switch (st->p->chr.tp)
+            {
+            case PATTERN_fallback:
+              free (st->p->fallback.prefix);
+              if (s->start != st->p)
+                free (st->p);
+              break;
+            case PATTERN_fold:
+              fnmatch_free (st->p->fold.states);
+              if (s->start != st->p)
+                free (st->p);
+              break;
+            case PATTERN_checkneeded:
+              fnmatch_free (st->p->checkneeded.states);
+              fnmatch_free (st->p->checkneeded.fallback);
+              if (s->start != st->p)
+                free (st->p);
+              break;
+            }
+        }
+      st2 = st->next;
+      free (st);
+      st = st2;
+    }
+  (*s->refcount)--;
+  if (!*s->refcount)
+    {
+      free (s->refcount);
+      freepatterns (s->start);
+      free (s->start);
+    }
+  free (s);
+}
diff --git a/lib/fnmatchcomp.h b/lib/fnmatchcomp.h
new file mode 100644
index 0000000..356fbf8
--- /dev/null
+++ b/lib/fnmatchcomp.h
@@ -0,0 +1,41 @@
+  /* Copyright (C) 2009
+     Ondrej Bilka < address@hidden >
+
+     This program is free software: you can redistribute it and/or modif y
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program. If not, see < http://      www.gnu.org/licenses/ 
> . */
+
+struct _fnmatch_state;
+typedef struct _fnmatch_state fnmatch_state;
+
+  /* compile pattern */
+fnmatch_state *fnmatch_compile (const char *pattern, int flags);
+
+  /* add prefix to matched strings.
+     suppose you want match foo againist /home/foo, /home/bar/foo,
+     /home/bar/bar
+     fnmatch_state* s = fnmatch_compile("foo", 0);
+     fnmatch_state* home = fnmatch_prefix(s, "/home");
+     fnmatch_state* bar = fnmatch_prefix(home, "/bar");
+     fnmatch_exec(home, "/foo");
+     fnmatch_exec(bar, "/foo");
+     fnmatch_exec(bar, "/bar");
+   */
+fnmatch_state *fnmatch_prefix (const fnmatch_state * pattern,
+                               const char *prefix);
+
+int fnmatch_exec (const fnmatch_state * pattern, const char *string);
+
+void fnmatch_free (fnmatch_state * pattern);
+
+  /* For some encodings we decide just call fnmatch. In that case s you can 
gain some performance by checking fnmatch_fallbacked and if returns 1 call 
fnmatch yourself */
+int fnmatch_fallbacked (fnmatch_state * pattern);
diff --git a/lib/fnmatchcomp_loop.h b/lib/fnmatchcomp_loop.h
new file mode 100644
index 0000000..f722ffb
--- /dev/null
+++ b/lib/fnmatchcomp_loop.h
@@ -0,0 +1,234 @@
+  /* Copyright (C) 2009
+     Ondrej Bilka < address@hidden >
+
+     This program is free software: you can redistribute it and/or modif y
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program. If not, see < http://      www.gnu.org/licenses/ 
> . */
+
+const char *s = str, *chars;
+int i, buflen;
+char *buf;
+while (1)
+  {
+#ifndef FNMATCH_PREFIX
+    int retval;
+    int sizerest, charno;
+#undef ADDSTATE
+#define  ADDSTATE
+#else
+    int statetwice;
+    patterns *pat, *pat2;
+    struct states *state;
+#undef ADDSTATE
+#define  ADDSTATE \
+  statetwice = 0; \
+  for (state = ret->states; state; state = state->next)\
+   if (state->p == p) statetwice = 1; \
+   if (!statetwice)  {\
+   state = calloc(1, sizeof (struct states)); \
+   state->next = ret->states; \
+   ret->states = state; \
+   state->p = p; \
+   }
+    if (!*s)
+      {
+        ADDSTATE;
+        return 0;
+      }
+#endif
+    switch (p->chr.tp)
+      {
+      case PATTERN_chr:
+        if (__builtin_expect ((*s != p->chr.chr), 1))
+          return FNM_NOMATCH;
+        NEXTPATTERN (chr);
+        s++;
+        break;
+      case PATTERN_starslash:
+#undef SLASHTEST
+#define  SLASHTEST if (str[i] == '/') return FNM_NOMATCH;
+#include "fnmatchcomp_star.h"
+        break;
+      case PATTERN_star:
+#undef SLASHTEST
+#define  SLASHTEST
+#include "fnmatchcomp_star.h"
+        break;
+
+      case PATTERN_end:
+        return (*s) ? FNM_NOMATCH : 0;
+        break;
+      case PATTERN_slashend:
+        if (*s == '/' || *s == 0)
+          {
+            NEXTPATTERN (slashend);
+          }
+        else
+          return FNM_NOMATCH;
+        break;
+      case PATTERN_endstar:
+        ADDSTATE;
+        return 0;
+        break;
+      case PATTERN_bracket:
+        {
+          int mat;
+          int mlen, mbuflen;
+          int firstbyte = (unsigned char) *s;
+          chars = p->bracket.chars;
+          if (MB_CUR_MAX == 1)
+            {
+              mlen = 1;
+              mat = (p->bracket.hash[firstbyte / 32] &
+                     (1 << (firstbyte % 32))) ? 1 : 0;
+            }
+          else
+            {
+              mlen = 0;
+              mat = 1;
+              if (!
+                  (p->bracket.hash[firstbyte / 32] & (1 << (firstbyte % 32))))
+                {
+                  mat = 0;
+                  goto match;
+                }
+              mlen = mblen (s, MB_CUR_MAX);
+              if (mlen < 0)
+                return FNM_NOMATCH;
+
+              while (*chars)
+                {
+                  mbuflen = strlen (chars);
+                  if (!strncmp (chars, s, mlen) && mlen == mbuflen)
+                    goto match;
+                  chars += mbuflen + 1;
+                }
+              mat = 0;
+            }
+        match:
+          if (mat ^ (p->bracket.flags & BRACKET_INVMATCH))
+            {
+              if (p->bracket.flags & BRACKET_SLASH && *s == '/')
+                return FNM_NOMATCH;
+              if (!mlen)
+                mlen = mblen (s, MB_CUR_MAX);
+              if (mlen < 0)
+                return FNM_NOMATCH;
+
+              s += mlen;
+              if (len - (str - s) < p->bracket.sizerest)
+                return FNM_NOMATCH;
+            }
+          else
+            {
+              return FNM_NOMATCH;
+            }
+          NEXTPATTERN (bracket);
+        }
+        break;
+      case PATTERN_begindot:
+        if (*s == '.')
+          return FNM_NOMATCH;
+        NEXTPATTERN (begindot);
+        break;
+      case PATTERN_fallback:
+
+        buflen = strlen (p->fallback.prefix) + len + 1;
+        NOMEMTEST (buf = ALLOCALIMIT (buflen));
+        strcpy (buf, p->fallback.prefix);
+        strcat (buf, s);
+#ifndef FNMATCH_PREFIX
+        retval = fnmatch (p->fallback.ptr, buf, p->fallback.flags);
+        ALLOCAFREE (buf, buflen);
+        return retval;
+#else
+        if (!(pat2 = malloc (sizeof (fallback_pattern))))
+          {
+            ALLOCAFREE (buf, buflen);
+            NOMEMTEST (NULL);
+          }
+        memcpy (pat2, p, sizeof (fallback_pattern));
+        if (!(pat2->fallback.prefix = strdup (buf)))
+          {
+            ALLOCAFREE (buf, buflen);
+            NOMEMTEST (NULL);
+          }
+        p = (patterns *) pat2;
+        ADDSTATE;
+        return FNM_NOMATCH;
+#endif
+        ALLOCAFREE (buf, buflen);
+
+      case PATTERN_fold:
+        buflen = 2 * len + 1;
+        NOMEMTEST (buf = (ALLOCALIMIT (buflen)));
+        strfold (str, buf);
+#ifndef FNMATCH_PREFIX
+        retval = fnmatch_exec (p->fold.states, buf);
+        ALLOCAFREE (buf, 2 * len + 1);
+        return retval;
+#else
+        if (!(pat2 = malloc (sizeof (fallback_pattern))))
+          {
+            ALLOCAFREE (buf, buflen);
+            NOMEMTEST (NULL);
+          }
+        pat2->fold.tp = PATTERN_fold;
+        if (!(pat2->fold.states =
+              (void *) fold_fallback (fnmatch_prefix (p->fold.states, buf))))
+          {
+            free (pat2);
+            ALLOCAFREE (buf, buflen);
+            NOMEMTEST (NULL);
+          }
+        p = (patterns *) pat2;
+        ADDSTATE;
+        ALLOCAFREE (buf, buflen);
+        return FNM_NOMATCH;
+#endif
+        break;
+      case PATTERN_checkneeded:
+#ifndef FNMATCH_PREFIX
+        {
+          int ret = fnmatch_exec (p->checkneeded.states, s);
+          if (ret > 0)
+            return FNM_NOMATCH;
+          if (ret < 0)
+            return ret;
+          return fnmatch_exec (p->checkneeded.fallback, s);
+        }
+#else
+        NOMEMTEST (pat2 = malloc (sizeof (checkneeded_pattern)));
+        pat2->checkneeded.tp = PATTERN_checkneeded;
+        if (!(pat2->checkneeded.states =
+              fnmatch_prefix (p->checkneeded.states, s)))
+          {
+            free (pat2);
+            NOMEMTEST (NULL);
+          }
+        if (!(pat2->checkneeded.fallback =
+              fnmatch_prefix (p->checkneeded.fallback, s)))
+          {
+            fnmatch_free (pat2->checkneeded.states);
+            free (pat2);
+            NOMEMTEST (NULL);
+          }
+        p = pat2;
+        ADDSTATE;
+        return FNM_NOMATCH;
+#endif
+        break;
+      }
+  }
+
+err:
+return -1;
diff --git a/lib/fnmatchcomp_star.h b/lib/fnmatchcomp_star.h
new file mode 100644
index 0000000..f542e3d
--- /dev/null
+++ b/lib/fnmatchcomp_star.h
@@ -0,0 +1,65 @@
+  /* Copyright (C) 2009
+     Ondrej Bilka < address@hidden >
+
+     This program is free software: you can redistribute it and/or modif y
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program. If not, see < http://      www.gnu.org/licenses/ 
> . */
+
+#ifndef FNMATCH_PREFIX
+sizerest = p->star.sizerest;
+chars = p->star.chars;
+charno = p->star.charno;
+NEXTPATTERN (star);
+for (i = 0; i < charno; i++)
+  {
+    NEXTPATTERN (chr);
+  }
+
+switch (charno)
+  {
+  case 0:
+    for (i = s - str; i <= len - sizerest; i++)
+      {
+        if (!fnmatch2_internal (p, str + i, len - i))
+          return 0;
+        SLASHTEST;
+      }
+    break;
+#define  CASEN(type, no) \
+  case no:\
+  for (i = s-str; i <= len-sizerest; i++)  {\
+  if (__builtin_expect((*((type*)(str+i)) == *(type *) chars), 0)\
+  && !fnmatch2_internal(p, str+i+no, len-i-no)) return 0; \
+  SLASHTEST \
+  } \
+  break;
+    CASEN (char, 1);
+    CASEN (UINT2, 2);
+  case 3:
+    CASEN (UINT4, 4);
+  }
+
+return FNM_NOMATCH;
+#else
+pat = p;
+NEXTPATTERN (star);
+for (i = s - str; i < len; i++)
+  {
+    fnmatch2_prefix_internal (p, str + i, len - i, ret);
+    SLASHTEST;
+  }
+
+p = pat;
+ADDSTATE;
+return FNM_NOMATCH;
+#endif
+break;
diff --git a/m4/fnmatchcomp.m4 b/m4/fnmatchcomp.m4
new file mode 100644
index 0000000..c2d3812
--- /dev/null
+++ b/m4/fnmatchcomp.m4
@@ -0,0 +1,25 @@
+# fnmatchcomp.m4 serial 10
+dnl Copyright (C) 2005-2007 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+
+AC_DEFUN([gl_FNMATCHCOMP],
+[
+  gl_PREREQ_FNMATCHCOMP
+
+  FNMATCHCOMP_H=fnmatchcomp.h
+  AC_LIBOBJ([fnmatchcomp])
+  AC_SUBST([FNMATCHCOMP_H])
+])
+
+
+
+# Prerequisites of lib/fnmatchcomp.*.
+AC_DEFUN([gl_PREREQ_FNMATCHCOMP],
+[
+  AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])dnl
+  AC_CHECK_FUNCS_ONCE([mblen wcstombs])
+  AC_CHECK_HEADERS_ONCE([stdlib.h wctype.h])
+])
diff --git a/modules/fnmatchcomp b/modules/fnmatchcomp
new file mode 100644
index 0000000..5aaf80a
--- /dev/null
+++ b/modules/fnmatchcomp
@@ -0,0 +1,37 @@
+Description:
+ Allows you to compile fnmatch pattern. You can add to this pattern prefix
+ which will be added as prefix of any string you will match with this pattern
+
+Files:
+lib/fnmatchcomp.c
+lib/fnmatchcomp.h
+lib/fnmatchcomp_loop.h
+lib/fnmatchcomp_star.h
+m4/fnmatchcomp.m4
+
+Depends-on:
+fnmatch
+extensions
+alloca
+stdbool
+wchar
+wctype
+memchr
+memcmp
+mbsrtowcs
+mbsinit
+
+configure.ac:
+gl_FNMATCHCOMP
+
+Makefile.am:
+BUILT_SOURCES += $(FNMATCHCOMP_H)
+
+Include:
+<fnmatchcomp.h>
+
+License:
+LGPLv2+
+
+Maintainer:
+Ondrej Bilka
diff --git a/modules/fnmatchcomp-tests b/modules/fnmatchcomp-tests
new file mode 100644
index 0000000..2219093
--- /dev/null
+++ b/modules/fnmatchcomp-tests
@@ -0,0 +1,11 @@
+Files:
+tests/test-fnmatchcomp.c
+
+Depends-on:
+fnmatchcomp
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-fnmatchcomp
+check_PROGRAMS += test-fnmatchcomp
diff --git a/modules/glob b/modules/glob
index b765be7..ac07549 100644
--- a/modules/glob
+++ b/modules/glob
@@ -22,6 +22,7 @@ strdup
 sys_stat
 unistd
 malloc-posix
+fnmatchcomp
 
 configure.ac:
 gl_GLOB
diff --git a/tests/test-fnmatchcomp.c b/tests/test-fnmatchcomp.c
new file mode 100644
index 0000000..5579924
--- /dev/null
+++ b/tests/test-fnmatchcomp.c
@@ -0,0 +1,132 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <nl_types.h>
+#include <langinfo.h>
+#include <locale.h>
+#define  _GNU_SOURCE
+#include <fnmatch.h>
+#include <fnmatchcomp.h>
+int
+fnmatch2 (const char *ptr, const char *str, int flags)
+{
+  return fnmatch_exec (fnmatch_compile (ptr, flags), str);
+}
+
+int
+main (int argc, char *argv[])
+{
+  int i;
+  fnmatch_state *x;
+  int l;
+  for (l = 0; l < 3; l++)
+    {
+      if (l == 0)
+        setlocale (LC_ALL, "C");
+      if (l == 1)
+        setlocale (LC_ALL, "en_US.UTF-8");
+      if (l == 2)
+        setlocale (LC_ALL, "zh_TW.BIG5");
+
+      int cnt = 0;
+      char *fooptr[20];
+#define  TESTPREF(st, pat, str, fullstr, flag) if (fnmatch(pat, fullstr, flag) 
!= fnmatch_exec(st, str))  { printf ("partial %s = ~ %s failed flags %i \n", 
pat, fullstr, flag); }
+      int j;
+      fooptr[0] = "*?abc*";
+
+      fooptr[1] = "*a[a-z]*";
+
+      fooptr[2] = "*a[[:alnum:], ]*";
+      fooptr[3] = "*foo*";
+      fooptr[4] = "*[xf]*";
+      fooptr[5] = "*abcd*";
+      fooptr[6] = "*a[[ = alnum = ], ]*";
+      fooptr[7] = " /* o*";
+
+      for (j = 0; j < 8; j++)
+        {
+
+          for (i = 0; i < 32; i++)
+            {
+              fnmatch_state *s = fnmatch_compile (fooptr[j], i);
+              fnmatch_state *home = fnmatch_prefix (s, "/home");
+              fnmatch_state *bar = fnmatch_prefix (home, "/bar");
+              fnmatch_state *baz = fnmatch_prefix (bar, "/baz");
+              TESTPREF (home, fooptr[j], "/foo", "/home/foo", i);
+              TESTPREF (bar, fooptr[j], "/foo", "/home/bar/foo", i);
+              TESTPREF (bar, fooptr[j], "/bar", "/home/bar/bar", i);
+              TESTPREF (baz, fooptr[j], "/blabla", "/home/bar/baz/blabla", i);
+              fnmatch_free (home);
+              fnmatch_free (bar);
+              fnmatch_free (s);
+              fnmatch_free (baz);
+            }
+        }
+      int k;
+      for (k = 0; k < 3; k++)
+        {
+#define  TESTP(pat, str, flag) x = fnmatch_compile(pat, flag); if 
(fnmatch(pat, str, flag) != fnmatch_exec(x, str))  { printf ("%s = ~ %s failed 
flags %i \n", pat, str, flag); } fnmatch_free(x);
+
+          for (i = 0; i < 32; i++)
+            {
+              TESTP (" */ aa*", "asdasd/aab", i);
+              TESTP (" */ [ab]a*", "as/asd/aab", i);
+              TESTP ("*ads*", ".asdvdf", i);
+              TESTP (" */ ?as", "asdasd/.asasd", i);
+              TESTP (" */ [a-z]as", "asdasd/.asasd", i);
+              TESTP (" */ [a-z]a*", "asdasd/aab", i);
+            }
+          /* m4 test of fnmatch */
+          char const *Apat = 'A' < '\\' ? "[A-\\\\]" : "[\\\\-A]";
+          char const *apat = 'a' < '\\' ? "[a-\\\\]" : "[\\\\-a]";
+          static char const A_1[] = { 'A' - 1, 0 };
+          static char const A01[] = { 'A' + 1, 0 };
+          static char const a_1[] = { 'a' - 1, 0 };
+          static char const a01[] = { 'a' + 1, 0 };
+          static char const bs_1[] = { '\\' - 1, 0 };
+          static char const bs01[] = { '\\' + 1, 0 };
+#define  y TESTP
+#define  n TESTP
+          n ("a*", "", 0)
+            y ("a*", "abc", 0)
+            n ("d* /* 1", "d/s/1", FNM_PATHNAME)
+            y ("a\\bc", "abc", 0)
+            n ("a\\bc", "abc", FNM_NOESCAPE)
+            y ("*x", ".x", 0)
+            n ("*x", ".x", FNM_PERIOD)
+            y (Apat, "\\", 0)
+            y (Apat, "A", 0)
+            y (apat, "\\", 0)
+            y (apat, "a", 0)
+            n (Apat, A_1, 0)
+            n (apat, a_1, 0)
+            y (Apat, A01, 0)
+            y (apat, a01, 0)
+            y (Apat, bs_1, 0)
+            y (apat, bs_1, 0)
+            n (Apat, bs01, 0)
+            n (apat, bs01, 0)
+            y ("xxXX", "xXxX", FNM_CASEFOLD)
+            y ("a++(x|yy)b", "a+xyyyyxb", FNM_EXTMATCH)
+            n ("d* /* 1", "d/s/1", FNM_FILE_NAME)
+            y ("*", "x", FNM_FILE_NAME | FNM_LEADING_DIR)
+            y ("x*", "x/y/z", FNM_FILE_NAME | FNM_LEADING_DIR)
+            y ("*c*", "c/x", FNM_FILE_NAME | FNM_LEADING_DIR);
+        }
+    }
+  setlocale (LC_ALL, "zh_TW.BIG5");
+  char tst[] = { 0xa5, 'a', 'b', 'c', 0 };
+  char ptr1[] = { '*', 0xa5, 'a', '*', 0 };
+  char ptr2[] = { '*', '[', 0xa5, 'b', ']', '*', 0 };
+  for (i = 0; i < 32; i++)
+    {
+      TESTP ("*a*", tst, i);
+      TESTP ("*b*", tst, i);
+      TESTP ("*c*", tst, i);
+      TESTP (ptr1, tst, i);
+      TESTP (ptr2, tst, i);
+    }
+
+
+  return (0);
+}




reply via email to

[Prev in Thread] Current Thread [Next in Thread]