bug-findutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Patch] Locate: Fold case once at most. (was: Re: [Patch] Locate: Read e


From: Bas van Gompel
Subject: [Patch] Locate: Fold case once at most. (was: Re: [Patch] Locate: Read each database only once.)
Date: Mon, 30 May 2005 21:11:46 +0200 (MET DST)
User-agent: slrn/0.9.8.1 (Win32) Hamster/2.0.6.0 KorrNews/4.2

Op Mon, 30 May 2005 09:47:34 +0100 schreef James Youngman
in <address@hidden>:
:  On Mon, May 30, 2005 at 04:31:52AM +0200, Buzz wrote:
:
: > * I really tried to get this patch as small as possible.
: > * The case-folding /can/ be done less often.
:
:  Yes.  The old code used to do the case-folding on only the new
:  characters read out of the database (that is, each filename character
:  read from the database would be case-folded at most once).  However,
:  when I refactored the code to use the Visitor pattern, I sacrificed
:  that optimisation.

That is not what I meant. I meant currently there are multiple copies
of the case-folded string kept. (one per pattern).
I include a patch for this issue.

:  If you can actually measure the performance impact of the current
:  slightly stupid implementation of visit_substring_match_casefold(),
:  I'll certainly consider applying an optimisation patch.

The following patch does not address this. Testing for it might be
difficult and time-consuming, considering the --basename option.

: > * These changes prepare a path towards a ``--and'' (``-a'') option.
:
:  Are you envisaging this option as a single-use option (indicating that
:  all patterns must match) or as a conjunction as used in "find"?  For
:  example are you suggesting...
:
:     locate foo -a bar -o baz

The former.

Example:
locate -iaE foo bar 'b?z'

:  I suppose you can already achieve something similar with
:     
:     ( locate 'foo.*bar' 'bar.*foo' ; locate baz  ) | sort -u

The above would be harder to code, and definitely slower...

:  What does the rest of the list think about "-a"?
:
: > * It is possible to allow reading a db from stdin.
: > * Much of the ``main'' loop can be moved into ``visitor''s.
:
:  If you're talking about refactoring locate(), then yes, it is a bit
:  too long.  I'm not sure the current "Visitor" pattern is appropriate
:  for this though.

I am. The "Visistor" pattern seems fine to me. (Except maybe the
{munged,original}_filename might be entered into a structure,
providing space for more parameters. (This should improve speed
for fastcall-compiled code))


Now for the casefolding-patch:


Suggested ChangeLog entry:

2005-05-30  Bas van Gompel  <address@hidden>

        * locate/locate.c: Fold case once, only when needed.


diff --exclude='*~' -Ndrup curr/findutils/locate/locate.c 
mine2/findutils/locate/locate.c
--- findutils/locate/locate.c   2005-05-30 15:13:34.000000000 +0200
+++ findutils/locate/locate.c   2005-05-30 20:08:10.000000000 +0200
@@ -266,11 +266,20 @@ struct locate_stats
 static struct locate_stats statistics;
 
 
-struct casefolder
+struct stringbuf
 {
-  const char *pattern;
   char *buffer;
   size_t buffersize;
+  size_t *soffs;
+  size_t *preqlen;
+};
+static struct stringbuf casebuf;
+
+
+struct casefolder
+{
+  const char *pattern;
+  struct stringbuf *pbuf;
 };
 
 struct regular_expression
@@ -381,6 +390,24 @@ visit_justprint(const char *munged_filen
   return VISIT_CONTINUE;
 }
 
+static int
+visit_casefold(const char *munged_filename,
+              const char *original_filename,
+              void *context)
+{
+  struct stringbuf *b = context;
+  (void) original_filename;
+  
+  if (*b->preqlen+1 > b->buffersize)
+    {
+      b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using 
extendbuf(). */
+      b->buffersize = *b->preqlen+1;
+    }
+  lc_strcpy(b->buffer, munged_filename);
+
+  return VISIT_CONTINUE;
+}
+  
 /* visit_existing_follow implements -L -e */
 static int
 visit_existing_follow(const char *munged_filename,
@@ -500,19 +527,12 @@ visit_substring_match_casefold(const cha
                               const char *original_filename,
                               void *context)
 {
-  struct casefolder * p = context;
-  size_t len = strlen(munged_filename);
-
+  const struct casefolder * p = context;
+  const struct stringbuf * b = p->pbuf;
+  (void) munged_filename;
   (void) original_filename;
-  if (len+1 > p->buffersize)
-    {
-      p->buffer = xrealloc(p->buffer, len+1); /* XXX: consider using 
extendbuf(). */
-      p->buffersize = len+1;
-    }
-  lc_strcpy(p->buffer, munged_filename);
-  
-  
-  if (NULL != strstr(p->buffer, p->pattern))
+
+  if (NULL != strstr(b->buffer, p->pattern))
     return VISIT_ACCEPTED;
   else
     return VISIT_REJECTED;
@@ -656,6 +676,7 @@ locate (int argc,
 {
   char *pathpart;              /* A pattern to consider. */
   int argn;                    /* Index to current pattern in argv. */
+  int need_fold;       /* Set when folding and any pattern is non-glob. */
   FILE *fp;                    /* The pathname database.  */
   int c;                       /* An input byte.  */
   int nread;                /* number of bytes read from an entry. */
@@ -681,6 +702,25 @@ locate (int argc,
   past_pat_inspector = NULL;
 
 
+  /* See if we need fold. */
+  if (ignore_case && !regex)
+    for ( argn = 0; argn < argc; argn++ )
+      {
+        pathpart = argv[argn];
+        if (!contains_metacharacter(pathpart))
+         {
+           need_fold = 1;
+           break;
+         }
+      }
+
+  if (need_fold)
+    {
+      add_visitor(visit_casefold, &casebuf);
+      casebuf.preqlen = &pathsize;
+      casebuf.soffs = &count;
+    }
+  
   /* Add an inspector for each pattern we're looking for. */
   for ( argn = 0; argn < argc; argn++ )
     {
@@ -719,8 +759,7 @@ locate (int argc,
            {
              struct casefolder * cf = xmalloc(sizeof(*cf));
              cf->pattern = pathpart;
-             cf->buffer = NULL;
-             cf->buffersize = 0;
+             cf->pbuf = &casebuf;
              add_visitor(visit_substring_match_casefold, cf);
              /* If we ignore case, convert it to lower now so we don't have to
               * do it every time


L8r,

Buzz.
-- 
  ) |  | ---/ ---/  Yes, this | This message consists of true | I do not
--  |  |   /    /   really is |   and false bits entirely.    | mail for
  ) |  |  /    /    a 72 by 4 +-------------------------------+ any1 but
--  \--| /--- /---  .sigfile. |   |perl -pe "s.u(z)\1.as."    | me. 4^re




reply via email to

[Prev in Thread] Current Thread [Next in Thread]