bug-findutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Significant performance improvements to locate, struct order opt


From: Devin Hussey
Subject: [PATCH] Significant performance improvements to locate, struct order optimizations
Date: Sat, 12 May 2018 19:57:38 -0400

Hello, I have added some optimizations for struct ordering and made
some significant
performance improvements to the locate utility, thanks to
Instruments.app on macOS.

I did include a third party strstr implementation, if it is okay. It
is under the BSD
3-clause. If you are not okay with that, feel free to not add it, but
it is really, really fast.

I don't feel like trying to reexplain it, so read the patch message for details.

before: locate -c file: 3.99s
after: locate -c file: 2.19s

----------

>From 095894c3c0392a0a4119926ec75562b20719d40e Mon Sep 17 00:00:00 2001
From: Devin Hussey <address@hidden>
Date: Sat, 12 May 2018 19:15:07 -0400
Subject: [PATCH] Significant performance improvements (at least on macOS).

find and locate:
 -> Reordered struct members to reduce padding and slightly reduce
    memory usage.

locate:
 -> visit_locate02_format
   1. Forced Gnulib to use its implementation, it is much faster,
      especially when compared to macOS.
   2. The previous implementation would cause getdelim() to realloc
      and then have that memory immediately freed once per file.
      - The buffer to be passed to getdelim() is now in a top-level
        static, with a respective size field.
      - getdelim() takes a minimum size by ref and will automatically
        resize the buffer when needed, updating that value with its
        new size.
      - By storing this and merely memsetting it to 0, we can
        recycle it.
  - visit_substring_match_nocasefold_buffer
   1. By default, I replaced the strstr implementation with the
      fast_strstr implementation by Raphael Javaux (BSD 3-Clause,
      https://github.com/RaphaelJ/fast_strstr).
      - It can be toggled in the configure script. I couldn't
        submodule it because it was not compatible with c90
        (it had // comments and mixed variable declarations),
        so I had to edit it slightly.
      - I also used "restrict" on the declaration, which recommends
        optimizations. Autoconf should gracefully no-op it on
        incompatible compilers.
      - Feel free to remove this, but from my tests, this can shave
        a good half second off the test below.
   2. I also forced enabling the Gnulib memchr(), as it is also
      much faster than the system implementation if you are using
      Gnulib's strstr.

before: locate -c file: 3.99s
after: locate -c file: 2.19s

Signed-off-by: Devin Hussey <address@hidden>
---
 bootstrap.conf       |   1 +
 configure.ac         |  19 ++++++-
 find/defs.h          | 130 ++++++++++++++++++++++---------------------
 locate/Makefile.am   |   4 ++
 locate/fast_strstr.c | 114 +++++++++++++++++++++++++++++++++++++
 locate/fast_strstr.h |  12 ++++
 locate/locate.c      |  39 +++++++++++--
 7 files changed, 246 insertions(+), 73 deletions(-)
 create mode 100644 locate/fast_strstr.c
 create mode 100644 locate/fast_strstr.h

diff --git a/bootstrap.conf b/bootstrap.conf
index cfa839a1..100f0b29 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -125,6 +125,7 @@ gnulib_modules="
     mbscasestr
     mbswidth
     mbsstr
+    memchr
     mktime
     modechange
     modf
diff --git a/configure.ac b/configure.ac
index 2acf54c7..02837ab5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -70,7 +70,6 @@ AC_ARG_ENABLE(d_type-optimisation,
  AS_HELP_STRING(--enable-d_type-optimisation,Synonym for
--enable-d_type-optimization),
  [ac_cv_d_type=$enableval],[])

-
 AC_MSG_CHECKING([for leaf optimisation])
 if test x$ac_cv_leaf_optimisation = xno; then
    AC_MSG_RESULT([no])
@@ -86,13 +85,15 @@ if test -n "$DEFAULT_ARG_SIZE"; then
      [If defined, the default argument size used in child processes])
 fi

-
-
 dnl Checks for programs.
 AC_PROG_CC
 AC_PROG_CPP

 dnl for gnulib
+dnl memchr and getdelim are much more efficient in gnulib
+gl_cv_func_memchr_works=no
+gl_cv_func_working_getdelim=no
+
 gl_EARLY
 AC_PROG_LN_S
 AC_PROG_INSTALL
@@ -105,6 +106,18 @@ AC_SYS_LARGEFILE

 gl_INIT

+
+AC_ARG_WITH(fast-strstr,
+  AS_HELP_STRING([--without-fast-strstr], [Use the system/Gnulib
strstr instead of fast_strstr]),
+  [fast_strstr=no],
+  [fast_strstr=yes])
+
+AM_CONDITIONAL(FAST_STRSTR, [[test x$fast_strstr = xyes]])
+
+if test x$fast_strstr = xyes; then
+  AC_DEFINE([FAST_STRSTR], 1, [Use fast_strstr instead of the system
implementation])
+fi
+
 AC_ARG_ENABLE(compiler-warnings,
   AS_HELP_STRING(--enable-compiler-warnings,Enable many compiler warnings),
   [
diff --git a/find/defs.h b/find/defs.h
index 63f3b2a3..47a3f35f 100644
--- a/find/defs.h
+++ b/find/defs.h
@@ -224,11 +224,11 @@ enum SegmentKind

 struct segment
 {
-  enum SegmentKind segkind;     /* KIND_FORMAT, KIND_PLAIN, KIND_STOP */
-  char format_char[2]; /* Format chars if kind is KIND_FORMAT */
   char *text; /* Plain text or `%' format string. */
-  int text_len; /* Length of `text'. */
   struct segment *next; /* Next segment for this predicate. */
+  int text_len; /* Length of `text'. */
+  enum SegmentKind segkind;     /* KIND_FORMAT, KIND_PLAIN, KIND_STOP */
+  char format_char[2]; /* Format chars if kind is KIND_FORMAT */
 };

 struct format_val
@@ -297,17 +297,17 @@ struct predicate
   /* True if this predicate node requires knowledge of the inode number. */
   bool need_inum;

-  enum EvaluationCost p_cost;
-
-  /* est_success_rate is a number between 0.0 and 1.0 */
-  float est_success_rate;
-
   /* True if this predicate should display control characters literally */
   bool literal_control_chars;

   /* True if this predicate didn't originate from the user. */
   bool artificial;

+  enum EvaluationCost p_cost;
+
+  /* est_success_rate is a number between 0.0 and 1.0 */
+  float est_success_rate;
+
   /* The raw text of the argument of this predicate. */
   const char *arg_text;

@@ -553,12 +553,24 @@ enum DebugOption

 struct options
 {
-  /* If true, process directory before contents.  True unless -depth given. */
-  bool do_dir_first;
-  /* If true, -depth was EXPLICITLY set (as opposed to having been turned
-   * on by -delete, for example).
-   */
-   bool explicit_depth;
+
+  struct timespec      start_time; /* Time at start of execution.  */
+
+  /* Either one day before now (the default), or the start of today
(if -daystart is given). */
+  struct timespec      cur_day_start;
+
+  /* Pointer to the function used to stat files. */
+  int (*xstat) (const char *name, struct stat *statbuf);
+
+  /* function used to get file context */
+  int (*x_getfilecon) (int, const char *, security_context_t *);
+
+  /* bitmask for debug options */
+  unsigned long debug_options;
+
+  int output_block_size; /* Output block size.  */
+
+  enum SymlinkOption symlink_handling;

   /* If >=0, don't descend more than this many levels of subdirectories. */
   int maxdepth;
@@ -566,6 +578,29 @@ struct options
   /* If >=0, don't process files above this level. */
   int mindepth;

+  /* The variety of regular expression that we support.
+   * The default is POSIX Basic Regular Expressions, but this
+   * can be changed with the positional option, -regextype.
+   */
+  int regex_options;
+
+  /* How should we quote filenames in error messages and so forth?
+   */
+  enum quoting_style err_quoting_style;
+
+  /* Optimisation level.  One is the default.
+   */
+  unsigned short optimisation_level;
+
+
+  /* If true, process directory before contents.  True unless -depth given. */
+  bool do_dir_first;
+
+  /* If true, -depth was EXPLICITLY set (as opposed to having been turned
+   * on by -delete, for example).
+   */
+  bool explicit_depth;
+
   /* If true, do not assume that files in directories with nlink == 2
      are non-directories. */
   bool no_leaf_check;
@@ -593,67 +628,29 @@ struct options
    */
   bool posixly_correct;

-  struct timespec      start_time; /* Time at start of execution.  */
-
-  /* Either one day before now (the default), or the start of today
(if -daystart is given). */
-  struct timespec      cur_day_start;
-
   /* If true, cur_day_start has been adjusted to the start of the day. */
   bool full_days;

-  int output_block_size; /* Output block size.  */
-
-  /* bitmask for debug options */
-  unsigned long debug_options;
-
-  enum SymlinkOption symlink_handling;
-
-
-  /* Pointer to the function used to stat files. */
-  int (*xstat) (const char *name, struct stat *statbuf);
-
-
   /* Indicate if we can implement safely_chdir() using the O_NOFOLLOW
    * flag to open(2).
    */
   bool open_nofollow_available;
-
-  /* The variety of regular expression that we support.
-   * The default is POSIX Basic Regular Expressions, but this
-   * can be changed with the positional option, -regextype.
-   */
-  int regex_options;
-
-  /* function used to get file context */
-  int (*x_getfilecon) (int, const char *, security_context_t *);
-
-  /* Optimisation level.  One is the default.
-   */
-  unsigned short optimisation_level;
-
-
-  /* How should we quote filenames in error messages and so forth?
-   */
-  enum quoting_style err_quoting_style;
 };


 struct state
 {
-  /* Current depth; 0 means current path is a command line arg. */
-  int curdepth;
-
-  /* If true, we have called stat on the current path. */
-  bool have_stat;
-
-  /* If true, we know the type of the current path. */
-  bool have_type;
-  mode_t type; /* this is the actual type */

   /* The file being operated on, relative to the current directory.
      Used for stat, readlink, remove, and opendir.  */
   const char *rel_pathname;

+  /* Shared files, opened via the interface in sharefile.h. */
+  sharefile_handle shared_files;
+
+  /* Current depth; 0 means current path is a command line arg. */
+  int curdepth;
+
   /* The directory fd to which rel_pathname is relative.  This is relevant
    * when we're navigating the hierarchy with fts() and using FTS_CWDFD.
    */
@@ -662,24 +659,29 @@ struct state
   /* Length of starting path. */
   int starting_path_length;

+  /* Status value to return to system. */
+  int exit_status;
+
+  mode_t type; /* this is the actual type */
+  /* If true, we know the type of the current path. */
+  bool have_type;
+
   /* If true, don't descend past current directory.
      Can be set by -prune, -maxdepth, and -xdev/-mount. */
   bool stop_at_current_level;

-  /* Status value to return to system. */
-  int exit_status;
-
   /* True if there are any execdirs.  This saves us a pair of fchdir()
    * calls for every directory we leave if it is false.  This is just
    * an optimisation.  Set to true if you want to be conservative.
    */
   bool execdirs_outstanding;

-  /* Shared files, opened via the interface in sharefile.h. */
-  sharefile_handle shared_files;
-
   /* Avoid multiple error messages for the same file. */
   bool already_issued_stat_error_msg;
+
+
+  /* If true, we have called stat on the current path. */
+  bool have_stat;
 };

 /* exec.c */
diff --git a/locate/Makefile.am b/locate/Makefile.am
index 09de3801..9d55d297 100644
--- a/locate/Makefile.am
+++ b/locate/Makefile.am
@@ -33,6 +33,10 @@ DISTCLEANFILES = dblocation.texi
 locate_SOURCES = locate.c word_io.c
 nodist_locate_TEXINFOS = dblocation.texi

+if FAST_STRSTR
+  locate_SOURCES += fast_strstr.c
+endif
+
 AM_CPPFLAGS = -I$(top_srcdir)/lib -I../gl/lib -I$(top_srcdir)/gl/lib
-DLOCATE_DB=\"$(LOCATE_DB)\" -DLOCALEDIR=\"$(localedir)\"

 LDADD = ../lib/libfind.a ../gl/lib/libgnulib.a $(LIB_CLOSE) $(LIBINTL)
diff --git a/locate/fast_strstr.c b/locate/fast_strstr.c
new file mode 100644
index 00000000..6221e4f3
--- /dev/null
+++ b/locate/fast_strstr.c
@@ -0,0 +1,114 @@
+/*
+ * This algorithm is licensed under the open-source BSD3 license
+ *
+ * Copyright (c) 2014, Raphael Javaux
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
contributors
+ * may be used to endorse or promote products derived from this
software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * A faster strstr implementation.
+ * Written by: Raphael Javaux
+ * Original source: https://github.com/RaphaelJ/fast_strstr
+ *
+ * Modified by Devin Hussey <address@hidden> for ANSI C support, and
+ * to use restrict.
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * Finds the first occurrence of the sub-string needle in the string haystack.
+ * Returns NULL if needle was not found.
+ */
+char *fast_strstr(const char * restrict haystack, const char * restrict needle)
+{
+    const char    needle_first  = *needle;
+    const char   *i_haystack;
+    const char   *i_needle;
+    unsigned int  sums_diff;
+    bool          identical = true;
+    size_t        needle_len;
+    size_t        needle_len_1;
+    const char   *sub_start;
+
+    if (!*needle) /* Empty needle. */
+        return (char *) haystack;
+
+    /* Runs strchr() on the first section of the haystack as it has a lower
+     * algorithmic complexity for discarding the first non-matching
characters.*/
+    haystack = strchr(haystack, needle_first);
+    if (!haystack) /* First character of needle is not in the haystack. */
+        return NULL;
+
+    /* First characters of haystack and needle are the same now. Both are
+     * guaranteed to be at least one character long.
+     * Now computes the sum of the first needle_len characters of haystack
+     * minus the sum of characters values of needle. */
+
+    i_haystack   = haystack + 1;
+    i_needle     = needle   + 1;
+
+    sums_diff    = *haystack;
+
+    while (*i_haystack && *i_needle) {
+        sums_diff += *i_haystack;
+        sums_diff -= *i_needle;
+        identical &= *i_haystack++ == *i_needle++;
+    }
+
+    /* i_haystack now references the (needle_len + 1)-th character. */
+
+    if (*i_needle) /* haystack is smaller than needle. */
+        return NULL;
+    else if (identical)
+        return (char *) haystack;
+
+    needle_len    = i_needle - needle;
+    needle_len_1  = needle_len - 1;
+
+    /* Loops for the remaining of the haystack, updating the sum
iteratively. */
+    for (sub_start = haystack; *i_haystack; i_haystack++) {
+        sums_diff -= *sub_start++;
+        sums_diff += *i_haystack;
+
+        /* Since the sum of the characters is already known to be equal at that
+         * point, it is enough to check just needle_len-1 characters for
+         * equality. */
+        if (
+               sums_diff == 0
+            && needle_first == *sub_start /* Avoids some calls to memcmp. */
+            && memcmp(sub_start, needle, needle_len_1) == 0
+        )
+            return (char *) sub_start;
+    }
+
+    return NULL;
+}
diff --git a/locate/fast_strstr.h b/locate/fast_strstr.h
new file mode 100644
index 00000000..06fbfb1b
--- /dev/null
+++ b/locate/fast_strstr.h
@@ -0,0 +1,12 @@
+
+
+#ifndef FAST_STRSTR_H
+#define FAST_STRSTR_H
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+char *fast_strstr(const char * restrict haystack, const char *
restrict needle);
+
+#endif
diff --git a/locate/locate.c b/locate/locate.c
index a10d86d2..41e2a5f7 100644
--- a/locate/locate.c
+++ b/locate/locate.c
@@ -101,6 +101,9 @@
 #include "printquoted.h"
 #include "splitstring.h"

+#ifdef FAST_STRSTR
+#  include "fast_strstr.h"
+#endif

 /* Warn if a database is older than this.  8 days allows for a weekly
    update that takes up to a day to perform.  */
@@ -215,6 +218,12 @@ contains_metacharacter (const char *s)
     return 1;
 }

+/* A reusable buffer for locate_read_str() that will automatically increase
+ * from the getdelim() call itself.
+ */
+static char * locate_buf  = NULL;
+static size_t locate_size = 0;
+
 /* locate_read_str()
  *
  * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
@@ -223,6 +232,13 @@ contains_metacharacter (const char *s)
  * is made regarding the content of the data (i.e. the implementation is
  * 8-bit clean, the only delimiter is DELIMITER).
  *
+ * locate_buf is a dynamically expanding buffer specifically for getdelim().
+ * getdelim() will automatically realloc a buffer that is too small and return
+ * the new size, which becomes our minimum size the next time this is called.
+ *
+ * Without this buffer, getdelim() would repeatedly call realloc() on an empty
+ * buffer, which is a performance bottleneck.
+ *
  * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
  * has been removed from gnulib.
  *
@@ -232,15 +248,23 @@ contains_metacharacter (const char *s)
 static int
 locate_read_str (char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
 {
-  char * p = NULL;
-  size_t sz = 0;
+  size_t sz = locate_size;
   int nread;
   size_t needed;

-  nread = getdelim (&p, &sz, delimiter, fp);
+  /* Clean out locate_buf without freeing or reallocating. */
+  if (locate_buf != NULL)
+    memset (locate_buf, '0', locate_size * sizeof (char));
+
+  nread = getdelim (&locate_buf, &sz, delimiter, fp);
+
+  /* If getdelim increased the size of locate_buf, update locate_size. */
+  if (sz > locate_size)
+      locate_size = sz;
+
   if (nread >= 0)
     {
-      assert (p != NULL);
+      assert (locate_buf != NULL);

       needed = offs + nread + 1u;
       if (needed > (*siz))
@@ -256,8 +280,7 @@ locate_read_str (char **buf, size_t *siz, FILE
*fp, int delimiter, int offs)
               *buf = pnew;
             }
         }
-      memcpy((*buf)+offs, p, nread + 1);
-      free(p);
+      memcpy((*buf)+offs, locate_buf, nread + 1);
     }
   return nread;
 }
@@ -686,7 +709,11 @@ visit_substring_match_nocasefold_narrow (struct
process_data *procdata, void *co
 {
   const char *pattern = context;
   assert (MB_CUR_MAX == 1);
+#ifdef FAST_STRSTR
+  if (NULL != fast_strstr (procdata->munged_filename, pattern))
+#else
   if (NULL != strstr (procdata->munged_filename, pattern))
+#endif
     return VISIT_ACCEPTED;
   else
     return VISIT_REJECTED;
-- 
2.17.0



reply via email to

[Prev in Thread] Current Thread [Next in Thread]