bug-make
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 1/5] optimize checking for globs


From: Paolo Bonzini
Subject: [PATCH 1/5] optimize checking for globs
Date: Wed, 2 Nov 2016 17:24:14 +0100

This removes the hotspot in parse_file_seq's call to strpbrk, by using
SSE2 vector instructions.  The resulting speedup on QEMU's noop build
is around 6% (15.4 seconds to 14.5).

The code is roughly based on GCC's similar optimizations in the lexer.

* read-opt.c: New.
* read.c (parse_file_seq): Use needs_glob instead of strpbrk.
* Makefile.am (make_SOURCES): Add read-opt.c.
* Makefile.in: Regenerate.
---
        (I also had a SSE4.2 version that gave another 1-2% improvement,
        but it fails some tests and I also don't feel like adding a lot
        of code to detect the instruction set.  All x86-64 machines have
        SSE2, so this provide the biggest bang for the buck).

 Makefile.am |  6 ++---
 Makefile.in | 19 ++++++-------
 makeint.h   |  1 +
 read-opt.c  | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 read.c      |  2 +-
 5 files changed, 103 insertions(+), 13 deletions(-)
 create mode 100644 read-opt.c

diff --git a/Makefile.am b/Makefile.am
index c88c465..ef5e1f9 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -44,9 +44,9 @@ endif
 
 make_SOURCES = ar.c arscan.c commands.c default.c dir.c expand.c file.c \
                function.c getopt.c getopt1.c guile.c implicit.c job.c load.c \
-               loadapi.c main.c misc.c $(ossrc) output.c read.c remake.c \
-               rule.c signame.c strcache.c variable.c version.c vpath.c \
-               hash.c $(remote)
+               loadapi.c main.c misc.c $(ossrc) output.c read.c read-opt.c \
+               remake.c rule.c signame.c strcache.c variable.c version.c \
+               vpath.c hash.c $(remote)
 
 EXTRA_make_SOURCES = vmsjobs.c remote-stub.c remote-cstms.c
 
diff --git a/Makefile.in b/Makefile.in
index 67b7616..52c854c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -144,8 +144,8 @@ loadavg_DEPENDENCIES =
 am__make_SOURCES_DIST = ar.c arscan.c commands.c default.c dir.c \
        expand.c file.c function.c getopt.c getopt1.c guile.c \
        implicit.c job.c load.c loadapi.c main.c misc.c posixos.c \
-       output.c read.c remake.c rule.c signame.c strcache.c \
-       variable.c version.c vpath.c hash.c remote-stub.c \
+       output.c read.c read-opt.c remake.c rule.c signame.c \
+       strcache.c variable.c version.c vpath.c hash.c remote-stub.c \
        remote-cstms.c
 @address@hidden = posixos.$(OBJEXT)
 @address@hidden = remote-stub.$(OBJEXT)
@@ -156,10 +156,10 @@ am_make_OBJECTS = ar.$(OBJEXT) arscan.$(OBJEXT) 
commands.$(OBJEXT) \
        getopt1.$(OBJEXT) guile.$(OBJEXT) implicit.$(OBJEXT) \
        job.$(OBJEXT) load.$(OBJEXT) loadapi.$(OBJEXT) main.$(OBJEXT) \
        misc.$(OBJEXT) $(am__objects_1) output.$(OBJEXT) \
-       read.$(OBJEXT) remake.$(OBJEXT) rule.$(OBJEXT) \
-       signame.$(OBJEXT) strcache.$(OBJEXT) variable.$(OBJEXT) \
-       version.$(OBJEXT) vpath.$(OBJEXT) hash.$(OBJEXT) \
-       $(am__objects_2)
+       read.$(OBJEXT) read-opt.$(OBJEXT) remake.$(OBJEXT) \
+       rule.$(OBJEXT) signame.$(OBJEXT) strcache.$(OBJEXT) \
+       variable.$(OBJEXT) version.$(OBJEXT) vpath.$(OBJEXT) \
+       hash.$(OBJEXT) $(am__objects_2)
 make_OBJECTS = $(am_make_OBJECTS)
 am__DEPENDENCIES_1 =
 @address@hidden = $(am__DEPENDENCIES_1)
@@ -473,9 +473,9 @@ include_HEADERS = gnumake.h
 @address@hidden = remote-cstms.c
 make_SOURCES = ar.c arscan.c commands.c default.c dir.c expand.c file.c \
                function.c getopt.c getopt1.c guile.c implicit.c job.c load.c \
-               loadapi.c main.c misc.c $(ossrc) output.c read.c remake.c \
-               rule.c signame.c strcache.c variable.c version.c vpath.c \
-               hash.c $(remote)
+               loadapi.c main.c misc.c $(ossrc) output.c read.c read-opt.c \
+               remake.c rule.c signame.c strcache.c variable.c version.c \
+               vpath.c hash.c $(remote)
 
 EXTRA_make_SOURCES = vmsjobs.c remote-stub.c remote-cstms.c
 noinst_HEADERS = commands.h dep.h filedef.h job.h makeint.h rule.h variable.h \
@@ -684,6 +684,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
address@hidden@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
diff --git a/makeint.h b/makeint.h
index 8e0ae6c..ef66312 100644
--- a/makeint.h
+++ b/makeint.h
@@ -712,6 +712,7 @@ void unblock_remote_children (void);
 int remote_kill (int id, int sig);
 void print_variable_data_base (void);
 void print_vpath_data_base (void);
+int needs_glob(const char *);
 
 extern char *starting_directory;
 extern unsigned int makelevel;
diff --git a/read-opt.c b/read-opt.c
new file mode 100644
index 0000000..6deb446
--- /dev/null
+++ b/read-opt.c
@@ -0,0 +1,88 @@
+/* Vectorized function for fast parsing of filenames for GNU Make.
+Copyright (C) 2016 Free Software Foundation, Inc.
+This file is part of GNU Make.
+
+GNU Make is free software; you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+GNU Make is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "makeint.h"
+
+#ifdef __SSE2__
+int needs_glob(const char *s)
+{
+  static const char repl_chars[4][16] __attribute__((aligned(16))) = {
+      { '?', '?', '?', '?', '?', '?', '?', '?',
+        '?', '?', '?', '?', '?', '?', '?', '?' },
+      { '*', '*', '*', '*', '*', '*', '*', '*',
+        '*', '*', '*', '*', '*', '*', '*', '*' },
+      { '[', '[', '[', '[', '[', '[', '[', '[',
+        '[', '[', '[', '[', '[', '[', '[', '[' },
+  };
+
+  typedef char v16qi __attribute__ ((__vector_size__ (16)));
+
+  const v16qi repl_qm = *(const v16qi *)repl_chars[0];
+  const v16qi repl_st = *(const v16qi *)repl_chars[1];
+  const v16qi repl_br = *(const v16qi *)repl_chars[2];
+  const v16qi repl_nul = *(const v16qi *)repl_chars[3];
+
+  unsigned int misalign, found, mask, done;
+  const v16qi *p;
+  v16qi data, t, n;
+
+  /* Align the source pointer.  */
+  misalign = (uintptr_t)s & 15;
+  p = (const v16qi *)((uintptr_t)s & -16);
+  data = *p;
+
+  /* Create a mask for the bytes that are valid within the first
+     16-byte block.  The Idea here is that the AND with the mask
+     within the loop is "free", since we need some AND or TEST
+     insn in order to set the flags for the branch anyway.  */
+  mask = -1u << misalign;
+
+  /* Main loop processing 16 bytes at a time.  */
+  goto start;
+  do
+    {
+      data = *++p;
+      mask = -1;
+
+    start:
+      n  = __builtin_ia32_pcmpeqb128(data, repl_nul);
+      t  = __builtin_ia32_pcmpeqb128(data, repl_qm);
+      t |= __builtin_ia32_pcmpeqb128(data, repl_st);
+      t |= __builtin_ia32_pcmpeqb128(data, repl_br);
+      t |= n;
+      found = __builtin_ia32_pmovmskb128 (t);
+      found &= mask;
+    }
+  while (!found);
+
+  /* FOUND contains 1 in bits for which we matched a relevant
+     character or NUL.  DONE contains 1 in bits for which we
+     matched a NUL.  */
+  done = __builtin_ia32_pmovmskb128 (n);
+
+  /* Set to 1 all bits corresponding to characters to the left of the 
+     first NUL.  */
+  done |= -done;
+  found &= ~done;
+  return found > 0;
+}
+
+#else
+int needs_glob(const char *s)
+{
+  return strpbrk (s, "?*[") == NULL;
+}
+#endif
diff --git a/read.c b/read.c
index b870aa8..0883100 100644
--- a/read.c
+++ b/read.c
@@ -3268,7 +3268,7 @@ parse_file_seq (char **stringp, unsigned int size, int 
stopmap,
 #endif /* !NO_ARCHIVES */
 
       /* glob() is expensive: don't call it unless we need to.  */
-      if (NONE_SET (flags, PARSEFS_EXISTS) && strpbrk (name, "?*[") == NULL)
+      if (NONE_SET (flags, PARSEFS_EXISTS) && !needs_glob (name))
         {
           globme = 0;
           i = 1;
-- 
2.7.4





reply via email to

[Prev in Thread] Current Thread [Next in Thread]