bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] md5: accepts a new --threads option


From: Giuseppe Scrivano
Subject: [PATCH] md5: accepts a new --threads option
Date: Sun, 18 Oct 2009 00:49:21 +0200
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/23.1.50 (gnu/linux)

Hello,

inspired by the attempt to make `sort' multi-threaded, I added threads
support to md5sum and the sha* programs family.  It has effect only when
multiple files are specified.

Any comment?

Cheers,
Giuseppe



>From 1e4ed081f41ac0955542d3a0f1ad143047b8ac25 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <address@hidden>
Date: Sun, 18 Oct 2009 00:19:25 +0200
Subject: [PATCH] md5: accepts a new --threads option

* NEWS: Mention it.
* bootstrap.conf: Use the `nproc' and `pthread' modules from gnulib.
* doc/coreutils.texi: Document the new feature.
* src/Makefile.am (md5sum, sha1sum, sha224, sha256, sha384, sha512):
Link to the pthread library.
* src/md5sum.c (main): Add --threads and move some code into new
functions.
(long_options, usage): Add --threads.
(do_file): New function.
(thread_start): New function.
(check_files): New function.
* tests/misc/md5sum: Test the new --threads option.
* tests/misc/sha1sum: Ditto.
* tests/misc/sha224sum: Ditto.
* tests/misc/sha256sum: Ditto.
* tests/misc/sha384sum: Ditto.
* tests/misc/sha512sum: Ditto.
---
 NEWS                 |    3 +
 bootstrap.conf       |    2 +
 doc/coreutils.texi   |    8 ++
 src/Makefile.am      |   12 ++--
 src/md5sum.c         |  234 +++++++++++++++++++++++++++++++++++++-------------
 tests/misc/md5sum    |    6 ++
 tests/misc/sha1sum   |    6 ++
 tests/misc/sha224sum |    6 ++
 tests/misc/sha256sum |    6 ++
 tests/misc/sha384sum |    6 ++
 tests/misc/sha512sum |    6 ++
 11 files changed, 230 insertions(+), 65 deletions(-)

diff --git a/NEWS b/NEWS
index f8269fc..70af0b3 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,9 @@ GNU coreutils NEWS                                    -*- 
outline -*-
   md5sum --check now also accepts openssl-style checksums.
   So do sha1sum, sha224sum, sha384sum and sha512sum.
 
+  md5sum, sha1sum, sha224sum, sha384sum and sha512sum accept a new option
+  --threads to improve parallelism when multiple files are specified.
+
 
 * Noteworthy changes in release 8.0 (2009-10-06) [beta]
 
diff --git a/bootstrap.conf b/bootstrap.conf
index e9b198c..fb3304d 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -155,6 +155,7 @@ gnulib_modules="
   mktime
   modechange
   mountlist
+  nproc
   mpsort
   obstack
   pathmax
@@ -166,6 +167,7 @@ gnulib_modules="
   priv-set
   progname
   propername
+  pthread
   putenv
   quote
   quotearg
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 5026e76..b81cb81 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3496,6 +3496,14 @@ distinguish between binary and text files.  On other 
systems, it is
 the default for reading standard input when standard input is a
 terminal.
 
address@hidden address@hidden
address@hidden --threads
address@hidden verifying MD5 checksums
+Use up to @var{n} threads when multiple files are specified.  If a
+value is not specified then the number of processors is used.  The
+number of threads used is limited by the number of specified files
+thus in any case are not created more threads than files.
+
 @item -w
 @itemx --warn
 @opindex -w
diff --git a/src/Makefile.am b/src/Makefile.am
index 915ea81..33d2563 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -220,7 +220,7 @@ link_LDADD = $(LDADD)
 ln_LDADD = $(LDADD)
 logname_LDADD = $(LDADD)
 ls_LDADD = $(LDADD)
-md5sum_LDADD = $(LDADD)
+md5sum_LDADD = $(LDADD) $(LIB_PTHREAD)
 mkdir_LDADD = $(LDADD)
 mkfifo_LDADD = $(LDADD)
 mknod_LDADD = $(LDADD)
@@ -244,11 +244,11 @@ rmdir_LDADD = $(LDADD)
 runcon_LDADD = $(LDADD)
 seq_LDADD = $(LDADD)
 setuidgid_LDADD = $(LDADD)
-sha1sum_LDADD = $(LDADD)
-sha224sum_LDADD = $(LDADD)
-sha256sum_LDADD = $(LDADD)
-sha384sum_LDADD = $(LDADD)
-sha512sum_LDADD = $(LDADD)
+sha1sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha224sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha256sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha384sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha512sum_LDADD = $(LDADD) $(LIB_PTHREAD)
 shred_LDADD = $(LDADD)
 shuf_LDADD = $(LDADD)
 sleep_LDADD = $(LDADD)
diff --git a/src/md5sum.c b/src/md5sum.c
index aa2a144..161f1a6 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -20,8 +20,11 @@
 
 #include <getopt.h>
 #include <sys/types.h>
+#include <pthread.h>
 
 #include "system.h"
+#include "nproc.h"
+#include "xstrtol.h"
 
 #if HASH_ALGO_MD5
 # include "md5.h"
@@ -126,7 +129,8 @@ static bool quiet = false;
 enum
 {
   STATUS_OPTION = CHAR_MAX + 1,
-  QUIET_OPTION
+  QUIET_OPTION,
+  THREADS_OPTION
 };
 
 static struct option const long_options[] =
@@ -136,12 +140,28 @@ static struct option const long_options[] =
   { "quiet", no_argument, NULL, QUIET_OPTION },
   { "status", no_argument, NULL, STATUS_OPTION },
   { "text", no_argument, NULL, 't' },
+  { "threads", optional_argument, NULL, THREADS_OPTION},
   { "warn", no_argument, NULL, 'w' },
   { GETOPT_HELP_OPTION_DECL },
   { GETOPT_VERSION_OPTION_DECL },
   { NULL, 0, NULL, 0 }
 };
 
+
+struct thread_arg
+{
+  char **files;
+  int n_files;
+  unsigned char **bin_buffer;
+  bool *res;
+  int *file_is_binary;
+  bool do_check;
+  bool *busy;
+
+  /* Protect BUSY.  */
+  pthread_mutex_t mutex;
+};
+
 void
 usage (int status)
 {
@@ -179,6 +199,8 @@ With no FILE, or when FILE is -, read standard input.\n\
   -t, --text              read in text mode (default)\n\
 "), stdout);
       fputs (_("\
+      --threads=N           use up to N threads\n"), stdout);
+      fputs (_("\
 \n\
 The following three options are useful only when verifying checksums:\n\
       --quiet             don't print OK for each successfully verified file\n\
@@ -599,16 +621,154 @@ digest_check (const char *checkfile_name)
           && n_open_or_read_failures == 0);
 }
 
+static void
+do_file (struct thread_arg *ts, int j)
+{
+  char *file = ts->files[j];
+  if (ts->do_check)
+    ts->res[j] = digest_check (file);
+  else
+    ts->res[j] = digest_file (file, &ts->file_is_binary[j],
+                              ts->bin_buffer[j]);
+}
+
+static void*
+thread_start (void *arg)
+{
+  struct thread_arg *ts = arg;
+  int current = 0;
+  while (1)
+    {
+      pthread_mutex_lock (&ts->mutex);
+
+      while (current < ts->n_files && ts->busy[current])
+        current++;
+
+      if (current < ts->n_files)
+        ts->busy[current] = true;
+
+      pthread_mutex_unlock (&ts->mutex);
+
+      /* No other files, exit from the thread.  */
+      if (ts->n_files <= current)
+        return NULL;
+
+      do_file (ts, current++);
+    }
+
+  return NULL;
+}
+
+static bool
+check_files (char **files, unsigned long n_threads, int n_files, bool do_check,
+             int binary)
+{
+  int j;
+  int ok = 1;
+  unsigned char *bin_buffer_unaligned = xnmalloc (DIGEST_BIN_BYTES
+                                                  + DIGEST_ALIGN, n_files);
+  unsigned char *bin_buffer[n_files];
+  bool res[n_files];
+  bool busy[n_files];
+  int file_is_binary[n_files];
+  pthread_t tids[n_threads - 1];
+
+  struct thread_arg ts =
+    {
+      .bin_buffer = bin_buffer,
+      .busy = busy,
+      .do_check = do_check,
+      .files = files,
+      .file_is_binary = file_is_binary,
+      .mutex = PTHREAD_MUTEX_INITIALIZER,
+      .n_files = n_files,
+      .res = res,
+    };
+
+  for (int j = 0; j < n_files; j++)
+    {
+      /* Make sure bin_buffer is properly aligned. */
+      unsigned char *tmp = &bin_buffer_unaligned[j * (DIGEST_BIN_BYTES
+                                                      + DIGEST_ALIGN)];
+      bin_buffer[j] = ptr_align (tmp, DIGEST_ALIGN);
+      file_is_binary[j] = binary;
+      ts.busy[j] = false;
+    }
+
+  for (j = 0; j < n_threads - 1; j++)
+    if (pthread_create (&tids[j], NULL, thread_start, &ts))
+      error (EXIT_FAILURE, errno, "cannot spawn a new thread");
+
+  /* Use the main thread as a regular thread.  */
+  thread_start (&ts);
+
+  for (j = 0; j < n_threads - 1; j++)
+    pthread_join (tids[j], NULL);
+
+  for (j = 0; j < n_files; j++)
+    {
+      char *file = files[j];
+
+      if (do_check)
+        ok &= res[j];
+      else
+        {
+          if (! res[j])
+            ok = false;
+          else
+            {
+              size_t i;
+
+              /* Output a leading backslash if the file name contains
+                 a newline or backslash.  */
+              if (strchr (file, '\n') || strchr (file, '\\'))
+                putchar ('\\');
+
+              for (i = 0; i < (digest_hex_bytes / 2); ++i)
+                printf ("%02x", bin_buffer[j][i]);
+
+              putchar (' ');
+              if (file_is_binary[j])
+                putchar ('*');
+              else
+                putchar (' ');
+
+              /* Translate each NEWLINE byte to the string, "\\n",
+                 and each backslash to "\\\\".  */
+              for (i = 0; i < strlen (file); ++i)
+                {
+                  switch (file[i])
+                    {
+                    case '\n':
+                      fputs ("\\n", stdout);
+                      break;
+
+                    case '\\':
+                      fputs ("\\\\", stdout);
+                      break;
+
+                    default:
+                      putchar (file[i]);
+                      break;
+                    }
+                }
+              putchar ('\n');
+            }
+        }
+    }
+  free (bin_buffer_unaligned);
+
+  return ok > 0;
+}
+
 int
 main (int argc, char **argv)
 {
-  unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN];
-  /* Make sure bin_buffer is properly aligned. */
-  unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
-  bool do_check = false;
   int opt;
-  bool ok = true;
+  bool ok;
   int binary = -1;
+  bool do_check = false;
+  unsigned long n_threads = 1;
 
   /* Setting values of global variables.  */
   initialize_main (&argc, &argv);
@@ -646,6 +806,12 @@ main (int argc, char **argv)
         warn = false;
         quiet = true;
         break;
+      case THREADS_OPTION:
+        if (optarg)
+          xstrtoul (optarg, NULL, 10, &n_threads, "");
+        else
+          n_threads = num_processors ();
+        break;
       case_GETOPT_HELP_CHAR;
       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
       default:
@@ -689,59 +855,9 @@ main (int argc, char **argv)
   if (optind == argc)
     argv[argc++] = bad_cast ("-");
 
-  for (; optind < argc; ++optind)
-    {
-      char *file = argv[optind];
-
-      if (do_check)
-        ok &= digest_check (file);
-      else
-        {
-          int file_is_binary = binary;
-
-          if (! digest_file (file, &file_is_binary, bin_buffer))
-            ok = false;
-          else
-            {
-              size_t i;
-
-              /* Output a leading backslash if the file name contains
-                 a newline or backslash.  */
-              if (strchr (file, '\n') || strchr (file, '\\'))
-                putchar ('\\');
-
-              for (i = 0; i < (digest_hex_bytes / 2); ++i)
-                printf ("%02x", bin_buffer[i]);
-
-              putchar (' ');
-              if (file_is_binary)
-                putchar ('*');
-              else
-                putchar (' ');
-
-              /* Translate each NEWLINE byte to the string, "\\n",
-                 and each backslash to "\\\\".  */
-              for (i = 0; i < strlen (file); ++i)
-                {
-                  switch (file[i])
-                    {
-                    case '\n':
-                      fputs ("\\n", stdout);
-                      break;
-
-                    case '\\':
-                      fputs ("\\\\", stdout);
-                      break;
-
-                    default:
-                      putchar (file[i]);
-                      break;
-                    }
-                }
-              putchar ('\n');
-            }
-        }
-    }
+  size_t n_files = argc - optind;
+  ok = check_files (&argv[optind], MIN (n_threads, n_files), n_files, do_check,
+                    binary);
 
   if (have_read_stdin && fclose (stdin) == EOF)
     error (EXIT_FAILURE, errno, _("standard input"));
diff --git a/tests/misc/md5sum b/tests/misc/md5sum
index 30edd9e..ae49954 100755
--- a/tests/misc/md5sum
+++ b/tests/misc/md5sum
@@ -96,6 +96,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha1sum b/tests/misc/sha1sum
index d084204..2a3ca6a 100755
--- a/tests/misc/sha1sum
+++ b/tests/misc/sha1sum
@@ -82,6 +82,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha224sum b/tests/misc/sha224sum
index aace96c..0405510 100755
--- a/tests/misc/sha224sum
+++ b/tests/misc/sha224sum
@@ -41,6 +41,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha256sum b/tests/misc/sha256sum
index d85f248..e376155 100755
--- a/tests/misc/sha256sum
+++ b/tests/misc/sha256sum
@@ -47,6 +47,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha384sum b/tests/misc/sha384sum
index c5818e2..0c60824 100755
--- a/tests/misc/sha384sum
+++ b/tests/misc/sha384sum
@@ -47,6 +47,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha512sum b/tests/misc/sha512sum
index 9a45602..7e19cb9 100755
--- a/tests/misc/sha512sum
+++ b/tests/misc/sha512sum
@@ -47,6 +47,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
-- 
1.6.3.3





reply via email to

[Prev in Thread] Current Thread [Next in Thread]