[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] md5: accepts a new --threads option
From: |
Giuseppe Scrivano |
Subject: |
[PATCH] md5: accepts a new --threads option |
Date: |
Sun, 18 Oct 2009 00:49:21 +0200 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/23.1.50 (gnu/linux) |
Hello,
inspired by the attempt to make `sort' multi-threaded, I added threads
support to md5sum and the sha* programs family. It has effect only when
multiple files are specified.
Any comment?
Cheers,
Giuseppe
>From 1e4ed081f41ac0955542d3a0f1ad143047b8ac25 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <address@hidden>
Date: Sun, 18 Oct 2009 00:19:25 +0200
Subject: [PATCH] md5: accepts a new --threads option
* NEWS: Mention it.
* bootstrap.conf: Use the `nproc' and `pthread' modules from gnulib.
* doc/coreutils.texi: Document the new feature.
* src/Makefile.am (md5sum, sha1sum, sha224, sha256, sha384, sha512):
Link to the pthread library.
* src/md5sum.c (main): Add --threads and move some code into new
functions.
(long_options, usage): Add --threads.
(do_file): New function.
(thread_start): New function.
(check_files): New function.
* tests/misc/md5sum: Test the new --threads option.
* tests/misc/sha1sum: Ditto.
* tests/misc/sha224sum: Ditto.
* tests/misc/sha256sum: Ditto.
* tests/misc/sha384sum: Ditto.
* tests/misc/sha512sum: Ditto.
---
NEWS | 3 +
bootstrap.conf | 2 +
doc/coreutils.texi | 8 ++
src/Makefile.am | 12 ++--
src/md5sum.c | 234 +++++++++++++++++++++++++++++++++++++-------------
tests/misc/md5sum | 6 ++
tests/misc/sha1sum | 6 ++
tests/misc/sha224sum | 6 ++
tests/misc/sha256sum | 6 ++
tests/misc/sha384sum | 6 ++
tests/misc/sha512sum | 6 ++
11 files changed, 230 insertions(+), 65 deletions(-)
diff --git a/NEWS b/NEWS
index f8269fc..70af0b3 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,9 @@ GNU coreutils NEWS -*-
outline -*-
md5sum --check now also accepts openssl-style checksums.
So do sha1sum, sha224sum, sha384sum and sha512sum.
+ md5sum, sha1sum, sha224sum, sha384sum and sha512sum accept a new option
+ --threads to improve parallelism when multiple files are specified.
+
* Noteworthy changes in release 8.0 (2009-10-06) [beta]
diff --git a/bootstrap.conf b/bootstrap.conf
index e9b198c..fb3304d 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -155,6 +155,7 @@ gnulib_modules="
mktime
modechange
mountlist
+ nproc
mpsort
obstack
pathmax
@@ -166,6 +167,7 @@ gnulib_modules="
priv-set
progname
propername
+ pthread
putenv
quote
quotearg
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 5026e76..b81cb81 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3496,6 +3496,14 @@ distinguish between binary and text files. On other
systems, it is
the default for reading standard input when standard input is a
terminal.
address@hidden address@hidden
address@hidden --threads
address@hidden verifying MD5 checksums
+Use up to @var{n} threads when multiple files are specified. If a
+value is not specified then the number of processors is used. The
+number of threads used is limited by the number of specified files
+thus in any case are not created more threads than files.
+
@item -w
@itemx --warn
@opindex -w
diff --git a/src/Makefile.am b/src/Makefile.am
index 915ea81..33d2563 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -220,7 +220,7 @@ link_LDADD = $(LDADD)
ln_LDADD = $(LDADD)
logname_LDADD = $(LDADD)
ls_LDADD = $(LDADD)
-md5sum_LDADD = $(LDADD)
+md5sum_LDADD = $(LDADD) $(LIB_PTHREAD)
mkdir_LDADD = $(LDADD)
mkfifo_LDADD = $(LDADD)
mknod_LDADD = $(LDADD)
@@ -244,11 +244,11 @@ rmdir_LDADD = $(LDADD)
runcon_LDADD = $(LDADD)
seq_LDADD = $(LDADD)
setuidgid_LDADD = $(LDADD)
-sha1sum_LDADD = $(LDADD)
-sha224sum_LDADD = $(LDADD)
-sha256sum_LDADD = $(LDADD)
-sha384sum_LDADD = $(LDADD)
-sha512sum_LDADD = $(LDADD)
+sha1sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha224sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha256sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha384sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha512sum_LDADD = $(LDADD) $(LIB_PTHREAD)
shred_LDADD = $(LDADD)
shuf_LDADD = $(LDADD)
sleep_LDADD = $(LDADD)
diff --git a/src/md5sum.c b/src/md5sum.c
index aa2a144..161f1a6 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -20,8 +20,11 @@
#include <getopt.h>
#include <sys/types.h>
+#include <pthread.h>
#include "system.h"
+#include "nproc.h"
+#include "xstrtol.h"
#if HASH_ALGO_MD5
# include "md5.h"
@@ -126,7 +129,8 @@ static bool quiet = false;
enum
{
STATUS_OPTION = CHAR_MAX + 1,
- QUIET_OPTION
+ QUIET_OPTION,
+ THREADS_OPTION
};
static struct option const long_options[] =
@@ -136,12 +140,28 @@ static struct option const long_options[] =
{ "quiet", no_argument, NULL, QUIET_OPTION },
{ "status", no_argument, NULL, STATUS_OPTION },
{ "text", no_argument, NULL, 't' },
+ { "threads", optional_argument, NULL, THREADS_OPTION},
{ "warn", no_argument, NULL, 'w' },
{ GETOPT_HELP_OPTION_DECL },
{ GETOPT_VERSION_OPTION_DECL },
{ NULL, 0, NULL, 0 }
};
+
+struct thread_arg
+{
+ char **files;
+ int n_files;
+ unsigned char **bin_buffer;
+ bool *res;
+ int *file_is_binary;
+ bool do_check;
+ bool *busy;
+
+ /* Protect BUSY. */
+ pthread_mutex_t mutex;
+};
+
void
usage (int status)
{
@@ -179,6 +199,8 @@ With no FILE, or when FILE is -, read standard input.\n\
-t, --text read in text mode (default)\n\
"), stdout);
fputs (_("\
+ --threads=N use up to N threads\n"), stdout);
+ fputs (_("\
\n\
The following three options are useful only when verifying checksums:\n\
--quiet don't print OK for each successfully verified file\n\
@@ -599,16 +621,154 @@ digest_check (const char *checkfile_name)
&& n_open_or_read_failures == 0);
}
+static void
+do_file (struct thread_arg *ts, int j)
+{
+ char *file = ts->files[j];
+ if (ts->do_check)
+ ts->res[j] = digest_check (file);
+ else
+ ts->res[j] = digest_file (file, &ts->file_is_binary[j],
+ ts->bin_buffer[j]);
+}
+
+static void*
+thread_start (void *arg)
+{
+ struct thread_arg *ts = arg;
+ int current = 0;
+ while (1)
+ {
+ pthread_mutex_lock (&ts->mutex);
+
+ while (current < ts->n_files && ts->busy[current])
+ current++;
+
+ if (current < ts->n_files)
+ ts->busy[current] = true;
+
+ pthread_mutex_unlock (&ts->mutex);
+
+ /* No other files, exit from the thread. */
+ if (ts->n_files <= current)
+ return NULL;
+
+ do_file (ts, current++);
+ }
+
+ return NULL;
+}
+
+static bool
+check_files (char **files, unsigned long n_threads, int n_files, bool do_check,
+ int binary)
+{
+ int j;
+ int ok = 1;
+ unsigned char *bin_buffer_unaligned = xnmalloc (DIGEST_BIN_BYTES
+ + DIGEST_ALIGN, n_files);
+ unsigned char *bin_buffer[n_files];
+ bool res[n_files];
+ bool busy[n_files];
+ int file_is_binary[n_files];
+ pthread_t tids[n_threads - 1];
+
+ struct thread_arg ts =
+ {
+ .bin_buffer = bin_buffer,
+ .busy = busy,
+ .do_check = do_check,
+ .files = files,
+ .file_is_binary = file_is_binary,
+ .mutex = PTHREAD_MUTEX_INITIALIZER,
+ .n_files = n_files,
+ .res = res,
+ };
+
+ for (int j = 0; j < n_files; j++)
+ {
+ /* Make sure bin_buffer is properly aligned. */
+ unsigned char *tmp = &bin_buffer_unaligned[j * (DIGEST_BIN_BYTES
+ + DIGEST_ALIGN)];
+ bin_buffer[j] = ptr_align (tmp, DIGEST_ALIGN);
+ file_is_binary[j] = binary;
+ ts.busy[j] = false;
+ }
+
+ for (j = 0; j < n_threads - 1; j++)
+ if (pthread_create (&tids[j], NULL, thread_start, &ts))
+ error (EXIT_FAILURE, errno, "cannot spawn a new thread");
+
+ /* Use the main thread as a regular thread. */
+ thread_start (&ts);
+
+ for (j = 0; j < n_threads - 1; j++)
+ pthread_join (tids[j], NULL);
+
+ for (j = 0; j < n_files; j++)
+ {
+ char *file = files[j];
+
+ if (do_check)
+ ok &= res[j];
+ else
+ {
+ if (! res[j])
+ ok = false;
+ else
+ {
+ size_t i;
+
+ /* Output a leading backslash if the file name contains
+ a newline or backslash. */
+ if (strchr (file, '\n') || strchr (file, '\\'))
+ putchar ('\\');
+
+ for (i = 0; i < (digest_hex_bytes / 2); ++i)
+ printf ("%02x", bin_buffer[j][i]);
+
+ putchar (' ');
+ if (file_is_binary[j])
+ putchar ('*');
+ else
+ putchar (' ');
+
+ /* Translate each NEWLINE byte to the string, "\\n",
+ and each backslash to "\\\\". */
+ for (i = 0; i < strlen (file); ++i)
+ {
+ switch (file[i])
+ {
+ case '\n':
+ fputs ("\\n", stdout);
+ break;
+
+ case '\\':
+ fputs ("\\\\", stdout);
+ break;
+
+ default:
+ putchar (file[i]);
+ break;
+ }
+ }
+ putchar ('\n');
+ }
+ }
+ }
+ free (bin_buffer_unaligned);
+
+ return ok > 0;
+}
+
int
main (int argc, char **argv)
{
- unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN];
- /* Make sure bin_buffer is properly aligned. */
- unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
- bool do_check = false;
int opt;
- bool ok = true;
+ bool ok;
int binary = -1;
+ bool do_check = false;
+ unsigned long n_threads = 1;
/* Setting values of global variables. */
initialize_main (&argc, &argv);
@@ -646,6 +806,12 @@ main (int argc, char **argv)
warn = false;
quiet = true;
break;
+ case THREADS_OPTION:
+ if (optarg)
+ xstrtoul (optarg, NULL, 10, &n_threads, "");
+ else
+ n_threads = num_processors ();
+ break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
@@ -689,59 +855,9 @@ main (int argc, char **argv)
if (optind == argc)
argv[argc++] = bad_cast ("-");
- for (; optind < argc; ++optind)
- {
- char *file = argv[optind];
-
- if (do_check)
- ok &= digest_check (file);
- else
- {
- int file_is_binary = binary;
-
- if (! digest_file (file, &file_is_binary, bin_buffer))
- ok = false;
- else
- {
- size_t i;
-
- /* Output a leading backslash if the file name contains
- a newline or backslash. */
- if (strchr (file, '\n') || strchr (file, '\\'))
- putchar ('\\');
-
- for (i = 0; i < (digest_hex_bytes / 2); ++i)
- printf ("%02x", bin_buffer[i]);
-
- putchar (' ');
- if (file_is_binary)
- putchar ('*');
- else
- putchar (' ');
-
- /* Translate each NEWLINE byte to the string, "\\n",
- and each backslash to "\\\\". */
- for (i = 0; i < strlen (file); ++i)
- {
- switch (file[i])
- {
- case '\n':
- fputs ("\\n", stdout);
- break;
-
- case '\\':
- fputs ("\\\\", stdout);
- break;
-
- default:
- putchar (file[i]);
- break;
- }
- }
- putchar ('\n');
- }
- }
- }
+ size_t n_files = argc - optind;
+ ok = check_files (&argv[optind], MIN (n_threads, n_files), n_files, do_check,
+ binary);
if (have_read_stdin && fclose (stdin) == EOF)
error (EXIT_FAILURE, errno, _("standard input"));
diff --git a/tests/misc/md5sum b/tests/misc/md5sum
index 30edd9e..ae49954 100755
--- a/tests/misc/md5sum
+++ b/tests/misc/md5sum
@@ -96,6 +96,12 @@ foreach $t (@Tests)
splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
}
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+ {
+ splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+ }
+
my $save_temps = $ENV{DEBUG};
my $verbose = $ENV{VERBOSE};
diff --git a/tests/misc/sha1sum b/tests/misc/sha1sum
index d084204..2a3ca6a 100755
--- a/tests/misc/sha1sum
+++ b/tests/misc/sha1sum
@@ -82,6 +82,12 @@ foreach $t (@Tests)
splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
}
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+ {
+ splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+ }
+
my $save_temps = $ENV{DEBUG};
my $verbose = $ENV{VERBOSE};
diff --git a/tests/misc/sha224sum b/tests/misc/sha224sum
index aace96c..0405510 100755
--- a/tests/misc/sha224sum
+++ b/tests/misc/sha224sum
@@ -41,6 +41,12 @@ foreach $t (@Tests)
splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
}
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+ {
+ splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+ }
+
my $save_temps = $ENV{DEBUG};
my $verbose = $ENV{VERBOSE};
diff --git a/tests/misc/sha256sum b/tests/misc/sha256sum
index d85f248..e376155 100755
--- a/tests/misc/sha256sum
+++ b/tests/misc/sha256sum
@@ -47,6 +47,12 @@ foreach $t (@Tests)
splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
}
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+ {
+ splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+ }
+
my $save_temps = $ENV{DEBUG};
my $verbose = $ENV{VERBOSE};
diff --git a/tests/misc/sha384sum b/tests/misc/sha384sum
index c5818e2..0c60824 100755
--- a/tests/misc/sha384sum
+++ b/tests/misc/sha384sum
@@ -47,6 +47,12 @@ foreach $t (@Tests)
splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
}
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+ {
+ splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+ }
+
my $save_temps = $ENV{DEBUG};
my $verbose = $ENV{VERBOSE};
diff --git a/tests/misc/sha512sum b/tests/misc/sha512sum
index 9a45602..7e19cb9 100755
--- a/tests/misc/sha512sum
+++ b/tests/misc/sha512sum
@@ -47,6 +47,12 @@ foreach $t (@Tests)
splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
}
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+ {
+ splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+ }
+
my $save_temps = $ENV{DEBUG};
my $verbose = $ENV{VERBOSE};
--
1.6.3.3
- [PATCH] md5: accepts a new --threads option,
Giuseppe Scrivano <=
- Re: [PATCH] md5: accepts a new --threads option, Pádraig Brady, 2009/10/17
- Re: [PATCH] md5: accepts a new --threads option, Pádraig Brady, 2009/10/17
- Re: [PATCH] md5: accepts a new --threads option, Jim Meyering, 2009/10/18
- Re: [PATCH] md5: accepts a new --threads option, Giuseppe Scrivano, 2009/10/18
- Re: [PATCH] md5: accepts a new --threads option, Jim Meyering, 2009/10/18
- Re: [PATCH] md5: accepts a new --threads option, Giuseppe Scrivano, 2009/10/18
- Re: [PATCH] md5: accepts a new --threads option, Jim Meyering, 2009/10/18
- Re: [PATCH] md5: accepts a new --threads option, Giuseppe Scrivano, 2009/10/25
- Re: [PATCH] md5: accepts a new --threads option, Jim Meyering, 2009/10/25
- Re: [PATCH] md5: accepts a new --threads option, Jim Meyering, 2009/10/25