bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

coreutils patch to multithread md5sum for parallel hashing (ala the HP-U


From: Brett L. Trotter
Subject: coreutils patch to multithread md5sum for parallel hashing (ala the HP-UX days)
Date: Thu, 25 Mar 2010 07:25:02 -0500
User-agent: Thunderbird 2.0.0.24 (X11/20100311)

Hello, this is my first post to the list, so I'll say in advance here
I'm pleased to meet you all.

I've been out of C/C++ land for a while due to the economy, but found
myself hashing a bunch of 46GB blu ray images and discs for verification
lately and wanted a simple way to cut down the time involved without
starting separate terminals, running screen, etc. HP-UX's md5sum
had/has(?) a -n option for parallelizing the hashing. I did a quick
implementation today, and it's probably nothing like the sort of code
you folks write and likely can be optimized quite a bit, but I was
sincerely hoping that the feature could make it into coreutils, either
based on my code or someone else's.

Attached is a patch against the version in
coreutils-5.97-23.el5_4.2.src.rpm on RHEL 5.4. It's been tested lightly,
shows a performance -decrease- for small numbers of small files, but in
increase for larger files or larger numbers of files. I haven't yet
gotten around to making the ptach apply to the makefile.am, so I was
manually adding -lpthread to the link lines for the *sum programs in the
generated makefile. I've also attached the fully patched md5sum.c.

Again, this is not anywhere near a production ready patch- and I'm aware
that output ordering will be potentially out of order when N > 1 is
used, but I'd love any thoughts, improvements, or reasons why md5sum
shouldn't be able to parallel process like the old days.

-Brett


P.S. I posted a very slightly earlier version of this code yesterday,
but my membership may not have been complete and it does not seem to
have made it to the list, please forgive me if it re-appears later on.
/* Compute MD5, SHA1, SHA224, SHA256, SHA384 or SHA512 checksum of files or 
strings
   Copyright (C) 1995-2005 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

/* Written by Ulrich Drepper <address@hidden>.  */

#include <config.h>

#include <getopt.h>
#include <sys/types.h>

#include "system.h"

#if HASH_ALGO_MD5
# include "md5.h"
#endif
#if HASH_ALGO_SHA1
# include "sha1.h"
#endif
#if HASH_ALGO_SHA256 || HASH_ALGO_SHA224
# include "sha256.h"
#endif
#if HASH_ALGO_SHA512 || HASH_ALGO_SHA384
# include "sha512.h"
#endif
#include "getline.h"
#include "error.h"
#include "quote.h"
#include "stdio--.h"

/* The official name of this program (e.g., no `g' prefix).  */
#if HASH_ALGO_MD5
# define PROGRAM_NAME "md5sum"
# define DIGEST_TYPE_STRING "MD5"
# define DIGEST_STREAM md5_stream
# define DIGEST_BUFFER md5_buffer
# define DIGEST_BITS 128
# define DIGEST_REFERENCE "RFC 1321"
# define DIGEST_ALIGN 4
#elif HASH_ALGO_SHA1
# define PROGRAM_NAME "sha1sum"
# define DIGEST_TYPE_STRING "SHA1"
# define DIGEST_STREAM sha1_stream
# define DIGEST_BUFFER sha1_buffer
# define DIGEST_BITS 160
# define DIGEST_REFERENCE "FIPS-180-1"
# define DIGEST_ALIGN 4
#elif HASH_ALGO_SHA256
# define PROGRAM_NAME "sha256sum"
# define DIGEST_TYPE_STRING "SHA256"
# define DIGEST_STREAM sha256_stream
# define DIGEST_BUFFER sha256_buffer
# define DIGEST_BITS 256
# define DIGEST_REFERENCE "FIPS-180-2"
# define DIGEST_ALIGN 4
#elif HASH_ALGO_SHA224
# define PROGRAM_NAME "sha224sum"
# define DIGEST_TYPE_STRING "SHA224"
# define DIGEST_STREAM sha224_stream
# define DIGEST_BUFFER sha224_buffer
# define DIGEST_BITS 224
# define DIGEST_REFERENCE "RFC 3874"
# define DIGEST_ALIGN 4
#elif HASH_ALGO_SHA512
# define PROGRAM_NAME "sha512sum"
# define DIGEST_TYPE_STRING "SHA512"
# define DIGEST_STREAM sha512_stream
# define DIGEST_BUFFER sha512_buffer
# define DIGEST_BITS 512
# define DIGEST_REFERENCE "FIPS-180-2"
# define DIGEST_ALIGN 8
#elif HASH_ALGO_SHA384
# define PROGRAM_NAME "sha384sum"
# define DIGEST_TYPE_STRING "SHA384"
# define DIGEST_STREAM sha384_stream
# define DIGEST_BUFFER sha384_buffer
# define DIGEST_BITS 384
# define DIGEST_REFERENCE "FIPS-180-2"
# define DIGEST_ALIGN 8
#else
# error "Can't decide which hash algorithm to compile."
#endif

#define DIGEST_HEX_BYTES (DIGEST_BITS / 4)
#define DIGEST_BIN_BYTES (DIGEST_BITS / 8)

#define AUTHORS "Ulrich Drepper", "Scott Miller", "David Madore"

/* The minimum length of a valid digest line.  This length does
   not include any newline character at the end of a line.  */
#define MIN_DIGEST_LINE_LENGTH \
  (DIGEST_HEX_BYTES /* length of hexadecimal message digest */ \
   + 2 /* blank and binary indicator */ \
   + 1 /* minimum filename length */ )

/* True if any of the files read were the standard input. */
static bool have_read_stdin;

/* The minimum length of a valid checksum line for the selected algorithm.  */
static size_t min_digest_line_length;

/* Set to the length of a digest hex string for the selected algorithm.  */
static size_t digest_hex_bytes;

/* With --check, don't generate any output.
   The exit code indicates success or failure.  */
static bool status_only = false;

/* With --check, print a message to standard error warning about each
   improperly formatted checksum line.  */
static bool warn = false;

/* The name this program was run with.  */
char *program_name;

/* For long options that have no equivalent short option, use a
   non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
enum
{
  STATUS_OPTION = CHAR_MAX + 1
};

static const struct option long_options[] =
{
  { "binary", no_argument, NULL, 'b' },
  { "check", no_argument, NULL, 'c' },
  { "status", no_argument, NULL, STATUS_OPTION },
  { "text", no_argument, NULL, 't' },
  { "warn", no_argument, NULL, 'w' },
  { GETOPT_HELP_OPTION_DECL },
  { GETOPT_VERSION_OPTION_DECL },
  { NULL, 0, NULL, 0 }
};

void
usage (int status)
{
  if (status != EXIT_SUCCESS)
    fprintf (stderr, _("Try `%s --help' for more information.\n"),
             program_name);
  else
    {
      printf (_("\
Usage: %s [OPTION] [FILE]...\n\
Print or check %s (%d-bit) checksums.\n\
With no FILE, or when FILE is -, read standard input.\n\
\n\
"),
              program_name,
              DIGEST_TYPE_STRING,
              DIGEST_BITS);
      if (O_BINARY)
        fputs (_("\
  -b, --binary            read in binary mode (default unless reading tty 
stdin)\n\
"), stdout);
      else
        fputs (_("\
  -b, --binary            read in binary mode\n\
"), stdout);
      printf (_("\
  -c, --check             read %s sums from the FILEs and check them\n"),
              DIGEST_TYPE_STRING);
      if (O_BINARY)
        fputs (_("\
  -t, --text              read in text mode (default if reading tty stdin)\n\
"), stdout);
      else
        fputs (_("\
  -t, --text              read in text mode (default)\n\
"), stdout);
      fputs (_("\
\n\
The following two options are useful only when verifying checksums:\n\
      --status            don't output anything, status code shows success\n\
  -w, --warn              warn about improperly formatted checksum lines\n\
\n\
"), stdout);
      fputs (HELP_OPTION_DESCRIPTION, stdout);
      fputs (VERSION_OPTION_DESCRIPTION, stdout);
      printf (_("\
\n\
The sums are computed as described in %s.  When checking, the input\n\
should be a former output of this program.  The default mode is to print\n\
a line with checksum, a character indicating type (`*' for binary, ` ' for\n\
text), and name for each FILE.\n"),
              DIGEST_REFERENCE);
      printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
    }

  exit (status);
}

#define ISWHITE(c) ((c) == ' ' || (c) == '\t')

/* Split the checksum string S (of length S_LEN) from a BSD 'md5' or
   'sha1' command into two parts: a hexadecimal digest, and the file
   name.  S is modified.  Return true if successful.  */

static bool
bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest, char 
**file_name)
{
  size_t i;

  *file_name = s;

  /* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape
     filenames, so search backwards for the last ')'. */
  i = s_len - 1;
  while (i && s[i] != ')')
    i--;

  if (s[i] != ')')
    return false;

  s[i++] = '\0';

  while (ISWHITE (s[i]))
    i++;

  if (s[i] != '=')
    return false;

  i++;

  while (ISWHITE (s[i]))
    i++;

  *hex_digest = (unsigned char *) &s[i];
  return true;
}

/* Split the string S (of length S_LEN) into three parts:
   a hexadecimal digest, binary flag, and the file name.
   S is modified.  Return true if successful.  */

static bool
split_3 (char *s, size_t s_len,
         unsigned char **hex_digest, int *binary, char **file_name)
{
  size_t i;
  bool escaped_filename = false;
  size_t algo_name_len;

  i = 0;
  while (ISWHITE (s[i]))
    ++i;

  /* Check for BSD-style checksum line. */
  algo_name_len = strlen (DIGEST_TYPE_STRING);
  if (strncmp (s + i, DIGEST_TYPE_STRING, algo_name_len) == 0)
    {
      if (strncmp (s + i + algo_name_len, " (", 2) == 0)
        {
          *binary = 0;
          return bsd_split_3 (s +      i + algo_name_len + 2,
                              s_len - (i + algo_name_len + 2),
                              hex_digest, file_name);
        }
    }

  /* Ignore this line if it is too short.
     Each line must have at least `min_digest_line_length - 1' (or one more, if
     the first is a backslash) more characters to contain correct message digest
     information.  */
  if (s_len - i < min_digest_line_length + (s[i] == '\\'))
    return false;

  if (s[i] == '\\')
    {
      ++i;
      escaped_filename = true;
    }
  *hex_digest = (unsigned char *) &s[i];

  /* The first field has to be the n-character hexadecimal
     representation of the message digest.  If it is not followed
     immediately by a white space it's an error.  */
  i += digest_hex_bytes;
  if (!ISWHITE (s[i]))
    return false;

  s[i++] = '\0';

  if (s[i] != ' ' && s[i] != '*')
    return false;
  *binary = (s[i++] == '*');

  /* All characters between the type indicator and end of line are
     significant -- that includes leading and trailing white space.  */
  *file_name = &s[i];

  if (escaped_filename)
    {
      /* Translate each `\n' string in the file name to a NEWLINE,
         and each `\\' string to a backslash.  */

      char *dst = &s[i];

      while (i < s_len)
        {
          switch (s[i])
            {
            case '\\':
              if (i == s_len - 1)
                {
                  /* A valid line does not end with a backslash.  */
                  return false;
                }
              ++i;
              switch (s[i++])
                {
                case 'n':
                  *dst++ = '\n';
                  break;
                case '\\':
                  *dst++ = '\\';
                  break;
                default:
                  /* Only `\' or `n' may follow a backslash.  */
                  return false;
                }
              break;

            case '\0':
              /* The file name may not contain a NUL.  */
              return false;
              break;

            default:
              *dst++ = s[i++];
              break;
            }
        }
      *dst = '\0';
    }
  return true;
}

static bool
hex_digits (unsigned char const *s)
{
  while (*s)
    {
      if (!ISXDIGIT (*s))
        return false;
      ++s;
    }
  return true;
}

/* An interface to the function, DIGEST_STREAM.
   Operate on FILENAME (it may be "-").

   *BINARY indicates whether the file is binary.  BINARY < 0 means it
   depends on whether binary mode makes any difference and the file is
   a terminal; in that case, clear *BINARY if the file was treated as
   text because it was a terminal.

   Put the checksum in *BIN_RESULT, which must be properly aligned.
   Return true if successful.  */

static bool
digest_file (const char *filename, int *binary, unsigned char *bin_result)
{
  FILE *fp;
  int err;
  bool is_stdin = STREQ (filename, "-");

  if (is_stdin)
    {
      have_read_stdin = true;
      fp = stdin;
      if (O_BINARY && *binary)
        {
          if (*binary < 0)
            *binary = ! isatty (STDIN_FILENO);
          if (*binary)
            freopen (NULL, "rb", stdin);
        }
    }
  else
    {
      fp = fopen (filename, (O_BINARY && *binary ? "rb" : "r"));
      if (fp == NULL)
        {
          error (0, errno, "%s", filename);
          return false;
        }
    }

  err = DIGEST_STREAM (fp, bin_result);
  if (err)
    {
      error (0, errno, "%s", filename);
      if (fp != stdin)
        fclose (fp);
      return false;
    }

  if (!is_stdin && fclose (fp) != 0)
    {
      error (0, errno, "%s", filename);
      return false;
    }

  return true;
}

static bool
digest_check (const char *checkfile_name)
{
  FILE *checkfile_stream;
  uintmax_t n_properly_formatted_lines = 0;
  uintmax_t n_mismatched_checksums = 0;
  uintmax_t n_open_or_read_failures = 0;
  unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN];
  /* Make sure bin_buffer is properly aligned. */
  unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
  uintmax_t line_number;
  char *line;
  size_t line_chars_allocated;
  bool is_stdin = STREQ (checkfile_name, "-");

  if (is_stdin)
    {
      have_read_stdin = true;
      checkfile_name = _("standard input");
      checkfile_stream = stdin;
    }
  else
    {
      checkfile_stream = fopen (checkfile_name, "r");
      if (checkfile_stream == NULL)
        {
          error (0, errno, "%s", checkfile_name);
          return false;
        }
    }

  line_number = 0;
  line = NULL;
  line_chars_allocated = 0;
  do
    {
      char *filename;
      int binary;
      unsigned char *hex_digest IF_LINT (= NULL);
      ssize_t line_length;

      ++line_number;
      if (line_number == 0)
        error (EXIT_FAILURE, 0, _("%s: too many checksum lines"),
               checkfile_name);

      line_length = getline (&line, &line_chars_allocated, checkfile_stream);
      if (line_length <= 0)
        break;

      /* Ignore comment lines, which begin with a '#' character.  */
      if (line[0] == '#')
        continue;

      /* Remove any trailing newline.  */
      if (line[line_length - 1] == '\n')
        line[--line_length] = '\0';

      if (! (split_3 (line, line_length, &hex_digest, &binary, &filename)
             && ! (is_stdin && STREQ (filename, "-"))
             && hex_digits (hex_digest)))
        {
          if (warn)
            {
              error (0, 0,
                     _("%s: %" PRIuMAX
                       ": improperly formatted %s checksum line"),
                     checkfile_name, line_number,
                     DIGEST_TYPE_STRING);
            }
        }
      else
        {
          static const char bin2hex[] = { '0', '1', '2', '3',
                                          '4', '5', '6', '7',
                                          '8', '9', 'a', 'b',
                                          'c', 'd', 'e', 'f' };
          bool ok;

          ++n_properly_formatted_lines;

          ok = digest_file (filename, &binary, bin_buffer);

          if (!ok)
            {
              ++n_open_or_read_failures;
              if (!status_only)
                {
                  printf (_("%s: FAILED open or read\n"), filename);
                  fflush (stdout);
                }
            }
          else
            {
              size_t digest_bin_bytes = digest_hex_bytes / 2;
              size_t cnt;
              /* Compare generated binary number with text representation
                 in check file.  Ignore case of hex digits.  */
              for (cnt = 0; cnt < digest_bin_bytes; ++cnt)
                {
                  if (TOLOWER (hex_digest[2 * cnt])
                      != bin2hex[bin_buffer[cnt] >> 4]
                      || (TOLOWER (hex_digest[2 * cnt + 1])
                          != (bin2hex[bin_buffer[cnt] & 0xf])))
                    break;
                }
              if (cnt != digest_bin_bytes)
                ++n_mismatched_checksums;

              if (!status_only)
                {
                  printf ("%s: %s\n", filename,
                          (cnt != digest_bin_bytes ? _("FAILED") : _("OK")));
                  fflush (stdout);
                }
            }
        }
    }
  while (!feof (checkfile_stream) && !ferror (checkfile_stream));

  free (line);

  if (ferror (checkfile_stream))
    {
      error (0, 0, _("%s: read error"), checkfile_name);
      return false;
    }

  if (!is_stdin && fclose (checkfile_stream) != 0)
    {
      error (0, errno, "%s", checkfile_name);
      return false;
    }

  if (n_properly_formatted_lines == 0)
    {
      /* Warn if no tests are found.  */
      error (0, 0, _("%s: no properly formatted %s checksum lines found"),
             checkfile_name, DIGEST_TYPE_STRING);
    }
  else
    {
      if (!status_only)
        {
          if (n_open_or_read_failures != 0)
            error (0, 0,
                   ngettext ("WARNING: %" PRIuMAX " of %" PRIuMAX
                             " listed file could not be read",
                             "WARNING: %" PRIuMAX " of %" PRIuMAX
                             " listed files could not be read",
                             n_properly_formatted_lines),
                   n_open_or_read_failures, n_properly_formatted_lines);

          if (n_mismatched_checksums != 0)
            {
              uintmax_t n_computed_checksums =
                (n_properly_formatted_lines - n_open_or_read_failures);
              error (0, 0,
                     ngettext ("WARNING: %" PRIuMAX " of %" PRIuMAX
                               " computed checksum did NOT match",
                               "WARNING: %" PRIuMAX " of %" PRIuMAX
                               " computed checksums did NOT match",
                               n_computed_checksums),
                     n_mismatched_checksums, n_computed_checksums);
            }
        }
    }

  return (n_properly_formatted_lines != 0
          && n_mismatched_checksums == 0
          && n_open_or_read_failures == 0);
}

int
main (int argc, char **argv)
{
  unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES+DIGEST_ALIGN];
  /* Make sure bin_buffer is properly aligned. */
  unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
  bool do_check = false;
  int opt;
  bool ok = true;
  int binary = -1;

  /* Setting values of global variables.  */
  initialize_main (&argc, &argv);
  program_name = argv[0];
  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);

  atexit (close_stdout);

  while ((opt = getopt_long (argc, argv, "bctw", long_options, NULL)) != -1)
    switch (opt)
      {
      case 'b':
        binary = 1;
        break;
      case 'c':
        do_check = true;
        break;
      case STATUS_OPTION:
        status_only = true;
        warn = false;
        break;
      case 't':
        binary = 0;
        break;
      case 'w':
        status_only = false;
        warn = true;
        break;
      case_GETOPT_HELP_CHAR;
      case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
      default:
        usage (EXIT_FAILURE);
      }

  min_digest_line_length = MIN_DIGEST_LINE_LENGTH;
  digest_hex_bytes = DIGEST_HEX_BYTES;

  if (0 <= binary && do_check)
    {
      error (0, 0, _("the --binary and --text options are meaningless when "
                     "verifying checksums"));
      usage (EXIT_FAILURE);
    }

  if (status_only & !do_check)
    {
      error (0, 0,
       _("the --status option is meaningful only when verifying checksums"));
      usage (EXIT_FAILURE);
    }

  if (warn & !do_check)
    {
      error (0, 0,
       _("the --warn option is meaningful only when verifying checksums"));
      usage (EXIT_FAILURE);
    }

  if (!O_BINARY && binary < 0)
    binary = 0;

  if (optind == argc)
    argv[argc++] = "-";

  for (; optind < argc; ++optind)
    {
      char *file = argv[optind];

      if (do_check)
        ok &= digest_check (file);
      else
        {
          int file_is_binary = binary;

          if (! digest_file (file, &file_is_binary, bin_buffer))
            ok = false;
          else
            {
              size_t i;

              /* Output a leading backslash if the file name contains
                 a newline or backslash.  */
              if (strchr (file, '\n') || strchr (file, '\\'))
                putchar ('\\');

              for (i = 0; i < (digest_hex_bytes / 2); ++i)
                printf ("%02x", bin_buffer[i]);

              putchar (' ');
              if (file_is_binary)
                putchar ('*');
              else
                putchar (' ');

              /* Translate each NEWLINE byte to the string, "\\n",
                 and each backslash to "\\\\".  */
              for (i = 0; i < strlen (file); ++i)
                {
                  switch (file[i])
                    {
                    case '\n':
                      fputs ("\\n", stdout);
                      break;

                    case '\\':
                      fputs ("\\\\", stdout);
                      break;

                    default:
                      putchar (file[i]);
                      break;
                    }
                }
              putchar ('\n');
            }
        }
    }

  if (have_read_stdin && fclose (stdin) == EOF)
    error (EXIT_FAILURE, errno, _("standard input"));

  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}
*** coreutils-5.97/src/md5sum.c 2010-03-25 07:22:01.000000000 -0500
--- coreutils-5.97/src/md5sum.c.threaded        2010-03-25 07:22:25.000000000 
-0500
***************
*** 40,45 ****
--- 40,46 ----
  #include "error.h"
  #include "quote.h"
  #include "stdio--.h"
+ #include <pthread.h>
  
  /* The official name of this program (e.g., no `g' prefix).  */
  #if HASH_ALGO_MD5
***************
*** 139,150 ****
--- 140,165 ----
    { "check", no_argument, NULL, 'c' },
    { "status", no_argument, NULL, STATUS_OPTION },
    { "text", no_argument, NULL, 't' },
+   { "threads", no_argument, NULL, 'n' },
    { "warn", no_argument, NULL, 'w' },
    { GETOPT_HELP_OPTION_DECL },
    { GETOPT_VERSION_OPTION_DECL },
    { NULL, 0, NULL, 0 }
  };
  
+ struct thread_node
+ {
+   bool do_check;
+   int binary;
+   pthread_t thread;
+   char *filename;
+   struct thread_node *next;
+ };
+ 
+ static pthread_mutex_t m_thread_pool;
+ static struct thread_node *thread_pool = NULL;
+ static bool ok = true;
+ 
  void
  usage (int status)
  {
***************
*** 181,186 ****
--- 196,204 ----
        fputs (_("\
    -t, --text              read in text mode (default)\n\
  "), stdout);
+       fputs (_("\
+   -nX, --threads=X        process X (1 - 16) files in parallel\n\
+ "), stdout);
        fputs (_("\
  \n\
  The following two options are useful only when verifying checksums:\n\
***************
*** 592,607 ****
          && n_open_or_read_failures == 0);
  }
  
! int
! main (int argc, char **argv)
  {
    unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES+DIGEST_ALIGN];
    /* Make sure bin_buffer is properly aligned. */
    unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
    bool do_check = false;
    int opt;
-   bool ok = true;
    int binary = -1;
  
    /* Setting values of global variables.  */
    initialize_main (&argc, &argv);
--- 610,718 ----
          && n_open_or_read_failures == 0);
  }
  
! void
! *thread_start(void *node)
  {
    unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES+DIGEST_ALIGN];
    /* Make sure bin_buffer is properly aligned. */
    unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
+   struct thread_node *tn = (struct thread_node*) node;
+   struct thread_node *tmp = NULL;
+ 
+   if ((node == NULL) || (thread_pool == NULL)) return NULL;
+ 
+   if (tn->filename != NULL)  {
+       if (tn->do_check)
+       ok &= digest_check (tn->filename);
+       else
+       {
+         int file_is_binary = tn->binary;
+ 
+         if (! digest_file (tn->filename, &file_is_binary, bin_buffer))
+           ok = false;
+         else
+           {
+             size_t i;
+ 
+               pthread_mutex_lock(&m_thread_pool);
+             /* Output a leading backslash if the file name contains
+                a newline or backslash.  */
+             if (strchr (tn->filename, '\n') || strchr (tn->filename, '\\'))
+               putchar ('\\');
+ 
+             for (i = 0; i < (digest_hex_bytes / 2); ++i)
+               printf ("%02x", bin_buffer[i]);
+ 
+             putchar (' ');
+             if (file_is_binary)
+               putchar ('*');
+             else
+               putchar (' ');
+ 
+             /* Translate each NEWLINE byte to the string, "\\n",
+                and each backslash to "\\\\".  */
+             for (i = 0; i < strlen (tn->filename); ++i)
+               {
+                 switch (tn->filename[i])
+                   {
+                   case '\n':
+                     fputs ("\\n", stdout);
+                     break;
+ 
+                   case '\\':
+                     fputs ("\\\\", stdout);
+                     break;
+ 
+                   default:
+                     putchar (tn->filename[i]);
+                     break;
+                   }
+               }
+             putchar ('\n');
+           }
+             pthread_mutex_unlock(&m_thread_pool);
+       }
+   }
+ 
+   /* thread cleanup */
+   pthread_mutex_lock(&m_thread_pool);
+   if (thread_pool != NULL) {
+     if (thread_pool == tn) {
+       /* we're the first thread, set root to next (if any) */
+       thread_pool = thread_pool->next;
+ 
+     } else {
+       tmp = thread_pool;
+       while (tmp->next != NULL)
+       {
+          if (tmp->next == tn) {
+            /* the next node is us, set next node to our next */
+            tmp->next = tn->next;
+            break;
+          }
+          tmp = tmp->next;
+       }
+     }
+     
+     free(tn->filename);
+     tn->filename = NULL;
+     tn->next = NULL;
+     free(tn);
+     tn = NULL;
+   }
+   pthread_mutex_unlock(&m_thread_pool);
+ }
+ 
+ int
+ main (int argc, char **argv)
+ {
    bool do_check = false;
    int opt;
    int binary = -1;
+   int threads_max = -1;
+   int threads_running = 0;
+   struct thread_node *tmpnode = NULL;
+   pthread_attr_t attr;
  
    /* Setting values of global variables.  */
    initialize_main (&argc, &argv);
***************
*** 612,618 ****
  
    atexit (close_stdout);
  
!   while ((opt = getopt_long (argc, argv, "bctw", long_options, NULL)) != -1)
      switch (opt)
        {
        case 'b':
--- 723,729 ----
  
    atexit (close_stdout);
  
!   while ((opt = getopt_long (argc, argv, "bctwn:", long_options, NULL)) != -1)
      switch (opt)
        {
        case 'b':
***************
*** 632,637 ****
--- 743,751 ----
        status_only = false;
        warn = true;
        break;
+       case 'n':
+       threads_max = atoi(optarg);
+       break;
        case_GETOPT_HELP_CHAR;
        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
        default:
***************
*** 662,725 ****
        usage (EXIT_FAILURE);
      }
  
    if (!O_BINARY && binary < 0)
      binary = 0;
  
    if (optind == argc)
      argv[argc++] = "-";
  
    for (; optind < argc; ++optind)
      {
!       char *file = argv[optind];
! 
!       if (do_check)
!       ok &= digest_check (file);
!       else
!       {
!         int file_is_binary = binary;
! 
!         if (! digest_file (file, &file_is_binary, bin_buffer))
!           ok = false;
!         else
!           {
!             size_t i;
! 
!             /* Output a leading backslash if the file name contains
!                a newline or backslash.  */
!             if (strchr (file, '\n') || strchr (file, '\\'))
!               putchar ('\\');
  
!             for (i = 0; i < (digest_hex_bytes / 2); ++i)
!               printf ("%02x", bin_buffer[i]);
  
!             putchar (' ');
!             if (file_is_binary)
!               putchar ('*');
!             else
!               putchar (' ');
  
!             /* Translate each NEWLINE byte to the string, "\\n",
!                and each backslash to "\\\\".  */
!             for (i = 0; i < strlen (file); ++i)
!               {
!                 switch (file[i])
!                   {
!                   case '\n':
!                     fputs ("\\n", stdout);
!                     break;
  
!                   case '\\':
!                     fputs ("\\\\", stdout);
!                     break;
  
!                   default:
!                     putchar (file[i]);
!                     break;
!                   }
!               }
!             putchar ('\n');
!           }
!       }
      }
  
    if (have_read_stdin && fclose (stdin) == EOF)
--- 776,860 ----
        usage (EXIT_FAILURE);
      }
  
+   if ((threads_max != -1) && do_check)
+     {
+       error (0, 0, _("the --threads option is meaningless when "
+                    "verifying checksums"));
+       usage (EXIT_FAILURE);
+     }
+ 
+   if (threads_max == -1) threads_max = 1;
+ 
+   if (((threads_max < 1) || (threads_max > 16)) && !do_check)
+     {
+       error (0, 0, _("the --threads argument must be between 1 and 16"));
+       usage (EXIT_FAILURE);
+     }
+ 
    if (!O_BINARY && binary < 0)
      binary = 0;
  
    if (optind == argc)
      argv[argc++] = "-";
  
+   /* initialize thread apparatus */
+   thread_pool = NULL;
+   pthread_mutex_init(&m_thread_pool, NULL);
+ 
    for (; optind < argc; ++optind)
      {
!       while (1)
!       {
!       pthread_mutex_lock(&m_thread_pool);
  
!       threads_running = 0;
!       tmpnode = thread_pool;
!       while (tmpnode != NULL) {
!         threads_running++;
!         if (tmpnode->next != NULL) tmpnode = tmpnode->next;
!         else break;
!       }
  
!       /* see if we need to start a new thread */
!       if (threads_running < threads_max)
!         {
!           if (thread_pool == NULL) 
!             {
!               thread_pool = (struct thread_node*)xmalloc(sizeof(struct 
thread_node));
!               tmpnode = thread_pool;
!             } else {
!               tmpnode->next = (struct thread_node*)xmalloc(sizeof(struct 
thread_node));
!               tmpnode = tmpnode->next;
!             }
! 
!           if (tmpnode != NULL)
!             {
!               tmpnode->do_check = do_check;
!               tmpnode->binary = binary;
!               tmpnode->filename = strdup(argv[optind]);
!               tmpnode->next = NULL;
!               tmpnode->thread = NULL;
! 
!               pthread_attr_init(&attr);
!               pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
!               pthread_create( &(tmpnode->thread), &attr, thread_start, (void 
*) tmpnode);
!             }
! 
!           pthread_mutex_unlock(&m_thread_pool);
!           break;
!         }
  
!       pthread_mutex_unlock(&m_thread_pool);
!       }
  
!     }
  
!     /* wait for all threads */
!     while (1)
!     {
!       pthread_mutex_lock(&m_thread_pool);
!       if (thread_pool == NULL) break;
!       pthread_mutex_unlock(&m_thread_pool);
      }
  
    if (have_read_stdin && fclose (stdin) == EOF)

reply via email to

[Prev in Thread] Current Thread [Next in Thread]