bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

sort --compress-program option


From: Paul Eggert
Subject: sort --compress-program option
Date: Mon, 19 Feb 2007 11:39:35 -0800
User-agent: Gnus/5.1008 (Gnus v5.10.8) Emacs/21.4 (gnu/linux)

Here's the patch I promised to have a --compress-program option to sort.
It doesn't address the other issues we have about compression, just the
user interface, but that's the most pressing issue we have right now.

2007-02-19  Paul Eggert  <address@hidden>

        * NEWS: sort now uses a --compress-program option rather than
        an environment variable.
        * doc/coreutils.texi (sort invocation): Document this.
        * src/sort.c (usage): Likewise.
        (COMPRESS_PROGRAM_OPTION): New const.
        (long_options, create_temp, main): Support new option.
        * tests/misc/sort-compress: Test it.

diff --git a/NEWS b/NEWS
index e0b8298..7519496 100644
--- a/NEWS
+++ b/NEWS
@@ -45,8 +45,9 @@ GNU coreutils NEWS                                    -*- 
outline -*-
 ** New features

   By default, sort usually compresses each temporary file it writes.
-  When sorting very large inputs, this can result in sort using far
-  less temporary disk space and in improved performance.
+  This can help save both time and disk space when sorting large inputs.
+  The default compression program is gzip, but this can be overridden
+  with sort's new --compress-program=PROG option.

 ** New features

diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index f738d83..1a2dba4 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3467,20 +3467,6 @@ value as the directory for temporary files instead of 
@file{/tmp}.  The
 @option{--temporary-directory} (@option{-T}) option in turn overrides
 the environment variable.

address@hidden GNUSORT_COMPRESSOR
-To improve performance when sorting very large files, GNU sort will,
-by default, try to compress temporary files with the program
address@hidden  The environment variable @env{GNUSORT_COMPRESSOR} can be
-set to the name of another program to be used.  The program specified
-must compress standard input to standard output when no arguments are
-given to it, and it must decompress standard input to standard output
-when the @option{-d} argument is given to it.  If the program exits
-with nonzero status, sort will terminate with an error.  To disable
-compression of temporary files, set the variable to the empty string.
-Whitespace and the backslash character should not appear in the
-program name.  They are reserved for future use.
-
-
 The following options affect the ordering of output lines.  They may be
 specified globally or as part of a specific key field.  If no key
 fields are specified, global options apply to comparison of entire
@@ -3647,6 +3633,23 @@ Other options are:

 @table @samp

address@hidden address@hidden
+If @var{prog} is not the empty string, compress any temporary files
+with the program @var{prog} rather than with the default compression
+method.  The default is currently @command{gzip} but this may change.
+
+With no arguments, @var{prog} must compress standard input to standard
+output, and when given the @option{-d} option it must decompress
+standard input to standard output.
+
+Terminate with an error if @var{prog} exits with nonzero status.
+
+Whitespace and the backslash character should not appear in
address@hidden; they are reserved for future use.
+
+If @var{prog} is the empty string, do not compress temporary
+files.
+
 @item -k @var{pos1}[,@var{pos2}]
 @itemx address@hidden,@var{pos2}]
 @opindex -k
diff --git a/src/sort.c b/src/sort.c
index c7ae0c8..6a7de9c 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -281,7 +281,7 @@ static bool have_read_stdin;
 static struct keyfield *keylist;

 /* Program used to (de)compress temp files.  Must accept -d.  */
-static const char *compress_program;
+static char const *compress_program;

 static void sortlines_temp (struct line *, size_t, struct line *);

@@ -339,6 +339,8 @@ Other options:\n\
 \n\
   -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
   -C, --check=quiet, --check=silent  like -c, but do not report first bad 
line\n\
+      --compress-program=PROG  compress temporaries with PROG;\n\
+                              decompress them with PROG -d\n\
   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS2 (origin 1)\n\
   -m, --merge               merge already sorted files; do not sort\n\
 "), stdout);
@@ -390,6 +392,7 @@ native byte values.\n\
 enum
 {
   CHECK_OPTION = CHAR_MAX + 1,
+  COMPRESS_PROGRAM_OPTION,
   RANDOM_SOURCE_OPTION
 };

@@ -399,6 +402,7 @@ static struct option const long_options[] =
 {
   {"ignore-leading-blanks", no_argument, NULL, 'b'},
   {"check", optional_argument, NULL, CHECK_OPTION},
+  {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
   {"dictionary-order", no_argument, NULL, 'd'},
   {"ignore-case", no_argument, NULL, 'f'},
   {"general-numeric-sort", no_argument, NULL, 'g'},
@@ -839,29 +843,18 @@ pipe_fork (int pipefds[2], size_t tries)
 static char *
 create_temp (FILE **pfp, pid_t *ppid)
 {
-  static bool compress_program_known;
   int tempfd;
   struct tempnode *node = create_temp_file (&tempfd);
   char *name = node->name;

-  if (! compress_program_known)
+  if (! compress_program)
     {
-      compress_program = getenv ("GNUSORT_COMPRESSOR");
-      if (compress_program == NULL)
-       {
-         static const char *default_program = "gzip";
-         const char *path_program = find_in_path (default_program);
-
-         if (path_program != default_program)
-           compress_program = path_program;
-       }
-      else if (*compress_program == '\0')
-       compress_program = NULL;
-
-      compress_program_known = true;
+      static char const default_compress_program[] = "gzip";
+      char const *prog = find_in_path (default_compress_program);
+      compress_program = (prog == default_compress_program ? "" : prog);
     }

-  if (compress_program)
+  if (*compress_program)
     {
       int pipefds[2];

@@ -2946,6 +2939,12 @@ main (int argc, char **argv)
          checkonly = c;
          break;

+       case COMPRESS_PROGRAM_OPTION:
+         if (compress_program && strcmp (compress_program, optarg) != 0)
+           error (SORT_FAILURE, 0, _("multiple compress programs specified"));
+         compress_program = optarg;
+         break;
+
        case 'k':
          key = key_init (&key_buf);

diff --git a/tests/misc/sort-compress b/tests/misc/sort-compress
index af961d2..b0f4dd7 100755
--- a/tests/misc/sort-compress
+++ b/tests/misc/sort-compress
@@ -64,21 +64,21 @@ test -f ok || fail=1
 rm -f ok

 # This is to make sure we can disable compression
-PATH=.:$PATH GNUSORT_COMPRESSOR= sort -S 1k in > out || fail=1
+PATH=.:$PATH sort --compress-program= -S 1k in > out || fail=1
 cmp exp out || fail=1
 test $fail = 1 && diff out exp 2> /dev/null
 test -f ok && fail=1

 # This is to make sure we can use something other than gzip
 mv gzip dzip || fail=1
-GNUSORT_COMPRESSOR=./dzip sort -S 1k in > out || fail=1
+sort --compress-program=./dzip -S 1k in > out || fail=1
 cmp exp out || fail=1
 test $fail = 1 && diff out exp 2> /dev/null
 test -f ok || fail=1
 rm -f ok

 # Make sure it can find other programs in PATH correctly
-PATH=.:$PATH GNUSORT_COMPRESSOR=dzip sort -S 1k in > out || fail=1
+PATH=.:$PATH sort --compress-program=dzip -S 1k in > out || fail=1
 cmp exp out || fail=1
 test $fail = 1 && diff out exp 2> /dev/null
 test -f ok || fail=1
M ChangeLog
M NEWS
M doc/coreutils.texi
M src/sort.c
M tests/misc/sort-compress
Committed as 58f7a426046fb5e4dd0cd258b5674fa14e38045b




reply via email to

[Prev in Thread] Current Thread [Next in Thread]