[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
sort --compress-program option
From: |
Paul Eggert |
Subject: |
sort --compress-program option |
Date: |
Mon, 19 Feb 2007 11:39:35 -0800 |
User-agent: |
Gnus/5.1008 (Gnus v5.10.8) Emacs/21.4 (gnu/linux) |
Here's the patch I promised to have a --compress-program option to sort.
It doesn't address the other issues we have about compression, just the
user interface, but that's the most pressing issue we have right now.
2007-02-19 Paul Eggert <address@hidden>
* NEWS: sort now uses a --compress-program option rather than
an environment variable.
* doc/coreutils.texi (sort invocation): Document this.
* src/sort.c (usage): Likewise.
(COMPRESS_PROGRAM_OPTION): New const.
(long_options, create_temp, main): Support new option.
* tests/misc/sort-compress: Test it.
diff --git a/NEWS b/NEWS
index e0b8298..7519496 100644
--- a/NEWS
+++ b/NEWS
@@ -45,8 +45,9 @@ GNU coreutils NEWS -*-
outline -*-
** New features
By default, sort usually compresses each temporary file it writes.
- When sorting very large inputs, this can result in sort using far
- less temporary disk space and in improved performance.
+ This can help save both time and disk space when sorting large inputs.
+ The default compression program is gzip, but this can be overridden
+ with sort's new --compress-program=PROG option.
** New features
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index f738d83..1a2dba4 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3467,20 +3467,6 @@ value as the directory for temporary files instead of
@file{/tmp}. The
@option{--temporary-directory} (@option{-T}) option in turn overrides
the environment variable.
address@hidden GNUSORT_COMPRESSOR
-To improve performance when sorting very large files, GNU sort will,
-by default, try to compress temporary files with the program
address@hidden The environment variable @env{GNUSORT_COMPRESSOR} can be
-set to the name of another program to be used. The program specified
-must compress standard input to standard output when no arguments are
-given to it, and it must decompress standard input to standard output
-when the @option{-d} argument is given to it. If the program exits
-with nonzero status, sort will terminate with an error. To disable
-compression of temporary files, set the variable to the empty string.
-Whitespace and the backslash character should not appear in the
-program name. They are reserved for future use.
-
-
The following options affect the ordering of output lines. They may be
specified globally or as part of a specific key field. If no key
fields are specified, global options apply to comparison of entire
@@ -3647,6 +3633,23 @@ Other options are:
@table @samp
address@hidden address@hidden
+If @var{prog} is not the empty string, compress any temporary files
+with the program @var{prog} rather than with the default compression
+method. The default is currently @command{gzip} but this may change.
+
+With no arguments, @var{prog} must compress standard input to standard
+output, and when given the @option{-d} option it must decompress
+standard input to standard output.
+
+Terminate with an error if @var{prog} exits with nonzero status.
+
+Whitespace and the backslash character should not appear in
address@hidden; they are reserved for future use.
+
+If @var{prog} is the empty string, do not compress temporary
+files.
+
@item -k @var{pos1}[,@var{pos2}]
@itemx address@hidden,@var{pos2}]
@opindex -k
diff --git a/src/sort.c b/src/sort.c
index c7ae0c8..6a7de9c 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -281,7 +281,7 @@ static bool have_read_stdin;
static struct keyfield *keylist;
/* Program used to (de)compress temp files. Must accept -d. */
-static const char *compress_program;
+static char const *compress_program;
static void sortlines_temp (struct line *, size_t, struct line *);
@@ -339,6 +339,8 @@ Other options:\n\
\n\
-c, --check, --check=diagnose-first check for sorted input; do not sort\n\
-C, --check=quiet, --check=silent like -c, but do not report first bad
line\n\
+ --compress-program=PROG compress temporaries with PROG;\n\
+ decompress them with PROG -d\n\
-k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
-m, --merge merge already sorted files; do not sort\n\
"), stdout);
@@ -390,6 +392,7 @@ native byte values.\n\
enum
{
CHECK_OPTION = CHAR_MAX + 1,
+ COMPRESS_PROGRAM_OPTION,
RANDOM_SOURCE_OPTION
};
@@ -399,6 +402,7 @@ static struct option const long_options[] =
{
{"ignore-leading-blanks", no_argument, NULL, 'b'},
{"check", optional_argument, NULL, CHECK_OPTION},
+ {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
{"dictionary-order", no_argument, NULL, 'd'},
{"ignore-case", no_argument, NULL, 'f'},
{"general-numeric-sort", no_argument, NULL, 'g'},
@@ -839,29 +843,18 @@ pipe_fork (int pipefds[2], size_t tries)
static char *
create_temp (FILE **pfp, pid_t *ppid)
{
- static bool compress_program_known;
int tempfd;
struct tempnode *node = create_temp_file (&tempfd);
char *name = node->name;
- if (! compress_program_known)
+ if (! compress_program)
{
- compress_program = getenv ("GNUSORT_COMPRESSOR");
- if (compress_program == NULL)
- {
- static const char *default_program = "gzip";
- const char *path_program = find_in_path (default_program);
-
- if (path_program != default_program)
- compress_program = path_program;
- }
- else if (*compress_program == '\0')
- compress_program = NULL;
-
- compress_program_known = true;
+ static char const default_compress_program[] = "gzip";
+ char const *prog = find_in_path (default_compress_program);
+ compress_program = (prog == default_compress_program ? "" : prog);
}
- if (compress_program)
+ if (*compress_program)
{
int pipefds[2];
@@ -2946,6 +2939,12 @@ main (int argc, char **argv)
checkonly = c;
break;
+ case COMPRESS_PROGRAM_OPTION:
+ if (compress_program && strcmp (compress_program, optarg) != 0)
+ error (SORT_FAILURE, 0, _("multiple compress programs specified"));
+ compress_program = optarg;
+ break;
+
case 'k':
key = key_init (&key_buf);
diff --git a/tests/misc/sort-compress b/tests/misc/sort-compress
index af961d2..b0f4dd7 100755
--- a/tests/misc/sort-compress
+++ b/tests/misc/sort-compress
@@ -64,21 +64,21 @@ test -f ok || fail=1
rm -f ok
# This is to make sure we can disable compression
-PATH=.:$PATH GNUSORT_COMPRESSOR= sort -S 1k in > out || fail=1
+PATH=.:$PATH sort --compress-program= -S 1k in > out || fail=1
cmp exp out || fail=1
test $fail = 1 && diff out exp 2> /dev/null
test -f ok && fail=1
# This is to make sure we can use something other than gzip
mv gzip dzip || fail=1
-GNUSORT_COMPRESSOR=./dzip sort -S 1k in > out || fail=1
+sort --compress-program=./dzip -S 1k in > out || fail=1
cmp exp out || fail=1
test $fail = 1 && diff out exp 2> /dev/null
test -f ok || fail=1
rm -f ok
# Make sure it can find other programs in PATH correctly
-PATH=.:$PATH GNUSORT_COMPRESSOR=dzip sort -S 1k in > out || fail=1
+PATH=.:$PATH sort --compress-program=dzip -S 1k in > out || fail=1
cmp exp out || fail=1
test $fail = 1 && diff out exp 2> /dev/null
test -f ok || fail=1
M ChangeLog
M NEWS
M doc/coreutils.texi
M src/sort.c
M tests/misc/sort-compress
Committed as 58f7a426046fb5e4dd0cd258b5674fa14e38045b
- sort --compress-program option,
Paul Eggert <=