>From f5601f0e6d0be43a8976839482abe4726f75b4a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Sun, 1 Jul 2012 01:14:42 +0100 Subject: [PATCH] sort: avoid redundant processing with inaccessible inputs or output * src/sort.c (check_inputs): A new function to verify all inputs are accessible before further processing. (check_output): A new function to open or create a specified output file, before futher processing. (stream_open): Adjust to truncating the previously opened output file rather than opening directly. (avoid_trashing_input): Optimize to stat the output file descriptor, rather than the file name. (main): Call the new functions to check accessability of inputs and output, before processing starts. * tests/misc/sort: Adjust to the changed error message. * tests/misc/sort-merge-fdlimit: Account for the earlier opened file descriptor of the specified output file. * tests/misc/sort-exit-early: A new test to exercise the improvements. * tests/Makefile.am: Reference the new test. * NEWS: Mention the improvement. Suggested-by: Bernhard Voelker --- NEWS | 6 ++++ src/sort.c | 65 ++++++++++++++++++++++++++++++++++++++-- tests/Makefile.am | 1 + tests/misc/sort | 2 +- tests/misc/sort-exit-early | 34 +++++++++++++++++++++ tests/misc/sort-merge-fdlimit | 2 +- 6 files changed, 104 insertions(+), 6 deletions(-) create mode 100755 tests/misc/sort-exit-early diff --git a/NEWS b/NEWS index 8c75a32..c51fb78 100644 --- a/NEWS +++ b/NEWS @@ -41,6 +41,12 @@ GNU coreutils NEWS -*- outline -*- patches as well as enough support to build on the Hurd, we no longer have any reason to include it here. +** Improvements + + sort will avoid redundant processing in the presence of inaccessible inputs, + or unwritable output. Immediate errors are now given before any potentially + expensive processing is initiated. + * Noteworthy changes in release 8.17 (2012-05-10) [stable] diff --git a/src/sort.c b/src/sort.c index 5a48ce6..40f094b 100644 --- a/src/sort.c +++ b/src/sort.c @@ -357,6 +357,9 @@ static bool unique; /* Nonzero if any of the input files are the standard input. */ static bool have_read_stdin; +/* File descriptor associated with -o. */ +static int outfd = -1; + /* List of key field comparisons to be tried. */ static struct keyfield *keylist; @@ -911,11 +914,12 @@ create_temp_file (int *pfd, bool survive_fd_exhaustion) static FILE * stream_open (char const *file, char const *how) { + FILE *fp; + if (!file) return stdout; if (*how == 'r') { - FILE *fp; if (STREQ (file, "-")) { have_read_stdin = true; @@ -924,9 +928,18 @@ stream_open (char const *file, char const *how) else fp = fopen (file, how); fadvise (fp, FADVISE_SEQUENTIAL); - return fp; } - return fopen (file, how); + else if (*how == 'w') + { + assert (outfd != -1); + if (ftruncate (outfd, 0) != 0) + error (EXIT_FAILURE, errno, _("%s: error truncating"), quote (file)); + fp = fdopen (outfd, how); + } + else + assert (!"unexpected mode passed to stream_open"); + + return fp; } /* Same as stream_open, except always return a non-null value; die on @@ -3637,7 +3650,7 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps, if (! got_outstat) { if ((outfile - ? stat (outfile, &outstat) + ? fstat (outfd, &outstat) : fstat (STDOUT_FILENO, &outstat)) != 0) break; @@ -3666,6 +3679,44 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps, } } +/* Scan the input files to ensure all are accessible. + Otherwise exit with a diagnostic. + + Note this will catch common issues with permissions etc. + but will fail to notice issues where you can open() but not read(), + like when a directory is specified on some systems. + Catching these obscure cases could slow down performance in + common cases. */ + +static void +check_inputs (char * const *files, size_t nfiles) +{ + size_t i; + for (i = 0; i < nfiles; i++) + { + if (STREQ (files[i], "-")) + continue; + + if (euidaccess (files[i], R_OK) != 0) + die (_("cannot read"), files[i]); + } +} + +/* Ensure a specified output file can be created or written to, + and cache the returned descriptor in the global OUTFD variable. + Otherwise exit with a diagnostic. */ + +static void +check_output (char const *outfile) +{ + if (outfile) + { + outfd = open (outfile, O_WRONLY | O_CREAT | O_BINARY, MODE_RW_UGO); + if (outfd < 0) + die (_("open failed"), outfile); + } +} + /* Merge the input FILES. NTEMPS is the number of files at the start of FILES that are temporary; it is zero at the top level. NFILES is the total number of files. Put the output in @@ -4620,6 +4671,12 @@ main (int argc, char **argv) exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER); } + /* Check all inputs are accessible, or exit immediately. */ + check_inputs (files, nfiles); + + /* Check output is writable, or exit immediately. */ + check_output (outfile); + if (mergeonly) { struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles); diff --git a/tests/Makefile.am b/tests/Makefile.am index 2155cee..dfac9a0 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -251,6 +251,7 @@ TESTS = \ misc/sort-merge \ misc/sort-merge-fdlimit \ misc/sort-month \ + misc/sort-exit-early \ misc/sort-rand \ misc/sort-spinlock-abuse \ misc/sort-stale-thread-mem \ diff --git a/tests/misc/sort b/tests/misc/sort index 5be00a0..5d15d75 100755 --- a/tests/misc/sort +++ b/tests/misc/sort @@ -33,7 +33,7 @@ my $mb_locale = $ENV{LOCALE_FR_UTF8}; # Normalize each diagnostic to use '-'. my $normalize_filename = {ERR_SUBST => 's/^$prog: .*?:/$prog: -:/'}; -my $no_file = "$prog: open failed: no-file: No such file or directory\n"; +my $no_file = "$prog: cannot read: no-file: No such file or directory\n"; my @Tests = ( diff --git a/tests/misc/sort-exit-early b/tests/misc/sort-exit-early new file mode 100755 index 0000000..4cdcee6 --- /dev/null +++ b/tests/misc/sort-exit-early @@ -0,0 +1,34 @@ +#!/bin/sh +# Test 'sort' exits early on inaccessible inputs or output + +# Copyright (C) 2012 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ sort + + +timeout 10 sort -o . && fail=1 +test $? == 124 && fail=1 + +# Check all inputs are readable before starting to sort +# Also ensure the output isn't created in this case +touch output +chmod a-r output +timeout 10 sort -o typo - output && fail=1 +test $? == 124 && fail=1 +test -e typo && fail=1 + +Exit $fail diff --git a/tests/misc/sort-merge-fdlimit b/tests/misc/sort-merge-fdlimit index 7ce109c..ee1575a 100755 --- a/tests/misc/sort-merge-fdlimit +++ b/tests/misc/sort-merge-fdlimit @@ -65,7 +65,7 @@ done (seq 6 && echo 6) >exp || fail=1 echo 6 >out || fail=1 (exec 3<&- 4<&- 5<&- 6