From 6148fb80709e83c48895a0c9dc9342b2c937dc42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Fri, 12 Feb 2016 20:15:51 -0800 Subject: [PATCH] split: adjust recent --number changes * src/split.c (lines_rr): Reinstate the conditional setting of the WROTE boolean, as otherwise split -n r/1 would spin forever writing to a closed --filter. There was a test in place to check for this, but it was incorrect as detailed below. (input_file_size): Immediately disallow --number with non seekable inputs, as such an invocation is not currently generally supported and will fail as the data overflows the internal buffer. * tests/split/l-chunk.sh: Adjust to again disallow -n /dev/zero. Also change all '&& fail=1' checks to use the 'returns_ 1' form. * tests/split/filter.sh: Change the no longer supported /dev/zero case to a regular $OFF_T_MAX file (supported on XFS for example). Also fix the timeout(1) commands so they're not subject to pipefail issues. --- src/split.c | 33 +++++++++++++++++++++------------ tests/split/filter.sh | 12 ++++++++---- tests/split/l-chunk.sh | 12 ++++++------ 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/src/split.c b/src/split.c index 0b10600..676aa2e 100644 --- a/src/split.c +++ b/src/split.c @@ -278,6 +278,14 @@ CHUNKS may be:\n\ static off_t input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize) { + off_t cur = lseek (fd, 0, SEEK_CUR); + if (cur < 0) + { + if (errno == ESPIPE) + errno = 0; /* Suppress confusing seek error. */ + return -1; + } + off_t size = 0; do { @@ -290,19 +298,20 @@ input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize) } while (size < bufsize); - /* The file contains at least BUFSIZE bytes. Infer its size via - st->st_size if this seems reliable, or via lseek if not. */ - off_t cur = lseek (fd, 0, SEEK_CUR); - off_t end; - if (cur < 0) - return -1; - if (cur < size) + /* Note we check st_size _after_ the read() above + because /proc files on GNU/Linux are seekable + but have st_size == 0. */ + if (st->st_size == 0) { - /* E.g., /dev/zero on GNU/Linux, where CUR is zero and SIZE == BUFSIZE. - Assume there is no limit to the file size. */ + /* We've filled the buffer, from a seekable file, + which has an st_size==0, E.g., /dev/zero on GNU/Linux. + Assume there is no limit to file size. */ errno = EOVERFLOW; return -1; } + + cur += size; + off_t end; if (usable_st_size (st) && cur <= st->st_size) end = st->st_size; else @@ -1197,7 +1206,8 @@ lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize) quotef (files[i_file].of_name)); } - wrote = true; + if (! ignorable (errno)) + wrote = true; if (file_limit) { @@ -1558,8 +1568,7 @@ main (int argc, char **argv) char *buf = ptr_align (b, page_size); size_t initial_read = SIZE_MAX; - if ((split_type == type_chunk_bytes || split_type == type_chunk_lines) - && n_units != 1) + if (split_type == type_chunk_bytes || split_type == type_chunk_lines) { file_size = input_file_size (STDIN_FILENO, &in_stat_buf, buf, in_blk_size); diff --git a/tests/split/filter.sh b/tests/split/filter.sh index e4e4dca..a93008b 100755 --- a/tests/split/filter.sh +++ b/tests/split/filter.sh @@ -18,6 +18,8 @@ . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src print_ver_ split +require_sparse_support_ # for 'truncate --size=$OFF_T_MAX' +eval $(getlimits) # for OFF_T limits xz --version || skip_ "xz (better than gzip/bzip2) required" for total_n_lines in 5 3000 20000; do @@ -41,7 +43,7 @@ done # split does not run the command (and effectively elides the file) # only when the output to that command would have been empty. split -e -n 10 --filter='xz > $FILE.xz' /dev/null || fail=1 -stat x?? 2>/dev/null && fail=1 +returns_ 1 stat x?? 2>/dev/null || fail=1 # Ensure this invalid combination is flagged returns_ 1 split -n 1/2 --filter='true' /dev/null 2>&1 || fail=1 @@ -50,8 +52,10 @@ returns_ 1 split -n 1/2 --filter='true' /dev/null 2>&1 || fail=1 # where they would result in a non zero exit from split. yes | head -n200K | split -b1G --filter='head -c1 >/dev/null' || fail=1 -# Ensure that endless input is ignored when all filters finish -timeout 10 yes | split --filter="head -c1 >/dev/null" -n r/1 || fail=1 -timeout 10 split --filter="head -c1 >/dev/null" -n 1 /dev/zero || fail=1 +# Ensure that "endless" input is ignored when all filters finish +timeout 10 sh -c 'yes | split --filter="head -c1 >/dev/null" -n r/1' || fail=1 +if truncate -s$OFF_T_MAX zero.in; then + timeout 10 sh -c 'split --filter="head -c1 >/dev/null" -n 1 zero.in' || fail=1 +fi Exit $fail diff --git a/tests/split/l-chunk.sh b/tests/split/l-chunk.sh index 38297de..a7a9395 100755 --- a/tests/split/l-chunk.sh +++ b/tests/split/l-chunk.sh @@ -21,12 +21,12 @@ print_ver_ split # invalid number of chunks echo "split: invalid number of chunks: '1o'" > exp -split -n l/1o 2>err && fail=1 +returns_ 1 split -n l/1o 2>err || fail=1 compare exp err || fail=1 -echo > exp -echo | split -n l/1 || fail=1 -compare exp xaa || fail=1 +echo "split: -: cannot determine file size" > exp +: | returns_ 1 split -n l/1 2>err || fail=1 +compare exp err || fail=1 # N can be greater than the file size # in which case no data is extracted, or empty files are written @@ -45,7 +45,7 @@ rm x?? # Ensure --elide-empty-files is honored split -e -n l/10 /dev/null || fail=1 -stat x?? 2>/dev/null && fail=1 +returns_ 1 stat x?? 2>/dev/null || fail=1 # 80 bytes. ~ transformed to \n below lines=\ @@ -79,7 +79,7 @@ for ELIDE_EMPTY in '' '-e'; do if test -z "$ELIDE_EMPTY"; then split ---io-blksize=$IO_BLKSIZE -n l/2/$N in > chunk.k - stat x* 2>/dev/null && fail=1 + returns_ 1 stat x* 2>/dev/null || fail=1 fi split ---io-blksize=$IO_BLKSIZE $ELIDE_EMPTY -n l/$N in -- 2.5.0