>From d63d7708ba16494a2968490a686916a93b4a805e Mon Sep 17 00:00:00 2001 From: "A. Gordon" Date: Wed, 7 Jan 2015 18:30:28 -0500 Subject: [PATCH 1/3] split: replace hard-coded '\n' with a variable --- src/split.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/split.c b/src/split.c index ef672f4..71fc9e2 100644 --- a/src/split.c +++ b/src/split.c @@ -108,6 +108,9 @@ static bool elide_empty_files; input to output, which is much slower, so disabled by default. */ static bool unbuffered; +/* The character marking end of line. Default to \n. */ +static char eolchar = '\n'; + /* The split mode to use. */ enum Split_type { @@ -630,10 +633,10 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize) error (EXIT_FAILURE, errno, "%s", infile); bp = bp_out = buf; eob = bp + n_read; - *eob = '\n'; + *eob = eolchar; while (true) { - bp = memchr (bp, '\n', eob - bp + 1); + bp = memchr (bp, eolchar, eob - bp + 1); if (bp == eob) { if (eob != bp_out) /* do not write 0 bytes! */ @@ -692,10 +695,10 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize) /* Have enough for split. */ split_rest = n_bytes - n_out - n_hold; eoc = sob + split_rest - 1; - eol = memrchr (sob, '\n', split_rest); + eol = memrchr (sob, eolchar, split_rest); } else - eol = memrchr (sob, '\n', n_left); + eol = memrchr (sob, eolchar, n_left); /* Output hold space if possible. */ if (n_hold && !(!eol && n_out)) @@ -833,7 +836,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, /* Begin looking for '\n' at last byte of chunk. */ off_t skip = MIN (n_read, MAX (0, chunk_end - n_written)); - char *bp_out = memchr (bp + skip, '\n', n_read - skip); + char *bp_out = memchr (bp + skip, eolchar, n_read - skip); if (bp_out++) next = true; else @@ -1080,7 +1083,7 @@ lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize) bool next = false; /* Find end of line. */ - char *bp_out = memchr (bp, '\n', eob - bp); + char *bp_out = memchr (bp, eolchar, eob - bp); if (bp_out) { bp_out++; -- 1.9.1 >From 5086fa3cec116086b8b6be895dc9a91d3e27dc59 Mon Sep 17 00:00:00 2001 From: "A. Gordon" Date: Wed, 7 Jan 2015 18:40:14 -0500 Subject: [PATCH 2/3] split: accept -t=SEP/-z options --- src/split.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/src/split.c b/src/split.c index 71fc9e2..cb63b03 100644 --- a/src/split.c +++ b/src/split.c @@ -109,7 +109,8 @@ static bool elide_empty_files; static bool unbuffered; /* The character marking end of line. Default to \n. */ -static char eolchar = '\n'; +enum { DEFAULT_EOL = '\n' }; +static char eolchar = DEFAULT_EOL; /* The split mode to use. */ enum Split_type @@ -142,6 +143,8 @@ static struct option const longopts[] = {"numeric-suffixes", optional_argument, NULL, 'd'}, {"filter", required_argument, NULL, FILTER_OPTION}, {"verbose", no_argument, NULL, VERBOSE_OPTION}, + {"line-separator", required_argument, NULL, 't'}, + {"zero-terminated", no_argument, NULL, 'z'}, {"-io-blksize", required_argument, NULL, IO_BLKSIZE_OPTION}, /* do not document */ {GETOPT_HELP_OPTION_DECL}, @@ -226,7 +229,9 @@ is -, read standard input.\n\ --filter=COMMAND write to shell COMMAND; file name is $FILE\n\ -l, --lines=NUMBER put NUMBER lines per output file\n\ -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\ + -t, --line-separator=SEP use SEP instead of new-line as line separator\n\ -u, --unbuffered immediately copy input to output with '-n r/...'\n\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ "), DEFAULT_SUFFIX_LENGTH); fputs (_("\ --verbose print a diagnostic just before each\n\ @@ -1227,7 +1232,7 @@ main (int argc, char **argv) int this_optind = optind ? optind : 1; char *slash; - c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u", + c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:uz", longopts, NULL); if (c == -1) break; @@ -1306,6 +1311,37 @@ main (int argc, char **argv) unbuffered = true; break; + case 't': + { + char neweol = optarg[0]; + if (! neweol) + error (EXIT_FAILURE, 0, _("empty line-delimiter")); + if (optarg[1]) + { + if (STREQ (optarg, "\\0")) + neweol = '\0'; + else + { + /* Provoke with 'split -txx'. Complain about + "multi-character tab" instead of "multibyte tab", so + that the diagnostic's wording does not need to be + changed once multibyte characters are supported. */ + error (EXIT_FAILURE, 0, _("multi-character delimiter %s"), + quote (optarg)); + } + } + if (eolchar != DEFAULT_EOL && neweol != eolchar) + error (EXIT_FAILURE, 0, _("incompatible line-delimiters")); + eolchar = neweol; + } + break; + + case 'z': + if (eolchar != DEFAULT_EOL && eolchar != '\0') + error (EXIT_FAILURE, 0, _("incompatible line-delimiters")); + eolchar = '\0'; + break; + case '0': case '1': case '2': -- 1.9.1 >From 029fed81ed90bd0dcaf34295c4819792c01ab3c6 Mon Sep 17 00:00:00 2001 From: "A. Gordon" Date: Wed, 7 Jan 2015 19:21:15 -0500 Subject: [PATCH 3/3] tests: test split with custom line separators --- tests/local.mk | 1 + tests/split/lines-sep.sh | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 tests/split/lines-sep.sh diff --git a/tests/local.mk b/tests/local.mk index 6fc8599..14dfaf3 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -355,6 +355,7 @@ all_tests = \ tests/split/b-chunk.sh \ tests/split/fail.sh \ tests/split/lines.sh \ + tests/split/lines-sep.sh \ tests/split/line-bytes.sh \ tests/split/l-chunk.sh \ tests/split/r-chunk.sh \ diff --git a/tests/split/lines-sep.sh b/tests/split/lines-sep.sh new file mode 100644 index 0000000..eb98b1d --- /dev/null +++ b/tests/split/lines-sep.sh @@ -0,0 +1,74 @@ +#!/bin/sh +# test split with custom line separators + +# Copyright (C) 2002-2015 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ split + +# Prepare input/expected-output files, +# with newline, zero, colon line-separators. +printf '1\n2\n3\n4\n5\n' > in1-nl || framework_failure_ +printf '1\n2\n' > exp1-nl || framework_failure_ +printf '3\n4\n' > exp2-nl || framework_failure_ +printf '5\n' > exp3-nl || framework_failure_ + +printf '1\0002\0003\0004\0005\000' > in1-z || framework_failure_ +printf '1\0002\000' > exp1-z || framework_failure_ +printf '3\0004\000' > exp2-z || framework_failure_ +printf '5\000' > exp3-z || framework_failure_ + +printf '1:2:3:4:5:' > in1-cln || framework_failure_ +printf '1:2:' > exp1-cln || framework_failure_ +printf '3:4:' > exp2-cln || framework_failure_ +printf '5:' > exp3-cln || framework_failure_ + + +run_split() +{ + # test number (should be unique, to avoid output file dups) + num=$1 + # suffix of test files (nl/z/cln) + suf=$2 + shift 2 + + split --lines=2 $@ in1-$suf x$num- > out-$suf || return 1 + + compare exp1-$suf x$num-aa || return 1 + compare exp2-$suf x$num-ab || return 1 + compare exp3-$suf x$num-ac || return 1 + test -f x$num-ad && return 1 + + return 0 +} + + +# Test newline, without '-t' option (the default) +run_split 1 nl || { warn_ "test 1 failed" ; fail=1 ; } + +#FIXME: Test newline specified as custom line separator +#run_split 2 nl '-t$\n' || { warn_ "test 2 failed" ; fail=1 ; } + +# Test null line-separator with '-z' +run_split 3 z -z || { warn_ "test 3 failed" ; fail=1 ; } + +#FIXME: Test null line-separator with '-t' +#run_split 4 z -t$'\0' || { warn_ "test 4 failed" ; fail=1 ; } + +# Test non-default line-separator with '-t' +run_split 5 cln -t: || { warn_ "test 5 failed" ; fail=1 ; } + +Exit $fail -- 1.9.1