diff --git i/NEWS w/NEWS index b30e91d..6ff6e14 100644 --- i/NEWS +++ w/NEWS @@ -39,6 +39,7 @@ datamash(1), decorate(1): Fix some minor memory leaks. + Schedule -f/--full combined with non-linewise operations for deprecation. * Noteworthy changes in release 1.7 (2020-04-23) [testing] diff --git i/doc/datamash.texi w/doc/datamash.texi index ac02749..3fcbe79 100644 --- i/doc/datamash.texi +++ w/doc/datamash.texi @@ -212,6 +212,9 @@ Skip comment lines (starting with '#' or ';' and optional whitespace). @opindex -f Print entire input line before op results (default: print only the grouped keys). +Using this option with non-linewise operations has been permitted, but never +produced very sensible output. Such combinations have been deprecated, and in +future will result in an error. @item --group=@var{X[,Y,X]} @itemx -g @var{X[,Y,X]} diff --git i/src/datamash.c w/src/datamash.c index 25f0fa3..51db417 100644 --- i/src/datamash.c +++ w/src/datamash.c @@ -232,6 +232,9 @@ which require a pair of fields (e.g. 'pcov 2:6').\n"), stdout); fputs (_("\ -f, --full print entire input line before op results\n\ (default: print only the grouped keys)\n\ + This option is only sensible for linewise operations.\n\ + Other uses are deprecated and will be removed in a\n\ + future version of GNU Datamash.\n\ "), stdout); fputs (_("\ -g, --group=X[,Y,Z] group via fields X,[Y,Z];\n\ @@ -640,6 +643,10 @@ process_file () if (input_header && line_number==0) process_input_header (input_stream); + if (print_full_line && !line_mode) + fprintf(stderr, "WARNING: Allowing -f/--full with non-linewise operations \ +is deprecated and will be removed in a future release.\n"); + /* If there is an input header line, and the user requested an output header line, and the input line was read successfully, print headers */ if (input_header && output_header && line_number==1) diff --git i/tests/datamash-sort-header.sh w/tests/datamash-sort-header.sh index d7bd521..0633184 100755 --- i/tests/datamash-sort-header.sh +++ w/tests/datamash-sort-header.sh @@ -57,8 +57,10 @@ B 4" > exp_no_sort_headers || framework_failure_ "failed to write exp_no_sort_headers file" -echo "A % 1 1,3 -B ( 2 2,4" > exp_sort_in_header_full || +echo "A % 1 1 +B ( 2 2 +A & 3 3 +B = 4 4" > exp_sort_in_header_full || framework_failure_ "failed to write exp_sort_in_header_full file" echo "GroupBy(x) unique(z) @@ -71,14 +73,18 @@ A 1,3 B 2,4" > exp_sort_out_header || framework_failure_ "failed to write exp_sort_out_header" -echo "field-1 field-2 field-3 unique(field-3) -A % 1 1,3 -B ( 2 2,4" > exp_sort_out_header_full || +echo "field-1 field-2 field-3 cut(field-3) +A % 1 1 +B ( 2 2 +A & 3 3 +B = 4 4" > exp_sort_out_header_full || framework_failure_ "failed to write exp_sort_out_header_full" -echo "x y z unique(z) -A % 1 1,3 -B ( 2 2,4" > exp_sort_headers_full || +echo "x y z cut(z) +A % 1 1 +B ( 2 2 +A & 3 3 +B = 4 4" > exp_sort_headers_full || framework_failure_ "failed to write exp_sort_headers_full" @@ -119,19 +125,19 @@ compare_ out7 exp_sort_out_header || { warn_ "sort-header-out failed" ; fail=1 ; } echo "$INFILE" | sed 1d | - datamash -t ' ' --sort --full --header-out -g 1 unique 3 > out8 || + datamash -t ' ' --sort --full --header-out cut 3 > out8 || framework_failure_ "datamash failed" compare_ out8 exp_sort_out_header_full || { warn_ "sort-header-out-full failed" ; fail=1 ; } echo "$INFILE" | - datamash -t ' ' -g 1 --sort --full --header-in unique 3 > out9 || + datamash -t ' ' --sort --full --header-in cut 3 > out9 || framework_failure_ "datamash failed" compare_ out9 exp_sort_in_header_full || { warn_ "sort-in-header-full failed" ; fail=1 ; } echo "$INFILE" | - datamash -t ' ' -g 1 --sort --full --headers unique 3 > out10 || + datamash -t ' ' --sort --full --headers cut 3 > out10 || framework_failure_ "datamash failed" compare_ out10 exp_sort_headers_full || { warn_ "sort-headers-full failed" ; fail=1 ; } @@ -157,7 +163,7 @@ printf "" | datamash -t ' ' --sort --headers unique 3 > emp4 || compare_ /dev/null "emp4" || { warn_ "sort+headers on empty file failed" ; fail=1; } -printf "" | datamash -t ' ' --sort --full unique 3 > emp5 || +printf "" | datamash -t ' ' --sort --full cut 3 > emp5 || framework_failure_ "datamash failed" compare_ /dev/null "emp5" || { warn_ "sort+full on empty file failed" ; fail=1; } diff --git i/tests/datamash-tests-2.pl w/tests/datamash-tests-2.pl index d743244..a45ec33 100755 --- i/tests/datamash-tests-2.pl +++ w/tests/datamash-tests-2.pl @@ -375,44 +375,21 @@ EOF my @Tests = ( - # Test 'min' + --full - # first, verify test without "--full" + # Test 'min' ['slct1', '-t" " -g1 min 2', {IN_PIPE=>$in_full1}, {OUT=>"A 3\nB 0\n"}], - # Test with "--full", "i2" and "i6" should be displayed - ['slct2', '-t" " -f -g1 min 2', {IN_PIPE=>$in_full1}, - {OUT=>"A 3 i2 3\nB 0 i6 0\n"}], - # --full with --sort => should not change results - ['slct3', '-s -t" " -f -g1 min 2', {IN_PIPE=>$in_full1}, - {OUT=>"A 3 i2 3\nB 0 i6 0\n"}], - - # Test 'max' + --full - # first, verify test without "--full" - ['slct4', '-t" " -g1 max 2', {IN_PIPE=>$in_full1}, {OUT=>"A 5\nB 8\n"}], - # Test with "--full", "i3" and "i7" should be displayed - ['slct5', '-t" " -f -g1 max 2', {IN_PIPE=>$in_full1}, - {OUT=>"A 5 i3 5\nB 8 i5 8\n"}], - # --full with --sort => should not change results - ['slct6', '-s -t" " -f -g1 max 2', {IN_PIPE=>$in_full1}, - {OUT=>"A 5 i3 5\nB 8 i5 8\n"}], - - # Test 'first' + --full - # first, verify test without "--full" - ['slct7', '-t" " -g1 first 2', {IN_PIPE=>$in_full1}, {OUT=>"A 4\nB 1\n"}], - # Test with "--full", "i1" and "i4" should be displayed - ['slct8', '-t" " -f -g1 first 2', {IN_PIPE=>$in_full1}, - {OUT=>"A 4 i1 4\nB 1 i4 1\n"}], - # more --full with --sort => see test 'sortslct1' below - - # Test 'last' + --full - # first, verify test without "--full" - ['slct9', '-t" " -g1 last 2', {IN_PIPE=>$in_full1}, {OUT=>"A 5\nB 3\n"}], - # Test with "--full", "i1" and "i4" should be displayed - ['slct10', '-t" " -f -g1 last 2', {IN_PIPE=>$in_full1}, - {OUT=>"A 5 i3 5\nB 3 i7 3\n"}], - # more --full with --sort => see test 'sortslct2' below + # Test 'max' + ['slct2', '-t" " -g1 max 2', {IN_PIPE=>$in_full1}, {OUT=>"A 5\nB 8\n"}], + # Test 'first' + ['slct3', '-t" " -g1 first 2', {IN_PIPE=>$in_full1}, {OUT=>"A 4\nB 1\n"}], + + # Test 'last' + ['slct4', '-t" " -g1 last 2', {IN_PIPE=>$in_full1}, {OUT=>"A 5\nB 3\n"}], + + # # Test --narm - ignoring NaN/NA values + # ## Test with 'NA' ['narm1', '--narm sum 1', {IN_PIPE=>$na1}, {OUT=>"6\n"}], @@ -664,26 +641,6 @@ my @Tests = ); - -if ($have_stable_sort) { - push @Tests, ( - # Test 'first' + --full + --sort - # NOTE: This is subtle: - # Sorting should be stable: only ordering the column which is used - # for grouping (column 1 in this test). This means that the second - # column (containing numbers) should NOT affect sorting, and the order - # of the lines should not change. The results of this test - # should be the same as 'slct8'. If the system doesn't have stable - # 'sort', then the order will change. - ['sortslct1', '-s -t" " -f -g1 first 2', {IN_PIPE=>$in_full1}, - {OUT=>"A 4 i1 4\nB 1 i4 1\n"}], - # Test 'last' + --full + --sort - # See note above regarding 'first' - applies to 'last' as well. - ['sortslct2', '-s -t" " -f -g1 last 2', {IN_PIPE=>$in_full1}, - {OUT=>"A 5 i3 5\nB 3 i7 3\n"}], - ) -} - my $save_temps = $ENV{SAVE_TEMPS}; my $verbose = $ENV{VERBOSE}; diff --git i/tests/datamash-tests.pl w/tests/datamash-tests.pl index a9bd53b..d613c93 100755 --- i/tests/datamash-tests.pl +++ w/tests/datamash-tests.pl @@ -451,16 +451,14 @@ my @Tests = # empty input = empty output, regardless of options [ 'emp1', 'count 1', {IN_PIPE=>""}, {OUT=>""}], - [ 'emp2', '--full count 2', {IN_PIPE=>""},{OUT=>""}], + [ 'emp2', '--full cut 2', {IN_PIPE=>""},{OUT=>""}], [ 'emp3', '--header-in count 2', {IN_PIPE=>""},{OUT=>""}], [ 'emp4', '--header-out count 2', {IN_PIPE=>""},{OUT=>""}], - [ 'emp5', '--full --header-in count 2', {IN_PIPE=>""},{OUT=>""}], - [ 'emp6', '--full --header-out count 2', {IN_PIPE=>""},{OUT=>""}], - [ 'emp7', '--full --header-in --header-out count 2', + [ 'emp5', '--full --header-in cut 2', {IN_PIPE=>""},{OUT=>""}], + [ 'emp6', '--full --header-out cut 2', {IN_PIPE=>""},{OUT=>""}], + [ 'emp7', '--full --header-in --header-out cut 2', {IN_PIPE=>""},{OUT=>""}], - [ 'emp8', '-g3,4 --full --header-in --header-out count 2', - {IN_PIPE=>""},{OUT=>""}], - [ 'emp9', '-g3 count 2', {IN_PIPE=>""},{OUT=>""}], + [ 'emp8', '-g3 count 2', {IN_PIPE=>""},{OUT=>""}], ## Field extraction ['f1', '-W sum 1', {IN_PIPE=>$in2}, {OUT=>"5\n"}], @@ -502,6 +500,22 @@ my @Tests = ['mm3', 'absmin 1', {IN_PIPE=>$in_minmax}, {OUT=>"0.0001\n"}], ['mm4', 'absmax 1', {IN_PIPE=>$in_minmax}, {OUT=>"-700\n"}], + + # + # -f/--full + # + + # Linewise operations (i.e. MODE_PER_LINE) + ['fl1', '-t" " --full cut 2', {IN_PIPE=>$in_g3}, + {OUT=>"A 3 W 3\nA 5 W 5\nA 7 W 7\nA 11 X 11\nA 13 X 13\n" . + "B 17 Y 17\nB 19 Z 19\nC 23 Z 23\n"}], + # Non-linewise operations (eg MODE_GROUPBY) + ['fl2', '-t" " --full -g3 sum 2', {IN_PIPE=>$in_g3}, + {OUT=>"A 3 W 15\nA 11 X 24\nB 17 Y 17\nB 19 Z 42\n"}, + {ERR=>"WARNING: Allowing -f/--full with non-linewise operations " . + "is deprecated and will be removed in a future release.\n"}], + + # # Test Grouping # @@ -525,14 +539,6 @@ my @Tests = ['g8.1', '-t" " -g1,3 sum 2', {IN_PIPE=>$in_g3}, {OUT=>"A W 15\nA X 24\nB Y 17\nB Z 19\nC Z 23\n"}], - - # --full option - without grouping, returns the first line - ['fl1', '-t" " --full sum 2', {IN_PIPE=>$in_g3}, - {OUT=>"A 3 W 98\n"}], - # --full with grouping - print entire line of each group - ['fl2', '-t" " --full -g3 sum 2', {IN_PIPE=>$in_g3}, - {OUT=>"A 3 W 15\nA 11 X 24\nB 17 Y 17\nB 19 Z 42\n"}], - # count on non-numeric fields ['cnt1', '-t" " -g 1 count 1', {IN_PIPE=>$in_g2}, {OUT=>"A 4\nB 3\n"}], @@ -546,18 +552,21 @@ my @Tests = {OUT=>"GroupBy(x) count(y)\nA 5\nB 3\nC 4\n"}], # Input and output header, with full line - ['hdr3', '-t" " -g 1 --full --header-in --header-out count 2', + ['hdr3', '-t" " --full --header-in --header-out cut 2', {IN_PIPE=>$in_hdr1}, - {OUT=>"x y z count(y)\nA 1 10 5\nB 5 10 3\nC 8 11 4\n"}], + {OUT=>"x y z cut(y)\nA 1 10 1\nA 2 10 2\nA 3 10 3\nA 4 10 4\nA 4 10 4\n" . + "B 5 10 5\nB 6 20 6\nB 7 30 7\n" . + "C 8 11 8\nC 9 22 9\nC 1 33 1\nC 2 44 2\n"}], # Output Header ['hdr4', '-t" " -g 1 --header-out count 2', {IN_PIPE=>$in_g3}, {OUT=>"GroupBy(field-1) count(field-2)\nA 5\nB 2\nC 1\n"}], # Output Header with --full - ['hdr5', '-t" " -g 1 --full --header-out count 2', {IN_PIPE=>$in_g3}, - {OUT=>"field-1 field-2 field-3 count(field-2)\n" . - "A 3 W 5\nB 17 Y 2\nC 23 Z 1\n"}], + ['hdr5', '-t" " --full --header-out cut 2', {IN_PIPE=>$in_g3}, + {OUT=>"field-1 field-2 field-3 cut(field-2)\n" . + "A 3 W 3\nA 5 W 5\nA 7 W 7\nA 11 X 11\nA 13 X 13\n" . + "B 17 Y 17\nB 19 Z 19\nC 23 Z 23\n"}], # Header without grouping ['hdr6', '-t" " --header-out count 2', {IN_PIPE=>$in_g3}, @@ -595,24 +604,20 @@ my @Tests = # header-in and header-out => header line should be printed ['hdr14', '-t: -H sum 1', {IN_PIPE=>$in_hdr_only}, {OUT=>"sum(X)\n"}], - ['hdr15', '-t: --full -H sum 1', {IN_PIPE=>$in_hdr_only}, - {OUT=>"X:Y:Z:sum(X)\n"}], - ['hdr16', '-t: -s -g1 -H sum 2', {IN_PIPE=>$in_hdr_only}, + ['hdr15', '-t: -s -g1 -H sum 2', {IN_PIPE=>$in_hdr_only}, {OUT=>"GroupBy(X):sum(Y)\n"}], - ['hdr17', '-t: --full -s -g1 -H sum 2', {IN_PIPE=>$in_hdr_only}, - {OUT=>"X:Y:Z:sum(Y)\n"}], - ['hdr18', '-t: --header-in sum 1', {IN_PIPE=>$in_hdr_only}, + ['hdr17', '-t: --header-in sum 1', {IN_PIPE=>$in_hdr_only}, {OUT=>""}], - ['hdr19', '-t: -H reverse', {IN_PIPE=>$in_hdr_only}, + ['hdr18', '-t: -H reverse', {IN_PIPE=>$in_hdr_only}, {OUT=>"Z:Y:X\n"}], - ['hdr20', '-t: --header-in reverse', {IN_PIPE=>$in_hdr_only}, + ['hdr19', '-t: --header-in reverse', {IN_PIPE=>$in_hdr_only}, {OUT=>""}], - ['hdr21', '-t: -H rmdup 1', {IN_PIPE=>$in_hdr_only}, + ['hdr20', '-t: -H rmdup 1', {IN_PIPE=>$in_hdr_only}, {OUT=>$in_hdr_only}], - ['hdr22', '-t: --header-in rmdup 1', {IN_PIPE=>$in_hdr_only}, + ['hdr21', '-t: --header-in rmdup 1', {IN_PIPE=>$in_hdr_only}, {OUT=>""}], - ['hdr23', '-t: --header-in rmdup 1', {IN_PIPE=>""}, {OUT=>""}], - ['hdr24', '-t: -H rmdup 1', {IN_PIPE=>""}, {OUT=>""}], + ['hdr22', '-t: --header-in rmdup 1', {IN_PIPE=>""}, {OUT=>""}], + ['hdr23', '-t: -H rmdup 1', {IN_PIPE=>""}, {OUT=>""}], # percentile operation has special header handling (which includes # the percentile value). @@ -624,8 +629,6 @@ my @Tests = # Test single line per group ['sl1', '-t" " -g 1 mean 2', {IN_PIPE=>$in_g4}, {OUT=>"A 5\nK 6\nP 2\n"}], - ['sl2', '-t" " --full -g 1 mean 2', {IN_PIPE=>$in_g4}, - {OUT=>"A 5 5\nK 6 6\nP 2 2\n"}], # Test countunique operation ['cuq1', '-t" " -g 1 countunique 3', {IN_PIPE=>$in_g3},