>From 710fb9423391def7cb95c9c6ae911c5958f492db Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 6 Oct 2010 15:55:18 -0400 Subject: [PATCH 1/2] Join: add '--auto-format' option. --- doc/coreutils.texi | 8 ++++++++ src/join.c | 37 ++++++++++++++++++++++++++++++++++++- tests/misc/join | 19 +++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletions(-) diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 4d17ed1..3b10608 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -5563,6 +5563,14 @@ specified format. The header lines will not be checked for ordering even if @option{--check-order} is specified. Also if the header lines from each file do not match, the heading fields from the first file will be used. +@item --auto-format +@opindex --auto-format +Automatically detects output format based on the number of fields in the +first line of each input file (as if the user explicitly specified @samp{-o}). +Allows using @samp{-e} without a-priori knowledge of the fields in the input files. +The join field is printed first, followed by the remaining fields from the first +file and the second file. + @item -i @itemx --ignore-case @opindex -i diff --git a/src/join.c b/src/join.c index 6eaad65..17ad2f3 100644 --- a/src/join.c +++ b/src/join.c @@ -139,7 +139,8 @@ enum { CHECK_ORDER_OPTION = CHAR_MAX + 1, NOCHECK_ORDER_OPTION, - HEADER_LINE_OPTION + HEADER_LINE_OPTION, + AUTO_FORMAT_OPTION }; @@ -149,6 +150,7 @@ static struct option const longopts[] = {"check-order", no_argument, NULL, CHECK_ORDER_OPTION}, {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION}, {"header", no_argument, NULL, HEADER_LINE_OPTION}, + {"auto-format", no_argument, NULL, AUTO_FORMAT_OPTION}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} @@ -164,6 +166,12 @@ static bool ignore_case; join them without checking for ordering */ static bool join_header_lines; +/* if nonzero, automatically build a specific output field list, + based on the first line of each input file */ +static bool auto_output_format; + +static void build_output_format(struct line const *line1, struct line const* line2); + void usage (int status) { @@ -200,6 +208,8 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\ --nocheck-order do not check that the input is correctly sorted\n\ --header treat the first line in each file as field headers,\n\ print them without trying to pair them\n\ + --auto-format Automatically build output format, based on the first\n\ + line of each input file. Allows '-e' without using '-o'\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); @@ -636,6 +646,9 @@ join (FILE *fp1, FILE *fp2) advance_seq (fp2, &seq2, true, 2); } + if (auto_output_format && seq1.count && seq2.count) + build_output_format(seq1.lines[0],seq2.lines[0]); + while (seq1.count && seq2.count) { size_t i; @@ -947,6 +960,24 @@ add_file_name (char *name, char *names[2], *optc_status = MIGHT_BE_O_ARG; } +static void +build_output_format(struct line const *line1, struct line const* line2) +{ + int i ; + if (outlist_head.next) + return; + + add_field(0,0); + for (i = 0; i < join_field_1 && i < line1->nfields; ++i) + add_field(1,i); + for (i = join_field_1 + 1; i < line1->nfields; ++i) + add_field(1,i); + for (i = 0; i < join_field_2 && i < line2->nfields; ++i) + add_field(2,i); + for (i = join_field_2 + 1; i < line2->nfields; ++i) + add_field(2,i); +} + int main (int argc, char **argv) { @@ -1077,6 +1108,10 @@ main (int argc, char **argv) join_header_lines = true; break; + case AUTO_FORMAT_OPTION: + auto_output_format = true; + break; + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); diff --git a/tests/misc/join b/tests/misc/join index a319b94..b07a18b 100755 --- a/tests/misc/join +++ b/tests/misc/join @@ -218,6 +218,25 @@ my @tv = ( ['header-5', '--header', [ "ID1 Name\n1 A\n2 B\n", "ID2 Color\n1 red\n"], "ID1 Name Color\n1 A red\n", 0], +# Auto-format +['autoformat-1', '-j1 -a1 -a2 --auto-format -e FOO', + ["1 a\n2 b\n", "1 X\n3 Y\n"], "1 a X\n2 b FOO\n3 FOO Y\n", 0], + +# Auto-format, with empty filler (no '-e' specified)- +# should print a column delimiters (space characters), but no filler. +# This should be equivalent to specifing "-o 0,1.2,2.2" without "-e". +['autoformat-2', '-j1 -a1 -a2 --auto-format', + ["1 a\n2 b\n", "1 X\n3 Y\n"], "1 a X\n2 b \n3 Y\n", 0], + +# auto-format sanity check: specify explicit output format without -e, +# make sure it matches the above test. +['autoformat-3', '-j1 -a1 -a2 -o 0,1.2,2.2', + ["1 a\n2 b\n", "1 X\n3 Y\n"], "1 a X\n2 b \n3 Y\n", 0], + +# both auto-format and explicit output format (different format than 'auto'), +# auto-format should be silently ignored. +['autoformat-4', '-j1 -a1 -a2 -e FOO --auto-format -o 0,2.2,1.2', + ["1 a\n2 b\n", "1 X\n3 Y\n"], "1 X a\n2 FOO b\n3 Y FOO\n", 0], ); # Convert the above old-style test vectors to the newer -- 1.7.1 >From 7dd975752358da66e7ce91aa08d72e6be08cfc02 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 6 Oct 2010 16:04:07 -0400 Subject: [PATCH 2/2] join (with autoformat) - update NEWS --- NEWS | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/NEWS b/NEWS index 22f257b..fc023bb 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,9 @@ GNU coreutils NEWS -*- outline -*- for a file. It also accepts the %w and %W format directives for outputting the birth time of a file, if one is available. + join now accepts the option --auto-format, to automatically + detect the output format without requireing explicit -o. + ** Changes in behavior df now consistently prints the device name for a bind mounted file, -- 1.7.1