From 74ed24edc2ffa224ee5175ea39fcc19e7fba705f Mon Sep 17 00:00:00 2001 From: Drew Frank Date: Thu, 1 Mar 2012 14:24:49 -0800 Subject: [PATCH] join: add numeric sort feature. * src/join.c: add new flags and implement numeric comparison feature. * tests/misc/join: add two tests for numerically sorted key fields. This patch is based on code written by Alex Shinn gmail.com> --- src/join.c | 22 +++++++++++++++++++--- tests/misc/join | 6 ++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/join.c b/src/join.c index b92c1f8..c65f07e 100644 --- a/src/join.c +++ b/src/join.c @@ -159,6 +159,7 @@ enum static struct option const longopts[] = { {"ignore-case", no_argument, NULL, 'i'}, + {"numeric-sort", no_argument, NULL, 'n'}, {"check-order", no_argument, NULL, CHECK_ORDER_OPTION}, {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION}, {"header", no_argument, NULL, HEADER_LINE_OPTION}, @@ -173,6 +174,9 @@ static struct line uni_blank; /* If nonzero, ignore case when comparing join fields. */ static bool ignore_case; +/* If nonzero, treat keys as numeric values. */ +static bool numeric_sort; + /* If nonzero, treat the first line of each file as column headers - join them without checking for ordering */ static bool join_header_lines; @@ -198,7 +202,8 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\ -e EMPTY replace missing input fields with EMPTY\n\ "), stdout); fputs (_("\ - -i, --ignore-case ignore differences in case when comparing fields\n\ + -i, --ignore-case ignore differences in case when comparing fields\n\ + -n, --numeric-sort compare according to string numerical value\n\ -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\ -o FORMAT obey FORMAT while constructing output line\n\ -t CHAR use CHAR as input and output field separator\n\ @@ -318,6 +323,7 @@ keycmp (struct line const *line1, struct line const *line2, size_t len1; size_t len2; /* Length of fields to compare. */ + long double x1, x2; int diff; if (jf_1 < line1->nfields) @@ -347,7 +353,13 @@ keycmp (struct line const *line1, struct line const *line2, if (len2 == 0) return 1; - if (ignore_case) + if (numeric_sort) + { + x1 = strtold (beg1, NULL); + x2 = strtold (beg2, NULL); + diff = x1 < x2 ? -1 : x1 != x2; + } + else if (ignore_case) { /* FIXME: ignore_case does not work with NLS (in particular, with multibyte chars). */ @@ -1017,7 +1029,7 @@ main (int argc, char **argv) issued_disorder_warning[0] = issued_disorder_warning[1] = false; check_input_order = CHECK_ORDER_DEFAULT; - while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:", + while ((optc = getopt_long (argc, argv, "-a:e:in1:2:j:o:t:v:", longopts, NULL)) != -1) { @@ -1054,6 +1066,10 @@ main (int argc, char **argv) ignore_case = true; break; + case 'n': + numeric_sort = true; + break; + case '1': set_join_field (&join_field_1, string_to_join_field (optarg)); break; diff --git a/tests/misc/join b/tests/misc/join index a3fd1a8..ae9ef10 100755 --- a/tests/misc/join +++ b/tests/misc/join @@ -147,6 +147,12 @@ my @tv = ( ["a,1,,2\nb,1,2\n", "a,3,4\nb,3,4\n"], "a,1,,2,3,4\nb,1,2,,3,4\n"], +# Join on numerically sorted field. +['numeric-1', '-n', ["7 s\n8 e\n10 t\n", "7 S\n9 N\n10 T\n"], + "7 s S\n10 t T\n", 0], +['numeric-2', '', ["7 s\n8 e\n10 t\n", "7 S\n9 N\n10 T\n"], + "7 s S\n", 1, "$prog: numeric-2.2:3: is not sorted: 10 T\n"], + # From Tim Smithers: fixed in 1.22l ['trailing-sp', '-t: -1 1 -2 1', ["a:x \n", "a:y \n"], "a:x :y \n", 0], -- 1.7.9.2