From 74ed24edc2ffa224ee5175ea39fcc19e7fba705f Mon Sep 17 00:00:00 2001
From: Drew Frank
Date: Thu, 1 Mar 2012 14:24:49 -0800
Subject: [PATCH] join: add numeric sort feature.
* src/join.c: add new flags and implement numeric comparison feature.
* tests/misc/join: add two tests for numerically sorted key fields.
This patch is based on code written by Alex Shinn
gmail.com>
---
src/join.c | 22 +++++++++++++++++++---
tests/misc/join | 6 ++++++
2 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/src/join.c b/src/join.c
index b92c1f8..c65f07e 100644
--- a/src/join.c
+++ b/src/join.c
@@ -159,6 +159,7 @@ enum
static struct option const longopts[] =
{
{"ignore-case", no_argument, NULL, 'i'},
+ {"numeric-sort", no_argument, NULL, 'n'},
{"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
{"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
{"header", no_argument, NULL, HEADER_LINE_OPTION},
@@ -173,6 +174,9 @@ static struct line uni_blank;
/* If nonzero, ignore case when comparing join fields. */
static bool ignore_case;
+/* If nonzero, treat keys as numeric values. */
+static bool numeric_sort;
+
/* If nonzero, treat the first line of each file as column headers -
join them without checking for ordering */
static bool join_header_lines;
@@ -198,7 +202,8 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
-e EMPTY replace missing input fields with EMPTY\n\
"), stdout);
fputs (_("\
- -i, --ignore-case ignore differences in case when comparing fields\n\
+ -i, --ignore-case ignore differences in case when comparing fields\n\
+ -n, --numeric-sort compare according to string numerical value\n\
-j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
-o FORMAT obey FORMAT while constructing output line\n\
-t CHAR use CHAR as input and output field separator\n\
@@ -318,6 +323,7 @@ keycmp (struct line const *line1, struct line const *line2,
size_t len1;
size_t len2; /* Length of fields to compare. */
+ long double x1, x2;
int diff;
if (jf_1 < line1->nfields)
@@ -347,7 +353,13 @@ keycmp (struct line const *line1, struct line const *line2,
if (len2 == 0)
return 1;
- if (ignore_case)
+ if (numeric_sort)
+ {
+ x1 = strtold (beg1, NULL);
+ x2 = strtold (beg2, NULL);
+ diff = x1 < x2 ? -1 : x1 != x2;
+ }
+ else if (ignore_case)
{
/* FIXME: ignore_case does not work with NLS (in particular,
with multibyte chars). */
@@ -1017,7 +1029,7 @@ main (int argc, char **argv)
issued_disorder_warning[0] = issued_disorder_warning[1] = false;
check_input_order = CHECK_ORDER_DEFAULT;
- while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
+ while ((optc = getopt_long (argc, argv, "-a:e:in1:2:j:o:t:v:",
longopts, NULL))
!= -1)
{
@@ -1054,6 +1066,10 @@ main (int argc, char **argv)
ignore_case = true;
break;
+ case 'n':
+ numeric_sort = true;
+ break;
+
case '1':
set_join_field (&join_field_1, string_to_join_field (optarg));
break;
diff --git a/tests/misc/join b/tests/misc/join
index a3fd1a8..ae9ef10 100755
--- a/tests/misc/join
+++ b/tests/misc/join
@@ -147,6 +147,12 @@ my @tv = (
["a,1,,2\nb,1,2\n", "a,3,4\nb,3,4\n"],
"a,1,,2,3,4\nb,1,2,,3,4\n"],
+# Join on numerically sorted field.
+['numeric-1', '-n', ["7 s\n8 e\n10 t\n", "7 S\n9 N\n10 T\n"],
+ "7 s S\n10 t T\n", 0],
+['numeric-2', '', ["7 s\n8 e\n10 t\n", "7 S\n9 N\n10 T\n"],
+ "7 s S\n", 1, "$prog: numeric-2.2:3: is not sorted: 10 T\n"],
+
# From Tim Smithers: fixed in 1.22l
['trailing-sp', '-t: -1 1 -2 1', ["a:x \n", "a:y \n"], "a:x :y \n", 0],
--
1.7.9.2