--- coreutils-6.10-sort.c 2009-06-04 17:27:35.000000000 +0200
+++ coreutils-6.10-sort-natural.c 2009-06-04 17:27:44.000000000 +0200
@@ -170,6 +170,7 @@
bool random; /* Sort by random hash of key. */
bool general_numeric; /* Flag for general, numeric comparison.
Handle numbers in exponential notation. */
+ bool natural; /* Flag for natural order comparison. */
bool month; /* Flag for comparison by month name. */
bool reverse; /* Reverse the sense of comparison. */
struct keyfield *next; /* Next keyfield to try. */
@@ -327,6 +328,7 @@
-i, --ignore-nonprinting consider only printable characters\n\
-M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
-n, --numeric-sort compare according to string numerical value\n\
+ -N, --natural-order-sort sort strings containing numbers in natural order\n\
-R, --random-sort sort by random hash of keys\n\
--random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
-r, --reverse reverse the result of comparisons\n\
@@ -394,7 +396,7 @@
RANDOM_SOURCE_OPTION
};
-static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
+static char const short_options[] = "-bcCdfgik:mMnNo:rRsS:t:T:uy:z";
static struct option const long_options[] =
{
@@ -409,6 +411,7 @@
{"merge", no_argument, NULL, 'm'},
{"month-sort", no_argument, NULL, 'M'},
{"numeric-sort", no_argument, NULL, 'n'},
+ {"natural-order-sort", no_argument, NULL, 'N'},
{"random-sort", no_argument, NULL, 'R'},
{"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
{"output", required_argument, NULL, 'o'},
@@ -1514,6 +1517,139 @@
return strnumcmp (a, b, decimal_point, thousands_sep);
}
+/* Perform 'natural order' comparisons of strings ("A1" < "A2" < "A10")
+ Adoption for "sort" from strnatcmp.c by Martin Sarfy
+ Original author (C) 2000, 2004 by Martin Pool */
+
+static inline int
+nat_isdigit(char a)
+{
+ return isdigit((unsigned char) a);
+}
+
+
+static inline int
+nat_isspace(char a)
+{
+ return isspace((unsigned char) a);
+}
+
+
+static inline char
+nat_toupper(char a)
+{
+ return toupper((unsigned char) a);
+}
+
+static int
+nat_compare_right(char const *a, char const *b)
+{
+ int bias = 0;
+
+ /* The longest run of digits wins. That aside, the greatest
+ value wins, but we can't know that it will until we've scanned
+ both numbers to know that they have the same magnitude, so we
+ remember it in BIAS. */
+ for (;; a++, b++) {
+ if (!nat_isdigit(*a) && !nat_isdigit(*b))
+ return bias;
+ else if (!nat_isdigit(*a))
+ return -1;
+ else if (!nat_isdigit(*b))
+ return +1;
+ else if (*a < *b) {
+ if (!bias)
+ bias = -1;
+ } else if (*a > *b) {
+ if (!bias)
+ bias = +1;
+ } else if (!*a && !*b)
+ return bias;
+ }
+
+ return 0;
+}
+
+
+static int
+nat_compare_left(char const *a, char const *b)
+{
+ /* Compare two left-aligned numbers: the first to have a
+ different value wins. */
+ for (;; a++, b++) {
+ if (!nat_isdigit(*a) && !nat_isdigit(*b))
+ return 0;
+ else if (!nat_isdigit(*a))
+ return -1;
+ else if (!nat_isdigit(*b))
+ return +1;
+ else if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return +1;
+ }
+
+ return 0;
+}
+
+
+static int strnatcmp0(char const *a, char const *b, int fold_case)
+{
+ int ai, bi;
+ char ca, cb;
+ int fractional, result;
+
+ /* assert(a && b); */
+ ai = bi = 0;
+ while (1) {
+ ca = a[ai]; cb = b[bi];
+
+ /* skip over leading spaces or zeros */
+ while (nat_isspace(ca))
+ ca = a[++ai];
+
+ while (nat_isspace(cb))
+ cb = b[++bi];
+
+ /* process run of digits */
+ if (nat_isdigit(ca) && nat_isdigit(cb)) {
+ fractional = (ca == '0' || cb == '0');
+
+ if (fractional) {
+ if ((result = nat_compare_left(a+ai, b+bi)) != 0)
+ return result;
+ } else {
+ if ((result = nat_compare_right(a+ai, b+bi)) != 0)
+ return result;
+ }
+ }
+
+ if (!ca && !cb) {
+ /* The strings compare the same. Perhaps the caller
+ will want to call strcmp to break the tie. */
+ return 0;
+ }
+
+ if (fold_case) {
+ ca = nat_toupper(ca);
+ cb = nat_toupper(cb);
+ }
+
+ if (ca < cb)
+ return -1;
+ else if (ca > cb)
+ return +1;
+
+ ++ai; ++bi;
+ }
+}
+
+static int
+natural_compare (const char *sa, const char *sb)
+{
+ return strnatcmp0(sa,sb,0);
+}
+
static int
general_numcompare (const char *sa, const char *sb)
{
@@ -1702,6 +1838,34 @@
return diff;
}
+static void
+translate_chars(const char* texta, size_t lena, const char* textb, size_t lenb,
+ char *copy_a, size_t *p_new_len_a, char* copy_b, size_t *p_new_len_b,
+ bool const *ignore, char const *translate)
+{
+ int i;
+ /* Ignore and/or translate chars before comparing. */
+ for (*p_new_len_a = *p_new_len_b = i = 0; i < MAX (lena, lenb); i++)
+ {
+ if (i < lena)
+ {
+ copy_a[*p_new_len_a] = (translate
+ ? translate[to_uchar (texta[i])]
+ : texta[i]);
+ if (!ignore || !ignore[to_uchar (texta[i])])
+ ++(*p_new_len_a);
+ }
+ if (i < lenb)
+ {
+ copy_b[*p_new_len_b] = (translate
+ ? translate[to_uchar (textb[i])]
+ : textb [i]);
+ if (!ignore || !ignore[to_uchar (textb[i])])
+ ++(*p_new_len_b);
+ }
+ }
+}
+
/* Compare two lines A and B trying every key in sequence until there
are no more keys or a difference is found. */
@@ -1741,6 +1905,22 @@
(texta, textb));
*lima = savea, *limb = saveb;
}
+ else if (key->natural)
+ {
+ char buf[4000];
+ size_t size = lena + 1 + lenb + 1;
+ char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
+ char *copy_b = copy_a + lena + 1;
+ size_t new_len_a, new_len_b;
+
+ translate_chars(texta, lena, textb, lenb, copy_a, &new_len_a,
+ copy_b, &new_len_b, ignore, translate);
+
+ diff = natural_compare (copy_a, copy_b );
+
+ if (sizeof buf < size)
+ free (copy_a);
+ }
else if (key->month)
diff = getmonth (texta, lena) - getmonth (textb, lenb);
/* Sorting like this may become slow, so in a simple locale the user
@@ -1753,28 +1933,10 @@
size_t size = lena + 1 + lenb + 1;
char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
char *copy_b = copy_a + lena + 1;
- size_t new_len_a, new_len_b, i;
+ size_t new_len_a, new_len_b;
- /* Ignore and/or translate chars before comparing. */
- for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
- {
- if (i < lena)
- {
- copy_a[new_len_a] = (translate
- ? translate[to_uchar (texta[i])]
- : texta[i]);
- if (!ignore || !ignore[to_uchar (texta[i])])
- ++new_len_a;
- }
- if (i < lenb)
- {
- copy_b[new_len_b] = (translate
- ? translate[to_uchar (textb[i])]
- : textb [i]);
- if (!ignore || !ignore[to_uchar (textb[i])])
- ++new_len_b;
- }
- }
+ translate_chars(texta, lena, textb, lenb, copy_a, &new_len_a,
+ copy_b, &new_len_b, ignore, translate);
diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
@@ -2587,7 +2749,7 @@
for (key = keylist; key; key = key->next)
if ((1 < (key->random + key->numeric + key->general_numeric + key->month
- + !!key->ignore))
+ + key->natural + !!key->ignore))
|| (key->random && key->translate))
{
char opts[7];
@@ -2604,6 +2766,8 @@
*p++ = 'M';
if (key->numeric)
*p++ = 'n';
+ if (key->natural)
+ *p++ = 'N';
if (key->random)
*p++ = 'R';
*p = '\0';
@@ -2696,6 +2860,9 @@
case 'M':
key->month = true;
break;
+ case 'N':
+ key->natural = true;
+ break;
case 'n':
key->numeric = true;
break;
@@ -2830,7 +2997,7 @@
gkey.sword = gkey.eword = SIZE_MAX;
gkey.ignore = NULL;
gkey.translate = NULL;
- gkey.numeric = gkey.general_numeric = gkey.random = false;
+ gkey.numeric = gkey.general_numeric = gkey.natural = gkey.random = false;
gkey.month = gkey.reverse = false;
gkey.skipsblanks = gkey.skipeblanks = false;
@@ -2909,6 +3076,7 @@
case 'i':
case 'M':
case 'n':
+ case 'N':
case 'r':
case 'R':
{
@@ -3087,7 +3255,7 @@
if (! (key->ignore || key->translate
|| (key->skipsblanks | key->reverse
| key->skipeblanks | key->month | key->numeric
- | key->general_numeric
+ | key->general_numeric | key->natural
| key->random)))
{
key->ignore = gkey.ignore;
@@ -3097,6 +3265,7 @@
key->month = gkey.month;
key->numeric = gkey.numeric;
key->general_numeric = gkey.general_numeric;
+ key->natural = gkey.natural;
key->random = gkey.random;
key->reverse = gkey.reverse;
}
@@ -3106,7 +3275,7 @@
if (!keylist && (gkey.ignore || gkey.translate
|| (gkey.skipsblanks | gkey.skipeblanks | gkey.month
- | gkey.numeric | gkey.general_numeric
+ | gkey.numeric | gkey.general_numeric | gkey.natural
| gkey.random)))
{
insertkey (&gkey);