--- orig/coreutils-7.2/src/sort.c 2009-03-29 13:44:10.000000000 -0400 +++ coreutils-7.2/src/sort.c 2009-04-26 00:46:42.000000000 -0400 @@ -176,6 +176,8 @@ bool random; /* Sort by random hash of key. */ bool general_numeric; /* Flag for general, numeric comparison. Handle numbers in exponential notation. */ + bool human_numeric; /* Flag for sorting by human readable + units with either SI xor IEC prefixes. */ bool month; /* Flag for comparison by month name. */ bool reverse; /* Reverse the sense of comparison. */ bool version; /* sort by version number */ @@ -336,6 +338,9 @@ -i, --ignore-nonprinting consider only printable characters\n\ -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ "), stdout); + fputs(_("\ + -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\ +"), stdout); fputs (_("\ -n, --numeric-sort compare according to string numerical value\n\ -R, --random-sort sort by random hash of keys\n\ @@ -344,8 +349,8 @@ "), stdout); fputs (_("\ --sort=WORD sort according to WORD:\n\ - general-numeric -g, month -M, numeric -n,\n\ - random -R, version -V\n\ + general-numeric -g, human-numeric -h, month -M,\n\ + numeric -n, random -R, version -V\n\ -V, --version-sort sort by numeric version\n\ \n\ "), stdout); @@ -426,7 +431,7 @@ SORT_OPTION }; -static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uVy:z"; +static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z"; static struct option const long_options[] = { @@ -442,6 +447,7 @@ {"merge", no_argument, NULL, 'm'}, {"month-sort", no_argument, NULL, 'M'}, {"numeric-sort", no_argument, NULL, 'n'}, + {"human-numeric-sort", no_argument, NULL, 'h'}, {"version-sort", no_argument, NULL, 'V'}, {"random-sort", no_argument, NULL, 'R'}, {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION}, @@ -480,6 +486,7 @@ #define SORT_TABLE \ _st_("general-numeric", 'g') \ + _st_("human-numeric", 'h') \ _st_("month", 'M') \ _st_("numeric", 'n') \ _st_("random", 'R') \ @@ -1673,6 +1680,85 @@ return strnumcmp (a, b, decimal_point, thousands_sep); } +/* Exit with an error if a mixture of SI and IEC units detected. */ + +static void +check_mixed_SI_IEC (char prefix) +{ + static int seen_si = -1; + bool si_present = prefix == 'i'; + if (seen_si != -1 && seen_si != si_present) + error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units")); + seen_si = si_present; +} + +/* return an integer which represents the order of magnitude of + the unit following the number +*/ +unsigned int +find_unit_order (const char* number) +{ + /* FIXME : if sort is fixed for multibyte + * separators this will need to be fixed too + */ + + static const char weights [UCHAR_LIM] = { + ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8, + ['k']=1, + }; + + const char *p = number; + + /* scan to end of number + * decimals or separators not followed by digits + * stop the scan + * numbers ending in decimals or separators are + * are thus considered to be lacking in units + */ + while ( ISDIGIT (*p) ) + { + p++ ; + + if ( *p == decimal_point && ISDIGIT( *(p+1) ) ) + p++ ; + else if ( thousands_sep != -1 ) + if ( *p == thousands_sep && ISDIGIT( *(p+1) ) ) + p++ ; + } + + /* only check for *ibi vs *ilo when actually on a + prefix that could be one of them */ + + int weight = weights[to_uchar ( *p )] ; + + if ( weight ) + check_mixed_SI_IEC (*(p+1)); + + return weight ; + +} + +/* Compare numbers ending in units with SI xor IEC prefixes + < K < M < G < T < P < E < Z < Y + Assume that numbers are properly abbreviated. + i.e. input will never have 5000K instead of 5M. */ + +static int +human_numcompare (const char *a, const char *b) +{ + while (blanks[to_uchar (*a)]) + a++; + while (blanks[to_uchar (*b)]) + b++; + + int aw = find_unit_order ( a ); + int bw = find_unit_order ( b ); + + return (aw > bw ? 1 + : aw < bw ? -1 + : strnumcmp (a , b , decimal_point , thousands_sep)); +} + static int general_numcompare (const char *sa, const char *sb) { @@ -1917,13 +2003,14 @@ if (key->random) diff = compare_random (texta, lena, textb, lenb); - else if (key->numeric | key->general_numeric) + else if (key->numeric | key->general_numeric | key->human_numeric) { char savea = *lima, saveb = *limb; *lima = *limb = '\0'; - diff = ((key->numeric ? numcompare : general_numcompare) - (texta, textb)); + diff = ((key->numeric ? numcompare + : key->general_numeric ? general_numcompare + : human_numcompare) (texta, textb)); *lima = savea, *limb = saveb; } else if (key->version) @@ -2887,7 +2974,7 @@ for (key = keylist; key; key = key->next) if ((1 < (key->random + key->numeric + key->general_numeric + key->month - + key->version + !!key->ignore)) + + key->version + (!!key->ignore) + key->human_numeric)) || (key->random && key->translate)) { /* The following is too big, but guaranteed to be "big enough". */ @@ -2899,6 +2986,8 @@ *p++ = 'f'; if (key->general_numeric) *p++ = 'g'; + if (key->human_numeric) + *p++ = 'h'; if (key->ignore == nonprinting) *p++ = 'i'; if (key->month) @@ -2990,6 +3079,9 @@ case 'g': key->general_numeric = true; break; + case 'h': + key->human_numeric = true; + break; case 'i': /* Option order should not matter, so don't let -i override -d. -d implies -i, but -i does not imply -d. */ @@ -3138,7 +3230,8 @@ gkey.sword = gkey.eword = SIZE_MAX; gkey.ignore = NULL; gkey.translate = NULL; - gkey.numeric = gkey.general_numeric = gkey.random = gkey.version = false; + gkey.numeric = gkey.general_numeric = gkey.human_numeric = false; + gkey.random = gkey.version = false; gkey.month = gkey.reverse = false; gkey.skipsblanks = gkey.skipeblanks = false; @@ -3217,6 +3310,7 @@ case 'd': case 'f': case 'g': + case 'h': case 'i': case 'M': case 'n': @@ -3469,6 +3563,7 @@ | key->numeric | key->version | key->general_numeric + | key->human_numeric | key->random))) { key->ignore = gkey.ignore; @@ -3478,6 +3573,7 @@ key->month = gkey.month; key->numeric = gkey.numeric; key->general_numeric = gkey.general_numeric; + key->human_numeric = gkey.human_numeric; key->random = gkey.random; key->reverse = gkey.reverse; key->version = gkey.version; @@ -3493,6 +3589,7 @@ | gkey.month | gkey.numeric | gkey.general_numeric + | gkey.human_numeric | gkey.random | gkey.version))) {