[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Support bytesize comparison in sort
From: |
Mart Somermaa |
Subject: |
Support bytesize comparison in sort |
Date: |
Thu, 06 Apr 2006 12:08:19 +0300 |
User-agent: |
Mail/News 1.5 (X11/20060309) |
I've attached a patch that adds support for human-readable bytesize
comparison to 'sort', i.e. numbers suffixed with K for kilo-, M for
mega- and G for gigabytes are sorted correctly, if option '-B' is
given in command-line arguments:
---
$ sort --help | grep bytesize
-B, --size-in-bytes compare bytesizes (numbers suffixed with
K, M, G)
---
Example usage:
---
$ echo -e "3G\n2M\n1K" | sort -g
1K
2M
3G
$ echo -e "3K\n2M\n1G" | sort -B
3K
2M
1G
---
The feature is useful e.g. in combination with df (think of $ dh -hs * |
sort -B).
Regards,
Mart Sõmermaa
--- sort.c.orig 2005-10-07 21:48:28.000000000 +0300
+++ sort.c 2006-04-06 12:05:31.000000000 +0300
@@ -149,6 +149,9 @@ struct keyfield
point, but no exponential notation. */
bool general_numeric; /* Flag for general, numeric comparison.
Handle numbers in exponential notation. */
+ bool size_in_bytes; /* Flag for human-readable bytesize comparison.
+ Handle numbers suffixed with K for kilo-,
+ M for mega- and G for gigabytes. */
bool month; /* Flag for comparison by month name. */
bool reverse; /* Reverse the sense of comparison. */
struct keyfield *next; /* Next keyfield to try. */
@@ -295,6 +298,7 @@ Ordering options:\n\
"), stdout);
fputs (_("\
-b, --ignore-leading-blanks ignore leading blanks\n\
+ -B, --size-in-bytes compare bytesizes (numbers suffixed with K, M,
G)\n\
-d, --dictionary-order consider only blanks and alphanumeric
characters\n\
-f, --ignore-case fold lower case to upper case characters\n\
"), stdout);
@@ -353,11 +357,12 @@ native byte values.\n\
exit (status);
}
-static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z";
+static char const short_options[] = "-bBcdfgik:mMno:rsS:t:T:uy:z";
static struct option const long_options[] =
{
{"ignore-leading-blanks", no_argument, NULL, 'b'},
+ {"size-in-bytes", no_argument, NULL, 'B'},
{"check", no_argument, NULL, 'c'},
{"dictionary-order", no_argument, NULL, 'd'},
{"ignore-case", no_argument, NULL, 'f'},
@@ -1077,8 +1082,13 @@ numcompare (const char *a, const char *b
return strnumcmp (a, b, decimal_point, thousands_sep);
}
-static int
-general_numcompare (const char *sa, const char *sb)
+/* If size_in_bytes is true, compare strings A and B as human-readable
+ * positive byte counts (as returned e.g. by df -h) suffixed with either 'K',
+ * 'M', 'G' for kilobytes, megabytes and gigabytes.
+ * FIXME: Possible robustness fix -- skip whitespace before letter?
+ */
+static int general_numcompare (const char *sa, const char *sb,
+ bool size_in_bytes)
{
/* FIXME: add option to warn about failed conversions. */
/* FIXME: maybe add option to try expensive FP conversion
@@ -1095,6 +1105,24 @@ general_numcompare (const char *sa, cons
if (sb == eb)
return 1;
+ if (size_in_bytes && ea && eb)
+ {
+ char ca = (char) tolower(*ea);
+ char cb = (char) tolower(*eb);
+
+ if (! ( (ca == 'g' || ca == 'm' || ca == 'k')
+ && (cb == 'g' || cb == 'm' || cb == 'k') )
+ || ca == cb) /* unknown or equal suffix -> compare as usual */
+ goto compare_as_usual;
+
+ /* as ca != cb, if ca in GB => ca bigger, cb in GB => cb bigger,
+ * if neither of these, if ca in MB => ca bigger, cb bigger otherwise */
+ return (ca == 'g' ? 1
+ : cb == 'g' ? -1
+ : ca == 'm' ? 1 : -1);
+ }
+
+compare_as_usual:
/* Sort numbers in the usual way, where -0 == +0. Put NaNs after
conversion errors but before numbers; sort them by internal
bit-pattern, for lack of a more portable alternative. */
@@ -1179,13 +1207,14 @@ keycompare (const struct line *a, const
size_t lenb = limb <= textb ? 0 : limb - textb;
/* Actually compare the fields. */
- if (key->numeric | key->general_numeric)
+ if (key->numeric | key->general_numeric | key->size_in_bytes)
{
char savea = *lima, saveb = *limb;
*lima = *limb = '\0';
- diff = ((key->numeric ? numcompare : general_numcompare)
- (texta, textb));
+ diff = (key->numeric ?
+ numcompare(texta, textb) :
+ general_numcompare(texta, textb, key->size_in_bytes));
*lima = savea, *limb = saveb;
}
else if (key->month)
@@ -2060,6 +2089,9 @@ set_ordering (const char *s, struct keyf
if (blanktype == bl_end || blanktype == bl_both)
key->skipeblanks = true;
break;
+ case 'B':
+ key->size_in_bytes = true;
+ break;
case 'd':
key->ignore = nondictionary;
break;
@@ -2187,7 +2219,8 @@ main (int argc, char **argv)
gkey.sword = gkey.eword = SIZE_MAX;
gkey.ignore = NULL;
gkey.translate = NULL;
- gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
+ gkey.numeric = gkey.general_numeric = gkey.size_in_bytes = false;
+ gkey.month = gkey.reverse = false;
gkey.skipsblanks = gkey.skipeblanks = false;
files = xnmalloc (argc, sizeof *files);
@@ -2256,6 +2289,7 @@ main (int argc, char **argv)
break;
case 'b':
+ case 'B':
case 'd':
case 'f':
case 'g':
@@ -2418,7 +2452,7 @@ main (int argc, char **argv)
if (! (key->ignore || key->translate
|| (key->skipsblanks | key->reverse
| key->skipeblanks | key->month | key->numeric
- | key->general_numeric)))
+ | key->general_numeric | key->size_in_bytes)))
{
key->ignore = gkey.ignore;
key->translate = gkey.translate;
@@ -2427,12 +2461,14 @@ main (int argc, char **argv)
key->month = gkey.month;
key->numeric = gkey.numeric;
key->general_numeric = gkey.general_numeric;
+ key->size_in_bytes = gkey.size_in_bytes;
key->reverse = gkey.reverse;
}
if (!keylist && (gkey.ignore || gkey.translate
|| (gkey.skipsblanks | gkey.skipeblanks | gkey.month
- | gkey.numeric | gkey.general_numeric)))
+ | gkey.numeric | gkey.general_numeric
+ | gkey.size_in_bytes )))
insertkey (&gkey);
reverse = gkey.reverse;
- Support bytesize comparison in sort,
Mart Somermaa <=