bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Support bytesize comparison in sort


From: Mart Somermaa
Subject: Support bytesize comparison in sort
Date: Thu, 06 Apr 2006 12:08:19 +0300
User-agent: Mail/News 1.5 (X11/20060309)

I've attached a patch that adds support for human-readable bytesize
comparison to 'sort', i.e. numbers suffixed with K for kilo-, M for
mega- and G for gigabytes are sorted correctly, if option '-B' is
given in command-line arguments:
---
$ sort --help | grep bytesize
  -B, --size-in-bytes         compare bytesizes (numbers suffixed with
K, M, G)
---

Example usage:
---
$ echo -e "3G\n2M\n1K" | sort -g
1K
2M
3G
$ echo -e "3K\n2M\n1G" | sort -B
3K
2M
1G
---

The feature is useful e.g. in combination with df (think of $ dh -hs * |
sort -B).

Regards,
Mart Sõmermaa
--- sort.c.orig 2005-10-07 21:48:28.000000000 +0300
+++ sort.c      2006-04-06 12:05:31.000000000 +0300
@@ -149,6 +149,9 @@ struct keyfield
                                   point, but no exponential notation. */
   bool general_numeric;                /* Flag for general, numeric comparison.
                                   Handle numbers in exponential notation. */
+  bool size_in_bytes;          /* Flag for human-readable bytesize comparison.
+                                  Handle numbers suffixed with K for kilo-,
+                                  M for mega- and G for gigabytes. */
   bool month;                  /* Flag for comparison by month name. */
   bool reverse;                        /* Reverse the sense of comparison. */
   struct keyfield *next;       /* Next keyfield to try. */
@@ -295,6 +298,7 @@ Ordering options:\n\
 "), stdout);
       fputs (_("\
   -b, --ignore-leading-blanks  ignore leading blanks\n\
+  -B, --size-in-bytes         compare bytesizes (numbers suffixed with K, M, 
G)\n\
   -d, --dictionary-order      consider only blanks and alphanumeric 
characters\n\
   -f, --ignore-case           fold lower case to upper case characters\n\
 "), stdout);
@@ -353,11 +357,12 @@ native byte values.\n\
   exit (status);
 }
 
-static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z";
+static char const short_options[] = "-bBcdfgik:mMno:rsS:t:T:uy:z";
 
 static struct option const long_options[] =
 {
   {"ignore-leading-blanks", no_argument, NULL, 'b'},
+  {"size-in-bytes", no_argument, NULL, 'B'},
   {"check", no_argument, NULL, 'c'},
   {"dictionary-order", no_argument, NULL, 'd'},
   {"ignore-case", no_argument, NULL, 'f'},
@@ -1077,8 +1082,13 @@ numcompare (const char *a, const char *b
   return strnumcmp (a, b, decimal_point, thousands_sep);
 }
 
-static int
-general_numcompare (const char *sa, const char *sb)
+/* If size_in_bytes is true, compare strings A and B as human-readable
+ * positive byte counts (as returned e.g. by df -h) suffixed with either 'K',
+ * 'M', 'G' for kilobytes, megabytes and gigabytes.
+ * FIXME: Possible robustness fix -- skip whitespace before letter?
+ */
+static int general_numcompare (const char *sa, const char *sb,
+               bool size_in_bytes)
 {
   /* FIXME: add option to warn about failed conversions.  */
   /* FIXME: maybe add option to try expensive FP conversion
@@ -1095,6 +1105,24 @@ general_numcompare (const char *sa, cons
   if (sb == eb)
     return 1;
 
+  if (size_in_bytes && ea && eb)
+    {
+      char ca = (char) tolower(*ea);
+      char cb = (char) tolower(*eb);
+
+      if (! ( (ca == 'g' || ca == 'm' || ca == 'k')
+             && (cb == 'g' || cb == 'm' || cb == 'k') )
+         || ca == cb) /* unknown or equal suffix -> compare as usual */
+       goto compare_as_usual;
+
+      /* as ca != cb, if ca in GB => ca bigger, cb in GB => cb bigger,
+       * if neither of these, if ca in MB =>  ca bigger, cb bigger otherwise */
+      return (ca == 'g' ? 1
+             : cb == 'g' ? -1
+             : ca == 'm' ? 1 : -1);
+    }
+
+compare_as_usual:
   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
      conversion errors but before numbers; sort them by internal
      bit-pattern, for lack of a more portable alternative.  */
@@ -1179,13 +1207,14 @@ keycompare (const struct line *a, const 
       size_t lenb = limb <= textb ? 0 : limb - textb;
 
       /* Actually compare the fields. */
-      if (key->numeric | key->general_numeric)
+      if (key->numeric | key->general_numeric | key->size_in_bytes)
        {
          char savea = *lima, saveb = *limb;
 
          *lima = *limb = '\0';
-         diff = ((key->numeric ? numcompare : general_numcompare)
-                 (texta, textb));
+         diff = (key->numeric ?
+                   numcompare(texta, textb) :
+                   general_numcompare(texta, textb, key->size_in_bytes));
          *lima = savea, *limb = saveb;
        }
       else if (key->month)
@@ -2060,6 +2089,9 @@ set_ordering (const char *s, struct keyf
          if (blanktype == bl_end || blanktype == bl_both)
            key->skipeblanks = true;
          break;
+       case 'B':
+         key->size_in_bytes = true;
+         break;
        case 'd':
          key->ignore = nondictionary;
          break;
@@ -2187,7 +2219,8 @@ main (int argc, char **argv)
   gkey.sword = gkey.eword = SIZE_MAX;
   gkey.ignore = NULL;
   gkey.translate = NULL;
-  gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
+  gkey.numeric = gkey.general_numeric = gkey.size_in_bytes = false;
+  gkey.month = gkey.reverse = false;
   gkey.skipsblanks = gkey.skipeblanks = false;
 
   files = xnmalloc (argc, sizeof *files);
@@ -2256,6 +2289,7 @@ main (int argc, char **argv)
          break;
 
        case 'b':
+       case 'B':
        case 'd':
        case 'f':
        case 'g':
@@ -2418,7 +2452,7 @@ main (int argc, char **argv)
     if (! (key->ignore || key->translate
           || (key->skipsblanks | key->reverse
               | key->skipeblanks | key->month | key->numeric
-              | key->general_numeric)))
+              | key->general_numeric | key->size_in_bytes)))
       {
        key->ignore = gkey.ignore;
        key->translate = gkey.translate;
@@ -2427,12 +2461,14 @@ main (int argc, char **argv)
        key->month = gkey.month;
        key->numeric = gkey.numeric;
        key->general_numeric = gkey.general_numeric;
+       key->size_in_bytes = gkey.size_in_bytes;
        key->reverse = gkey.reverse;
       }
 
   if (!keylist && (gkey.ignore || gkey.translate
                   || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
-                      | gkey.numeric | gkey.general_numeric)))
+                      | gkey.numeric | gkey.general_numeric
+                      | gkey.size_in_bytes )))
     insertkey (&gkey);
   reverse = gkey.reverse;
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]