bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [patch #3596] Sort directories before files in "ls"


From: Francesco Montorsi
Subject: Re: [patch #3596] Sort directories before files in "ls"
Date: Thu, 29 Dec 2005 18:07:57 +0100
User-agent: Mozilla Thunderbird 1.0.6 (X11/20050715)

Hi,


>I hope you don't view these comments as criticism, so much as ideas to
>make your patch more likely to be incorporated once your copyright status
>is cleared.  Remember, ls already has so many options that the bar for
>adding a new option is very high - you have to give good reasons why the
>patch is a useful addition, and minimize other's efforts to incorporate
>your patch.  Your efforts to provide a good patch for a feature that you
>desire are commendable; too many people complain about a missing feature,
>then do nothing about it.
thanks, I'll try to make the patch as perfect as possible...
I hope I have fixed all previous points.
I used many of your comments in the patch comments and I have
rearranged the sort function array to be multi-dimensional.
I merged all patched files in a single patch (attached); just one issue: I don't know how it's best to use ARGMATCH_VERIFY macro. I have defined num_sorts but then left out the
ARGMATCH_VERIFY check as you can see below...

 Can you take a look at this ?


Thanks,
Francesco



ChangeLog changes:


2005-12-27  Francesco Montorsi  <address@hidden>

        Add a --group-directories-first option to ls to group directories
        before files.
        * NEWS: Document new option.
        * src/ls.c (sort_type): Rearrange to use as an array index when
        choosing sort function.
        (directories_first): New variable.
        (GROUP_DIRECTORIES_FIRST_OPTION): New enum.
        (long_options): Add --directories-first.
        (main): Support new option.
        (is_directory): New function.
        (extract_dirs_from_files): Use it.
        (DIRFIRST_CHECK, DEFINE_SORT_FUNCTIONS)
        (LIST_SORTFUNCTION_VARIANTS): New macros.
        (sort_functions): New variable.
        (sort_files): Use it.
        (usage): Document new option.
        * doc/coreutils.texi: documented the new option
Index: NEWS
===================================================================
RCS file: /sources/coreutils/coreutils/NEWS,v
retrieving revision 1.354
diff -b -u -2 -r1.354 NEWS
--- NEWS        15 Dec 2005 12:24:54 -0000      1.354
+++ NEWS        29 Dec 2005 16:50:17 -0000
@@ -24,4 +24,7 @@
   attempts to have the default be the best of both worlds.
 
+  ls now supports the '--group-directories-first' option to group directories
+  before files.
+
   sort now reports incompatible options (e.g., -i and -n) rather than
   silently ignoring one of them.
Index: THANKS
===================================================================
RCS file: /sources/coreutils/coreutils/THANKS,v
retrieving revision 1.413
diff -b -u -2 -r1.413 THANKS
--- THANKS      9 Dec 2005 16:27:26 -0000       1.413
+++ THANKS      29 Dec 2005 16:50:18 -0000
@@ -152,4 +152,5 @@
 Fletcher Mattox                     address@hidden
 Florin Iucha                        address@hidden
+Francesco Montorsi                  address@hidden
 François Pinard                     address@hidden
 Frank Adler                         address@hidden
Index: doc/coreutils.texi
===================================================================
RCS file: /sources/coreutils/coreutils/doc/coreutils.texi,v
retrieving revision 1.302
diff -b -u -2 -r1.302 coreutils.texi
--- doc/coreutils.texi  17 Dec 2005 10:50:00 -0000      1.302
+++ doc/coreutils.texi  29 Dec 2005 16:50:33 -0000
@@ -5470,4 +5470,12 @@
 @option{--dereference-command-line} (@option{-H})).
 
address@hidden --group-directories-first
address@hidden --group-directories-first
+Groups all the directories before the files and then sorts the
+directories and the files separately using the selected sort key
+(see --sort option).
+That is, this option acts as primary sort key using the value of
+the --sort option as secondary key.
+
 @item --hide=PATTERN
 @opindex address@hidden
Index: src/ls.c
===================================================================
RCS file: /sources/coreutils/coreutils/src/ls.c,v
retrieving revision 1.404
diff -b -u -2 -r1.404 ls.c
--- src/ls.c    17 Dec 2005 10:33:08 -0000      1.404
+++ src/ls.c    29 Dec 2005 16:50:38 -0000
@@ -399,5 +399,7 @@
 ARGMATCH_VERIFY (time_style_args, time_style_types);
 
-/* Type of time to print or sort by.  Controlled by -c and -u.  */
+/* Type of time to print or sort by.  Controlled by -c and -u.
+   The values of each item of this enum are important since they are
+   used as indices in the sort functions array (see sort_files()).  */
 
 enum time_type
@@ -405,19 +407,23 @@
     time_mtime,                        /* default */
     time_ctime,                        /* -c */
-    time_atime                 /* -u */
+    time_atime,                /* -u */
+    time_numtypes
   };
 
 static enum time_type time_type;
 
-/* The file characteristic to sort by.  Controlled by -t, -S, -U, -X, -v.  */
+/* The file characteristic to sort by.  Controlled by -t, -S, -U, -X, -v.
+   The values of each item of this enum are important since they are
+   used as indices in the sort functions array (see sort_files()).  */
 
 enum sort_type
   {
-    sort_none,                 /* -U */
+    sort_none = -1,            /* -U */
     sort_name,                 /* default */
     sort_extension,            /* -X */
-    sort_time,                 /* -t */
     sort_size,                 /* -S */
-    sort_version               /* -v */
+    sort_version,              /* -v */
+    sort_time,                 /* -t */
+    sort_numtypes
   };
 
@@ -592,4 +598,8 @@
 static bool immediate_dirs;
 
+/* True means that directories are grouped before files. */
+
+static bool directories_first;
+
 /* Which files to ignore.  */
 
@@ -731,4 +741,5 @@
   FORMAT_OPTION,
   FULL_TIME_OPTION,
+  GROUP_DIRECTORIES_FIRST_OPTION,
   HIDE_OPTION,
   INDICATOR_STYLE_OPTION,
@@ -752,4 +763,5 @@
   {"dired", no_argument, NULL, 'D'},
   {"full-time", no_argument, NULL, FULL_TIME_OPTION},
+  {"group-directories-first", no_argument, NULL, 
GROUP_DIRECTORIES_FIRST_OPTION},
   {"human-readable", no_argument, NULL, 'h'},
   {"inode", no_argument, NULL, 'i'},
@@ -1224,5 +1236,6 @@
     || format == long_format
     || dereference == DEREF_ALWAYS
-    || print_block_size || print_inode;
+    || print_block_size || print_inode
+    || directories_first;
   format_needs_type = (!format_needs_stat
                       && (recursive || print_with_color
@@ -1713,4 +1726,8 @@
          break;
 
+    case GROUP_DIRECTORIES_FIRST_OPTION:
+      directories_first = true;
+      break;
+
        case TIME_OPTION:
          time_type = XARGMATCH ("--time", optarg, time_args, time_types);
@@ -2728,4 +2745,13 @@
 }
 
+
+/* Returns true if the given fileinfo refers to a directory */
+static bool 
+is_directory (const struct fileinfo *f)
+{
+  return f->filetype == directory || f->filetype == arg_directory;
+}
+
+
 #ifdef S_ISLNK
 
@@ -2810,7 +2836,7 @@
      order.  */
   for (i = files_index; i-- != 0; )
-    if ((files[i].filetype == directory || files[i].filetype == arg_directory)
-       && (!ignore_dot_and_dot_dot
-           || !basename_is_dot_or_dotdot (files[i].name)))
+    if (is_directory(&files[i])
+        && (! ignore_dot_and_dot_dot
+            || ! basename_is_dot_or_dotdot (files[i].name)))
       {
        if (!dirname || files[i].name[0] == '/')
@@ -2868,4 +2894,49 @@
 
 typedef void const *V;
+typedef int (*qsortFunc)(V a, V b);
+
+/* Used below in DEFINE_SORT_FUNCTIONS for _df_ sort function variants.
+   The do { ... } while(0) makes it possible to use the macro more like 
+   a statement, without violating C89 rules: */
+#define DIRFIRST_CHECK(a, b)                                        \
+  do {                                                              \
+    bool a_is_dir = is_directory((struct fileinfo const *)a);       \
+    bool b_is_dir = is_directory((struct fileinfo const *)b);       \
+    if (a_is_dir && !b_is_dir)                                      \
+        return -1;         /* a goes before b */                    \
+    if (!a_is_dir && b_is_dir)                                      \
+        return 1;          /* b goes before a */                    \
+  } while (0)
+
+/*
+   Defines the 8 different sort function variants required for each sortkey.
+   The 'basefunc' should be an inline function so that compiler will be able
+   to optimize all these functions. The 'basename' argument will be prefixed
+   with the '[rev_][x]str{cmp|coll}[_df]_' string to create the function name.
+*/
+#define DEFINE_SORT_FUNCTIONS(basename, basefunc)                 \
+  /* direct, non-dirfirst versions */                             \
+  static int xstrcoll_##basename (V a, V b)                       \
+  { return basefunc (a, b, xstrcoll); }                           \
+  static int strcmp_##basename (V a, V b)                         \
+  { return basefunc (a, b, strcmp); }                             \
+                                                                  \
+  /* reverse, non-dirfirst versions */                            \
+  static int rev_xstrcoll_##basename (V a, V b)                   \
+  { return basefunc (b, a, xstrcoll); }                           \
+  static int rev_strcmp_##basename (V a, V b)                     \
+  { return basefunc (b, a, strcmp); }                             \
+                                                                  \
+  /* direct, dirfirst versions */                                 \
+  static int xstrcoll_df_##basename (V a, V b)                    \
+  { DIRFIRST_CHECK (a, b); return basefunc (a, b, xstrcoll); }    \
+  static int strcmp_df_##basename (V a, V b)                      \
+  { DIRFIRST_CHECK (a, b); return basefunc (a, b, strcmp); }      \
+                                                                  \
+  /* reverse, dirfirst versions */                                \
+  static int rev_xstrcoll_df_##basename (V a, V b)                \
+  { DIRFIRST_CHECK (a, b); return basefunc (b, a, xstrcoll); }    \
+  static int rev_strcmp_df_##basename (V a, V b)                  \
+  { DIRFIRST_CHECK (a, b); return basefunc (b, a, strcmp); }
 
 static inline int
@@ -2877,8 +2948,4 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_ctime (V a, V b) { return cmp_ctime (a, b, xstrcoll); }
-static int compstr_ctime (V a, V b) { return cmp_ctime (a, b, strcmp); }
-static int rev_cmp_ctime (V a, V b) { return compare_ctime (b, a); }
-static int rev_str_ctime (V a, V b) { return compstr_ctime (b, a); }
 
 static inline int
@@ -2890,8 +2957,4 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_mtime (V a, V b) { return cmp_mtime (a, b, xstrcoll); }
-static int compstr_mtime (V a, V b) { return cmp_mtime (a, b, strcmp); }
-static int rev_cmp_mtime (V a, V b) { return compare_mtime (b, a); }
-static int rev_str_mtime (V a, V b) { return compstr_mtime (b, a); }
 
 static inline int
@@ -2903,8 +2966,4 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_atime (V a, V b) { return cmp_atime (a, b, xstrcoll); }
-static int compstr_atime (V a, V b) { return cmp_atime (a, b, strcmp); }
-static int rev_cmp_atime (V a, V b) { return compare_atime (b, a); }
-static int rev_str_atime (V a, V b) { return compstr_atime (b, a); }
 
 static inline int
@@ -2915,16 +2974,4 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_size (V a, V b) { return cmp_size (a, b, xstrcoll); }
-static int compstr_size (V a, V b) { return cmp_size (a, b, strcmp); }
-static int rev_cmp_size (V a, V b) { return compare_size (b, a); }
-static int rev_str_size (V a, V b) { return compstr_size (b, a); }
-
-static inline int
-cmp_version (struct fileinfo const *a, struct fileinfo const *b)
-{
-  return strverscmp (a->name, b->name);
-}
-static int compare_version (V a, V b) { return cmp_version (a, b); }
-static int rev_cmp_version (V a, V b) { return compare_version (b, a); }
 
 static inline int
@@ -2934,8 +2981,4 @@
   return cmp (a->name, b->name);
 }
-static int compare_name (V a, V b) { return cmp_name (a, b, xstrcoll); }
-static int compstr_name (V a, V b) { return cmp_name (a, b, strcmp); }
-static int rev_cmp_name (V a, V b) { return compare_name (b, a); }
-static int rev_str_name (V a, V b) { return compstr_name (b, a); }
 
 /* Compare file extensions.  Files with no extension are `smallest'.
@@ -2951,8 +2994,96 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_extension (V a, V b) { return cmp_extension (a, b, 
xstrcoll); }
-static int compstr_extension (V a, V b) { return cmp_extension (a, b, strcmp); 
}
-static int rev_cmp_extension (V a, V b) { return compare_extension (b, a); }
-static int rev_str_extension (V a, V b) { return compstr_extension (b, a); }
+
+
+DEFINE_SORT_FUNCTIONS(ctime, cmp_ctime)
+DEFINE_SORT_FUNCTIONS(mtime, cmp_mtime)
+DEFINE_SORT_FUNCTIONS(atime, cmp_atime)
+DEFINE_SORT_FUNCTIONS(size, cmp_size)
+DEFINE_SORT_FUNCTIONS(name, cmp_name)
+DEFINE_SORT_FUNCTIONS(extension, cmp_extension)
+
+
+
+/* Compares file versions. 
+   Unlike all other compare functions above, cmp_version only depends
+   on strverscmp, which does not fail (even for locale reasons), and does not
+   need a secondary sort key.
+   All the other sort options, in fact, need an xstrcoll and strcmp variants, 
+   because they all use a string comparison (either as the primary or secondary
+   sort key), and xstrcoll has the ability to do a longjmp if strcoll fails for
+   locale reasons. Last, strverscmp is ALWAYS available in coreutils,
+   thanks to the gnulib library. */
+static inline int
+cmp_version (struct fileinfo const *a, struct fileinfo const *b)
+{
+  return strverscmp (a->name, b->name);
+}
+
+static int xstrcoll_version (V a, V b)
+{ return cmp_version (a, b); }
+static int rev_xstrcoll_version (V a, V b)
+{ return cmp_version (b, a); }
+static int xstrcoll_df_version (V a, V b)
+{ DIRFIRST_CHECK(a,b); return cmp_version (a, b); }
+static int rev_xstrcoll_df_version (V a, V b)
+{ DIRFIRST_CHECK(a,b); return cmp_version (b, a); }
+
+
+/* The sortkeys are not just numtypes because when sort_time is selected
+   then we have time_numtypes possible sortkeys. */
+#define num_sorts      (sort_numtypes + time_numtypes - 1)
+
+/*
+   We have 2^3 different variants for each sortkey function
+   (for 3 indipendent sort modes), and we have num_sorts different sortkeys.
+   The function pointers stored in this array must be dereferenced as:
+
+       sort_variants[sort_key][use_strcmp][reverse][dirs_first]
+
+   Note that the order in which sortkeys are listed in the function pointer
+   array is defined by the time_type and sort_type enums !
+*/
+
+#define LIST_SORTFUNCTION_VARIANTS(basename)                        \
+  {                                                                 \
+    {                                                               \
+      { xstrcoll_##basename, xstrcoll_df_##basename },              \
+      { rev_xstrcoll_##basename, rev_xstrcoll_df_##basename },      \
+    },                                                              \
+    {                                                               \
+      { strcmp_##basename, strcmp_df_##basename },                  \
+      { rev_strcmp_##basename, rev_strcmp_df_##basename },          \
+    }                                                               \
+  }
+
+qsortFunc sort_functions[][2][2][2] = {
+    LIST_SORTFUNCTION_VARIANTS(name),
+    LIST_SORTFUNCTION_VARIANTS(extension),
+    LIST_SORTFUNCTION_VARIANTS(size),
+
+    {
+      {
+        { xstrcoll_version, xstrcoll_df_version },
+        { rev_xstrcoll_version, rev_xstrcoll_df_version },
+      },
+
+      /* we use NULL for the strcmp variants of version comparison
+         since as explained in cmp_version definition, version comparison 
+         does not rely on xstrcoll, so it will never longjmp, and never 
+         need to try the strcmp fallback */
+      {
+        { NULL, NULL },
+        { NULL, NULL },
+      }
+    },
+
+    /* last are time sort functions */
+    LIST_SORTFUNCTION_VARIANTS(mtime),
+    LIST_SORTFUNCTION_VARIANTS(ctime),
+    LIST_SORTFUNCTION_VARIANTS(atime)
+  };
+
+//ARGMATCH_VERIFY(sort_functions, num_sorts)
+
 
 /* Sort the files now in the table.  */
@@ -2961,5 +3092,8 @@
 sort_files (void)
 {
-  int (*func) (V, V);
+  bool use_strcmp;
+
+  if (sort_type == sort_none)
+      return;
 
   /* Try strcoll.  If it fails, fall back on strcmp.  We can't safely
@@ -2969,76 +3103,20 @@
 
   if (! setjmp (failed_strcoll))
-    {
-      switch (sort_type)
-       {
-       case sort_none:
-         return;
-       case sort_time:
-         switch (time_type)
-           {
-           case time_ctime:
-             func = sort_reverse ? rev_cmp_ctime : compare_ctime;
-             break;
-           case time_mtime:
-             func = sort_reverse ? rev_cmp_mtime : compare_mtime;
-             break;
-           case time_atime:
-             func = sort_reverse ? rev_cmp_atime : compare_atime;
-             break;
-           default:
-             abort ();
-           }
-         break;
-       case sort_name:
-         func = sort_reverse ? rev_cmp_name : compare_name;
-         break;
-       case sort_extension:
-         func = sort_reverse ? rev_cmp_extension : compare_extension;
-         break;
-       case sort_size:
-         func = sort_reverse ? rev_cmp_size : compare_size;
-         break;
-       case sort_version:
-         func = sort_reverse ? rev_cmp_version : compare_version;
-         break;
-       default:
-         abort ();
-       }
-    }
+        use_strcmp = false;      /* strcoll() did not failed ! */
   else
     {
-      switch (sort_type)
-       {
-       case sort_time:
-         switch (time_type)
-           {
-           case time_ctime:
-             func = sort_reverse ? rev_str_ctime : compstr_ctime;
-             break;
-           case time_mtime:
-             func = sort_reverse ? rev_str_mtime : compstr_mtime;
-             break;
-           case time_atime:
-             func = sort_reverse ? rev_str_atime : compstr_atime;
-             break;
-           default:
-             abort ();
-           }
-         break;
-       case sort_name:
-         func = sort_reverse ? rev_str_name : compstr_name;
-         break;
-       case sort_extension:
-         func = sort_reverse ? rev_str_extension : compstr_extension;
-         break;
-       case sort_size:
-         func = sort_reverse ? rev_str_size : compstr_size;
-         break;
-       default:
-         abort ();
-       }
+      use_strcmp = true;
+      if (sort_type == sort_version)
+       abort();
     }
 
-  qsort (files, files_index, sizeof *files, func);
+  /* when sort_type == sort_time we need to use time_type as subindex. */
+  int timeoffset = 0;
+  if (sort_type == sort_time)
+     timeoffset = time_type;
+
+  qsort (files, files_index, sizeof *files, 
+         sort_functions[sort_type + timeoffset][use_strcmp][sort_reverse]
+                       [directories_first]);
 }
 
@@ -4147,4 +4225,10 @@
       fputs (_("\
   -g                         like -l, but do not list owner\n\
+"), stdout);
+      fputs(_("\
+      --group-directories-first\n\
+                             group directories before files\n\
+"), stdout);
+      fputs(_("\
   -G, --no-group             like -l, but do not list group\n\
   -h, --human-readable       with -l, print sizes in human readable format\n\

reply via email to

[Prev in Thread] Current Thread [Next in Thread]