bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] Use a heap for the internal merge.


From: Pádraig Brady
Subject: Re: [PATCH] Use a heap for the internal merge.
Date: Mon, 16 Mar 2009 11:27:28 +0000
User-agent: Thunderbird 2.0.0.6 (X11/20071008)

This required a non trivial merge (pardon the pun)
with the fd exhaustion patch. Attached is the incremental
commit for applying after the fd exhaustion one.

cheers,
Pádraig.
>From 3f81910eb57e2db853f208bcbb17630fce7346db Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Fri, 13 Mar 2009 12:24:22 -0700
Subject: [PATCH] sort: Use a heap to speed up the internal merge

* src/sort.c (mergefps): When merging a line with a K-way merge, update
a heap rather than doing a linear insertion.  This improves the overall
performance of 'sort' from O(N*K) to O(N log K), which in testing with
random data and 64-way merge sped up 'sort' by 5%.
(less_than, swap): New functions.
(sortlines_temp): Rename local variable so that it doesn't shadow the
new 'swap' function.
* THANKS: Add Alexander Nguyen who coauthored this commit.
---
 THANKS     |    1 +
 src/sort.c |  143 ++++++++++++++++++++++++++++--------------------------------
 2 files changed, 68 insertions(+), 76 deletions(-)

diff --git a/THANKS b/THANKS
index 47263ac..1080f39 100644
--- a/THANKS
+++ b/THANKS
@@ -23,6 +23,7 @@ Alberto Accomazzi                   address@hidden
 aldomel                             address@hidden
 Alen Muzinic                        address@hidden
 Alexander V. Lukyanov               address@hidden
+Alexander Nguyen                    address@hidden
 Allen Hewes                         address@hidden
 Axel Dörfler                        address@hidden
 Alexandre Duret-Lutz                address@hidden
diff --git a/src/sort.c b/src/sort.c
index ced0f2d..5915fef 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -2101,6 +2101,25 @@ compare (const struct line *a, const struct line *b)
   return reverse ? -diff : diff;
 }
 
+/* If CUR is a vector of lines, and ORD a vector of indexes into CUR,
+   then return true if the line ORD[A] is less than the line ORD[B]
+   in CUR, using a stable comparison.  */
+static bool
+less_than (struct line const *const *cur, size_t const *ord,
+          size_t a, size_t b)
+{
+  return compare (cur[ord[a]], cur[ord[b]]) < (ord[a] < ord[b]);
+}
+
+/* Swap *A and *B.  */
+static inline void
+swap (size_t *a, size_t *b)
+{
+  size_t t = *a;
+  *a = *b;
+  *b = t;
+}
+
 /* Check that the lines read from FILE_NAME come in order.  Return
    true if they are in order.  If CHECKONLY == 'c', also print a
    diagnostic (FILE_NAME, line number, contents of line) to stderr if
@@ -2236,57 +2255,50 @@ mergefps (struct sortfile *files, size_t ntemps, size_t 
nfiles,
   struct line const **base = xnmalloc (nfiles, sizeof *base);
                                /* Base of each line table.  */
   size_t *ord = xnmalloc (nfiles, sizeof *ord);
-                               /* Table representing a permutation of fps,
-                                  such that cur[ord[0]] is the smallest line
-                                  and will be next output. */
-  size_t i;
-  size_t j;
-  size_t t;
+                               /* Zero-based binary heap of run numbers for
+                                  selecting the minimum line to output next,
+                                  referenced by cur[ord[0]].  */
+  size_t nopen = 0;
   struct keyfield const *key = keylist;
   saved.text = NULL;
 
   /* Read initial lines from each input file. */
-  for (i = 0; i < nfiles; )
+  for (size_t i = 0; i < nfiles; i++)
     {
       initbuf (&buffer[i], sizeof (struct line),
-              MAX (merge_buffer_size, sort_size / nfiles));
+              MAX (merge_buffer_size, sort_size / (nfiles - (i - nopen))));
       if (fillbuf (&buffer[i], fps[i], files[i].name))
        {
          struct line const *linelim = buffer_linelim (&buffer[i]);
          cur[i] = linelim - 1;
          base[i] = linelim - buffer[i].nlines;
-         i++;
+         ord[nopen++] = i;
        }
       else
        {
          /* fps[i] is empty; eliminate it from future consideration.  */
          xfclose (fps[i], files[i].name);
          if (i < ntemps)
-           {
-             ntemps--;
-             zaptemp (files[i].name);
-           }
+           zaptemp (files[i].name);
          free (buffer[i].buf);
-         --nfiles;
-         for (j = i; j < nfiles; ++j)
-           {
-             files[j] = files[j + 1];
-             fps[j] = fps[j + 1];
-           }
        }
     }
 
-  /* Set up the ord table according to comparisons among input lines.
-     Since this only reorders two items if one is strictly greater than
-     the other, it is stable. */
-  for (i = 0; i < nfiles; ++i)
-    ord[i] = i;
-  for (i = 1; i < nfiles; ++i)
-    if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
-      t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
+  /* Set up the heap.  Since this never reorders two lines when the
+     parent equals the current line, the resulting root must be the
+     smallest line, even if stability is taken into account.  */
+  for (size_t j = 1; j < nopen; j++)
+    for (size_t current = j; 0 < current; )
+      {
+       size_t parent = (current - 1) / 2;
+       if (compare (cur[ord[parent]], cur[ord[current]]) <= 0)
+         break;
+       swap (&ord[current], &ord[parent]);
+       current = parent;
+      }
 
   /* Repeatedly output the smallest line until no input remains. */
-  while (nfiles)
+  while (nopen != 0)
     {
       struct line const *smallest = cur[ord[0]];
 
@@ -2342,57 +2354,36 @@ mergefps (struct sortfile *files, size_t ntemps, size_t 
nfiles,
          else
            {
              /* We reached EOF on fps[ord[0]].  */
-             for (i = 1; i < nfiles; ++i)
-               if (ord[i] > ord[0])
-                 --ord[i];
-             --nfiles;
              xfclose (fps[ord[0]], files[ord[0]].name);
              if (ord[0] < ntemps)
-               {
-                 ntemps--;
-                 zaptemp (files[ord[0]].name);
-               }
+               zaptemp (files[ord[0]].name);
              free (buffer[ord[0]].buf);
-             for (i = ord[0]; i < nfiles; ++i)
-               {
-                 fps[i] = fps[i + 1];
-                 files[i] = files[i + 1];
-                 buffer[i] = buffer[i + 1];
-                 cur[i] = cur[i + 1];
-                 base[i] = base[i + 1];
-               }
-             for (i = 0; i < nfiles; ++i)
-               ord[i] = ord[i + 1];
-             continue;
+
+             /* Shrink the ord array by replacing the root with the
+                last line on the last level.  This line will be
+                sifted down in the loop below.  */
+             ord[0] = ord[--nopen];
            }
        }
 
       /* The new line just read in may be larger than other lines
-        already in main memory; push it back in the queue until we
-        encounter a line larger than it.  Optimize for the common
-        case where the new line is smallest.  */
-      {
-       size_t lo = 1;
-       size_t hi = nfiles;
-       size_t probe = lo;
-       size_t ord0 = ord[0];
-       size_t count_of_smaller_lines;
-
-       while (lo < hi)
-         {
-           int cmp = compare (cur[ord0], cur[ord[probe]]);
-           if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
-             hi = probe;
-           else
-             lo = probe + 1;
-           probe = (lo + hi) / 2;
-         }
-
-       count_of_smaller_lines = lo - 1;
-       for (j = 0; j < count_of_smaller_lines; j++)
-         ord[j] = ord[j + 1];
-       ord[count_of_smaller_lines] = ord0;
-      }
+        already in main memory.  Sift down the heap.  */
+      for (size_t current = 0; ; )
+       {
+         size_t left = 2 * current + 1;
+         if (nopen <= left)
+           break;
+         size_t right = left + 1;
+         size_t winner = current;
+         if (less_than (cur, ord, left, winner))
+           winner = left;
+         if (right < nopen && less_than (cur, ord, right, winner))
+           winner = right;
+         if (winner == current)
+           break;
+         swap (&ord[current], &ord[winner]);
+         current = winner;
+       }
 
       /* Free up some resources every once in a while.  */
       if (MAX_PROCS_BEFORE_REAP < nprocs)
@@ -2521,12 +2512,12 @@ sortlines_temp (struct line *lines, size_t nlines, 
struct line *temp)
 {
   if (nlines == 2)
     {
-      /* Declare `swap' as int, not bool, to work around a bug
+      /* Declare `swap_offset' as int, not bool, to work around a bug
         <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
         in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
-      int swap = (0 < compare (&lines[-1], &lines[-2]));
-      temp[-1] = lines[-1 - swap];
-      temp[-2] = lines[-2 + swap];
+      int swap_offset = (0 < compare (&lines[-1], &lines[-2]));
+      temp[-1] = lines[-1 - swap_offset];
+      temp[-2] = lines[-2 + swap_offset];
     }
   else
     {
-- 
1.5.3.6


reply via email to

[Prev in Thread] Current Thread [Next in Thread]