bug-patch
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[bug-patch] [PATCH] use di-set module rather than hash: easier-to-use, m


From: Jim Meyering
Subject: [bug-patch] [PATCH] use di-set module rather than hash: easier-to-use, more efficient
Date: Tue, 08 Feb 2011 11:20:22 +0100

Hi,

While fixing down the used-uninitialized bug, I noticed
how patch was using gnulib's hash module to maintain a set
of dev/inode pairs to keep track of which files have been processed.
Until not long ago, coreutils did something very similar in a few
programs, including du.  By using the di-set module that I've just
moved from coreutils/gl to gnulib, patch can remove a good chunk of
code and make the remaining bits easier to read.

To test this locally, I first updated patch to use the latest from
gnulib via this:

  git syncsub
  git ci -m 'build: update gnulib submodule to latest' gnulib

(that depends on my having the following in ~/.gitconfig)

[alias]
      ci = commit
      syncsub = submodule foreach git pull --ff-only origin master

Then reran ./bootstrap && ./configured && make as usual.


>From 1480ad29ef9923ad423c0ff4a30b4d9b4711cf2b Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Mon, 7 Feb 2011 17:01:00 +0100
Subject: [PATCH] use di-set module rather than hash: easier-to-use, more 
efficient

In coreutils, we found that we could save a lot of space with large
dev/inode sets by using the di-set module that is now in gnulib.
This change makes patch use that, too, though not for its run-time
efficiency, but rather for its higher-level, easier-to-use interface.
* bootstrap.conf (gnulib_modules): Add di-set.
* src/util.c (file_id_hasher, file_id_comparator): Remove functions.
(init_backup_hash_table, insert_file): Rewrite to use di_set_*
functions rather than hash table functions.
(insert_file, file_already_seen): Likewise.
---
 bootstrap.conf |    1 +
 src/util.c     |   60 +++++++++++--------------------------------------------
 3 files changed, 26 insertions(+), 48 deletions(-)

diff --git a/bootstrap.conf b/bootstrap.conf
index 688fde0..cb41fee 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -21,6 +21,7 @@ gnulib_modules="
 argmatch
 backupfile
 clock-time
+di-set
 diffseq
 dirname
 dup2
diff --git a/src/util.c b/src/util.c
index f1187ff..7d35a46 100644
--- a/src/util.c
+++ b/src/util.c
@@ -21,7 +21,7 @@
 #define XTERN extern
 #include <common.h>
 #include <dirname.h>
-#include <hash.h>
+#include <di-set.h>
 #include <quotearg.h>
 #undef XTERN
 #define XTERN
@@ -51,42 +51,15 @@

 static void makedirs (char const *);

-typedef struct
-{
-  dev_t dev;
-  ino_t ino;
-} file_id;
-
-/* Return an index for ENTRY into a hash table of size TABLE_SIZE.  */
-
-static size_t
-file_id_hasher (void const *entry, size_t table_size)
-{
-  file_id const *e = entry;
-  size_t i = e->ino + e->dev;
-  return i % table_size;
-}
-
-/* Do ENTRY1 and ENTRY2 refer to the same files?  */
+static struct di_set *file_set;

-static bool
-file_id_comparator (void const *entry1, void const *entry2)
-{
-  file_id const *e1 = entry1;
-  file_id const *e2 = entry2;
-  return (e1->ino == e2->ino && e1->dev == e2->dev);
-}
-
-static Hash_table *file_id_table;
-
-/* Initialize the hash table.  */
+/* Initialize the set.  */

 void
 init_backup_hash_table (void)
 {
-  file_id_table = hash_initialize (0, NULL, file_id_hasher,
-                                  file_id_comparator, free);
-  if (!file_id_table)
+  file_set = di_set_alloc ();
+  if (!file_set)
     xalloc_die ();
 }

@@ -95,18 +68,9 @@ init_backup_hash_table (void)
 static void
 insert_file (struct stat const *st)
 {
-   file_id *p;
-   static file_id *next_slot;
-
-   if (!next_slot)
-     next_slot = xmalloc (sizeof *next_slot);
-   next_slot->dev = st->st_dev;
-   next_slot->ino = st->st_ino;
-   p = hash_insert (file_id_table, next_slot);
-   if (!p)
-     xalloc_die ();
-   if (p == next_slot)
-     next_slot = NULL;
+  int inserted = di_set_insert (file_set, st->st_dev, st->st_ino);
+  if (inserted < 0)
+    xalloc_die ();
 }

 /* Has the file identified by ST already been inserted into the hash
@@ -115,10 +79,10 @@ insert_file (struct stat const *st)
 bool
 file_already_seen (struct stat const *st)
 {
-  file_id f;
-  f.dev = st->st_dev;
-  f.ino = st->st_ino;
-  return hash_lookup (file_id_table, &f) != 0;
+  int found = di_set_lookup (file_set, st->st_dev, st->st_ino);
+  if (found < 0)
+    xalloc_die ();
+  return found;
 }

 static bool
--
1.7.4.2.g597a6



reply via email to

[Prev in Thread] Current Thread [Next in Thread]