[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[bug-patch] [PATCH] use di-set module rather than hash: easier-to-use, m
From: |
Jim Meyering |
Subject: |
[bug-patch] [PATCH] use di-set module rather than hash: easier-to-use, more efficient |
Date: |
Tue, 08 Feb 2011 11:20:22 +0100 |
Hi,
While fixing down the used-uninitialized bug, I noticed
how patch was using gnulib's hash module to maintain a set
of dev/inode pairs to keep track of which files have been processed.
Until not long ago, coreutils did something very similar in a few
programs, including du. By using the di-set module that I've just
moved from coreutils/gl to gnulib, patch can remove a good chunk of
code and make the remaining bits easier to read.
To test this locally, I first updated patch to use the latest from
gnulib via this:
git syncsub
git ci -m 'build: update gnulib submodule to latest' gnulib
(that depends on my having the following in ~/.gitconfig)
[alias]
ci = commit
syncsub = submodule foreach git pull --ff-only origin master
Then reran ./bootstrap && ./configured && make as usual.
>From 1480ad29ef9923ad423c0ff4a30b4d9b4711cf2b Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Mon, 7 Feb 2011 17:01:00 +0100
Subject: [PATCH] use di-set module rather than hash: easier-to-use, more
efficient
In coreutils, we found that we could save a lot of space with large
dev/inode sets by using the di-set module that is now in gnulib.
This change makes patch use that, too, though not for its run-time
efficiency, but rather for its higher-level, easier-to-use interface.
* bootstrap.conf (gnulib_modules): Add di-set.
* src/util.c (file_id_hasher, file_id_comparator): Remove functions.
(init_backup_hash_table, insert_file): Rewrite to use di_set_*
functions rather than hash table functions.
(insert_file, file_already_seen): Likewise.
---
bootstrap.conf | 1 +
src/util.c | 60 +++++++++++--------------------------------------------
3 files changed, 26 insertions(+), 48 deletions(-)
diff --git a/bootstrap.conf b/bootstrap.conf
index 688fde0..cb41fee 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -21,6 +21,7 @@ gnulib_modules="
argmatch
backupfile
clock-time
+di-set
diffseq
dirname
dup2
diff --git a/src/util.c b/src/util.c
index f1187ff..7d35a46 100644
--- a/src/util.c
+++ b/src/util.c
@@ -21,7 +21,7 @@
#define XTERN extern
#include <common.h>
#include <dirname.h>
-#include <hash.h>
+#include <di-set.h>
#include <quotearg.h>
#undef XTERN
#define XTERN
@@ -51,42 +51,15 @@
static void makedirs (char const *);
-typedef struct
-{
- dev_t dev;
- ino_t ino;
-} file_id;
-
-/* Return an index for ENTRY into a hash table of size TABLE_SIZE. */
-
-static size_t
-file_id_hasher (void const *entry, size_t table_size)
-{
- file_id const *e = entry;
- size_t i = e->ino + e->dev;
- return i % table_size;
-}
-
-/* Do ENTRY1 and ENTRY2 refer to the same files? */
+static struct di_set *file_set;
-static bool
-file_id_comparator (void const *entry1, void const *entry2)
-{
- file_id const *e1 = entry1;
- file_id const *e2 = entry2;
- return (e1->ino == e2->ino && e1->dev == e2->dev);
-}
-
-static Hash_table *file_id_table;
-
-/* Initialize the hash table. */
+/* Initialize the set. */
void
init_backup_hash_table (void)
{
- file_id_table = hash_initialize (0, NULL, file_id_hasher,
- file_id_comparator, free);
- if (!file_id_table)
+ file_set = di_set_alloc ();
+ if (!file_set)
xalloc_die ();
}
@@ -95,18 +68,9 @@ init_backup_hash_table (void)
static void
insert_file (struct stat const *st)
{
- file_id *p;
- static file_id *next_slot;
-
- if (!next_slot)
- next_slot = xmalloc (sizeof *next_slot);
- next_slot->dev = st->st_dev;
- next_slot->ino = st->st_ino;
- p = hash_insert (file_id_table, next_slot);
- if (!p)
- xalloc_die ();
- if (p == next_slot)
- next_slot = NULL;
+ int inserted = di_set_insert (file_set, st->st_dev, st->st_ino);
+ if (inserted < 0)
+ xalloc_die ();
}
/* Has the file identified by ST already been inserted into the hash
@@ -115,10 +79,10 @@ insert_file (struct stat const *st)
bool
file_already_seen (struct stat const *st)
{
- file_id f;
- f.dev = st->st_dev;
- f.ino = st->st_ino;
- return hash_lookup (file_id_table, &f) != 0;
+ int found = di_set_lookup (file_set, st->st_dev, st->st_ino);
+ if (found < 0)
+ xalloc_die ();
+ return found;
}
static bool
--
1.7.4.2.g597a6
- [bug-patch] [PATCH] use di-set module rather than hash: easier-to-use, more efficient,
Jim Meyering <=