[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 2/4] Reduce memory consumption of oldfind on large directories.
From: |
James Youngman |
Subject: |
[PATCH 2/4] Reduce memory consumption of oldfind on large directories. |
Date: |
Sun, 21 Aug 2011 00:10:18 +0100 |
* find/find.c (process_dir): Reduce memory consumption for large
directories. Don't save the whoel directory content with
xsavedir, instead just loop over the results of readdir. This
means that oldfind will consume one file descriptor per directory
level.
* find/testsuite/sv-34079.sh: verify that the memory consumption
of oldfind is reasonable on large directories.
---
ChangeLog | 9 ++++
find/find.c | 100 ++++++++++++++++++++++++++++++++++++-------
find/testsuite/sv-34079.sh | 15 ++++---
3 files changed, 100 insertions(+), 24 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index da5ce0b..bf79f52 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
2011-08-20 James Youngman <address@hidden>
+ Reduce memory consumption of oldfind on large directories.
+ * find/find.c (process_dir): Reduce memory consumption for large
+ directories. Don't save the whoel directory content with
+ xsavedir, instead just loop over the results of readdir. This
+ means that oldfind will consume one file descriptor per directory
+ level.
+ * find/testsuite/sv-34079.sh: verify that the memory consumption
+ of oldfind is reasonable on large directories.
+
Reduce memory consumption of fts.
* gnulib: update to latest, mainly to include a bugfix (gnulib
commit 47cb657eca1abf2c26c32c8ce03def994a3ee37c) which limits the
diff --git a/find/find.c b/find/find.c
index c34bd0e..93fbbcd 100644
--- a/find/find.c
+++ b/find/find.c
@@ -37,6 +37,7 @@
/* gnulib headers. */
#include "canonicalize.h"
#include "closein.h"
+#include "dirent-safer.h"
#include "dirname.h"
#include "error.h"
#include "fcntl--.h"
@@ -52,13 +53,15 @@
#include "buildcmd.h"
#include "defs.h"
#include "fdleak.h"
-#include "savedirinfo.h"
#undef STAT_MOUNTPOINTS
-
-
-
+#ifdef CLOSEDIR_VOID
+/* Fake a return value. */
+# define CLOSEDIR(d) (closedir (d), 0)
+#else
+# define CLOSEDIR(d) closedir (d)
+#endif
#if ENABLE_NLS
# include <libintl.h>
@@ -114,6 +117,43 @@ enum WdSanityCheckFatality
NON_FATAL_IF_SANITY_CHECK_FAILS
};
+#if defined HAVE_STRUCT_DIRENT_D_TYPE
+/* Convert the value of struct dirent.d_type into a value for
+ * struct stat.st_mode (at least the file type bits), or zero
+ * if the type is DT_UNKNOWN or is a value we don't know about.
+ */
+static mode_t
+type_to_mode (unsigned type)
+{
+ switch (type)
+ {
+#ifdef DT_FIFO
+ case DT_FIFO: return S_IFIFO;
+#endif
+#ifdef DT_CHR
+ case DT_CHR: return S_IFCHR;
+#endif
+#ifdef DT_DIR
+ case DT_DIR: return S_IFDIR;
+#endif
+#ifdef DT_BLK
+ case DT_BLK: return S_IFBLK;
+#endif
+#ifdef DT_REG
+ case DT_REG: return S_IFREG;
+#endif
+#ifdef DT_LNK
+ case DT_LNK: return S_IFLNK;
+#endif
+#ifdef DT_SOCK
+ case DT_SOCK: return S_IFSOCK;
+#endif
+ default:
+ return 0; /* Unknown. */
+ }
+}
+#endif
+
int
get_current_dirfd (void)
@@ -1245,7 +1285,7 @@ process_dir (char *pathname, char *name, int pathlen,
const struct stat *statp,
unsigned int idx; /* Which entry are we on? */
struct stat stat_buf;
size_t dircount = 0u;
- struct savedir_dirinfo *dirinfo;
+ DIR *dirp;
if (statp->st_nlink < 2)
{
@@ -1259,10 +1299,9 @@ process_dir (char *pathname, char *name, int pathlen,
const struct stat *statp,
}
errno = 0;
- dirinfo = xsavedir (name, 0);
+ dirp = opendir_safer (name);
-
- if (dirinfo == NULL)
+ if (dirp == NULL)
{
assert (errno != 0);
error (0, errno, "%s", safely_quote_err_filename (0, pathname));
@@ -1270,7 +1309,6 @@ process_dir (char *pathname, char *name, int pathlen,
const struct stat *statp,
}
else
{
- register char *namep; /* Current point in `name_space'. */
char *cur_path; /* Full path of each file to process. */
char *cur_name; /* Base name of each file to process. */
unsigned cur_path_size; /* Bytes allocated for `cur_path'. */
@@ -1347,14 +1385,42 @@ process_dir (char *pathname, char *name, int pathlen,
const struct stat *statp,
}
}
- for (idx=0; idx < dirinfo->size; ++idx)
+ while (1)
{
- /* savedirinfo() may return dirinfo=NULL if extended information
- * is not available.
- */
- mode_t mode = (dirinfo->entries[idx].flags & SavedirHaveFileType) ?
- dirinfo->entries[idx].type_info : 0;
- namep = dirinfo->entries[idx].name;
+ const char *namep;
+ mode_t mode = 0;
+ const struct dirent *dp;
+
+ /* We reset errno here to distinguish between end-of-directory and an
error */
+ errno = 0;
+ dp = readdir (dirp);
+ if (NULL == dp)
+ {
+ if (errno)
+ {
+ /* an error occurred, but we are not yet at the end
+ of the directory stream. */
+ error (0, errno, "%s", safely_quote_err_filename (0,
pathname));
+ continue;
+ }
+ else
+ {
+ break; /* End of the directory stream. */
+ }
+ }
+ else
+ {
+ namep = dp->d_name;
+ /* Skip "", ".", and "..". "" is returned by at least one buggy
+ implementation: Solaris 2.4 readdir on NFS file systems. */
+ if (!namep[0] || (namep[0] == '.' && (namep[1] == '.' || namep[1]
== 0)))
+ continue;
+ }
+
+#if defined HAVE_STRUCT_DIRENT_D_TYPE
+ if (dp->d_type != DT_UNKNOWN)
+ mode = type_to_mode (dp->d_type);
+#endif
/* Append this directory entry's name to the path being searched. */
file_len = pathname_len + strlen (namep);
@@ -1482,7 +1548,7 @@ process_dir (char *pathname, char *name, int pathlen,
const struct stat *statp,
}
free (cur_path);
- free_dirinfo (dirinfo);
+ CLOSEDIR (dirp);
}
if (subdirs_unreliable)
diff --git a/find/testsuite/sv-34079.sh b/find/testsuite/sv-34079.sh
index ea6aea6..5773a3f 100755
--- a/find/testsuite/sv-34079.sh
+++ b/find/testsuite/sv-34079.sh
@@ -52,13 +52,14 @@ if [[ -n "${RUN_VERY_EXPENSIVE_TESTS}" ]]; then
# it stores all the directory entries. Hence the excessive
# memory consumption bug applies to oldfind even though it is
# not using fts.
- exe="${ftsfind}"
- echo "Checking memory consumption of ${exe}..." >&2
- if ( ulimit -v 50000 && ${exe} "${outdir}" >/dev/null; ); then
- echo "Memory consumption of ${exe} is reasonable" >&2
- else
- bad="${bad}${bad:+\n}Memory consumption of ${exe} is too high"
- fi
+ for exe in "${ftsfind}" "${oldfind}"; do
+ echo "Checking memory consumption of ${exe}..." >&2
+ if ( ulimit -v 50000 && ${exe} "${outdir}" >/dev/null; ); then
+ echo "Memory consumption of ${exe} is reasonable" >&2
+ else
+ bad="${bad}${bad:+\n}Memory consumption of ${exe} is
too high"
+ fi
+ done
else
bad="failed to set up the test in ${outdir}"
fi
--
1.7.2.5
- [PATCH] rm, du, chmod, chown, chgrp: use much less memory for large directories, Jim Meyering, 2011/08/19
- Re: [PATCH] rm, du, chmod, chown, chgrp: use much less memory for large directories, Erik Auerswald, 2011/08/19
- [PATCH 2/4] Reduce memory consumption of oldfind on large directories.,
James Youngman <=
- [PATCH 3/4] Remove no-longer-used files savedir.[ch]., James Youngman, 2011/08/20
- [PATCH 1/4] Reduce memory consumption of fts., James Youngman, 2011/08/20
- [PATCH 4/4] Describe recent memory savings on very large directories., James Youngman, 2011/08/20
- RE: [PATCH] rm, du, chmod, chown, chgrp: use much less memory for large directories, Voelker, Bernhard, 2011/08/23