bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: bug #17457: "grep -r foo . > somefile" goes into an infinite loop


From: Jim Meyering
Subject: Re: bug #17457: "grep -r foo . > somefile" goes into an infinite loop
Date: Sat, 23 Jul 2011 23:12:24 +0200

Hi Paolo,

In my experience, dev/ino is sufficient, as long as you're not using one of
a few POSIX-violating fringe file systems (clearcase's MVS comes to mind).
I remember problems years ago that were fixed when Paul added ctime,
mtime, etc. checks to diffutils.

So for a case like this, I think we should use something
like this (see the long comment just preceding it in
http://git.savannah.gnu.org/cgit/diffutils.git/tree/src/system.h#n171):

#ifndef same_file_attributes
# define same_file_attributes(s, t) \
   ((s)->st_mode == (t)->st_mode \
    && (s)->st_nlink == (t)->st_nlink \
    && (s)->st_uid == (t)->st_uid \
    && (s)->st_gid == (t)->st_gid \
    && (s)->st_size == (t)->st_size \
    && (s)->st_mtime == (t)->st_mtime \
    && (s)->st_ctime == (t)->st_ctime)
#endif

I'd be surprised if there's any system on which adding an st_birthtime
comparison would change the outcome.

Here's a preliminary patch.
I will add a test case and update NEWS.  In the mean time, if anyone knows
who submitted the proposed patch, I'll mention that it inspired mine.

With that change, now I see this behavior:

    $ mkdir d && touch d/k && ./grep -r pat d > d/k
    ./grep: input file `d/k' is also the output
    [Exit 2]

Before, depending on pattern/contents, it would
either exit 0, 1 or fill your partition.


>From ed7715eaf0deea8d5b4c925a7358d05ab1688d34 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Sat, 23 Jul 2011 22:52:06 +0200
Subject: [PATCH] fail with exit status 2 when an input file is the same as
 the output

This avoids a potential "infinite" disk-filling loop.
Reported in http://savannah.gnu.org/patch/?5316
and http://savannah.gnu.org/bugs/?17457.
* src/main.c: Include "quote.h".
(out_stat): New global.
(grepfile): Compare each regular file's dev/ino/etc.
with those from the file on stdout (if it too is regular).
(main): Set out_stat, if stdout is a regular file.
* src/system.h: Include "same-inode.h".
(same_file_attributes): Define.  From diffutils.
(SAME_REGULAR_FILE): Define.
* bootstrap.conf (gnulib_modules): Use quote, not quotearg.
Use same-inode.
---
 bootstrap.conf |    3 ++-
 src/main.c     |   28 ++++++++++++++++++++++++++++
 src/system.h   |   42 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/bootstrap.conf b/bootstrap.conf
index 143d7b5..66c239e 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -57,10 +57,11 @@ obstack
 open
 progname
 propername
-quotearg
+quote
 readme-release
 realloc-gnu
 regex
+same-inode
 ssize_t
 stddef
 stdlib
diff --git a/src/main.c b/src/main.c
index 4e01a17..da74916 100644
--- a/src/main.c
+++ b/src/main.c
@@ -44,6 +44,7 @@
 #include "isdir.h"
 #include "progname.h"
 #include "propername.h"
+#include "quote.h"
 #include "savedir.h"
 #include "version-etc.h"
 #include "xalloc.h"
@@ -59,6 +60,11 @@
   proper_name ("Mike Haertel"), \
   _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>")

+/* When stdout is connected to a regular file, save its stat
+   information here, so that we can automatically skip it, thus
+   avoiding a potential (racy) infinite loop.  */
+static struct stat out_stat;
+
 struct stats
 {
   struct stats const *parent;
@@ -1212,6 +1218,24 @@ grepfile (char const *file, struct stats *stats)
                                       || S_ISSOCK (stats->stat.st_mode)
                                       || S_ISFIFO (stats->stat.st_mode)))
         return 1;
+
+      /* If there's a regular file on stdout and the current file refers
+         to the same i-node, we have to report the problem and skip it.
+         Otherwise when matching lines from some other input reach the
+         disk before we open this file, we can end up reading and matching
+         those lines and appending them to the file from which we're reading.
+         Then we'd have what appears to be an infinite loop that'd terminate
+         only upon filling the output file system or reaching a quota.  */
+      if (S_ISREG (stats->stat.st_mode) && out_stat.st_ino
+          && SAME_REGULAR_FILE (stats->stat, out_stat))
+        {
+          if (! suppress_errors)
+            error (0, 0, _("input file %s is also the output"),
+                   quote (file));
+          errseen = 1;
+          return 1;
+        }
+
       while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
         continue;

@@ -2121,6 +2145,10 @@ main (int argc, char **argv)
   if (show_help)
     usage (EXIT_SUCCESS);

+  struct stat tmp_stat;
+  if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode))
+    out_stat = tmp_stat;
+
   if (keys)
     {
       if (keycc == 0)
diff --git a/src/system.h b/src/system.h
index 199b485..e7afc46 100644
--- a/src/system.h
+++ b/src/system.h
@@ -27,6 +27,7 @@
 #include "configmake.h"
 #include "dirname.h"
 #include "minmax.h"
+#include "same-inode.h"

 #if O_BINARY
 # define HAVE_DOS_FILE_CONTENTS 1
@@ -53,8 +54,47 @@ enum { EXIT_TROUBLE = 2 };
 #include <locale.h>

 #ifndef initialize_main
-#define initialize_main(argcp, argvp)
+# define initialize_main(argcp, argvp)
 #endif

+/* Do struct stat *S, *T have the same file attributes?
+
+   POSIX says that two files are identical if st_ino and st_dev are
+   the same, but many file systems incorrectly assign the same (device,
+   inode) pair to two distinct files, including:
+
+   - GNU/Linux NFS servers that export all local file systems as a
+     single NFS file system, if a local device number (st_dev) exceeds
+     255, or if a local inode number (st_ino) exceeds 16777215.
+
+   - Network Appliance NFS servers in snapshot directories; see
+     Network Appliance bug #195.
+
+   - ClearCase MVFS; see bug id ATRia04618.
+
+   Check whether two files that purport to be the same have the same
+   attributes, to work around instances of this common bug.  Do not
+   inspect all attributes, only attributes useful in checking for this
+   bug.
+
+   It's possible for two distinct files on a buggy file system to have
+   the same attributes, but it's not worth slowing down all
+   implementations (or complicating the configuration) to cater to
+   these rare cases in buggy implementations.  */
+
+#ifndef same_file_attributes
+# define same_file_attributes(s, t) \
+   ((s)->st_mode == (t)->st_mode \
+    && (s)->st_nlink == (t)->st_nlink \
+    && (s)->st_uid == (t)->st_uid \
+    && (s)->st_gid == (t)->st_gid \
+    && (s)->st_size == (t)->st_size \
+    && (s)->st_mtime == (t)->st_mtime \
+    && (s)->st_ctime == (t)->st_ctime)
+#endif
+
+#define SAME_REGULAR_FILE(s, t) \
+  (SAME_INODE (s, t) && same_file_attributes (&s, &t))
+
 #include "unlocked-io.h"
 #endif
--
1.7.6.609.gbf6a9



reply via email to

[Prev in Thread] Current Thread [Next in Thread]