bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: SEEK_HOLE defined but useless on linux-3.4+/ext4 [Re: small ascii fi


From: Paul Eggert
Subject: Re: SEEK_HOLE defined but useless on linux-3.4+/ext4 [Re: small ascii files can be sparse
Date: Tue, 31 Jul 2012 15:44:22 -0700
User-agent: Mozilla/5.0 (X11; Linux i686; rv:14.0) Gecko/20120714 Thunderbird/14.0

On 07/31/2012 11:43 AM, Jim Meyering wrote:
> It seems a little too far-fetched
> even for me.

Hah!  I never thought I'd get you to say *that*!

I suppose you're right.  As far as POSIX-conformance goes,
apps are not required to work "correctly" in the presence
of mount+unmount syscalls by other processes, and as
a practical matter it is a rare case where it'd matter.
Still, it'd be nice to avoid the extra complexity, if
we could.

It's too bad that there's no reliable way to tell whether
SEEK_HOLE really works without writing to the file system
in question (which'd be even worse).

> I do not like it when grep -r mysteriously hangs and makes
> my system run out of memory.  This really does affect me a couple
> times per month, at least.

This can happen when files look like text for the first 32 KiB,
but have big holes later.  If that's the problem, how
about the following patch?

>From ca6e5ec860cf5a2af060e2a5c8c53d9004ddc131 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Tue, 31 Jul 2012 15:37:40 -0700
Subject: [PATCH] grep: switch to binary mode when encountering binary data
 anywhere

* src/main.c (buffer_is_binary): New function.
(file_is_binary): Use it.  Test for binary data anywhere in the
file, not just at the start; and when it's encountered, switch
to binary mode.
---
 src/main.c |   45 +++++++++++++++++++++++++++++++--------------
 1 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/src/main.c b/src/main.c
index 84066d8..c2958a7 100644
--- a/src/main.c
+++ b/src/main.c
@@ -436,6 +436,12 @@ clean_up_stdout (void)
     close_stdout ();
 }
 
+static int
+buffer_is_binary (char const *buf, size_t bufsize)
+{
+  return memchr (buf, eolbyte ? 0 : '\200', bufsize) != 0;
+}
+
 /* Return 1 if a file is known to be binary for the purpose of 'grep'.
    BUF, of size BUFSIZE, is the initial buffer read from the file with
    descriptor FD and status ST.  */
@@ -446,18 +452,13 @@ file_is_binary (char const *buf, size_t bufsize, int fd, 
struct stat const *st)
   enum { SEEK_HOLE = SEEK_END };
   #endif
 
-  /* If -z, test only whether the initial buffer contains '\200';
-     knowing about holes won't help.  */
-  if (! eolbyte)
-    return memchr (buf, '\200', bufsize) != 0;
-
-  /* If the initial buffer contains a null byte, guess that the file
+  /* If the initial buffer contains a binary byte, guess that the file
      is binary.  */
-  if (memchr (buf, '\0', bufsize))
+  if (buffer_is_binary (buf, bufsize))
     return 1;
 
   /* If the file has holes, it must contain a null byte somewhere.  */
-  if (SEEK_HOLE != SEEK_END && usable_st_size (st))
+  if (SEEK_HOLE != SEEK_END && eolbyte && usable_st_size (st))
     {
       off_t cur = bufsize;
       if (O_BINARY || fd == STDIN_FILENO)
@@ -1155,6 +1156,8 @@ grep (int fd, struct stat const *st)
   char *beg;
   char *lim;
   char eol = eolbyte;
+  int test_for_binary = ((binary_files == BINARY_BINARY_FILES && !out_quiet)
+                         || binary_files == WITHOUT_MATCH_BINARY_FILES);
 
   if (! reset (fd, st))
     return 0;
@@ -1176,13 +1179,16 @@ grep (int fd, struct stat const *st)
       return 0;
     }
 
-  not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet)
-               || binary_files == WITHOUT_MATCH_BINARY_FILES)
+  not_text = (test_for_binary
               && file_is_binary (bufbeg, buflim - bufbeg, fd, st));
-  if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES)
-    return 0;
-  done_on_match += not_text;
-  out_quiet += not_text;
+  if (not_text)
+    {
+      if (binary_files == WITHOUT_MATCH_BINARY_FILES)
+        return 0;
+      test_for_binary = 0;
+      done_on_match++;
+      out_quiet++;
+    }
 
   for (;;)
     {
@@ -1246,6 +1252,17 @@ grep (int fd, struct stat const *st)
           suppressible_error (filename, errno);
           goto finish_grep;
         }
+      if (test_for_binary
+          && buffer_is_binary (bufbeg + save, buflim - bufbeg - save))
+        {
+          if ((nlines && !out_invert)
+              || binary_files == WITHOUT_MATCH_BINARY_FILES)
+            goto finish_grep;
+          not_text = 1;
+          test_for_binary = 0;
+          done_on_match++;
+          out_quiet++;
+        }
     }
   if (residue)
     {
-- 
1.7.6.5





reply via email to

[Prev in Thread] Current Thread [Next in Thread]