bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep: very large file with no newline causes trouble


From: Jim Meyering
Subject: grep: very large file with no newline causes trouble
Date: Mon, 17 Mar 2003 22:46:49 +0100

Package: grep
Version: 2.5.1-4
Severity: normal
Tags: upstream patch

grep doesn't deal well with very large files containing no line terminator.
I ran grep in a directory where I thought it'd find matches and complete
in a fraction of a second.  I was surprised to see it appear to hang and
finally exit with only this error: `grep: memory exhausted'.  The failure
was due to the presence of a file I'd created like this:

  dd bs=1 seek=1T of=big < /dev/null

So, even though that file occupies no disk blocks, grep thinks
it has to search a terabyte of data, and blithely tries to read
the first line (the entire file) into memory.

Of course this problem isn't unique to grep.
Any program that tries to read a line into memory at a time
is likely to meet the same fate.  But grep is a little different
from other tools in that one may well use it to search an entire
hierarchy for a file containing a particular string.  The mere
presence of a very large file should not cause such a search to fail.

Below, I've included a patch that makes grep report the error,
free the allocated memory, and continue with any remaining files.

I tested it like this:

  $ echo a | ./grep a big -
  grep: big: No space left on device
  (standard input):a
  [Exit 2]

2003-03-16  Jim Meyering  <address@hidden>

        When grep runs out of memory, don't abort the entire command,
        but rather just the affected command line argument(s).
        * src/grep.c: Include <setjmp.h>.
        (jumpbuf, stdin_argv): New globals.
        (next_arg): New function.
        (main): Rearrange main loop so that there is only one grepfile call.
        Arrange for a failed xrealloc to result in a longjmp back into main
        so grep continues with any remaining arguments.
        Declare affected locals to be static.
        * lib/xmalloc.c (xrealloc): Upon failure, free P.

diff -F '^[_a-zA-Z$]' -ru grep-2.5.1/lib/xmalloc.c grep-2.5.1-new/lib/xmalloc.c
--- grep-2.5.1/lib/xmalloc.c    2001-03-04 06:33:12.000000000 +0100
+++ grep-2.5.1-new/lib/xmalloc.c        2003-03-16 10:02:56.000000000 +0100
@@ -96,10 +96,14 @@ xmalloc (size_t n)
 void *
 xrealloc (void *p, size_t n)
 {
-  p = realloc (p, n);
-  if (p == 0)
-    xalloc_die ();
-  return p;
+  char *new_p = realloc (p, n);
+  if (new_p == 0)
+    {
+      if (p)
+       free (p);
+      xalloc_die ();
+    }
+  return new_p;
 }
 
 /* Allocate memory for N elements of S bytes, with error checking.  */
diff -F '^[_a-zA-Z$]' -ru grep-2.5.1/src/grep.c grep-2.5.1-new/src/grep.c
--- grep-2.5.1/src/grep.c       2002-03-26 16:54:12.000000000 +0100
+++ grep-2.5.1-new/src/grep.c   2003-03-16 11:20:45.000000000 +0100
@@ -1,5 +1,5 @@
 /* grep.c - main driver file for grep.
-   Copyright 1992, 1997-1999, 2000 Free Software Foundation, Inc.
+   Copyright 1992, 1997-1999, 2000, 2003 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -22,6 +22,7 @@
 # include <config.h>
 #endif
 #include <sys/types.h>
+#include <setjmp.h>
 #include <sys/stat.h>
 #if defined(HAVE_MMAP)
 # include <sys/mman.h>
@@ -82,6 +83,15 @@ static struct exclude *included_patterns
 static char const short_options[] =
 "0123456789A:B:C:D:EFGHIPUVX:abcd:e:f:hiKLlm:noqRrsuvwxyZz";
 
+/* Record target for longjmp upon failed xmalloc.  */
+static jmp_buf jumpbuf;
+
+/* Default for `file_list' if no files are given on the command line. */
+static char *stdin_argv[] =
+{
+  "-", NULL
+};
+
 /* Non-boolean long options that have no corresponding short equivalents.  */
 enum
 {
@@ -1277,17 +1287,29 @@ get_nondigit_option (int argc, char *con
   return opt;
 }
 
+/* This function is called via xmalloc.c's xalloc_fail_func
+   variable upon memory allocation failure.  */
+static void
+next_arg (void)
+{
+  longjmp (jumpbuf, 1);
+}
+
 int
 main (int argc, char **argv)
 {
   char *keys;
   size_t keycc, oldcc, keyalloc;
   int with_filenames;
-  int opt, cc, status;
+  int opt, cc;
   int default_context;
   FILE *fp;
   extern char *optarg;
   extern int optind;
+  /* Put the following two variables in static storage, so they can't
+     be clobbered by the potential longjmp into this function.  */
+  static char **file_list;
+  static int status;
 
   initialize_main (&argc, &argv);
   program_name = argv[0];
@@ -1712,29 +1734,42 @@ warranty; not even for MERCHANTABILITY o
   if (max_count == 0)
     exit (1);
 
-  if (optind < argc)
+  file_list = (optind == argc ? stdin_argv : &argv[optind]);
+
+  /* In case xrealloc fails, arrange to give a diagnostic for the
+     current file and to continue with any subsequent arguments.  */
+  xalloc_fail_func = next_arg;
+
+  status = 1;
+  while (1)
     {
-       status = 1;
-       do
+      char *file = *file_list++;
+
+      if (file == NULL)
+       break;
+
+      if ((included_patterns || excluded_patterns)
+         && !isdir (file))
        {
-         char *file = argv[optind];
-         if ((included_patterns || excluded_patterns)
-             && !isdir (file))
-           {
-             if (included_patterns &&
-                 ! excluded_filename (included_patterns, file, 0))
-               continue;
-             if (excluded_patterns &&
-                 excluded_filename (excluded_patterns, file, 0))
-               continue;
-           }
-         status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file,
-                             &stats_base);
+         if (included_patterns &&
+             ! excluded_filename (included_patterns, file, 0))
+           continue;
+         if (excluded_patterns &&
+             excluded_filename (excluded_patterns, file, 0))
+           continue;
+       }
+      if (setjmp (jumpbuf) == 0)
+       {
+         status &= grepfile (strcmp (file, "-") == 0
+                             ? (char *) NULL : file, &stats_base);
+       }
+      else
+       {
+         /* This happens when xmalloc fails.  */
+         error (0, errno, "%s", file);
+         status = 1;
        }
-       while ( ++optind < argc);
     }
-  else
-    status = grepfile ((char *) NULL, &stats_base);
 
   /* We register via atexit() to test stdout.  */
   exit (errseen ? 2 : status);


-- System Information:
Debian Release: testing/unstable
Architecture: i386
Kernel: Linux xxy 2.4.19-xfs
Locale: LANG=C, LC_CTYPE=C

Versions of packages grep depends on:
ii  libc6                         2.3.1-14   GNU C Library: Shared libraries an

-- no debconf information




reply via email to

[Prev in Thread] Current Thread [Next in Thread]