--- grep-3.1-orig/grep-3.1/src/grep.c 2017-07-02 13:41:41.000000000 -0400 +++ grep-3.1/src/grep.c 2018-03-29 18:01:35.556895798 -0400 @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -414,7 +416,7 @@ static struct exclude *excluded_directory_patterns[2]; /* Short options. */ static char const short_options[] = -"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz"; +"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZzp:"; /* Non-boolean long options that have no corresponding short equivalents. */ enum @@ -483,6 +485,7 @@ {"version", no_argument, NULL, 'V'}, {"with-filename", no_argument, NULL, 'H'}, {"word-regexp", no_argument, NULL, 'w'}, + {"preprocess", required_argument, NULL, 'p'}, {0, 0, 0, 0} }; @@ -534,6 +537,9 @@ SKIP_DEVICES } devices = READ_COMMAND_LINE_DEVICES; +static char *preproc; /* if non-NULL pipe each file input to this command */ +static pid_t preprocPID; /* current preproc decoder child PID to wait for. */ + static bool grepfile (int, char const *, bool, bool); static bool grepdesc (int, bool); @@ -1702,6 +1708,40 @@ suppressible_error (errno); return true; } + if (preproc) { /* desc <- preprocessing decoder output */ + int fds[2], ac = 0; + char *sh, *av[5 + 3]; + if (pipe (fds) == -1) + exit (2); + switch (preprocPID = vfork ()) { + case 0: /* child */ + dup2 (desc, 0); /* stdin = desc on file itself */ + dup2 (fds[1], 1); /* stdout = write end of pipe [1] */ + close (fds[1]); /* do not need extra handle on pipe */ + close (fds[0]); /* close read end of pipe [0] */ + setenv ("GREP_INPUT", filename, 1); + if (!(sh = getenv ("GREP_SHELL"))) + sh = "/bin/sh"; + ac = 0; /* word-split $GREP_SHELL a few times */ + av[ac] = strtok (sh, " \t"); + for (ac++; ac < 5; ac++) + if (!(av[ac] = strtok (NULL, " \t"))) + break; + av[ac++] = "-c"; /* append -c preproc */ + av[ac++] = preproc; + av[ac] = NULL; + execvp (av[0], av); /* become a preprocessing decoder */ + fprintf (stderr, "%s -c \"%s\": %s\n", sh, preproc, strerror (errno)); + exit (3); + default: /* parent */ + close (fds[1]); /* close write end of pipe[1] */ + close (desc); /* close desc on file itself */ + desc = fds[0]; /* replace desc value with read end of pipe [0] */ + break; + case -1: /* vfork failed => resource exhaustion => die */ + exit (2); + } + } return grepdesc (desc, command_line); } @@ -1876,6 +1916,19 @@ } closeout: + if (preprocPID) { + int wstatus = 0; + /* Do not decode whole file if it won't be read anyway, e.g. -l mode. */ + kill (preprocPID, SIGKILL); /* waitpid NOHANG before killing? */ + while (waitpid (preprocPID, &wstatus, 0) == -1) + if (errno != EINTR) + break; + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) + exit (2); /* post-signal status == success; only get here on cmd fail */ + preprocPID = 0; + /*XXX verify that signal was SIGKILL and perhaps die otherwise? */ + } + if (desc != STDIN_FILENO && close (desc) != 0) suppressible_error (errno); return status; @@ -1957,6 +2010,7 @@ ")); printf (_("\ -I equivalent to --binary-files=without-match\n\ + -p, --preprocess=COMMAND pipe file inputs to stdin of COMMAND pre-search\n\ -d, --directories=ACTION how to handle directories;\n\ ACTION is 'read', 'recurse', or 'skip'\n\ -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\ @@ -2676,6 +2730,10 @@ eolbyte = '\0'; break; + case 'p': + preproc = optarg; + break; + case BINARY_FILES_OPTION: if (STREQ (optarg, "binary")) binary_files = BINARY_BINARY_FILES; --- grep-3.1-orig/grep-3.1/doc/grep.in.1 2017-06-24 23:05:03.000000000 -0400 +++ grep-3.1/doc/grep.in.1 2018-03-29 17:55:56.579906789 -0400 @@ -462,6 +462,30 @@ in the environment, in order to find more matches even if the matches are unsafe for direct display. .TP +.BI \-p " COMMAND" "\fR,\fP \-\^\-preprocess=" COMMAND +If this option is given, for each input file +.I COMMAND +is invoked with that input file as standard input and with grep reading +and searching the standard output of +.I COMMAND +instead of direct file contents. E.g., if +.I COMMAND +is +.RB "\*(lq" "gzip -dc" "\*(rq" +grep will decompress each file before searching it, assuming all +inputs are gzipped. +.I COMMAND +is run via \fB${GREP_SHELL:-/bin/sh} -c COMMAND\fR. +.IP +The environment variable GREP_INPUT set to each input filename. +This may be useful to dispatch to appropriate decoders. +E.g., with a standard shell, (note single/double quoting), +.nf + GPP='case "$GREP_INPUT" in *.gz) gzip -dc;; *) cat;; esac' +.fi +will allow \fB grep -p"$GPP"\fR to search through a mixture of +gzipped and ordinary files. +.TP .BI \-D " ACTION" "\fR,\fP \-\^\-devices=" ACTION If an input file is a device, FIFO or socket, use .I ACTION @@ -1187,7 +1211,7 @@ .SH "SEE ALSO" .SS "Regular Manual Pages" awk(1), cmp(1), diff(1), find(1), gzip(1), -perl(1), sed(1), sort(1), xargs(1), zgrep(1), +perl(1), sed(1), sort(1), xargs(1), zgrep(1), gunzip(1), read(2), pcre(3), pcresyntax(3), pcrepattern(3), terminfo(5),