[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#17156: [PATCH 2/5] Revert conversion to shell scripts
From: |
Paolo Bonzini |
Subject: |
bug#17156: [PATCH 2/5] Revert conversion to shell scripts |
Date: |
Tue, 1 Apr 2014 11:18:43 +0200 |
This reverts the following commits:
- e4994366166059c7320fbb97a930100b1cc79e61.
- ddea6c8ea3529f3364257e0e239775c63bf1b95d.
- b639643840ef506594b6c46e5b24d9980a33e78e.
The portability requirements of grep mandate higher standards than
this. Even though egrep and fgrep have been deprecated, most users
will be using them instead of grep -E and -F, on MS Windows systems
too where the native command shell is used.
There was a typo in the matcher-name comparisons; "-P" is "perl",
not "pcre". Fix it.
---
gnulib | 2 +-
po/POTFILES.in | 3 +
src/Makefile.am | 28 +-
src/dfasearch.c | 12 +
src/egrep.c | 12 +
src/egrep.sh | 10 -
src/fgrep.c | 12 +
src/grep.c | 2496 +------------------------------------------------------
src/grep.h | 17 +
src/main.c | 2478 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
src/search.h | 2 +
11 files changed, 2560 insertions(+), 2512 deletions(-)
create mode 100644 src/egrep.c
delete mode 100644 src/egrep.sh
create mode 100644 src/fgrep.c
create mode 100644 src/main.c
diff --git a/gnulib b/gnulib
index 24379a9..497f4cd 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 24379a9217fa4bd62685795aaaa010fd90ced9e3
+Subproject commit 497f4cdf8d4ef8c73059e68030c824949ae32649
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 399bd2f..c6385d2 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -27,6 +27,9 @@ lib/version-etc.c
lib/xalloc-die.c
lib/xstrtol-error.c
src/dfa.c
+src/egrep.c
+src/fgrep.c
src/grep.c
src/kwset.c
+src/main.c
src/pcresearch.c
diff --git a/src/Makefile.am b/src/Makefile.am
index 3487848..9283d1a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -21,20 +21,23 @@ AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS)
# Tell the linker to omit references to unused shared libraries.
AM_LDFLAGS = $(IGNORE_UNUSED_LIBRARIES_CFLAGS)
-bin_PROGRAMS = grep
-bin_SCRIPTS = egrep fgrep
-grep_SOURCES = grep.c searchutils.c \
- dfa.c dfasearch.c \
- kwset.c kwsearch.c \
- pcresearch.c
+bin_PROGRAMS = grep egrep fgrep
+grep_SOURCES = grep.c
+egrep_SOURCES = egrep.c
+fgrep_SOURCES = fgrep.c
noinst_HEADERS = grep.h dfa.h kwset.h search.h system.h mbsupport.h
+noinst_LIBRARIES = libgrep.a
+libgrep_a_SOURCES = kwset.c dfa.c searchutils.c dfasearch.c kwsearch.c \
+ pcresearch.c main.c
+
# Sometimes, the expansion of $(LIBINTL) includes -lc which may
# include modules defining variables like 'optind', so libgreputils.a
# must precede $(LIBINTL) in order to ensure we use GNU getopt.
# But libgreputils.a must also follow $(LIBINTL), since libintl uses
# replacement functions defined in libgreputils.a.
LDADD = \
+ libgrep.a \
../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a $(LIBICONV) \
$(LIBTHREAD)
@@ -42,15 +45,4 @@ grep_LDADD = $(LDADD) $(LIB_PCRE)
localedir = $(datadir)/locale
AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib
-EXTRA_DIST = dosbuf.c egrep.sh
-
-egrep fgrep: egrep.sh Makefile
- $(AM_V_GEN)grep=`echo grep | sed -e '$(transform)'` && \
- case $@ in egrep) option=-E;; fgrep) option=-F;; esac && \
- sed -e 's|address@hidden@|$(SHELL)|g' \
- -e "s|address@hidden@|$$grep|g" \
- -e "s|address@hidden@|$$option|g" <$(srcdir)/egrep.sh
>address@hidden
- $(AM_V_at)chmod +x address@hidden
- $(AM_V_at)mv address@hidden $@
-
-CLEANFILES = egrep fgrep *-t
+EXTRA_DIST = dosbuf.c
diff --git a/src/dfasearch.c b/src/dfasearch.c
index d098a9b..d3a8af3 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -406,3 +406,15 @@ EGexecute (char const *buf, size_t size, size_t
*match_size,
*match_size = len;
return off;
}
+
+void
+Gcompile (char const *pattern, size_t size)
+{
+ GEAcompile (pattern, size, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES);
+}
+
+void
+Ecompile (char const *pattern, size_t size)
+{
+ GEAcompile (pattern, size, RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
+}
diff --git a/src/egrep.c b/src/egrep.c
new file mode 100644
index 0000000..4c34694
--- /dev/null
+++ b/src/egrep.c
@@ -0,0 +1,12 @@
+#include <config.h>
+#include "search.h"
+
+struct matcher const matchers[] = {
+ { "egrep", Ecompile, EGexecute },
+ { NULL, NULL, NULL },
+};
+
+const char before_options[] =
+N_("PATTERN is an extended regular expression (ERE).\n");
+const char after_options[] =
+N_("Invocation as 'egrep' is deprecated; use 'grep -E' instead.\n");
diff --git a/src/egrep.sh b/src/egrep.sh
deleted file mode 100644
index f1b4146..0000000
--- a/src/egrep.sh
+++ /dev/null
@@ -1,10 +0,0 @@
address@hidden@
-grep=grep
-case $0 in
- */*)
- if test -x "${0%/*}/@grep@"; then
- PATH=${0%/*}:$PATH
- address@hidden@
- fi;;
-esac
-exec $grep @option@ "$@"
diff --git a/src/fgrep.c b/src/fgrep.c
new file mode 100644
index 0000000..a0940cc
--- /dev/null
+++ b/src/fgrep.c
@@ -0,0 +1,12 @@
+#include <config.h>
+#include "search.h"
+
+struct matcher const matchers[] = {
+ { "fgrep", Fcompile, Fexecute },
+ { NULL, NULL, NULL },
+};
+
+const char before_options[] =
+N_("PATTERN is a set of newline-separated fixed strings.\n");
+const char after_options[] =
+N_("Invocation as 'fgrep' is deprecated; use 'grep -F' instead.\n");
diff --git a/src/grep.c b/src/grep.c
index a1bccdb..3572017 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1,1665 +1,5 @@
-/* grep.c - main driver file for grep.
- Copyright (C) 1992, 1997-2002, 2004-2014 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-/* Written July 1992 by Mike Haertel. */
-
#include <config.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "mbsupport.h"
-#include <wchar.h>
-#include <wctype.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include "system.h"
-
-#include "argmatch.h"
-#include "c-ctype.h"
-#include "closeout.h"
-#include "colorize.h"
-#include "dfa.h"
-#include "error.h"
-#include "exclude.h"
-#include "exitfail.h"
-#include "fcntl-safer.h"
-#include "fts_.h"
-#include "getopt.h"
-#include "grep.h"
-#include "intprops.h"
-#include "progname.h"
-#include "propername.h"
-#include "quote.h"
-#include "safe-read.h"
#include "search.h"
-#include "version-etc.h"
-#include "xalloc.h"
-#include "xstrtol.h"
-
-#define SEP_CHAR_SELECTED ':'
-#define SEP_CHAR_REJECTED '-'
-#define SEP_STR_GROUP "--"
-
-#define AUTHORS \
- proper_name ("Mike Haertel"), \
- _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>")
-
-/* When stdout is connected to a regular file, save its stat
- information here, so that we can automatically skip it, thus
- avoiding a potential (racy) infinite loop. */
-static struct stat out_stat;
-
-/* if non-zero, display usage information and exit */
-static int show_help;
-
-/* If non-zero, print the version on standard output and exit. */
-static int show_version;
-
-/* If nonzero, suppress diagnostics for nonexistent or unreadable files. */
-static int suppress_errors;
-
-/* If nonzero, use color markers. */
-static int color_option;
-
-/* If nonzero, show only the part of a line matching the expression. */
-static int only_matching;
-
-/* If nonzero, make sure first content char in a line is on a tab stop. */
-static int align_tabs;
-
-/* The group separator used when context is requested. */
-static const char *group_separator = SEP_STR_GROUP;
-
-/* The context and logic for choosing default --color screen attributes
- (foreground and background colors, etc.) are the following.
- -- There are eight basic colors available, each with its own
- nominal luminosity to the human eye and foreground/background
- codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
- magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
- yellow [89 %, 33/43], and white [100 %, 37/47]).
- -- Sometimes, white as a background is actually implemented using
- a shade of light gray, so that a foreground white can be visible
- on top of it (but most often not).
- -- Sometimes, black as a foreground is actually implemented using
- a shade of dark gray, so that it can be visible on top of a
- background black (but most often not).
- -- Sometimes, more colors are available, as extensions.
- -- Other attributes can be selected/deselected (bold [1/22],
- underline [4/24], standout/inverse [7/27], blink [5/25], and
- invisible/hidden [8/28]). They are sometimes implemented by
- using colors instead of what their names imply; e.g., bold is
- often achieved by using brighter colors. In practice, only bold
- is really available to us, underline sometimes being mapped by
- the terminal to some strange color choice, and standout best
- being left for use by downstream programs such as less(1).
- -- We cannot assume that any of the extensions or special features
- are available for the purpose of choosing defaults for everyone.
- -- The most prevalent default terminal backgrounds are pure black
- and pure white, and are not necessarily the same shades of
- those as if they were selected explicitly with SGR sequences.
- Some terminals use dark or light pictures as default background,
- but those are covered over by an explicit selection of background
- color with an SGR sequence; their users will appreciate their
- background pictures not be covered like this, if possible.
- -- Some uses of colors attributes is to make some output items
- more understated (e.g., context lines); this cannot be achieved
- by changing the background color.
- -- For these reasons, the grep color defaults should strive not
- to change the background color from its default, unless it's
- for a short item that should be highlighted, not understated.
- -- The grep foreground color defaults (without an explicitly set
- background) should provide enough contrast to be readable on any
- terminal with either a black (dark) or white (light) background.
- This only leaves red, magenta, green, and cyan (and their bold
- counterparts) and possibly bold blue. */
-/* The color strings used for matched text.
- The user can overwrite them using the deprecated
- environment variable GREP_COLOR or the new GREP_COLORS. */
-static const char *selected_match_color = "01;31"; /* bold red */
-static const char *context_match_color = "01;31"; /* bold red */
-
-/* Other colors. Defaults look damn good. */
-static const char *filename_color = "35"; /* magenta */
-static const char *line_num_color = "32"; /* green */
-static const char *byte_num_color = "32"; /* green */
-static const char *sep_color = "36"; /* cyan */
-static const char *selected_line_color = ""; /* default color pair */
-static const char *context_line_color = ""; /* default color pair */
-
-/* Select Graphic Rendition (SGR, "\33[...m") strings. */
-/* Also Erase in Line (EL) to Right ("\33[K") by default. */
-/* Why have EL to Right after SGR?
- -- The behavior of line-wrapping when at the bottom of the
- terminal screen and at the end of the current line is often
- such that a new line is introduced, entirely cleared with
- the current background color which may be different from the
- default one (see the boolean back_color_erase terminfo(5)
- capability), thus scrolling the display by one line.
- The end of this new line will stay in this background color
- even after reverting to the default background color with
- "\33[m', unless it is explicitly cleared again with "\33[K"
- (which is the behavior the user would instinctively expect
- from the whole thing). There may be some unavoidable
- background-color flicker at the end of this new line because
- of this (when timing with the monitor's redraw is just right).
- -- The behavior of HT (tab, "\t") is usually the same as that of
- Cursor Forward Tabulation (CHT) with a default parameter
- of 1 ("\33[I"), i.e., it performs pure movement to the next
- tab stop, without any clearing of either content or screen
- attributes (including background color); try
- printf 'asdfqwerzxcv\rASDF\tZXCV\n'
- in a bash(1) shell to demonstrate this. This is not what the
- user would instinctively expect of HT (but is ok for CHT).
- The instinctive behavior would include clearing the terminal
- cells that are skipped over by HT with blank cells in the
- current screen attributes, including background color;
- the boolean dest_tabs_magic_smso terminfo(5) capability
- indicates this saner behavior for HT, but only some rare
- terminals have it (although it also indicates a special
- glitch with standout mode in the Teleray terminal for which
- it was initially introduced). The remedy is to add "\33K"
- after each SGR sequence, be it START (to fix the behavior
- of any HT after that before another SGR) or END (to fix the
- behavior of an HT in default background color that would
- follow a line-wrapping at the bottom of the screen in another
- background color, and to complement doing it after START).
- Piping grep's output through a pager such as less(1) avoids
- any HT problems since the pager performs tab expansion.
-
- Generic disadvantages of this remedy are:
- -- Some very rare terminals might support SGR but not EL (nobody
- will use "grep --color" on a terminal that does not support
- SGR in the first place).
- -- Having these extra control sequences might somewhat complicate
- the task of any program trying to parse "grep --color"
- output in order to extract structuring information from it.
- A specific disadvantage to doing it after SGR START is:
- -- Even more possible background color flicker (when timing
- with the monitor's redraw is just right), even when not at the
- bottom of the screen.
- There are no additional disadvantages specific to doing it after
- SGR END.
-
- It would be impractical for GNU grep to become a full-fledged
- terminal program linked against ncurses or the like, so it will
- not detect terminfo(5) capabilities. */
-static const char *sgr_start = "\33[%sm\33[K";
-static const char *sgr_end = "\33[m\33[K";
-
-/* SGR utility functions. */
-static void
-pr_sgr_start (char const *s)
-{
- if (*s)
- print_start_colorize (sgr_start, s);
-}
-static void
-pr_sgr_end (char const *s)
-{
- if (*s)
- print_end_colorize (sgr_end);
-}
-static void
-pr_sgr_start_if (char const *s)
-{
- if (color_option)
- pr_sgr_start (s);
-}
-static void
-pr_sgr_end_if (char const *s)
-{
- if (color_option)
- pr_sgr_end (s);
-}
-
-struct color_cap
- {
- const char *name;
- const char **var;
- void (*fct) (void);
- };
-
-static void
-color_cap_mt_fct (void)
-{
- /* Our caller just set selected_match_color. */
- context_match_color = selected_match_color;
-}
-
-static void
-color_cap_rv_fct (void)
-{
- /* By this point, it was 1 (or already -1). */
- color_option = -1; /* That's still != 0. */
-}
-
-static void
-color_cap_ne_fct (void)
-{
- sgr_start = "\33[%sm";
- sgr_end = "\33[m";
-}
-
-/* For GREP_COLORS. */
-static const struct color_cap color_dict[] =
- {
- { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
- { "ms", &selected_match_color, NULL }, /* selected matched text */
- { "mc", &context_match_color, NULL }, /* context matched text */
- { "fn", &filename_color, NULL }, /* filename */
- { "ln", &line_num_color, NULL }, /* line number */
- { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
- { "se", &sep_color, NULL }, /* separator */
- { "sl", &selected_line_color, NULL }, /* selected lines */
- { "cx", &context_line_color, NULL }, /* context lines */
- { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
- { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
- { NULL, NULL, NULL }
- };
-
-static struct exclude *excluded_patterns;
-static struct exclude *excluded_directory_patterns;
-/* Short options. */
-static char const short_options[] =
-"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
-
-/* Non-boolean long options that have no corresponding short equivalents. */
-enum
-{
- BINARY_FILES_OPTION = CHAR_MAX + 1,
- COLOR_OPTION,
- INCLUDE_OPTION,
- EXCLUDE_OPTION,
- EXCLUDE_FROM_OPTION,
- LINE_BUFFERED_OPTION,
- LABEL_OPTION,
- EXCLUDE_DIRECTORY_OPTION,
- GROUP_SEPARATOR_OPTION
-};
-
-/* Long options equivalences. */
-static struct option const long_options[] =
-{
- {"basic-regexp", no_argument, NULL, 'G'},
- {"extended-regexp", no_argument, NULL, 'E'},
- {"fixed-regexp", no_argument, NULL, 'F'},
- {"fixed-strings", no_argument, NULL, 'F'},
- {"perl-regexp", no_argument, NULL, 'P'},
- {"after-context", required_argument, NULL, 'A'},
- {"before-context", required_argument, NULL, 'B'},
- {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
- {"byte-offset", no_argument, NULL, 'b'},
- {"context", required_argument, NULL, 'C'},
- {"color", optional_argument, NULL, COLOR_OPTION},
- {"colour", optional_argument, NULL, COLOR_OPTION},
- {"count", no_argument, NULL, 'c'},
- {"devices", required_argument, NULL, 'D'},
- {"directories", required_argument, NULL, 'd'},
- {"exclude", required_argument, NULL, EXCLUDE_OPTION},
- {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
- {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
- {"file", required_argument, NULL, 'f'},
- {"files-with-matches", no_argument, NULL, 'l'},
- {"files-without-match", no_argument, NULL, 'L'},
- {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
- {"help", no_argument, &show_help, 1},
- {"include", required_argument, NULL, INCLUDE_OPTION},
- {"ignore-case", no_argument, NULL, 'i'},
- {"initial-tab", no_argument, NULL, 'T'},
- {"label", required_argument, NULL, LABEL_OPTION},
- {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
- {"line-number", no_argument, NULL, 'n'},
- {"line-regexp", no_argument, NULL, 'x'},
- {"max-count", required_argument, NULL, 'm'},
-
- {"no-filename", no_argument, NULL, 'h'},
- {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
- {"no-messages", no_argument, NULL, 's'},
- {"null", no_argument, NULL, 'Z'},
- {"null-data", no_argument, NULL, 'z'},
- {"only-matching", no_argument, NULL, 'o'},
- {"quiet", no_argument, NULL, 'q'},
- {"recursive", no_argument, NULL, 'r'},
- {"dereference-recursive", no_argument, NULL, 'R'},
- {"regexp", required_argument, NULL, 'e'},
- {"invert-match", no_argument, NULL, 'v'},
- {"silent", no_argument, NULL, 'q'},
- {"text", no_argument, NULL, 'a'},
- {"binary", no_argument, NULL, 'U'},
- {"unix-byte-offsets", no_argument, NULL, 'u'},
- {"version", no_argument, NULL, 'V'},
- {"with-filename", no_argument, NULL, 'H'},
- {"word-regexp", no_argument, NULL, 'w'},
- {0, 0, 0, 0}
-};
-
-/* Define flags declared in grep.h. */
-int match_icase;
-int match_words;
-int match_lines;
-unsigned char eolbyte;
-
-static char const *matcher;
-
-/* For error messages. */
-/* The input file name, or (if standard input) "-" or a --label argument. */
-static char const *filename;
-static size_t filename_prefix_len;
-static int errseen;
-static int write_error_seen;
-
-enum directories_type
- {
- READ_DIRECTORIES = 2,
- RECURSE_DIRECTORIES,
- SKIP_DIRECTORIES
- };
-
-/* How to handle directories. */
-static char const *const directories_args[] =
-{
- "read", "recurse", "skip", NULL
-};
-static enum directories_type const directories_types[] =
-{
- READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
-};
-ARGMATCH_VERIFY (directories_args, directories_types);
-
-static enum directories_type directories = READ_DIRECTORIES;
-
-enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
-static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
-
-/* How to handle devices. */
-static enum
- {
- READ_COMMAND_LINE_DEVICES,
- READ_DEVICES,
- SKIP_DEVICES
- } devices = READ_COMMAND_LINE_DEVICES;
-
-static int grepfile (int, char const *, int, int);
-static int grepdesc (int, int);
-#if defined HAVE_DOS_FILE_CONTENTS
-static int undossify_input (char *, size_t);
-#endif
-
-static int
-is_device_mode (mode_t m)
-{
- return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
-}
-
-/* Return nonzero if ST->st_size is defined. Assume the file is not a
- symbolic link. */
-static int
-usable_st_size (struct stat const *st)
-{
- return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
-}
-
-/* Functions we'll use to search. */
-typedef void (*compile_fp_t) (char const *, size_t);
-typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *);
-static compile_fp_t compile;
-static execute_fp_t execute;
-
-/* Like error, but suppress the diagnostic if requested. */
-static void
-suppressible_error (char const *mesg, int errnum)
-{
- if (! suppress_errors)
- error (0, errnum, "%s", mesg);
- errseen = 1;
-}
-
-/* If there has already been a write error, don't bother closing
- standard output, as that might elicit a duplicate diagnostic. */
-static void
-clean_up_stdout (void)
-{
- if (! write_error_seen)
- close_stdout ();
-}
-
-/* Return 1 if a file is known to be binary for the purpose of 'grep'.
- BUF, of size BUFSIZE, is the initial buffer read from the file with
- descriptor FD and status ST. */
-static int
-file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st)
-{
- #ifndef SEEK_HOLE
- enum { SEEK_HOLE = SEEK_END };
- #endif
-
- /* If -z, test only whether the initial buffer contains '\200';
- knowing about holes won't help. */
- if (! eolbyte)
- return memchr (buf, '\200', bufsize) != 0;
-
- /* If the initial buffer contains a null byte, guess that the file
- is binary. */
- if (memchr (buf, '\0', bufsize))
- return 1;
-
- /* If the file has holes, it must contain a null byte somewhere. */
- if (SEEK_HOLE != SEEK_END && usable_st_size (st))
- {
- off_t cur = bufsize;
- if (O_BINARY || fd == STDIN_FILENO)
- {
- cur = lseek (fd, 0, SEEK_CUR);
- if (cur < 0)
- return 0;
- }
-
- /* Look for a hole after the current location. */
- off_t hole_start = lseek (fd, cur, SEEK_HOLE);
- if (0 <= hole_start)
- {
- if (lseek (fd, cur, SEEK_SET) < 0)
- suppressible_error (filename, errno);
- if (hole_start < st->st_size)
- return 1;
- }
- }
-
- /* Guess that the file does not contain binary data. */
- return 0;
-}
-
-/* Convert STR to a nonnegative integer, storing the result in *OUT.
- STR must be a valid context length argument; report an error if it
- isn't. Silently ceiling *OUT at the maximum value, as that is
- practically equivalent to infinity for grep's purposes. */
-static void
-context_length_arg (char const *str, intmax_t *out)
-{
- switch (xstrtoimax (str, 0, 10, out, ""))
- {
- case LONGINT_OK:
- case LONGINT_OVERFLOW:
- if (0 <= *out)
- break;
- /* Fall through. */
- default:
- error (EXIT_TROUBLE, 0, "%s: %s", str,
- _("invalid context length argument"));
- }
-}
-
-/* Return nonzero if the file with NAME should be skipped.
- If COMMAND_LINE is nonzero, it is a command-line argument.
- If IS_DIR is nonzero, it is a directory. */
-static int
-skipped_file (char const *name, int command_line, int is_dir)
-{
- return (is_dir
- ? (directories == SKIP_DIRECTORIES
- || (! (command_line && filename_prefix_len != 0)
- && excluded_directory_patterns
- && excluded_file_name (excluded_directory_patterns, name)))
- : (excluded_patterns
- && excluded_file_name (excluded_patterns, name)));
-}
-
-/* Hairy buffering mechanism for grep. The intent is to keep
- all reads aligned on a page boundary and multiples of the
- page size, unless a read yields a partial page. */
-
-static char *buffer; /* Base of buffer. */
-static size_t bufalloc; /* Allocated buffer size, counting
slop. */
-#define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */
-static int bufdesc; /* File descriptor. */
-static char *bufbeg; /* Beginning of user-visible stuff. */
-static char *buflim; /* Limit of user-visible stuff. */
-static size_t pagesize; /* alignment of memory pages */
-static off_t bufoffset; /* Read offset; defined on regular
files. */
-static off_t after_last_match; /* Pointer after last matching line that
- would have been output if we were
- outputting characters. */
-
-/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
- an integer or a pointer. Both args must be free of side effects. */
-#define ALIGN_TO(val, alignment) \
- ((size_t) (val) % (alignment) == 0 \
- ? (val) \
- : (val) + ((alignment) - (size_t) (val) % (alignment)))
-
-/* Reset the buffer for a new file, returning zero if we should skip it.
- Initialize on the first time through. */
-static int
-reset (int fd, struct stat const *st)
-{
- if (! pagesize)
- {
- pagesize = getpagesize ();
- if (pagesize == 0 || 2 * pagesize + 1 <= pagesize)
- abort ();
- bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1;
- buffer = xmalloc (bufalloc);
- }
-
- bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
- bufbeg[-1] = eolbyte;
- bufdesc = fd;
-
- if (S_ISREG (st->st_mode))
- {
- if (fd != STDIN_FILENO)
- bufoffset = 0;
- else
- {
- bufoffset = lseek (fd, 0, SEEK_CUR);
- if (bufoffset < 0)
- {
- suppressible_error (_("lseek failed"), errno);
- return 0;
- }
- }
- }
- return 1;
-}
-
-/* Read new stuff into the buffer, saving the specified
- amount of old stuff. When we're done, 'bufbeg' points
- to the beginning of the buffer contents, and 'buflim'
- points just after the end. Return zero if there's an error. */
-static int
-fillbuf (size_t save, struct stat const *st)
-{
- ssize_t fillsize;
- int cc = 1;
- char *readbuf;
- size_t readsize;
-
- /* Offset from start of buffer to start of old stuff
- that we want to save. */
- size_t saved_offset = buflim - save - buffer;
-
- if (pagesize <= buffer + bufalloc - buflim)
- {
- readbuf = buflim;
- bufbeg = buflim - save;
- }
- else
- {
- size_t minsize = save + pagesize;
- size_t newsize;
- size_t newalloc;
- char *newbuf;
-
- /* Grow newsize until it is at least as great as minsize. */
- for (newsize = bufalloc - pagesize - 1; newsize < minsize; newsize *= 2)
- if (newsize * 2 < newsize || newsize * 2 + pagesize + 1 < newsize * 2)
- xalloc_die ();
-
- /* Try not to allocate more memory than the file size indicates,
- as that might cause unnecessary memory exhaustion if the file
- is large. However, do not use the original file size as a
- heuristic if we've already read past the file end, as most
- likely the file is growing. */
- if (usable_st_size (st))
- {
- off_t to_be_read = st->st_size - bufoffset;
- off_t maxsize_off = save + to_be_read;
- if (0 <= to_be_read && to_be_read <= maxsize_off
- && maxsize_off == (size_t) maxsize_off
- && minsize <= (size_t) maxsize_off
- && (size_t) maxsize_off < newsize)
- newsize = maxsize_off;
- }
-
- /* Add enough room so that the buffer is aligned and has room
- for byte sentinels fore and aft. */
- newalloc = newsize + pagesize + 1;
-
- newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
- readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
- bufbeg = readbuf - save;
- memmove (bufbeg, buffer + saved_offset, save);
- bufbeg[-1] = eolbyte;
- if (newbuf != buffer)
- {
- free (buffer);
- buffer = newbuf;
- }
- }
-
- readsize = buffer + bufalloc - readbuf;
- readsize -= readsize % pagesize;
-
- fillsize = safe_read (bufdesc, readbuf, readsize);
- if (fillsize < 0)
- fillsize = cc = 0;
- bufoffset += fillsize;
-#if defined HAVE_DOS_FILE_CONTENTS
- if (fillsize)
- fillsize = undossify_input (readbuf, fillsize);
-#endif
- buflim = readbuf + fillsize;
- return cc;
-}
-
-/* Flags controlling the style of output. */
-static enum
-{
- BINARY_BINARY_FILES,
- TEXT_BINARY_FILES,
- WITHOUT_MATCH_BINARY_FILES
-} binary_files; /* How to handle binary files. */
-
-static int filename_mask; /* If zero, output nulls after filenames. */
-static int out_quiet; /* Suppress all normal output. */
-static int out_invert; /* Print nonmatching stuff. */
-static int out_file; /* Print filenames. */
-static int out_line; /* Print line numbers. */
-static int out_byte; /* Print byte offsets. */
-static intmax_t out_before; /* Lines of leading context. */
-static intmax_t out_after; /* Lines of trailing context. */
-static int count_matches; /* Count matching lines. */
-static int list_files; /* List matching files. */
-static int no_filenames; /* Suppress file names. */
-static intmax_t max_count; /* Stop after outputting this many
- lines from an input file. */
-static int line_buffered; /* If nonzero, use line buffering, i.e.
- fflush everyline out. */
-static char *label = NULL; /* Fake filename for stdin */
-
-
-/* Internal variables to keep track of byte count, context, etc. */
-static uintmax_t totalcc; /* Total character count before bufbeg. */
-static char const *lastnl; /* Pointer after last newline counted. */
-static char const *lastout; /* Pointer after last character output;
- NULL if no character has been output
- or if it's conceptually before bufbeg. */
-static uintmax_t totalnl; /* Total newline count before lastnl. */
-static intmax_t outleft; /* Maximum number of lines to be output. */
-static intmax_t pending; /* Pending lines of output.
- Always kept 0 if out_quiet is true. */
-static int done_on_match; /* Stop scanning file on first match. */
-static int exit_on_match; /* Exit on first match. */
-
-#if defined HAVE_DOS_FILE_CONTENTS
-# include "dosbuf.c"
-#endif
-
-/* Add two numbers that count input bytes or lines, and report an
- error if the addition overflows. */
-static uintmax_t
-add_count (uintmax_t a, uintmax_t b)
-{
- uintmax_t sum = a + b;
- if (sum < a)
- error (EXIT_TROUBLE, 0, _("input is too large to count"));
- return sum;
-}
-
-static void
-nlscan (char const *lim)
-{
- size_t newlines = 0;
- char const *beg;
- for (beg = lastnl; beg < lim; beg++)
- {
- beg = memchr (beg, eolbyte, lim - beg);
- if (!beg)
- break;
- newlines++;
- }
- totalnl = add_count (totalnl, newlines);
- lastnl = lim;
-}
-
-/* Print the current filename. */
-static void
-print_filename (void)
-{
- pr_sgr_start_if (filename_color);
- fputs (filename, stdout);
- pr_sgr_end_if (filename_color);
-}
-
-/* Print a character separator. */
-static void
-print_sep (char sep)
-{
- pr_sgr_start_if (sep_color);
- fputc (sep, stdout);
- pr_sgr_end_if (sep_color);
-}
-
-/* Print a line number or a byte offset. */
-static void
-print_offset (uintmax_t pos, int min_width, const char *color)
-{
- /* Do not rely on printf to print pos, since uintmax_t may be longer
- than long, and long long is not portable. */
-
- char buf[sizeof pos * CHAR_BIT];
- char *p = buf + sizeof buf;
-
- do
- {
- *--p = '0' + pos % 10;
- --min_width;
- }
- while ((pos /= 10) != 0);
-
- /* Do this to maximize the probability of alignment across lines. */
- if (align_tabs)
- while (--min_width >= 0)
- *--p = ' ';
-
- pr_sgr_start_if (color);
- fwrite (p, 1, buf + sizeof buf - p, stdout);
- pr_sgr_end_if (color);
-}
-
-/* Print a whole line head (filename, line, byte). */
-static void
-print_line_head (char const *beg, char const *lim, int sep)
-{
- int pending_sep = 0;
-
- if (out_file)
- {
- print_filename ();
- if (filename_mask)
- pending_sep = 1;
- else
- fputc (0, stdout);
- }
-
- if (out_line)
- {
- if (lastnl < lim)
- {
- nlscan (beg);
- totalnl = add_count (totalnl, 1);
- lastnl = lim;
- }
- if (pending_sep)
- print_sep (sep);
- print_offset (totalnl, 4, line_num_color);
- pending_sep = 1;
- }
-
- if (out_byte)
- {
- uintmax_t pos = add_count (totalcc, beg - bufbeg);
-#if defined HAVE_DOS_FILE_CONTENTS
- pos = dossified_pos (pos);
-#endif
- if (pending_sep)
- print_sep (sep);
- print_offset (pos, 6, byte_num_color);
- pending_sep = 1;
- }
-
- if (pending_sep)
- {
- /* This assumes sep is one column wide.
- Try doing this any other way with Unicode
- (and its combining and wide characters)
- filenames and you're wasting your efforts. */
- if (align_tabs)
- fputs ("\t\b", stdout);
-
- print_sep (sep);
- }
-}
-
-static const char *
-print_line_middle (const char *beg, const char *lim,
- const char *line_color, const char *match_color)
-{
- size_t match_size;
- size_t match_offset;
- const char *cur = beg;
- const char *mid = NULL;
-
- while (cur < lim
- && ((match_offset = execute (beg, lim - beg, &match_size,
- beg + (cur - beg))) != (size_t) -1))
- {
- char const *b = beg + match_offset;
-
- /* Avoid matching the empty line at the end of the buffer. */
- if (b == lim)
- break;
-
- /* Avoid hanging on grep --color "" foo */
- if (match_size == 0)
- {
- /* Make minimal progress; there may be further non-empty matches. */
- /* XXX - Could really advance by one whole multi-octet character. */
- match_size = 1;
- if (!mid)
- mid = cur;
- }
- else
- {
- /* This function is called on a matching line only,
- but is it selected or rejected/context? */
- if (only_matching)
- print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED
- : SEP_CHAR_SELECTED));
- else
- {
- pr_sgr_start (line_color);
- if (mid)
- {
- cur = mid;
- mid = NULL;
- }
- fwrite (cur, sizeof (char), b - cur, stdout);
- }
-
- pr_sgr_start_if (match_color);
- fwrite (b, sizeof (char), match_size, stdout);
- pr_sgr_end_if (match_color);
- if (only_matching)
- fputs ("\n", stdout);
- }
- cur = b + match_size;
- }
-
- if (only_matching)
- cur = lim;
- else if (mid)
- cur = mid;
-
- return cur;
-}
-
-static const char *
-print_line_tail (const char *beg, const char *lim, const char *line_color)
-{
- size_t eol_size;
- size_t tail_size;
-
- eol_size = (lim > beg && lim[-1] == eolbyte);
- eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
- tail_size = lim - eol_size - beg;
-
- if (tail_size > 0)
- {
- pr_sgr_start (line_color);
- fwrite (beg, 1, tail_size, stdout);
- beg += tail_size;
- pr_sgr_end (line_color);
- }
-
- return beg;
-}
-
-static void
-prline (char const *beg, char const *lim, int sep)
-{
- int matching;
- const char *line_color;
- const char *match_color;
-
- if (!only_matching)
- print_line_head (beg, lim, sep);
-
- matching = (sep == SEP_CHAR_SELECTED) ^ !!out_invert;
-
- if (color_option)
- {
- line_color = (((sep == SEP_CHAR_SELECTED)
- ^ (out_invert && (color_option < 0)))
- ? selected_line_color : context_line_color);
- match_color = (sep == SEP_CHAR_SELECTED
- ? selected_match_color : context_match_color);
- }
- else
- line_color = match_color = NULL; /* Shouldn't be used. */
-
- if ((only_matching && matching)
- || (color_option && (*line_color || *match_color)))
- {
- /* We already know that non-matching lines have no match (to colorize).
*/
- if (matching && (only_matching || *match_color))
- beg = print_line_middle (beg, lim, line_color, match_color);
-
- if (!only_matching && *line_color)
- {
- /* This code is exercised at least when grep is invoked like this:
- echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
- beg = print_line_tail (beg, lim, line_color);
- }
- }
-
- if (!only_matching && lim > beg)
- fwrite (beg, 1, lim - beg, stdout);
-
- if (ferror (stdout))
- {
- write_error_seen = 1;
- error (EXIT_TROUBLE, 0, _("write error"));
- }
-
- lastout = lim;
-
- if (line_buffered)
- fflush (stdout);
-}
-
-/* Print pending lines of trailing context prior to LIM. Trailing context ends
- at the next matching line when OUTLEFT is 0. */
-static void
-prpending (char const *lim)
-{
- if (!lastout)
- lastout = bufbeg;
- while (pending > 0 && lastout < lim)
- {
- char const *nl = memchr (lastout, eolbyte, lim - lastout);
- size_t match_size;
- --pending;
- if (outleft
- || ((execute (lastout, nl + 1 - lastout,
- &match_size, NULL) == (size_t) -1)
- == !out_invert))
- prline (lastout, nl + 1, SEP_CHAR_REJECTED);
- else
- pending = 0;
- }
-}
-
-/* Print the lines between BEG and LIM. Deal with context crap.
- If NLINESP is non-null, store a count of lines between BEG and LIM. */
-static void
-prtext (char const *beg, char const *lim, intmax_t *nlinesp)
-{
- static int used; /* avoid printing SEP_STR_GROUP before any output */
- char const *bp, *p;
- char eol = eolbyte;
- intmax_t i, n;
-
- if (!out_quiet && pending > 0)
- prpending (beg);
-
- p = beg;
-
- if (!out_quiet)
- {
- /* Deal with leading context crap. */
-
- bp = lastout ? lastout : bufbeg;
- for (i = 0; i < out_before; ++i)
- if (p > bp)
- do
- --p;
- while (p[-1] != eol);
-
- /* We print the SEP_STR_GROUP separator only if our output is
- discontiguous from the last output in the file. */
- if ((out_before || out_after) && used && p != lastout && group_separator)
- {
- pr_sgr_start_if (sep_color);
- fputs (group_separator, stdout);
- pr_sgr_end_if (sep_color);
- fputc ('\n', stdout);
- }
-
- while (p < beg)
- {
- char const *nl = memchr (p, eol, beg - p);
- nl++;
- prline (p, nl, SEP_CHAR_REJECTED);
- p = nl;
- }
- }
-
- if (nlinesp)
- {
- /* Caller wants a line count. */
- for (n = 0; p < lim && n < outleft; n++)
- {
- char const *nl = memchr (p, eol, lim - p);
- nl++;
- if (!out_quiet)
- prline (p, nl, SEP_CHAR_SELECTED);
- p = nl;
- }
- *nlinesp = n;
-
- /* relying on it that this function is never called when outleft = 0. */
- after_last_match = bufoffset - (buflim - p);
- }
- else if (!out_quiet)
- prline (beg, lim, SEP_CHAR_SELECTED);
-
- pending = out_quiet ? 0 : out_after;
- used = 1;
-}
-
-/* Invoke the matcher, EXECUTE, on buffer BUF of SIZE bytes. If there
- is no match, return (size_t) -1. Otherwise, set *MATCH_SIZE to the
- length of the match and return the offset of the start of the match. */
-static size_t
-do_execute (char const *buf, size_t size, size_t *match_size,
- char const *start_ptr)
-{
- size_t result;
- const char *line_next;
-
- /* With the current implementation, using --ignore-case with a multi-byte
- character set is very inefficient when applied to a large buffer
- containing many matches. We can avoid much of the wasted effort
- by matching line-by-line.
-
- FIXME: this is just an ugly workaround, and it doesn't really
- belong here. Also, PCRE is always using this same per-line
- matching algorithm. Either we fix -i, or we should refactor
- this code---for example, we could add another function pointer
- to struct matcher to split the buffer passed to execute. It would
- perform the memchr if line-by-line matching is necessary, or just
- return buf + size otherwise. */
- if (! (execute == Fexecute || execute == Pexecute)
- || MB_CUR_MAX == 1 || !match_icase)
- return execute (buf, size, match_size, start_ptr);
-
- for (line_next = buf; line_next < buf + size; )
- {
- const char *line_buf = line_next;
- const char *line_end = memchr (line_buf, eolbyte,
- (buf + size) - line_buf);
- if (line_end == NULL)
- line_next = line_end = buf + size;
- else
- line_next = line_end + 1;
-
- if (start_ptr && start_ptr >= line_end)
- continue;
-
- result = execute (line_buf, line_next - line_buf, match_size, start_ptr);
- if (result != (size_t) -1)
- return (line_buf - buf) + result;
- }
-
- return (size_t) -1;
-}
-
-/* Scan the specified portion of the buffer, matching lines (or
- between matching lines if OUT_INVERT is true). Return a count of
- lines printed. */
-static intmax_t
-grepbuf (char const *beg, char const *lim)
-{
- intmax_t nlines, n;
- char const *p;
- size_t match_offset;
- size_t match_size;
-
- nlines = 0;
- p = beg;
- while ((match_offset = do_execute (p, lim - p, &match_size,
- NULL)) != (size_t) -1)
- {
- char const *b = p + match_offset;
- char const *endp = b + match_size;
- /* Avoid matching the empty line at the end of the buffer. */
- if (b == lim)
- break;
- if (!out_invert)
- {
- prtext (b, endp, NULL);
- nlines++;
- outleft--;
- if (!outleft || done_on_match)
- {
- if (exit_on_match)
- exit (EXIT_SUCCESS);
- after_last_match = bufoffset - (buflim - endp);
- return nlines;
- }
- }
- else if (p < b)
- {
- prtext (p, b, &n);
- nlines += n;
- outleft -= n;
- if (!outleft)
- return nlines;
- }
- p = endp;
- }
- if (out_invert && p < lim)
- {
- prtext (p, lim, &n);
- nlines += n;
- outleft -= n;
- }
- return nlines;
-}
-
-/* Search a given file. Normally, return a count of lines printed;
- but if the file is a directory and we search it recursively, then
- return -2 if there was a match, and -1 otherwise. */
-static intmax_t
-grep (int fd, struct stat const *st)
-{
- intmax_t nlines, i;
- int not_text;
- size_t residue, save;
- char oldc;
- char *beg;
- char *lim;
- char eol = eolbyte;
-
- if (! reset (fd, st))
- return 0;
-
- totalcc = 0;
- lastout = 0;
- totalnl = 0;
- outleft = max_count;
- after_last_match = 0;
- pending = 0;
-
- nlines = 0;
- residue = 0;
- save = 0;
-
- if (! fillbuf (save, st))
- {
- suppressible_error (filename, errno);
- return 0;
- }
-
- not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet)
- || binary_files == WITHOUT_MATCH_BINARY_FILES)
- && file_is_binary (bufbeg, buflim - bufbeg, fd, st));
- if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES)
- return 0;
- done_on_match += not_text;
- out_quiet += not_text;
-
- for (;;)
- {
- lastnl = bufbeg;
- if (lastout)
- lastout = bufbeg;
-
- beg = bufbeg + save;
-
- /* no more data to scan (eof) except for maybe a residue -> break */
- if (beg == buflim)
- break;
-
- /* Determine new residue (the length of an incomplete line at the end of
- the buffer, 0 means there is no incomplete last line). */
- oldc = beg[-1];
- beg[-1] = eol;
- /* FIXME: use rawmemrchr if/when it exists, since we have ensured
- that this use of memrchr is guaranteed never to return NULL. */
- lim = memrchr (beg - 1, eol, buflim - beg + 1);
- ++lim;
- beg[-1] = oldc;
- if (lim == beg)
- lim = beg - residue;
- beg -= residue;
- residue = buflim - lim;
-
- if (beg < lim)
- {
- if (outleft)
- nlines += grepbuf (beg, lim);
- if (pending)
- prpending (lim);
- if ((!outleft && !pending)
- || (nlines && done_on_match && !out_invert))
- goto finish_grep;
- }
-
- /* The last OUT_BEFORE lines at the end of the buffer will be needed as
- leading context if there is a matching line at the begin of the
- next data. Make beg point to their begin. */
- i = 0;
- beg = lim;
- while (i < out_before && beg > bufbeg && beg != lastout)
- {
- ++i;
- do
- --beg;
- while (beg[-1] != eol);
- }
-
- /* detect if leading context is discontinuous from last printed line. */
- if (beg != lastout)
- lastout = 0;
-
- /* Handle some details and read more data to scan. */
- save = residue + lim - beg;
- if (out_byte)
- totalcc = add_count (totalcc, buflim - bufbeg - save);
- if (out_line)
- nlscan (beg);
- if (! fillbuf (save, st))
- {
- suppressible_error (filename, errno);
- goto finish_grep;
- }
- }
- if (residue)
- {
- *buflim++ = eol;
- if (outleft)
- nlines += grepbuf (bufbeg + save - residue, buflim);
- if (pending)
- prpending (buflim);
- }
-
- finish_grep:
- done_on_match -= not_text;
- out_quiet -= not_text;
- if ((not_text & ~out_quiet) && nlines != 0)
- printf (_("Binary file %s matches\n"), filename);
- return nlines;
-}
-
-static int
-grepdirent (FTS *fts, FTSENT *ent, int command_line)
-{
- int follow, dirdesc;
- struct stat *st = ent->fts_statp;
- command_line &= ent->fts_level == FTS_ROOTLEVEL;
-
- if (ent->fts_info == FTS_DP)
- {
- if (directories == RECURSE_DIRECTORIES && command_line)
- out_file &= ~ (2 * !no_filenames);
- return 1;
- }
-
- if (skipped_file (ent->fts_name, command_line,
- (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
- || ent->fts_info == FTS_DNR)))
- {
- fts_set (fts, ent, FTS_SKIP);
- return 1;
- }
-
- filename = ent->fts_path + filename_prefix_len;
- follow = (fts->fts_options & FTS_LOGICAL
- || (fts->fts_options & FTS_COMFOLLOW && command_line));
-
- switch (ent->fts_info)
- {
- case FTS_D:
- if (directories == RECURSE_DIRECTORIES)
- {
- out_file |= 2 * !no_filenames;
- return 1;
- }
- fts_set (fts, ent, FTS_SKIP);
- break;
-
- case FTS_DC:
- if (!suppress_errors)
- error (0, 0, _("warning: %s: %s"), filename,
- _("recursive directory loop"));
- return 1;
-
- case FTS_DNR:
- case FTS_ERR:
- case FTS_NS:
- suppressible_error (filename, ent->fts_errno);
- return 1;
-
- case FTS_DEFAULT:
- case FTS_NSOK:
- if (devices == SKIP_DEVICES
- || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
- {
- struct stat st1;
- if (! st->st_mode)
- {
- /* The file type is not already known. Get the file status
- before opening, since opening might have side effects
- on a device. */
- int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
- if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
- {
- suppressible_error (filename, errno);
- return 1;
- }
- st = &st1;
- }
- if (is_device_mode (st->st_mode))
- return 1;
- }
- break;
-
- case FTS_F:
- case FTS_SLNONE:
- break;
-
- case FTS_SL:
- case FTS_W:
- return 1;
-
- default:
- abort ();
- }
-
- dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD
- ? fts->fts_cwd_fd
- : AT_FDCWD);
- return grepfile (dirdesc, ent->fts_accpath, follow, command_line);
-}
-
-static int
-grepfile (int dirdesc, char const *name, int follow, int command_line)
-{
- int desc = openat_safer (dirdesc, name, O_RDONLY | (follow ? 0 :
O_NOFOLLOW));
- if (desc < 0)
- {
- if (follow || (errno != ELOOP && errno != EMLINK))
- suppressible_error (filename, errno);
- return 1;
- }
- return grepdesc (desc, command_line);
-}
-
-static int
-grepdesc (int desc, int command_line)
-{
- intmax_t count;
- int status = 1;
- struct stat st;
-
- /* Get the file status, possibly for the second time. This catches
- a race condition if the directory entry changes after the
- directory entry is read and before the file is opened. For
- example, normally DESC is a directory only at the top level, but
- there is an exception if some other process substitutes a
- directory for a non-directory while 'grep' is running. */
- if (fstat (desc, &st) != 0)
- {
- suppressible_error (filename, errno);
- goto closeout;
- }
-
- if (desc != STDIN_FILENO && command_line
- && skipped_file (filename, 1, S_ISDIR (st.st_mode)))
- goto closeout;
-
- if (desc != STDIN_FILENO
- && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
- {
- /* Traverse the directory starting with its full name, because
- unfortunately fts provides no way to traverse the directory
- starting from its file descriptor. */
-
- FTS *fts;
- FTSENT *ent;
- int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
- char *fts_arg[2];
-
- /* Close DESC now, to conserve file descriptors if the race
- condition occurs many times in a deep recursion. */
- if (close (desc) != 0)
- suppressible_error (filename, errno);
-
- fts_arg[0] = (char *) filename;
- fts_arg[1] = NULL;
- fts = fts_open (fts_arg, opts, NULL);
-
- if (!fts)
- xalloc_die ();
- while ((ent = fts_read (fts)))
- status &= grepdirent (fts, ent, command_line);
- if (errno)
- suppressible_error (filename, errno);
- if (fts_close (fts) != 0)
- suppressible_error (filename, errno);
- return status;
- }
- if (desc != STDIN_FILENO
- && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
- || ((devices == SKIP_DEVICES
- || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
- && is_device_mode (st.st_mode))))
- goto closeout;
-
- /* If there is a regular file on stdout and the current file refers
- to the same i-node, we have to report the problem and skip it.
- Otherwise when matching lines from some other input reach the
- disk before we open this file, we can end up reading and matching
- those lines and appending them to the file from which we're reading.
- Then we'd have what appears to be an infinite loop that'd terminate
- only upon filling the output file system or reaching a quota.
- However, there is no risk of an infinite loop if grep is generating
- no output, i.e., with --silent, --quiet, -q.
- Similarly, with any of these:
- --max-count=N (-m) (for N >= 2)
- --files-with-matches (-l)
- --files-without-match (-L)
- there is no risk of trouble.
- For --max-count=1, grep stops after printing the first match,
- so there is no risk of malfunction. But even --max-count=2, with
- input==output, while there is no risk of infloop, there is a race
- condition that could result in "alternate" output. */
- if (!out_quiet && list_files == 0 && 1 < max_count
- && S_ISREG (out_stat.st_mode) && out_stat.st_ino
- && SAME_INODE (st, out_stat))
- {
- if (! suppress_errors)
- error (0, 0, _("input file %s is also the output"), quote (filename));
- errseen = 1;
- goto closeout;
- }
-
-#if defined SET_BINARY
- /* Set input to binary mode. Pipes are simulated with files
- on DOS, so this includes the case of "foo | grep bar". */
- if (!isatty (desc))
- SET_BINARY (desc);
-#endif
-
- count = grep (desc, &st);
- if (count < 0)
- status = count + 2;
- else
- {
- if (count_matches)
- {
- if (out_file)
- {
- print_filename ();
- if (filename_mask)
- print_sep (SEP_CHAR_SELECTED);
- else
- fputc (0, stdout);
- }
- printf ("%" PRIdMAX "\n", count);
- }
-
- status = !count;
- if (list_files == 1 - 2 * status)
- {
- print_filename ();
- fputc ('\n' & filename_mask, stdout);
- }
-
- if (desc == STDIN_FILENO)
- {
- off_t required_offset = outleft ? bufoffset : after_last_match;
- if (required_offset != bufoffset
- && lseek (desc, required_offset, SEEK_SET) < 0
- && S_ISREG (st.st_mode))
- suppressible_error (filename, errno);
- }
- }
-
- closeout:
- if (desc != STDIN_FILENO && close (desc) != 0)
- suppressible_error (filename, errno);
- return status;
-}
-
-static int
-grep_command_line_arg (char const *arg)
-{
- if (STREQ (arg, "-"))
- {
- filename = label ? label : _("(standard input)");
- return grepdesc (STDIN_FILENO, 1);
- }
- else
- {
- filename = arg;
- return grepfile (AT_FDCWD, arg, 1, 1);
- }
-}
-
-_Noreturn void usage (int);
-void
-usage (int status)
-{
- if (status != 0)
- {
- fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
- program_name);
- fprintf (stderr, _("Try '%s --help' for more information.\n"),
- program_name);
- }
- else
- {
- printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name);
- printf (_("Search for PATTERN in each FILE or standard input.\n"));
- printf (_("PATTERN is, by default, a basic regular expression
(BRE).\n"));
- printf (_("\
-Example: %s -i 'hello world' menu.h main.c\n\
-\n\
-Regexp selection and interpretation:\n"), program_name);
- printf (_("\
- -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\
- -F, --fixed-strings PATTERN is a set of newline-separated fixed
strings\n\
- -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\
- -P, --perl-regexp PATTERN is a Perl regular expression\n"));
- /* -X is undocumented on purpose. */
- printf (_("\
- -e, --regexp=PATTERN use PATTERN for matching\n\
- -f, --file=FILE obtain PATTERN from FILE\n\
- -i, --ignore-case ignore case distinctions\n\
- -w, --word-regexp force PATTERN to match only whole words\n\
- -x, --line-regexp force PATTERN to match only whole lines\n\
- -z, --null-data a data line ends in 0 byte, not newline\n"));
- printf (_("\
-\n\
-Miscellaneous:\n\
- -s, --no-messages suppress error messages\n\
- -v, --invert-match select non-matching lines\n\
- -V, --version display version information and exit\n\
- --help display this help text and exit\n"));
- printf (_("\
-\n\
-Output control:\n\
- -m, --max-count=NUM stop after NUM matches\n\
- -b, --byte-offset print the byte offset with output lines\n\
- -n, --line-number print line number with output lines\n\
- --line-buffered flush output on every line\n\
- -H, --with-filename print the file name for each match\n\
- -h, --no-filename suppress the file name prefix on output\n\
- --label=LABEL use LABEL as the standard input file name prefix\n\
-"));
- printf (_("\
- -o, --only-matching show only the part of a line matching PATTERN\n\
- -q, --quiet, --silent suppress all normal output\n\
- --binary-files=TYPE assume that binary files are TYPE;\n\
- TYPE is 'binary', 'text', or 'without-match'\n\
- -a, --text equivalent to --binary-files=text\n\
-"));
- printf (_("\
- -I equivalent to --binary-files=without-match\n\
- -d, --directories=ACTION how to handle directories;\n\
- ACTION is 'read', 'recurse', or 'skip'\n\
- -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
- ACTION is 'read' or 'skip'\n\
- -r, --recursive like --directories=recurse\n\
- -R, --dereference-recursive likewise, but follow all symlinks\n\
-"));
- printf (_("\
- --include=FILE_PATTERN search only files that match FILE_PATTERN\n\
- --exclude=FILE_PATTERN skip files and directories matching
FILE_PATTERN\n\
- --exclude-from=FILE skip files matching any file pattern from FILE\n\
- --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\
-"));
- printf (_("\
- -L, --files-without-match print only names of FILEs containing no match\n\
- -l, --files-with-matches print only names of FILEs containing matches\n\
- -c, --count print only a count of matching lines per FILE\n\
- -T, --initial-tab make tabs line up (if needed)\n\
- -Z, --null print 0 byte after FILE name\n"));
- printf (_("\
-\n\
-Context control:\n\
- -B, --before-context=NUM print NUM lines of leading context\n\
- -A, --after-context=NUM print NUM lines of trailing context\n\
- -C, --context=NUM print NUM lines of output context\n\
-"));
- printf (_("\
- -NUM same as --context=NUM\n\
- --color[=WHEN],\n\
- --colour[=WHEN] use markers to highlight the matching strings;\n\
- WHEN is 'always', 'never', or 'auto'\n\
- -U, --binary do not strip CR characters at EOL
(MSDOS/Windows)\n\
- -u, --unix-byte-offsets report offsets as if CRs were not there\n\
- (MSDOS/Windows)\n\
-\n"));
- printf (_("\
-'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\
-Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n"));
- printf (_("\
-When FILE is -, read standard input. With no FILE, read . if a command-line\n\
--r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\
-Exit status is 0 if any line is selected, 1 otherwise;\n\
-if any error occurs and -q is not given, the exit status is 2.\n"));
- printf (_("\nReport bugs to: %s\n"), PACKAGE_BUGREPORT);
- printf (_("GNU Grep home page: <%s>\n"),
- "http://www.gnu.org/software/grep/");
- fputs (_("General help using GNU software:
<http://www.gnu.org/gethelp/>\n"),
- stdout);
-
- }
- exit (status);
-}
-
-/* Pattern compilers and matchers. */
-
-static void
-Gcompile (char const *pattern, size_t size)
-{
- GEAcompile (pattern, size, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES);
-}
-
-static void
-Ecompile (char const *pattern, size_t size)
-{
- GEAcompile (pattern, size, RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
-}
static void
Acompile (char const *pattern, size_t size)
@@ -1679,829 +19,19 @@ PAcompile (char const *pattern, size_t size)
GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK);
}
-struct matcher
-{
- char const name[16];
- compile_fp_t compile;
- execute_fp_t execute;
-};
-static struct matcher const matchers[] = {
- { "grep", Gcompile, EGexecute },
- { "egrep", Ecompile, EGexecute },
- { "fgrep", Fcompile, Fexecute },
- { "awk", Acompile, EGexecute },
- { "gawk", GAcompile, EGexecute },
+struct matcher const matchers[] = {
+ { "grep", Gcompile, EGexecute },
+ { "egrep", Ecompile, EGexecute },
+ { "awk", Acompile, EGexecute },
+ { "gawk", GAcompile, EGexecute },
{ "posixawk", PAcompile, EGexecute },
- { "perl", Pcompile, Pexecute },
- { "", NULL, NULL },
+ { "fgrep", Fcompile, Fexecute },
+ { "perl", Pcompile, Pexecute },
+ { NULL, NULL, NULL },
};
-/* Set the matcher to M if available. Exit in case of conflicts or if
- M is not available. */
-static void
-setmatcher (char const *m)
-{
- struct matcher const *p;
-
- if (matcher && !STREQ (matcher, m))
- error (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
-
- for (p = matchers; p->compile; p++)
- if (STREQ (m, p->name))
- {
- matcher = p->name;
- compile = p->compile;
- execute = p->execute;
- return;
- }
-
- error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
-}
-
-/* Find the white-space-separated options specified by OPTIONS, and
- using BUF to store copies of these options, set ARGV[0], ARGV[1],
- etc. to the option copies. Return the number N of options found.
- Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0]
- etc. Backslash can be used to escape whitespace (and backslashes). */
-static size_t
-prepend_args (char const *options, char *buf, char **argv)
-{
- char const *o = options;
- char *b = buf;
- size_t n = 0;
-
- for (;;)
- {
- while (c_isspace (to_uchar (*o)))
- o++;
- if (!*o)
- return n;
- if (argv)
- argv[n] = b;
- n++;
-
- do
- if ((*b++ = *o++) == '\\' && *o)
- b[-1] = *o++;
- while (*o && ! c_isspace (to_uchar (*o)));
-
- *b++ = '\0';
- }
-}
-
-/* Prepend the whitespace-separated options in OPTIONS to the argument
- vector of a main program with argument count *PARGC and argument
- vector *PARGV. Return the number of options prepended. */
-static int
-prepend_default_options (char const *options, int *pargc, char ***pargv)
-{
- if (options && *options)
- {
- char *buf = xmalloc (strlen (options) + 1);
- size_t prepended = prepend_args (options, buf, NULL);
- int argc = *pargc;
- char *const *argv = *pargv;
- char **pp;
- enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) };
- if (MAX_ARGS - argc < prepended)
- xalloc_die ();
- pp = xmalloc ((prepended + argc + 1) * sizeof *pp);
- *pargc = prepended + argc;
- *pargv = pp;
- *pp++ = *argv++;
- pp += prepend_args (options, buf, pp);
- while ((*pp++ = *argv++))
- continue;
- return prepended;
- }
-
- return 0;
-}
-
-/* Get the next non-digit option from ARGC and ARGV.
- Return -1 if there are no more options.
- Process any digit options that were encountered on the way,
- and store the resulting integer into *DEFAULT_CONTEXT. */
-static int
-get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
-{
- static int prev_digit_optind = -1;
- int this_digit_optind, was_digit;
- char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
- char *p = buf;
- int opt;
-
- was_digit = 0;
- this_digit_optind = optind;
- while (1)
- {
- opt = getopt_long (argc, (char **) argv, short_options,
- long_options, NULL);
- if ( ! ('0' <= opt && opt <= '9'))
- break;
-
- if (prev_digit_optind != this_digit_optind || !was_digit)
- {
- /* Reset to start another context length argument. */
- p = buf;
- }
- else
- {
- /* Suppress trivial leading zeros, to avoid incorrect
- diagnostic on strings like 00000000000. */
- p -= buf[0] == '0';
- }
-
- if (p == buf + sizeof buf - 4)
- {
- /* Too many digits. Append "..." to make context_length_arg
- complain about "X...", where X contains the digits seen
- so far. */
- strcpy (p, "...");
- p += 3;
- break;
- }
- *p++ = opt;
-
- was_digit = 1;
- prev_digit_optind = this_digit_optind;
- this_digit_optind = optind;
- }
- if (p != buf)
- {
- *p = '\0';
- context_length_arg (buf, default_context);
- }
-
- return opt;
-}
-
-/* Parse GREP_COLORS. The default would look like:
- GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
- with boolean capabilities (ne and rv) unset (i.e., omitted).
- No character escaping is needed or supported. */
-static void
-parse_grep_colors (void)
-{
- const char *p;
- char *q;
- char *name;
- char *val;
-
- p = getenv ("GREP_COLORS"); /* Plural! */
- if (p == NULL || *p == '\0')
- return;
-
- /* Work off a writable copy. */
- q = xstrdup (p);
-
- name = q;
- val = NULL;
- /* From now on, be well-formed or you're gone. */
- for (;;)
- if (*q == ':' || *q == '\0')
- {
- char c = *q;
- struct color_cap const *cap;
-
- *q++ = '\0'; /* Terminate name or val. */
- /* Empty name without val (empty cap)
- * won't match and will be ignored. */
- for (cap = color_dict; cap->name; cap++)
- if (STREQ (cap->name, name))
- break;
- /* If name unknown, go on for forward compatibility. */
- if (cap->var && val)
- *(cap->var) = val;
- if (cap->fct)
- cap->fct ();
- if (c == '\0')
- return;
- name = q;
- val = NULL;
- }
- else if (*q == '=')
- {
- if (q == name || val)
- return;
- *q++ = '\0'; /* Terminate name. */
- val = q; /* Can be the empty string. */
- }
- else if (val == NULL)
- q++; /* Accumulate name. */
- else if (*q == ';' || (*q >= '0' && *q <= '9'))
- q++; /* Accumulate val. Protect the terminal from being sent crap. */
- else
- return;
-}
-
-#define MBRTOWC(pwc, s, n, ps) \
- (MB_CUR_MAX == 1 \
- ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \
- : mbrtowc (pwc, s, n, ps))
-#define WCRTOMB(s, wc, ps) \
- (MB_CUR_MAX == 1 \
- ? (*(s) = wctob ((wint_t) (wc)), 1) \
- : wcrtomb (s, wc, ps))
-
-/* Change a pattern for fgrep into grep. */
-static void
-fgrep_to_grep_pattern (size_t len, char const *keys,
- size_t *new_len, char **new_keys)
-{
- char *p = *new_keys = xnmalloc (len + 1, 2);
- mbstate_t mb_state = { 0 };
- size_t n;
-
- for (; len; keys += n, len -= n)
- {
- wchar_t wc;
- n = MBRTOWC (&wc, keys, len, &mb_state);
- switch (n)
- {
- case (size_t) -2:
- n = len;
- /* Fall through. */
- default:
- p = mempcpy (p, keys, n);
- break;
-
- case (size_t) -1:
- memset (&mb_state, 0, sizeof mb_state);
- /* Fall through. */
- case 1:
- *p = '\\';
- p += strchr ("$*.[\\^", *keys) != NULL;
- /* Fall through. */
- case 0:
- *p++ = *keys;
- n = 1;
- break;
- }
- }
-
- *new_len = p - *new_keys;
-}
-
-/* If the newline-separated regular expressions, KEYS (with length, LEN
- and no trailing NUL byte), are amenable to transformation into
- otherwise equivalent case-ignoring ones, perform the transformation,
- put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
- and return true. Otherwise, return false. */
-
-static bool
-trivial_case_ignore (size_t len, char const *keys,
- size_t *new_len, char **new_keys)
-{
- /* FIXME: consider removing the following restriction:
- Reject if KEYS contain ASCII '\\' or '['. */
- if (memchr (keys, '\\', len) || memchr (keys, '[', len))
- return false;
-
- /* Worst case is that each byte B of KEYS is ASCII alphabetic and
- CASE_FOLDED_BUFSIZE other_case(B) characters, C through Z, each
- occupying MB_CUR_MAX bytes, so each B maps to [BC...Z], which
- requires CASE_FOLDED_BUFSIZE * MB_CUR_MAX + 3 bytes; this is
- bounded above by the constant expression CASE_FOLDED_BUFSIZE *
- MB_LEN_MAX + 3. */
- *new_keys = xnmalloc (len + 1, CASE_FOLDED_BUFSIZE * MB_LEN_MAX + 3);
- char *p = *new_keys;
-
- mbstate_t mb_state = { 0 };
- while (len)
- {
- bool initial_state = mbsinit (&mb_state) != 0;
- wchar_t wc;
- size_t n = MBRTOWC (&wc, keys, len, &mb_state);
-
- /* For an invalid, incomplete or L'\0', skip this optimization. */
- if ((size_t) -2 <= n)
- {
- skip_case_ignore_optimization:
- free (*new_keys);
- return false;
- }
-
- char const *orig = keys;
- keys += n;
- len -= n;
-
- wchar_t folded[CASE_FOLDED_BUFSIZE];
- int nfolded = case_folded_counterparts (wc, folded);
- if (nfolded <= 0)
- {
- memcpy (p, orig, n);
- p += n;
- }
- else if (! initial_state)
- goto skip_case_ignore_optimization;
- else
- {
- *p++ = '[';
- memcpy (p, orig, n);
- p += n;
-
- int i = 0;
- do
- {
- size_t nbytes = WCRTOMB (p, folded[i], &mb_state);
- if (nbytes == (size_t) -1)
- goto skip_case_ignore_optimization;
- p += nbytes;
- }
- while (++i < nfolded);
-
- if (! mbsinit (&mb_state))
- goto skip_case_ignore_optimization;
-
- *p++ = ']';
- }
- }
-
- *new_len = p - *new_keys;
-
- return true;
-}
-
-int
-main (int argc, char **argv)
-{
- char *keys;
- size_t keycc, oldcc, keyalloc;
- int with_filenames;
- size_t cc;
- int opt, status, prepended;
- int prev_optind, last_recursive;
- int fread_errno;
- intmax_t default_context;
- FILE *fp;
- exit_failure = EXIT_TROUBLE;
- initialize_main (&argc, &argv);
- set_program_name (argv[0]);
- program_name = argv[0];
-
- keys = NULL;
- keycc = 0;
- with_filenames = 0;
- eolbyte = '\n';
- filename_mask = ~0;
-
- max_count = INTMAX_MAX;
-
- /* The value -1 means to use DEFAULT_CONTEXT. */
- out_after = out_before = -1;
- /* Default before/after context: changed by -C/-NUM options */
- default_context = 0;
- /* Changed by -o option */
- only_matching = 0;
-
- /* Internationalization. */
-#if defined HAVE_SETLOCALE
- setlocale (LC_ALL, "");
-#endif
-#if defined ENABLE_NLS
- bindtextdomain (PACKAGE, LOCALEDIR);
- textdomain (PACKAGE);
-#endif
-
- exit_failure = EXIT_TROUBLE;
- atexit (clean_up_stdout);
-
- last_recursive = 0;
- prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
- compile = matchers[0].compile;
- execute = matchers[0].execute;
-
- while (prev_optind = optind,
- (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
- switch (opt)
- {
- case 'A':
- context_length_arg (optarg, &out_after);
- break;
-
- case 'B':
- context_length_arg (optarg, &out_before);
- break;
-
- case 'C':
- /* Set output match context, but let any explicit leading or
- trailing amount specified with -A or -B stand. */
- context_length_arg (optarg, &default_context);
- break;
-
- case 'D':
- if (STREQ (optarg, "read"))
- devices = READ_DEVICES;
- else if (STREQ (optarg, "skip"))
- devices = SKIP_DEVICES;
- else
- error (EXIT_TROUBLE, 0, _("unknown devices method"));
- break;
-
- case 'E':
- setmatcher ("egrep");
- break;
-
- case 'F':
- setmatcher ("fgrep");
- break;
-
- case 'P':
- setmatcher ("perl");
- break;
-
- case 'G':
- setmatcher ("grep");
- break;
-
- case 'X': /* undocumented on purpose */
- setmatcher (optarg);
- break;
-
- case 'H':
- with_filenames = 1;
- no_filenames = 0;
- break;
-
- case 'I':
- binary_files = WITHOUT_MATCH_BINARY_FILES;
- break;
-
- case 'T':
- align_tabs = 1;
- break;
-
- case 'U':
-#if defined HAVE_DOS_FILE_CONTENTS
- dos_use_file_type = DOS_BINARY;
-#endif
- break;
-
- case 'u':
-#if defined HAVE_DOS_FILE_CONTENTS
- dos_report_unix_offset = 1;
-#endif
- break;
-
- case 'V':
- show_version = 1;
- break;
-
- case 'a':
- binary_files = TEXT_BINARY_FILES;
- break;
-
- case 'b':
- out_byte = 1;
- break;
-
- case 'c':
- count_matches = 1;
- break;
-
- case 'd':
- directories = XARGMATCH ("--directories", optarg,
- directories_args, directories_types);
- if (directories == RECURSE_DIRECTORIES)
- last_recursive = prev_optind;
- break;
-
- case 'e':
- cc = strlen (optarg);
- keys = xrealloc (keys, keycc + cc + 1);
- strcpy (&keys[keycc], optarg);
- keycc += cc;
- keys[keycc++] = '\n';
- break;
-
- case 'f':
- fp = STREQ (optarg, "-") ? stdin : fopen (optarg, "r");
- if (!fp)
- error (EXIT_TROUBLE, errno, "%s", optarg);
- for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
- ;
- keys = xrealloc (keys, keyalloc);
- oldcc = keycc;
- while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0)
- {
- keycc += cc;
- if (keycc == keyalloc - 1)
- keys = x2nrealloc (keys, &keyalloc, sizeof *keys);
- }
- fread_errno = errno;
- if (ferror (fp))
- error (EXIT_TROUBLE, fread_errno, "%s", optarg);
- if (fp != stdin)
- fclose (fp);
- /* Append final newline if file ended in non-newline. */
- if (oldcc != keycc && keys[keycc - 1] != '\n')
- keys[keycc++] = '\n';
- break;
-
- case 'h':
- with_filenames = 0;
- no_filenames = 1;
- break;
-
- case 'i':
- case 'y': /* For old-timers . . . */
- match_icase = 1;
- break;
-
- case 'L':
- /* Like -l, except list files that don't contain matches.
- Inspired by the same option in Hume's gre. */
- list_files = -1;
- break;
-
- case 'l':
- list_files = 1;
- break;
-
- case 'm':
- switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
- {
- case LONGINT_OK:
- case LONGINT_OVERFLOW:
- break;
-
- default:
- error (EXIT_TROUBLE, 0, _("invalid max count"));
- }
- break;
-
- case 'n':
- out_line = 1;
- break;
-
- case 'o':
- only_matching = 1;
- break;
-
- case 'q':
- exit_on_match = 1;
- exit_failure = 0;
- break;
-
- case 'R':
- fts_options = basic_fts_options | FTS_LOGICAL;
- /* Fall through. */
- case 'r':
- directories = RECURSE_DIRECTORIES;
- last_recursive = prev_optind;
- break;
-
- case 's':
- suppress_errors = 1;
- break;
-
- case 'v':
- out_invert = 1;
- break;
-
- case 'w':
- match_words = 1;
- break;
-
- case 'x':
- match_lines = 1;
- break;
-
- case 'Z':
- filename_mask = 0;
- break;
-
- case 'z':
- eolbyte = '\0';
- break;
-
- case BINARY_FILES_OPTION:
- if (STREQ (optarg, "binary"))
- binary_files = BINARY_BINARY_FILES;
- else if (STREQ (optarg, "text"))
- binary_files = TEXT_BINARY_FILES;
- else if (STREQ (optarg, "without-match"))
- binary_files = WITHOUT_MATCH_BINARY_FILES;
- else
- error (EXIT_TROUBLE, 0, _("unknown binary-files type"));
- break;
-
- case COLOR_OPTION:
- if (optarg)
- {
- if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes")
- || !strcasecmp (optarg, "force"))
- color_option = 1;
- else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg,
"no")
- || !strcasecmp (optarg, "none"))
- color_option = 0;
- else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg,
"tty")
- || !strcasecmp (optarg, "if-tty"))
- color_option = 2;
- else
- show_help = 1;
- }
- else
- color_option = 2;
- break;
-
- case EXCLUDE_OPTION:
- case INCLUDE_OPTION:
- if (!excluded_patterns)
- excluded_patterns = new_exclude ();
- add_exclude (excluded_patterns, optarg,
- (EXCLUDE_WILDCARDS
- | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)));
- break;
- case EXCLUDE_FROM_OPTION:
- if (!excluded_patterns)
- excluded_patterns = new_exclude ();
- if (add_exclude_file (add_exclude, excluded_patterns, optarg,
- EXCLUDE_WILDCARDS, '\n') != 0)
- {
- error (EXIT_TROUBLE, errno, "%s", optarg);
- }
- break;
-
- case EXCLUDE_DIRECTORY_OPTION:
- if (!excluded_directory_patterns)
- excluded_directory_patterns = new_exclude ();
- add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS);
- break;
-
- case GROUP_SEPARATOR_OPTION:
- group_separator = optarg;
- break;
-
- case LINE_BUFFERED_OPTION:
- line_buffered = 1;
- break;
-
- case LABEL_OPTION:
- label = optarg;
- break;
-
- case 0:
- /* long options */
- break;
-
- default:
- usage (EXIT_TROUBLE);
- break;
-
- }
-
- if (color_option == 2)
- color_option = isatty (STDOUT_FILENO) && should_colorize ();
- init_colorize ();
-
- /* POSIX says that -q overrides -l, which in turn overrides the
- other output options. */
- if (exit_on_match)
- list_files = 0;
- if (exit_on_match | list_files)
- {
- count_matches = 0;
- done_on_match = 1;
- }
- out_quiet = count_matches | done_on_match;
-
- if (out_after < 0)
- out_after = default_context;
- if (out_before < 0)
- out_before = default_context;
-
- if (color_option)
- {
- /* Legacy. */
- char *userval = getenv ("GREP_COLOR");
- if (userval != NULL && *userval != '\0')
- selected_match_color = context_match_color = userval;
-
- /* New GREP_COLORS has priority. */
- parse_grep_colors ();
- }
-
- if (show_version)
- {
- version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS,
- (char *) NULL);
- exit (EXIT_SUCCESS);
- }
-
- if (show_help)
- usage (EXIT_SUCCESS);
-
- struct stat tmp_stat;
- if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode))
- out_stat = tmp_stat;
-
- if (keys)
- {
- if (keycc == 0)
- {
- /* No keys were specified (e.g. -f /dev/null). Match nothing. */
- out_invert ^= 1;
- match_lines = match_words = 0;
- }
- else
- /* Strip trailing newline. */
- --keycc;
- }
- else if (optind < argc)
- {
- /* A copy must be made in case of an xrealloc() or free() later. */
- keycc = strlen (argv[optind]);
- keys = xmemdup (argv[optind++], keycc + 1);
- }
- else
- usage (EXIT_TROUBLE);
-
- /* If case-insensitive fgrep in a multibyte locale, improve
- performance by using grep instead. */
- if (match_icase && compile == Fcompile && MB_CUR_MAX > 1)
- {
- size_t new_keycc;
- char *new_keys;
- fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
- free (keys);
- keys = new_keys;
- keycc = new_keycc;
- matcher = "grep";
- compile = Gcompile;
- execute = EGexecute;
- }
-
- /* Case-insensitive matching is expensive in multibyte locales
- because a few characters may change size when converted to upper
- or lower case. To accommodate those, search the input one line
- at a time, rather than using the much more efficient buffer search.
-
- Try to convert a regular expression 'foo' (ignoring case) to an
- equivalent regular expression '[fF][oO][oO]' (where case matters).
- Not only does this avoid the expensive requirement to read and
- process a line at a time, it also allows use of the kwset engine,
- a win in non-UTF-8 multibyte locales. */
- if (match_icase)
- {
- size_t new_keycc;
- char *new_keys;
- /* It is not possible with -F, not useful with -P (pcre) and there is no
- point when there is no regexp. It also depends on which constructs
- appear in the regexp. See trivial_case_ignore for those details. */
- if (keycc
- && ! (compile == Fcompile || compile == Pcompile)
- && trivial_case_ignore (keycc, keys, &new_keycc, &new_keys))
- {
- match_icase = 0;
- free (keys);
- keys = new_keys;
- keycc = new_keycc;
- }
- }
-
-#if MBS_SUPPORT
- if (MB_CUR_MAX > 1)
- build_mbclen_cache ();
-#endif
-
- compile (keys, keycc);
- free (keys);
-
- if ((argc - optind > 1 && !no_filenames) || with_filenames)
- out_file = 1;
-
-#ifdef SET_BINARY
- /* Output is set to binary mode because we shouldn't convert
- NL to CR-LF pairs, especially when grepping binary files. */
- if (!isatty (1))
- SET_BINARY (1);
-#endif
-
- if (max_count == 0)
- exit (EXIT_FAILURE);
-
- if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
- devices = READ_DEVICES;
-
- if (optind < argc)
- {
- status = 1;
- do
- status &= grep_command_line_arg (argv[optind]);
- while (++optind < argc);
- }
- else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
- {
- /* Grep through ".", omitting leading "./" from diagnostics. */
- filename_prefix_len = 2;
- status = grep_command_line_arg (".");
- }
- else
- status = grep_command_line_arg ("-");
-
- /* We register via atexit() to test stdout. */
- exit (errseen ? EXIT_TROUBLE : status);
-}
-/* vim:set shiftwidth=2: */
+const char before_options[] =
+N_("PATTERN is, by default, a basic regular expression (BRE).\n");
+const char after_options[] =
+N_("'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\
+Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n");
diff --git a/src/grep.h b/src/grep.h
index 4935872..4d642a1 100644
--- a/src/grep.h
+++ b/src/grep.h
@@ -20,6 +20,23 @@
#ifndef GREP_GREP_H
#define GREP_GREP_H 1
+/* Function pointer types. */
+typedef void (*compile_fp_t) (char const *, size_t);
+typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *);
+
+/* grep.c expects the matchers vector to be terminated by an entry
+ with a NULL name, and to contain at least one entry. */
+struct matcher
+{
+ const char *name;
+ compile_fp_t compile;
+ execute_fp_t execute;
+};
+extern const struct matcher matchers[];
+
+extern const char before_options[];
+extern const char after_options[];
+
/* The following flags are exported from grep for the matchers
to look at. */
extern int match_icase; /* -i */
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..e429380
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,2478 @@
+/* grep.c - main driver file for grep.
+ Copyright (C) 1992, 1997-2002, 2004-2014 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written July 1992 by Mike Haertel. */
+
+#include <config.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "mbsupport.h"
+#include <wchar.h>
+#include <wctype.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include "system.h"
+
+#include "argmatch.h"
+#include "c-ctype.h"
+#include "closeout.h"
+#include "colorize.h"
+#include "dfa.h"
+#include "error.h"
+#include "exclude.h"
+#include "exitfail.h"
+#include "fcntl-safer.h"
+#include "fts_.h"
+#include "getopt.h"
+#include "grep.h"
+#include "intprops.h"
+#include "progname.h"
+#include "propername.h"
+#include "quote.h"
+#include "safe-read.h"
+#include "search.h"
+#include "version-etc.h"
+#include "xalloc.h"
+#include "xstrtol.h"
+
+#define SEP_CHAR_SELECTED ':'
+#define SEP_CHAR_REJECTED '-'
+#define SEP_STR_GROUP "--"
+
+#define AUTHORS \
+ proper_name ("Mike Haertel"), \
+ _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>")
+
+/* When stdout is connected to a regular file, save its stat
+ information here, so that we can automatically skip it, thus
+ avoiding a potential (racy) infinite loop. */
+static struct stat out_stat;
+
+/* if non-zero, display usage information and exit */
+static int show_help;
+
+/* If non-zero, print the version on standard output and exit. */
+static int show_version;
+
+/* If nonzero, suppress diagnostics for nonexistent or unreadable files. */
+static int suppress_errors;
+
+/* If nonzero, use color markers. */
+static int color_option;
+
+/* If nonzero, show only the part of a line matching the expression. */
+static int only_matching;
+
+/* If nonzero, make sure first content char in a line is on a tab stop. */
+static int align_tabs;
+
+/* The group separator used when context is requested. */
+static const char *group_separator = SEP_STR_GROUP;
+
+/* The context and logic for choosing default --color screen attributes
+ (foreground and background colors, etc.) are the following.
+ -- There are eight basic colors available, each with its own
+ nominal luminosity to the human eye and foreground/background
+ codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
+ magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
+ yellow [89 %, 33/43], and white [100 %, 37/47]).
+ -- Sometimes, white as a background is actually implemented using
+ a shade of light gray, so that a foreground white can be visible
+ on top of it (but most often not).
+ -- Sometimes, black as a foreground is actually implemented using
+ a shade of dark gray, so that it can be visible on top of a
+ background black (but most often not).
+ -- Sometimes, more colors are available, as extensions.
+ -- Other attributes can be selected/deselected (bold [1/22],
+ underline [4/24], standout/inverse [7/27], blink [5/25], and
+ invisible/hidden [8/28]). They are sometimes implemented by
+ using colors instead of what their names imply; e.g., bold is
+ often achieved by using brighter colors. In practice, only bold
+ is really available to us, underline sometimes being mapped by
+ the terminal to some strange color choice, and standout best
+ being left for use by downstream programs such as less(1).
+ -- We cannot assume that any of the extensions or special features
+ are available for the purpose of choosing defaults for everyone.
+ -- The most prevalent default terminal backgrounds are pure black
+ and pure white, and are not necessarily the same shades of
+ those as if they were selected explicitly with SGR sequences.
+ Some terminals use dark or light pictures as default background,
+ but those are covered over by an explicit selection of background
+ color with an SGR sequence; their users will appreciate their
+ background pictures not be covered like this, if possible.
+ -- Some uses of colors attributes is to make some output items
+ more understated (e.g., context lines); this cannot be achieved
+ by changing the background color.
+ -- For these reasons, the grep color defaults should strive not
+ to change the background color from its default, unless it's
+ for a short item that should be highlighted, not understated.
+ -- The grep foreground color defaults (without an explicitly set
+ background) should provide enough contrast to be readable on any
+ terminal with either a black (dark) or white (light) background.
+ This only leaves red, magenta, green, and cyan (and their bold
+ counterparts) and possibly bold blue. */
+/* The color strings used for matched text.
+ The user can overwrite them using the deprecated
+ environment variable GREP_COLOR or the new GREP_COLORS. */
+static const char *selected_match_color = "01;31"; /* bold red */
+static const char *context_match_color = "01;31"; /* bold red */
+
+/* Other colors. Defaults look damn good. */
+static const char *filename_color = "35"; /* magenta */
+static const char *line_num_color = "32"; /* green */
+static const char *byte_num_color = "32"; /* green */
+static const char *sep_color = "36"; /* cyan */
+static const char *selected_line_color = ""; /* default color pair */
+static const char *context_line_color = ""; /* default color pair */
+
+/* Select Graphic Rendition (SGR, "\33[...m") strings. */
+/* Also Erase in Line (EL) to Right ("\33[K") by default. */
+/* Why have EL to Right after SGR?
+ -- The behavior of line-wrapping when at the bottom of the
+ terminal screen and at the end of the current line is often
+ such that a new line is introduced, entirely cleared with
+ the current background color which may be different from the
+ default one (see the boolean back_color_erase terminfo(5)
+ capability), thus scrolling the display by one line.
+ The end of this new line will stay in this background color
+ even after reverting to the default background color with
+ "\33[m', unless it is explicitly cleared again with "\33[K"
+ (which is the behavior the user would instinctively expect
+ from the whole thing). There may be some unavoidable
+ background-color flicker at the end of this new line because
+ of this (when timing with the monitor's redraw is just right).
+ -- The behavior of HT (tab, "\t") is usually the same as that of
+ Cursor Forward Tabulation (CHT) with a default parameter
+ of 1 ("\33[I"), i.e., it performs pure movement to the next
+ tab stop, without any clearing of either content or screen
+ attributes (including background color); try
+ printf 'asdfqwerzxcv\rASDF\tZXCV\n'
+ in a bash(1) shell to demonstrate this. This is not what the
+ user would instinctively expect of HT (but is ok for CHT).
+ The instinctive behavior would include clearing the terminal
+ cells that are skipped over by HT with blank cells in the
+ current screen attributes, including background color;
+ the boolean dest_tabs_magic_smso terminfo(5) capability
+ indicates this saner behavior for HT, but only some rare
+ terminals have it (although it also indicates a special
+ glitch with standout mode in the Teleray terminal for which
+ it was initially introduced). The remedy is to add "\33K"
+ after each SGR sequence, be it START (to fix the behavior
+ of any HT after that before another SGR) or END (to fix the
+ behavior of an HT in default background color that would
+ follow a line-wrapping at the bottom of the screen in another
+ background color, and to complement doing it after START).
+ Piping grep's output through a pager such as less(1) avoids
+ any HT problems since the pager performs tab expansion.
+
+ Generic disadvantages of this remedy are:
+ -- Some very rare terminals might support SGR but not EL (nobody
+ will use "grep --color" on a terminal that does not support
+ SGR in the first place).
+ -- Having these extra control sequences might somewhat complicate
+ the task of any program trying to parse "grep --color"
+ output in order to extract structuring information from it.
+ A specific disadvantage to doing it after SGR START is:
+ -- Even more possible background color flicker (when timing
+ with the monitor's redraw is just right), even when not at the
+ bottom of the screen.
+ There are no additional disadvantages specific to doing it after
+ SGR END.
+
+ It would be impractical for GNU grep to become a full-fledged
+ terminal program linked against ncurses or the like, so it will
+ not detect terminfo(5) capabilities. */
+static const char *sgr_start = "\33[%sm\33[K";
+static const char *sgr_end = "\33[m\33[K";
+
+/* SGR utility functions. */
+static void
+pr_sgr_start (char const *s)
+{
+ if (*s)
+ print_start_colorize (sgr_start, s);
+}
+static void
+pr_sgr_end (char const *s)
+{
+ if (*s)
+ print_end_colorize (sgr_end);
+}
+static void
+pr_sgr_start_if (char const *s)
+{
+ if (color_option)
+ pr_sgr_start (s);
+}
+static void
+pr_sgr_end_if (char const *s)
+{
+ if (color_option)
+ pr_sgr_end (s);
+}
+
+struct color_cap
+ {
+ const char *name;
+ const char **var;
+ void (*fct) (void);
+ };
+
+static void
+color_cap_mt_fct (void)
+{
+ /* Our caller just set selected_match_color. */
+ context_match_color = selected_match_color;
+}
+
+static void
+color_cap_rv_fct (void)
+{
+ /* By this point, it was 1 (or already -1). */
+ color_option = -1; /* That's still != 0. */
+}
+
+static void
+color_cap_ne_fct (void)
+{
+ sgr_start = "\33[%sm";
+ sgr_end = "\33[m";
+}
+
+/* For GREP_COLORS. */
+static const struct color_cap color_dict[] =
+ {
+ { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
+ { "ms", &selected_match_color, NULL }, /* selected matched text */
+ { "mc", &context_match_color, NULL }, /* context matched text */
+ { "fn", &filename_color, NULL }, /* filename */
+ { "ln", &line_num_color, NULL }, /* line number */
+ { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
+ { "se", &sep_color, NULL }, /* separator */
+ { "sl", &selected_line_color, NULL }, /* selected lines */
+ { "cx", &context_line_color, NULL }, /* context lines */
+ { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
+ { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
+ { NULL, NULL, NULL }
+ };
+
+static struct exclude *excluded_patterns;
+static struct exclude *excluded_directory_patterns;
+/* Short options. */
+static char const short_options[] =
+"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
+
+/* Non-boolean long options that have no corresponding short equivalents. */
+enum
+{
+ BINARY_FILES_OPTION = CHAR_MAX + 1,
+ COLOR_OPTION,
+ INCLUDE_OPTION,
+ EXCLUDE_OPTION,
+ EXCLUDE_FROM_OPTION,
+ LINE_BUFFERED_OPTION,
+ LABEL_OPTION,
+ EXCLUDE_DIRECTORY_OPTION,
+ GROUP_SEPARATOR_OPTION
+};
+
+/* Long options equivalences. */
+static struct option const long_options[] =
+{
+ {"basic-regexp", no_argument, NULL, 'G'},
+ {"extended-regexp", no_argument, NULL, 'E'},
+ {"fixed-regexp", no_argument, NULL, 'F'},
+ {"fixed-strings", no_argument, NULL, 'F'},
+ {"perl-regexp", no_argument, NULL, 'P'},
+ {"after-context", required_argument, NULL, 'A'},
+ {"before-context", required_argument, NULL, 'B'},
+ {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
+ {"byte-offset", no_argument, NULL, 'b'},
+ {"context", required_argument, NULL, 'C'},
+ {"color", optional_argument, NULL, COLOR_OPTION},
+ {"colour", optional_argument, NULL, COLOR_OPTION},
+ {"count", no_argument, NULL, 'c'},
+ {"devices", required_argument, NULL, 'D'},
+ {"directories", required_argument, NULL, 'd'},
+ {"exclude", required_argument, NULL, EXCLUDE_OPTION},
+ {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
+ {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
+ {"file", required_argument, NULL, 'f'},
+ {"files-with-matches", no_argument, NULL, 'l'},
+ {"files-without-match", no_argument, NULL, 'L'},
+ {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
+ {"help", no_argument, &show_help, 1},
+ {"include", required_argument, NULL, INCLUDE_OPTION},
+ {"ignore-case", no_argument, NULL, 'i'},
+ {"initial-tab", no_argument, NULL, 'T'},
+ {"label", required_argument, NULL, LABEL_OPTION},
+ {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
+ {"line-number", no_argument, NULL, 'n'},
+ {"line-regexp", no_argument, NULL, 'x'},
+ {"max-count", required_argument, NULL, 'm'},
+
+ {"no-filename", no_argument, NULL, 'h'},
+ {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
+ {"no-messages", no_argument, NULL, 's'},
+ {"null", no_argument, NULL, 'Z'},
+ {"null-data", no_argument, NULL, 'z'},
+ {"only-matching", no_argument, NULL, 'o'},
+ {"quiet", no_argument, NULL, 'q'},
+ {"recursive", no_argument, NULL, 'r'},
+ {"dereference-recursive", no_argument, NULL, 'R'},
+ {"regexp", required_argument, NULL, 'e'},
+ {"invert-match", no_argument, NULL, 'v'},
+ {"silent", no_argument, NULL, 'q'},
+ {"text", no_argument, NULL, 'a'},
+ {"binary", no_argument, NULL, 'U'},
+ {"unix-byte-offsets", no_argument, NULL, 'u'},
+ {"version", no_argument, NULL, 'V'},
+ {"with-filename", no_argument, NULL, 'H'},
+ {"word-regexp", no_argument, NULL, 'w'},
+ {0, 0, 0, 0}
+};
+
+/* Define flags declared in grep.h. */
+int match_icase;
+int match_words;
+int match_lines;
+unsigned char eolbyte;
+
+static char const *matcher;
+
+/* For error messages. */
+/* The input file name, or (if standard input) "-" or a --label argument. */
+static char const *filename;
+static size_t filename_prefix_len;
+static int errseen;
+static int write_error_seen;
+
+enum directories_type
+ {
+ READ_DIRECTORIES = 2,
+ RECURSE_DIRECTORIES,
+ SKIP_DIRECTORIES
+ };
+
+/* How to handle directories. */
+static char const *const directories_args[] =
+{
+ "read", "recurse", "skip", NULL
+};
+static enum directories_type const directories_types[] =
+{
+ READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
+};
+ARGMATCH_VERIFY (directories_args, directories_types);
+
+static enum directories_type directories = READ_DIRECTORIES;
+
+enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
+static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
+
+/* How to handle devices. */
+static enum
+ {
+ READ_COMMAND_LINE_DEVICES,
+ READ_DEVICES,
+ SKIP_DEVICES
+ } devices = READ_COMMAND_LINE_DEVICES;
+
+static int grepfile (int, char const *, int, int);
+static int grepdesc (int, int);
+#if defined HAVE_DOS_FILE_CONTENTS
+static int undossify_input (char *, size_t);
+#endif
+
+static int
+is_device_mode (mode_t m)
+{
+ return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
+}
+
+/* Return nonzero if ST->st_size is defined. Assume the file is not a
+ symbolic link. */
+static int
+usable_st_size (struct stat const *st)
+{
+ return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
+}
+
+/* Functions we'll use to search. */
+static compile_fp_t compile;
+static execute_fp_t execute;
+
+/* Like error, but suppress the diagnostic if requested. */
+static void
+suppressible_error (char const *mesg, int errnum)
+{
+ if (! suppress_errors)
+ error (0, errnum, "%s", mesg);
+ errseen = 1;
+}
+
+/* If there has already been a write error, don't bother closing
+ standard output, as that might elicit a duplicate diagnostic. */
+static void
+clean_up_stdout (void)
+{
+ if (! write_error_seen)
+ close_stdout ();
+}
+
+/* Return 1 if a file is known to be binary for the purpose of 'grep'.
+ BUF, of size BUFSIZE, is the initial buffer read from the file with
+ descriptor FD and status ST. */
+static int
+file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st)
+{
+ #ifndef SEEK_HOLE
+ enum { SEEK_HOLE = SEEK_END };
+ #endif
+
+ /* If -z, test only whether the initial buffer contains '\200';
+ knowing about holes won't help. */
+ if (! eolbyte)
+ return memchr (buf, '\200', bufsize) != 0;
+
+ /* If the initial buffer contains a null byte, guess that the file
+ is binary. */
+ if (memchr (buf, '\0', bufsize))
+ return 1;
+
+ /* If the file has holes, it must contain a null byte somewhere. */
+ if (SEEK_HOLE != SEEK_END && usable_st_size (st))
+ {
+ off_t cur = bufsize;
+ if (O_BINARY || fd == STDIN_FILENO)
+ {
+ cur = lseek (fd, 0, SEEK_CUR);
+ if (cur < 0)
+ return 0;
+ }
+
+ /* Look for a hole after the current location. */
+ off_t hole_start = lseek (fd, cur, SEEK_HOLE);
+ if (0 <= hole_start)
+ {
+ if (lseek (fd, cur, SEEK_SET) < 0)
+ suppressible_error (filename, errno);
+ if (hole_start < st->st_size)
+ return 1;
+ }
+ }
+
+ /* Guess that the file does not contain binary data. */
+ return 0;
+}
+
+/* Convert STR to a nonnegative integer, storing the result in *OUT.
+ STR must be a valid context length argument; report an error if it
+ isn't. Silently ceiling *OUT at the maximum value, as that is
+ practically equivalent to infinity for grep's purposes. */
+static void
+context_length_arg (char const *str, intmax_t *out)
+{
+ switch (xstrtoimax (str, 0, 10, out, ""))
+ {
+ case LONGINT_OK:
+ case LONGINT_OVERFLOW:
+ if (0 <= *out)
+ break;
+ /* Fall through. */
+ default:
+ error (EXIT_TROUBLE, 0, "%s: %s", str,
+ _("invalid context length argument"));
+ }
+}
+
+/* Return nonzero if the file with NAME should be skipped.
+ If COMMAND_LINE is nonzero, it is a command-line argument.
+ If IS_DIR is nonzero, it is a directory. */
+static int
+skipped_file (char const *name, int command_line, int is_dir)
+{
+ return (is_dir
+ ? (directories == SKIP_DIRECTORIES
+ || (! (command_line && filename_prefix_len != 0)
+ && excluded_directory_patterns
+ && excluded_file_name (excluded_directory_patterns, name)))
+ : (excluded_patterns
+ && excluded_file_name (excluded_patterns, name)));
+}
+
+/* Hairy buffering mechanism for grep. The intent is to keep
+ all reads aligned on a page boundary and multiples of the
+ page size, unless a read yields a partial page. */
+
+static char *buffer; /* Base of buffer. */
+static size_t bufalloc; /* Allocated buffer size, counting
slop. */
+#define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */
+static int bufdesc; /* File descriptor. */
+static char *bufbeg; /* Beginning of user-visible stuff. */
+static char *buflim; /* Limit of user-visible stuff. */
+static size_t pagesize; /* alignment of memory pages */
+static off_t bufoffset; /* Read offset; defined on regular
files. */
+static off_t after_last_match; /* Pointer after last matching line that
+ would have been output if we were
+ outputting characters. */
+
+/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
+ an integer or a pointer. Both args must be free of side effects. */
+#define ALIGN_TO(val, alignment) \
+ ((size_t) (val) % (alignment) == 0 \
+ ? (val) \
+ : (val) + ((alignment) - (size_t) (val) % (alignment)))
+
+/* Reset the buffer for a new file, returning zero if we should skip it.
+ Initialize on the first time through. */
+static int
+reset (int fd, struct stat const *st)
+{
+ if (! pagesize)
+ {
+ pagesize = getpagesize ();
+ if (pagesize == 0 || 2 * pagesize + 1 <= pagesize)
+ abort ();
+ bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1;
+ buffer = xmalloc (bufalloc);
+ }
+
+ bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
+ bufbeg[-1] = eolbyte;
+ bufdesc = fd;
+
+ if (S_ISREG (st->st_mode))
+ {
+ if (fd != STDIN_FILENO)
+ bufoffset = 0;
+ else
+ {
+ bufoffset = lseek (fd, 0, SEEK_CUR);
+ if (bufoffset < 0)
+ {
+ suppressible_error (_("lseek failed"), errno);
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+/* Read new stuff into the buffer, saving the specified
+ amount of old stuff. When we're done, 'bufbeg' points
+ to the beginning of the buffer contents, and 'buflim'
+ points just after the end. Return zero if there's an error. */
+static int
+fillbuf (size_t save, struct stat const *st)
+{
+ ssize_t fillsize;
+ int cc = 1;
+ char *readbuf;
+ size_t readsize;
+
+ /* Offset from start of buffer to start of old stuff
+ that we want to save. */
+ size_t saved_offset = buflim - save - buffer;
+
+ if (pagesize <= buffer + bufalloc - buflim)
+ {
+ readbuf = buflim;
+ bufbeg = buflim - save;
+ }
+ else
+ {
+ size_t minsize = save + pagesize;
+ size_t newsize;
+ size_t newalloc;
+ char *newbuf;
+
+ /* Grow newsize until it is at least as great as minsize. */
+ for (newsize = bufalloc - pagesize - 1; newsize < minsize; newsize *= 2)
+ if (newsize * 2 < newsize || newsize * 2 + pagesize + 1 < newsize * 2)
+ xalloc_die ();
+
+ /* Try not to allocate more memory than the file size indicates,
+ as that might cause unnecessary memory exhaustion if the file
+ is large. However, do not use the original file size as a
+ heuristic if we've already read past the file end, as most
+ likely the file is growing. */
+ if (usable_st_size (st))
+ {
+ off_t to_be_read = st->st_size - bufoffset;
+ off_t maxsize_off = save + to_be_read;
+ if (0 <= to_be_read && to_be_read <= maxsize_off
+ && maxsize_off == (size_t) maxsize_off
+ && minsize <= (size_t) maxsize_off
+ && (size_t) maxsize_off < newsize)
+ newsize = maxsize_off;
+ }
+
+ /* Add enough room so that the buffer is aligned and has room
+ for byte sentinels fore and aft. */
+ newalloc = newsize + pagesize + 1;
+
+ newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
+ readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
+ bufbeg = readbuf - save;
+ memmove (bufbeg, buffer + saved_offset, save);
+ bufbeg[-1] = eolbyte;
+ if (newbuf != buffer)
+ {
+ free (buffer);
+ buffer = newbuf;
+ }
+ }
+
+ readsize = buffer + bufalloc - readbuf;
+ readsize -= readsize % pagesize;
+
+ fillsize = safe_read (bufdesc, readbuf, readsize);
+ if (fillsize < 0)
+ fillsize = cc = 0;
+ bufoffset += fillsize;
+#if defined HAVE_DOS_FILE_CONTENTS
+ if (fillsize)
+ fillsize = undossify_input (readbuf, fillsize);
+#endif
+ buflim = readbuf + fillsize;
+ return cc;
+}
+
+/* Flags controlling the style of output. */
+static enum
+{
+ BINARY_BINARY_FILES,
+ TEXT_BINARY_FILES,
+ WITHOUT_MATCH_BINARY_FILES
+} binary_files; /* How to handle binary files. */
+
+static int filename_mask; /* If zero, output nulls after filenames. */
+static int out_quiet; /* Suppress all normal output. */
+static int out_invert; /* Print nonmatching stuff. */
+static int out_file; /* Print filenames. */
+static int out_line; /* Print line numbers. */
+static int out_byte; /* Print byte offsets. */
+static intmax_t out_before; /* Lines of leading context. */
+static intmax_t out_after; /* Lines of trailing context. */
+static int count_matches; /* Count matching lines. */
+static int list_files; /* List matching files. */
+static int no_filenames; /* Suppress file names. */
+static intmax_t max_count; /* Stop after outputting this many
+ lines from an input file. */
+static int line_buffered; /* If nonzero, use line buffering, i.e.
+ fflush everyline out. */
+static char *label = NULL; /* Fake filename for stdin */
+
+
+/* Internal variables to keep track of byte count, context, etc. */
+static uintmax_t totalcc; /* Total character count before bufbeg. */
+static char const *lastnl; /* Pointer after last newline counted. */
+static char const *lastout; /* Pointer after last character output;
+ NULL if no character has been output
+ or if it's conceptually before bufbeg. */
+static uintmax_t totalnl; /* Total newline count before lastnl. */
+static intmax_t outleft; /* Maximum number of lines to be output. */
+static intmax_t pending; /* Pending lines of output.
+ Always kept 0 if out_quiet is true. */
+static int done_on_match; /* Stop scanning file on first match. */
+static int exit_on_match; /* Exit on first match. */
+
+#if defined HAVE_DOS_FILE_CONTENTS
+# include "dosbuf.c"
+#endif
+
+/* Add two numbers that count input bytes or lines, and report an
+ error if the addition overflows. */
+static uintmax_t
+add_count (uintmax_t a, uintmax_t b)
+{
+ uintmax_t sum = a + b;
+ if (sum < a)
+ error (EXIT_TROUBLE, 0, _("input is too large to count"));
+ return sum;
+}
+
+static void
+nlscan (char const *lim)
+{
+ size_t newlines = 0;
+ char const *beg;
+ for (beg = lastnl; beg < lim; beg++)
+ {
+ beg = memchr (beg, eolbyte, lim - beg);
+ if (!beg)
+ break;
+ newlines++;
+ }
+ totalnl = add_count (totalnl, newlines);
+ lastnl = lim;
+}
+
+/* Print the current filename. */
+static void
+print_filename (void)
+{
+ pr_sgr_start_if (filename_color);
+ fputs (filename, stdout);
+ pr_sgr_end_if (filename_color);
+}
+
+/* Print a character separator. */
+static void
+print_sep (char sep)
+{
+ pr_sgr_start_if (sep_color);
+ fputc (sep, stdout);
+ pr_sgr_end_if (sep_color);
+}
+
+/* Print a line number or a byte offset. */
+static void
+print_offset (uintmax_t pos, int min_width, const char *color)
+{
+ /* Do not rely on printf to print pos, since uintmax_t may be longer
+ than long, and long long is not portable. */
+
+ char buf[sizeof pos * CHAR_BIT];
+ char *p = buf + sizeof buf;
+
+ do
+ {
+ *--p = '0' + pos % 10;
+ --min_width;
+ }
+ while ((pos /= 10) != 0);
+
+ /* Do this to maximize the probability of alignment across lines. */
+ if (align_tabs)
+ while (--min_width >= 0)
+ *--p = ' ';
+
+ pr_sgr_start_if (color);
+ fwrite (p, 1, buf + sizeof buf - p, stdout);
+ pr_sgr_end_if (color);
+}
+
+/* Print a whole line head (filename, line, byte). */
+static void
+print_line_head (char const *beg, char const *lim, int sep)
+{
+ int pending_sep = 0;
+
+ if (out_file)
+ {
+ print_filename ();
+ if (filename_mask)
+ pending_sep = 1;
+ else
+ fputc (0, stdout);
+ }
+
+ if (out_line)
+ {
+ if (lastnl < lim)
+ {
+ nlscan (beg);
+ totalnl = add_count (totalnl, 1);
+ lastnl = lim;
+ }
+ if (pending_sep)
+ print_sep (sep);
+ print_offset (totalnl, 4, line_num_color);
+ pending_sep = 1;
+ }
+
+ if (out_byte)
+ {
+ uintmax_t pos = add_count (totalcc, beg - bufbeg);
+#if defined HAVE_DOS_FILE_CONTENTS
+ pos = dossified_pos (pos);
+#endif
+ if (pending_sep)
+ print_sep (sep);
+ print_offset (pos, 6, byte_num_color);
+ pending_sep = 1;
+ }
+
+ if (pending_sep)
+ {
+ /* This assumes sep is one column wide.
+ Try doing this any other way with Unicode
+ (and its combining and wide characters)
+ filenames and you're wasting your efforts. */
+ if (align_tabs)
+ fputs ("\t\b", stdout);
+
+ print_sep (sep);
+ }
+}
+
+static const char *
+print_line_middle (const char *beg, const char *lim,
+ const char *line_color, const char *match_color)
+{
+ size_t match_size;
+ size_t match_offset;
+ const char *cur = beg;
+ const char *mid = NULL;
+
+ while (cur < lim
+ && ((match_offset = execute (beg, lim - beg, &match_size,
+ beg + (cur - beg))) != (size_t) -1))
+ {
+ char const *b = beg + match_offset;
+
+ /* Avoid matching the empty line at the end of the buffer. */
+ if (b == lim)
+ break;
+
+ /* Avoid hanging on grep --color "" foo */
+ if (match_size == 0)
+ {
+ /* Make minimal progress; there may be further non-empty matches. */
+ /* XXX - Could really advance by one whole multi-octet character. */
+ match_size = 1;
+ if (!mid)
+ mid = cur;
+ }
+ else
+ {
+ /* This function is called on a matching line only,
+ but is it selected or rejected/context? */
+ if (only_matching)
+ print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED
+ : SEP_CHAR_SELECTED));
+ else
+ {
+ pr_sgr_start (line_color);
+ if (mid)
+ {
+ cur = mid;
+ mid = NULL;
+ }
+ fwrite (cur, sizeof (char), b - cur, stdout);
+ }
+
+ pr_sgr_start_if (match_color);
+ fwrite (b, sizeof (char), match_size, stdout);
+ pr_sgr_end_if (match_color);
+ if (only_matching)
+ fputs ("\n", stdout);
+ }
+ cur = b + match_size;
+ }
+
+ if (only_matching)
+ cur = lim;
+ else if (mid)
+ cur = mid;
+
+ return cur;
+}
+
+static const char *
+print_line_tail (const char *beg, const char *lim, const char *line_color)
+{
+ size_t eol_size;
+ size_t tail_size;
+
+ eol_size = (lim > beg && lim[-1] == eolbyte);
+ eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
+ tail_size = lim - eol_size - beg;
+
+ if (tail_size > 0)
+ {
+ pr_sgr_start (line_color);
+ fwrite (beg, 1, tail_size, stdout);
+ beg += tail_size;
+ pr_sgr_end (line_color);
+ }
+
+ return beg;
+}
+
+static void
+prline (char const *beg, char const *lim, int sep)
+{
+ int matching;
+ const char *line_color;
+ const char *match_color;
+
+ if (!only_matching)
+ print_line_head (beg, lim, sep);
+
+ matching = (sep == SEP_CHAR_SELECTED) ^ !!out_invert;
+
+ if (color_option)
+ {
+ line_color = (((sep == SEP_CHAR_SELECTED)
+ ^ (out_invert && (color_option < 0)))
+ ? selected_line_color : context_line_color);
+ match_color = (sep == SEP_CHAR_SELECTED
+ ? selected_match_color : context_match_color);
+ }
+ else
+ line_color = match_color = NULL; /* Shouldn't be used. */
+
+ if ((only_matching && matching)
+ || (color_option && (*line_color || *match_color)))
+ {
+ /* We already know that non-matching lines have no match (to colorize).
*/
+ if (matching && (only_matching || *match_color))
+ beg = print_line_middle (beg, lim, line_color, match_color);
+
+ if (!only_matching && *line_color)
+ {
+ /* This code is exercised at least when grep is invoked like this:
+ echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
+ beg = print_line_tail (beg, lim, line_color);
+ }
+ }
+
+ if (!only_matching && lim > beg)
+ fwrite (beg, 1, lim - beg, stdout);
+
+ if (ferror (stdout))
+ {
+ write_error_seen = 1;
+ error (EXIT_TROUBLE, 0, _("write error"));
+ }
+
+ lastout = lim;
+
+ if (line_buffered)
+ fflush (stdout);
+}
+
+/* Print pending lines of trailing context prior to LIM. Trailing context ends
+ at the next matching line when OUTLEFT is 0. */
+static void
+prpending (char const *lim)
+{
+ if (!lastout)
+ lastout = bufbeg;
+ while (pending > 0 && lastout < lim)
+ {
+ char const *nl = memchr (lastout, eolbyte, lim - lastout);
+ size_t match_size;
+ --pending;
+ if (outleft
+ || ((execute (lastout, nl + 1 - lastout,
+ &match_size, NULL) == (size_t) -1)
+ == !out_invert))
+ prline (lastout, nl + 1, SEP_CHAR_REJECTED);
+ else
+ pending = 0;
+ }
+}
+
+/* Print the lines between BEG and LIM. Deal with context crap.
+ If NLINESP is non-null, store a count of lines between BEG and LIM. */
+static void
+prtext (char const *beg, char const *lim, intmax_t *nlinesp)
+{
+ static int used; /* avoid printing SEP_STR_GROUP before any output */
+ char const *bp, *p;
+ char eol = eolbyte;
+ intmax_t i, n;
+
+ if (!out_quiet && pending > 0)
+ prpending (beg);
+
+ p = beg;
+
+ if (!out_quiet)
+ {
+ /* Deal with leading context crap. */
+
+ bp = lastout ? lastout : bufbeg;
+ for (i = 0; i < out_before; ++i)
+ if (p > bp)
+ do
+ --p;
+ while (p[-1] != eol);
+
+ /* We print the SEP_STR_GROUP separator only if our output is
+ discontiguous from the last output in the file. */
+ if ((out_before || out_after) && used && p != lastout && group_separator)
+ {
+ pr_sgr_start_if (sep_color);
+ fputs (group_separator, stdout);
+ pr_sgr_end_if (sep_color);
+ fputc ('\n', stdout);
+ }
+
+ while (p < beg)
+ {
+ char const *nl = memchr (p, eol, beg - p);
+ nl++;
+ prline (p, nl, SEP_CHAR_REJECTED);
+ p = nl;
+ }
+ }
+
+ if (nlinesp)
+ {
+ /* Caller wants a line count. */
+ for (n = 0; p < lim && n < outleft; n++)
+ {
+ char const *nl = memchr (p, eol, lim - p);
+ nl++;
+ if (!out_quiet)
+ prline (p, nl, SEP_CHAR_SELECTED);
+ p = nl;
+ }
+ *nlinesp = n;
+
+ /* relying on it that this function is never called when outleft = 0. */
+ after_last_match = bufoffset - (buflim - p);
+ }
+ else if (!out_quiet)
+ prline (beg, lim, SEP_CHAR_SELECTED);
+
+ pending = out_quiet ? 0 : out_after;
+ used = 1;
+}
+
+/* Invoke the matcher, EXECUTE, on buffer BUF of SIZE bytes. If there
+ is no match, return (size_t) -1. Otherwise, set *MATCH_SIZE to the
+ length of the match and return the offset of the start of the match. */
+static size_t
+do_execute (char const *buf, size_t size, size_t *match_size,
+ char const *start_ptr)
+{
+ size_t result;
+ const char *line_next;
+
+ /* With the current implementation, using --ignore-case with a multi-byte
+ character set is very inefficient when applied to a large buffer
+ containing many matches. We can avoid much of the wasted effort
+ by matching line-by-line.
+
+ FIXME: this is just an ugly workaround, and it doesn't really
+ belong here. Also, PCRE is always using this same per-line
+ matching algorithm. Either we fix -i, or we should refactor
+ this code---for example, we could add another function pointer
+ to struct matcher to split the buffer passed to execute. It would
+ perform the memchr if line-by-line matching is necessary, or just
+ return buf + size otherwise. */
+ if (MB_CUR_MAX == 1 || !match_icase
+ || ! (matcher
+ && (STREQ (matcher, "fgrep") || STREQ (matcher, "perl"))))
+ return execute (buf, size, match_size, start_ptr);
+
+ for (line_next = buf; line_next < buf + size; )
+ {
+ const char *line_buf = line_next;
+ const char *line_end = memchr (line_buf, eolbyte,
+ (buf + size) - line_buf);
+ if (line_end == NULL)
+ line_next = line_end = buf + size;
+ else
+ line_next = line_end + 1;
+
+ if (start_ptr && start_ptr >= line_end)
+ continue;
+
+ result = execute (line_buf, line_next - line_buf, match_size, start_ptr);
+ if (result != (size_t) -1)
+ return (line_buf - buf) + result;
+ }
+
+ return (size_t) -1;
+}
+
+/* Scan the specified portion of the buffer, matching lines (or
+ between matching lines if OUT_INVERT is true). Return a count of
+ lines printed. */
+static intmax_t
+grepbuf (char const *beg, char const *lim)
+{
+ intmax_t nlines, n;
+ char const *p;
+ size_t match_offset;
+ size_t match_size;
+
+ nlines = 0;
+ p = beg;
+ while ((match_offset = do_execute (p, lim - p, &match_size,
+ NULL)) != (size_t) -1)
+ {
+ char const *b = p + match_offset;
+ char const *endp = b + match_size;
+ /* Avoid matching the empty line at the end of the buffer. */
+ if (b == lim)
+ break;
+ if (!out_invert)
+ {
+ prtext (b, endp, NULL);
+ nlines++;
+ outleft--;
+ if (!outleft || done_on_match)
+ {
+ if (exit_on_match)
+ exit (EXIT_SUCCESS);
+ after_last_match = bufoffset - (buflim - endp);
+ return nlines;
+ }
+ }
+ else if (p < b)
+ {
+ prtext (p, b, &n);
+ nlines += n;
+ outleft -= n;
+ if (!outleft)
+ return nlines;
+ }
+ p = endp;
+ }
+ if (out_invert && p < lim)
+ {
+ prtext (p, lim, &n);
+ nlines += n;
+ outleft -= n;
+ }
+ return nlines;
+}
+
+/* Search a given file. Normally, return a count of lines printed;
+ but if the file is a directory and we search it recursively, then
+ return -2 if there was a match, and -1 otherwise. */
+static intmax_t
+grep (int fd, struct stat const *st)
+{
+ intmax_t nlines, i;
+ int not_text;
+ size_t residue, save;
+ char oldc;
+ char *beg;
+ char *lim;
+ char eol = eolbyte;
+
+ if (! reset (fd, st))
+ return 0;
+
+ totalcc = 0;
+ lastout = 0;
+ totalnl = 0;
+ outleft = max_count;
+ after_last_match = 0;
+ pending = 0;
+
+ nlines = 0;
+ residue = 0;
+ save = 0;
+
+ if (! fillbuf (save, st))
+ {
+ suppressible_error (filename, errno);
+ return 0;
+ }
+
+ not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet)
+ || binary_files == WITHOUT_MATCH_BINARY_FILES)
+ && file_is_binary (bufbeg, buflim - bufbeg, fd, st));
+ if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES)
+ return 0;
+ done_on_match += not_text;
+ out_quiet += not_text;
+
+ for (;;)
+ {
+ lastnl = bufbeg;
+ if (lastout)
+ lastout = bufbeg;
+
+ beg = bufbeg + save;
+
+ /* no more data to scan (eof) except for maybe a residue -> break */
+ if (beg == buflim)
+ break;
+
+ /* Determine new residue (the length of an incomplete line at the end of
+ the buffer, 0 means there is no incomplete last line). */
+ oldc = beg[-1];
+ beg[-1] = eol;
+ /* FIXME: use rawmemrchr if/when it exists, since we have ensured
+ that this use of memrchr is guaranteed never to return NULL. */
+ lim = memrchr (beg - 1, eol, buflim - beg + 1);
+ ++lim;
+ beg[-1] = oldc;
+ if (lim == beg)
+ lim = beg - residue;
+ beg -= residue;
+ residue = buflim - lim;
+
+ if (beg < lim)
+ {
+ if (outleft)
+ nlines += grepbuf (beg, lim);
+ if (pending)
+ prpending (lim);
+ if ((!outleft && !pending)
+ || (nlines && done_on_match && !out_invert))
+ goto finish_grep;
+ }
+
+ /* The last OUT_BEFORE lines at the end of the buffer will be needed as
+ leading context if there is a matching line at the begin of the
+ next data. Make beg point to their begin. */
+ i = 0;
+ beg = lim;
+ while (i < out_before && beg > bufbeg && beg != lastout)
+ {
+ ++i;
+ do
+ --beg;
+ while (beg[-1] != eol);
+ }
+
+ /* detect if leading context is discontinuous from last printed line. */
+ if (beg != lastout)
+ lastout = 0;
+
+ /* Handle some details and read more data to scan. */
+ save = residue + lim - beg;
+ if (out_byte)
+ totalcc = add_count (totalcc, buflim - bufbeg - save);
+ if (out_line)
+ nlscan (beg);
+ if (! fillbuf (save, st))
+ {
+ suppressible_error (filename, errno);
+ goto finish_grep;
+ }
+ }
+ if (residue)
+ {
+ *buflim++ = eol;
+ if (outleft)
+ nlines += grepbuf (bufbeg + save - residue, buflim);
+ if (pending)
+ prpending (buflim);
+ }
+
+ finish_grep:
+ done_on_match -= not_text;
+ out_quiet -= not_text;
+ if ((not_text & ~out_quiet) && nlines != 0)
+ printf (_("Binary file %s matches\n"), filename);
+ return nlines;
+}
+
+static int
+grepdirent (FTS *fts, FTSENT *ent, int command_line)
+{
+ int follow, dirdesc;
+ struct stat *st = ent->fts_statp;
+ command_line &= ent->fts_level == FTS_ROOTLEVEL;
+
+ if (ent->fts_info == FTS_DP)
+ {
+ if (directories == RECURSE_DIRECTORIES && command_line)
+ out_file &= ~ (2 * !no_filenames);
+ return 1;
+ }
+
+ if (skipped_file (ent->fts_name, command_line,
+ (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
+ || ent->fts_info == FTS_DNR)))
+ {
+ fts_set (fts, ent, FTS_SKIP);
+ return 1;
+ }
+
+ filename = ent->fts_path + filename_prefix_len;
+ follow = (fts->fts_options & FTS_LOGICAL
+ || (fts->fts_options & FTS_COMFOLLOW && command_line));
+
+ switch (ent->fts_info)
+ {
+ case FTS_D:
+ if (directories == RECURSE_DIRECTORIES)
+ {
+ out_file |= 2 * !no_filenames;
+ return 1;
+ }
+ fts_set (fts, ent, FTS_SKIP);
+ break;
+
+ case FTS_DC:
+ if (!suppress_errors)
+ error (0, 0, _("warning: %s: %s"), filename,
+ _("recursive directory loop"));
+ return 1;
+
+ case FTS_DNR:
+ case FTS_ERR:
+ case FTS_NS:
+ suppressible_error (filename, ent->fts_errno);
+ return 1;
+
+ case FTS_DEFAULT:
+ case FTS_NSOK:
+ if (devices == SKIP_DEVICES
+ || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
+ {
+ struct stat st1;
+ if (! st->st_mode)
+ {
+ /* The file type is not already known. Get the file status
+ before opening, since opening might have side effects
+ on a device. */
+ int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
+ if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
+ {
+ suppressible_error (filename, errno);
+ return 1;
+ }
+ st = &st1;
+ }
+ if (is_device_mode (st->st_mode))
+ return 1;
+ }
+ break;
+
+ case FTS_F:
+ case FTS_SLNONE:
+ break;
+
+ case FTS_SL:
+ case FTS_W:
+ return 1;
+
+ default:
+ abort ();
+ }
+
+ dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD
+ ? fts->fts_cwd_fd
+ : AT_FDCWD);
+ return grepfile (dirdesc, ent->fts_accpath, follow, command_line);
+}
+
+static int
+grepfile (int dirdesc, char const *name, int follow, int command_line)
+{
+ int desc = openat_safer (dirdesc, name, O_RDONLY | (follow ? 0 :
O_NOFOLLOW));
+ if (desc < 0)
+ {
+ if (follow || (errno != ELOOP && errno != EMLINK))
+ suppressible_error (filename, errno);
+ return 1;
+ }
+ return grepdesc (desc, command_line);
+}
+
+static int
+grepdesc (int desc, int command_line)
+{
+ intmax_t count;
+ int status = 1;
+ struct stat st;
+
+ /* Get the file status, possibly for the second time. This catches
+ a race condition if the directory entry changes after the
+ directory entry is read and before the file is opened. For
+ example, normally DESC is a directory only at the top level, but
+ there is an exception if some other process substitutes a
+ directory for a non-directory while 'grep' is running. */
+ if (fstat (desc, &st) != 0)
+ {
+ suppressible_error (filename, errno);
+ goto closeout;
+ }
+
+ if (desc != STDIN_FILENO && command_line
+ && skipped_file (filename, 1, S_ISDIR (st.st_mode)))
+ goto closeout;
+
+ if (desc != STDIN_FILENO
+ && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
+ {
+ /* Traverse the directory starting with its full name, because
+ unfortunately fts provides no way to traverse the directory
+ starting from its file descriptor. */
+
+ FTS *fts;
+ FTSENT *ent;
+ int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
+ char *fts_arg[2];
+
+ /* Close DESC now, to conserve file descriptors if the race
+ condition occurs many times in a deep recursion. */
+ if (close (desc) != 0)
+ suppressible_error (filename, errno);
+
+ fts_arg[0] = (char *) filename;
+ fts_arg[1] = NULL;
+ fts = fts_open (fts_arg, opts, NULL);
+
+ if (!fts)
+ xalloc_die ();
+ while ((ent = fts_read (fts)))
+ status &= grepdirent (fts, ent, command_line);
+ if (errno)
+ suppressible_error (filename, errno);
+ if (fts_close (fts) != 0)
+ suppressible_error (filename, errno);
+ return status;
+ }
+ if (desc != STDIN_FILENO
+ && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
+ || ((devices == SKIP_DEVICES
+ || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
+ && is_device_mode (st.st_mode))))
+ goto closeout;
+
+ /* If there is a regular file on stdout and the current file refers
+ to the same i-node, we have to report the problem and skip it.
+ Otherwise when matching lines from some other input reach the
+ disk before we open this file, we can end up reading and matching
+ those lines and appending them to the file from which we're reading.
+ Then we'd have what appears to be an infinite loop that'd terminate
+ only upon filling the output file system or reaching a quota.
+ However, there is no risk of an infinite loop if grep is generating
+ no output, i.e., with --silent, --quiet, -q.
+ Similarly, with any of these:
+ --max-count=N (-m) (for N >= 2)
+ --files-with-matches (-l)
+ --files-without-match (-L)
+ there is no risk of trouble.
+ For --max-count=1, grep stops after printing the first match,
+ so there is no risk of malfunction. But even --max-count=2, with
+ input==output, while there is no risk of infloop, there is a race
+ condition that could result in "alternate" output. */
+ if (!out_quiet && list_files == 0 && 1 < max_count
+ && S_ISREG (out_stat.st_mode) && out_stat.st_ino
+ && SAME_INODE (st, out_stat))
+ {
+ if (! suppress_errors)
+ error (0, 0, _("input file %s is also the output"), quote (filename));
+ errseen = 1;
+ goto closeout;
+ }
+
+#if defined SET_BINARY
+ /* Set input to binary mode. Pipes are simulated with files
+ on DOS, so this includes the case of "foo | grep bar". */
+ if (!isatty (desc))
+ SET_BINARY (desc);
+#endif
+
+ count = grep (desc, &st);
+ if (count < 0)
+ status = count + 2;
+ else
+ {
+ if (count_matches)
+ {
+ if (out_file)
+ {
+ print_filename ();
+ if (filename_mask)
+ print_sep (SEP_CHAR_SELECTED);
+ else
+ fputc (0, stdout);
+ }
+ printf ("%" PRIdMAX "\n", count);
+ }
+
+ status = !count;
+ if (list_files == 1 - 2 * status)
+ {
+ print_filename ();
+ fputc ('\n' & filename_mask, stdout);
+ }
+
+ if (desc == STDIN_FILENO)
+ {
+ off_t required_offset = outleft ? bufoffset : after_last_match;
+ if (required_offset != bufoffset
+ && lseek (desc, required_offset, SEEK_SET) < 0
+ && S_ISREG (st.st_mode))
+ suppressible_error (filename, errno);
+ }
+ }
+
+ closeout:
+ if (desc != STDIN_FILENO && close (desc) != 0)
+ suppressible_error (filename, errno);
+ return status;
+}
+
+static int
+grep_command_line_arg (char const *arg)
+{
+ if (STREQ (arg, "-"))
+ {
+ filename = label ? label : _("(standard input)");
+ return grepdesc (STDIN_FILENO, 1);
+ }
+ else
+ {
+ filename = arg;
+ return grepfile (AT_FDCWD, arg, 1, 1);
+ }
+}
+
+_Noreturn void usage (int);
+void
+usage (int status)
+{
+ if (status != 0)
+ {
+ fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
+ program_name);
+ fprintf (stderr, _("Try '%s --help' for more information.\n"),
+ program_name);
+ }
+ else
+ {
+ printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name);
+ printf (_("\
+Search for PATTERN in each FILE or standard input.\n"));
+ fputs (_(before_options), stdout);
+ printf (_("\
+Example: %s -i 'hello world' menu.h main.c\n\
+\n\
+Regexp selection and interpretation:\n"), program_name);
+ if (matchers[1].name)
+ printf (_("\
+ -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\
+ -F, --fixed-strings PATTERN is a set of newline-separated fixed
strings\n\
+ -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\
+ -P, --perl-regexp PATTERN is a Perl regular expression\n"));
+ /* -X is undocumented on purpose. */
+ printf (_("\
+ -e, --regexp=PATTERN use PATTERN for matching\n\
+ -f, --file=FILE obtain PATTERN from FILE\n\
+ -i, --ignore-case ignore case distinctions\n\
+ -w, --word-regexp force PATTERN to match only whole words\n\
+ -x, --line-regexp force PATTERN to match only whole lines\n\
+ -z, --null-data a data line ends in 0 byte, not newline\n"));
+ printf (_("\
+\n\
+Miscellaneous:\n\
+ -s, --no-messages suppress error messages\n\
+ -v, --invert-match select non-matching lines\n\
+ -V, --version display version information and exit\n\
+ --help display this help text and exit\n"));
+ printf (_("\
+\n\
+Output control:\n\
+ -m, --max-count=NUM stop after NUM matches\n\
+ -b, --byte-offset print the byte offset with output lines\n\
+ -n, --line-number print line number with output lines\n\
+ --line-buffered flush output on every line\n\
+ -H, --with-filename print the file name for each match\n\
+ -h, --no-filename suppress the file name prefix on output\n\
+ --label=LABEL use LABEL as the standard input file name prefix\n\
+"));
+ printf (_("\
+ -o, --only-matching show only the part of a line matching PATTERN\n\
+ -q, --quiet, --silent suppress all normal output\n\
+ --binary-files=TYPE assume that binary files are TYPE;\n\
+ TYPE is 'binary', 'text', or 'without-match'\n\
+ -a, --text equivalent to --binary-files=text\n\
+"));
+ printf (_("\
+ -I equivalent to --binary-files=without-match\n\
+ -d, --directories=ACTION how to handle directories;\n\
+ ACTION is 'read', 'recurse', or 'skip'\n\
+ -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
+ ACTION is 'read' or 'skip'\n\
+ -r, --recursive like --directories=recurse\n\
+ -R, --dereference-recursive likewise, but follow all symlinks\n\
+"));
+ printf (_("\
+ --include=FILE_PATTERN search only files that match FILE_PATTERN\n\
+ --exclude=FILE_PATTERN skip files and directories matching
FILE_PATTERN\n\
+ --exclude-from=FILE skip files matching any file pattern from FILE\n\
+ --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\
+"));
+ printf (_("\
+ -L, --files-without-match print only names of FILEs containing no match\n\
+ -l, --files-with-matches print only names of FILEs containing matches\n\
+ -c, --count print only a count of matching lines per FILE\n\
+ -T, --initial-tab make tabs line up (if needed)\n\
+ -Z, --null print 0 byte after FILE name\n"));
+ printf (_("\
+\n\
+Context control:\n\
+ -B, --before-context=NUM print NUM lines of leading context\n\
+ -A, --after-context=NUM print NUM lines of trailing context\n\
+ -C, --context=NUM print NUM lines of output context\n\
+"));
+ printf (_("\
+ -NUM same as --context=NUM\n\
+ --color[=WHEN],\n\
+ --colour[=WHEN] use markers to highlight the matching strings;\n\
+ WHEN is 'always', 'never', or 'auto'\n\
+ -U, --binary do not strip CR characters at EOL
(MSDOS/Windows)\n\
+ -u, --unix-byte-offsets report offsets as if CRs were not there\n\
+ (MSDOS/Windows)\n\
+\n"));
+ fputs (_(after_options), stdout);
+ printf (_("\
+When FILE is -, read standard input. With no FILE, read . if a command-line\n\
+-r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\
+Exit status is 0 if any line is selected, 1 otherwise;\n\
+if any error occurs and -q is not given, the exit status is 2.\n"));
+ printf (_("\nReport bugs to: %s\n"), PACKAGE_BUGREPORT);
+ printf (_("GNU Grep home page: <%s>\n"),
+ "http://www.gnu.org/software/grep/");
+ fputs (_("General help using GNU software:
<http://www.gnu.org/gethelp/>\n"),
+ stdout);
+
+ }
+ exit (status);
+}
+
+/* If M is NULL, initialize the matcher to the default. Otherwise set the
+ matcher to M if available. Exit in case of conflicts or if M is not
+ available. */
+static void
+setmatcher (char const *m)
+{
+ unsigned int i;
+
+ if (!m)
+ {
+ compile = matchers[0].compile;
+ execute = matchers[0].execute;
+ if (!matchers[1].name)
+ matcher = matchers[0].name;
+ }
+
+ else if (matcher)
+ {
+ if (matcher && STREQ (matcher, m))
+ ;
+
+ else if (!matchers[1].name)
+ error (EXIT_TROUBLE, 0, _("%s can only use the %s pattern syntax"),
+ program_name, matcher);
+ else
+ error (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
+ }
+
+ else
+ {
+ for (i = 0; matchers[i].name; i++)
+ if (STREQ (m, matchers[i].name))
+ {
+ compile = matchers[i].compile;
+ execute = matchers[i].execute;
+ matcher = m;
+ return;
+ }
+
+ error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
+ }
+}
+
+/* Find the white-space-separated options specified by OPTIONS, and
+ using BUF to store copies of these options, set ARGV[0], ARGV[1],
+ etc. to the option copies. Return the number N of options found.
+ Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0]
+ etc. Backslash can be used to escape whitespace (and backslashes). */
+static size_t
+prepend_args (char const *options, char *buf, char **argv)
+{
+ char const *o = options;
+ char *b = buf;
+ size_t n = 0;
+
+ for (;;)
+ {
+ while (c_isspace (to_uchar (*o)))
+ o++;
+ if (!*o)
+ return n;
+ if (argv)
+ argv[n] = b;
+ n++;
+
+ do
+ if ((*b++ = *o++) == '\\' && *o)
+ b[-1] = *o++;
+ while (*o && ! c_isspace (to_uchar (*o)));
+
+ *b++ = '\0';
+ }
+}
+
+/* Prepend the whitespace-separated options in OPTIONS to the argument
+ vector of a main program with argument count *PARGC and argument
+ vector *PARGV. Return the number of options prepended. */
+static int
+prepend_default_options (char const *options, int *pargc, char ***pargv)
+{
+ if (options && *options)
+ {
+ char *buf = xmalloc (strlen (options) + 1);
+ size_t prepended = prepend_args (options, buf, NULL);
+ int argc = *pargc;
+ char *const *argv = *pargv;
+ char **pp;
+ enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) };
+ if (MAX_ARGS - argc < prepended)
+ xalloc_die ();
+ pp = xmalloc ((prepended + argc + 1) * sizeof *pp);
+ *pargc = prepended + argc;
+ *pargv = pp;
+ *pp++ = *argv++;
+ pp += prepend_args (options, buf, pp);
+ while ((*pp++ = *argv++))
+ continue;
+ return prepended;
+ }
+
+ return 0;
+}
+
+/* Get the next non-digit option from ARGC and ARGV.
+ Return -1 if there are no more options.
+ Process any digit options that were encountered on the way,
+ and store the resulting integer into *DEFAULT_CONTEXT. */
+static int
+get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
+{
+ static int prev_digit_optind = -1;
+ int this_digit_optind, was_digit;
+ char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
+ char *p = buf;
+ int opt;
+
+ was_digit = 0;
+ this_digit_optind = optind;
+ while (1)
+ {
+ opt = getopt_long (argc, (char **) argv, short_options,
+ long_options, NULL);
+ if ( ! ('0' <= opt && opt <= '9'))
+ break;
+
+ if (prev_digit_optind != this_digit_optind || !was_digit)
+ {
+ /* Reset to start another context length argument. */
+ p = buf;
+ }
+ else
+ {
+ /* Suppress trivial leading zeros, to avoid incorrect
+ diagnostic on strings like 00000000000. */
+ p -= buf[0] == '0';
+ }
+
+ if (p == buf + sizeof buf - 4)
+ {
+ /* Too many digits. Append "..." to make context_length_arg
+ complain about "X...", where X contains the digits seen
+ so far. */
+ strcpy (p, "...");
+ p += 3;
+ break;
+ }
+ *p++ = opt;
+
+ was_digit = 1;
+ prev_digit_optind = this_digit_optind;
+ this_digit_optind = optind;
+ }
+ if (p != buf)
+ {
+ *p = '\0';
+ context_length_arg (buf, default_context);
+ }
+
+ return opt;
+}
+
+/* Parse GREP_COLORS. The default would look like:
+ GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
+ with boolean capabilities (ne and rv) unset (i.e., omitted).
+ No character escaping is needed or supported. */
+static void
+parse_grep_colors (void)
+{
+ const char *p;
+ char *q;
+ char *name;
+ char *val;
+
+ p = getenv ("GREP_COLORS"); /* Plural! */
+ if (p == NULL || *p == '\0')
+ return;
+
+ /* Work off a writable copy. */
+ q = xstrdup (p);
+
+ name = q;
+ val = NULL;
+ /* From now on, be well-formed or you're gone. */
+ for (;;)
+ if (*q == ':' || *q == '\0')
+ {
+ char c = *q;
+ struct color_cap const *cap;
+
+ *q++ = '\0'; /* Terminate name or val. */
+ /* Empty name without val (empty cap)
+ * won't match and will be ignored. */
+ for (cap = color_dict; cap->name; cap++)
+ if (STREQ (cap->name, name))
+ break;
+ /* If name unknown, go on for forward compatibility. */
+ if (cap->var && val)
+ *(cap->var) = val;
+ if (cap->fct)
+ cap->fct ();
+ if (c == '\0')
+ return;
+ name = q;
+ val = NULL;
+ }
+ else if (*q == '=')
+ {
+ if (q == name || val)
+ return;
+ *q++ = '\0'; /* Terminate name. */
+ val = q; /* Can be the empty string. */
+ }
+ else if (val == NULL)
+ q++; /* Accumulate name. */
+ else if (*q == ';' || (*q >= '0' && *q <= '9'))
+ q++; /* Accumulate val. Protect the terminal from being sent crap. */
+ else
+ return;
+}
+
+#define MBRTOWC(pwc, s, n, ps) \
+ (MB_CUR_MAX == 1 \
+ ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \
+ : mbrtowc (pwc, s, n, ps))
+#define WCRTOMB(s, wc, ps) \
+ (MB_CUR_MAX == 1 \
+ ? (*(s) = wctob ((wint_t) (wc)), 1) \
+ : wcrtomb (s, wc, ps))
+
+/* Change a pattern for fgrep into grep. */
+static void
+fgrep_to_grep_pattern (size_t len, char const *keys,
+ size_t *new_len, char **new_keys)
+{
+ char *p = *new_keys = xnmalloc (len + 1, 2);
+ mbstate_t mb_state = { 0 };
+ size_t n;
+
+ for (; len; keys += n, len -= n)
+ {
+ wchar_t wc;
+ n = MBRTOWC (&wc, keys, len, &mb_state);
+ switch (n)
+ {
+ case (size_t) -2:
+ n = len;
+ /* Fall through. */
+ default:
+ p = mempcpy (p, keys, n);
+ break;
+
+ case (size_t) -1:
+ memset (&mb_state, 0, sizeof mb_state);
+ /* Fall through. */
+ case 1:
+ *p = '\\';
+ p += strchr ("$*.[\\^", *keys) != NULL;
+ /* Fall through. */
+ case 0:
+ *p++ = *keys;
+ n = 1;
+ break;
+ }
+ }
+
+ *new_len = p - *new_keys;
+}
+
+/* If the newline-separated regular expressions, KEYS (with length, LEN
+ and no trailing NUL byte), are amenable to transformation into
+ otherwise equivalent case-ignoring ones, perform the transformation,
+ put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
+ and return true. Otherwise, return false. */
+
+static bool
+trivial_case_ignore (size_t len, char const *keys,
+ size_t *new_len, char **new_keys)
+{
+ /* FIXME: consider removing the following restriction:
+ Reject if KEYS contain ASCII '\\' or '['. */
+ if (memchr (keys, '\\', len) || memchr (keys, '[', len))
+ return false;
+
+ /* Worst case is that each byte B of KEYS is ASCII alphabetic and
+ CASE_FOLDED_BUFSIZE other_case(B) characters, C through Z, each
+ occupying MB_CUR_MAX bytes, so each B maps to [BC...Z], which
+ requires CASE_FOLDED_BUFSIZE * MB_CUR_MAX + 3 bytes; this is
+ bounded above by the constant expression CASE_FOLDED_BUFSIZE *
+ MB_LEN_MAX + 3. */
+ *new_keys = xnmalloc (len + 1, CASE_FOLDED_BUFSIZE * MB_LEN_MAX + 3);
+ char *p = *new_keys;
+
+ mbstate_t mb_state = { 0 };
+ while (len)
+ {
+ bool initial_state = mbsinit (&mb_state) != 0;
+ wchar_t wc;
+ size_t n = MBRTOWC (&wc, keys, len, &mb_state);
+
+ /* For an invalid, incomplete or L'\0', skip this optimization. */
+ if ((size_t) -2 <= n)
+ {
+ skip_case_ignore_optimization:
+ free (*new_keys);
+ return false;
+ }
+
+ char const *orig = keys;
+ keys += n;
+ len -= n;
+
+ wchar_t folded[CASE_FOLDED_BUFSIZE];
+ int nfolded = case_folded_counterparts (wc, folded);
+ if (nfolded <= 0)
+ {
+ memcpy (p, orig, n);
+ p += n;
+ }
+ else if (! initial_state)
+ goto skip_case_ignore_optimization;
+ else
+ {
+ *p++ = '[';
+ memcpy (p, orig, n);
+ p += n;
+
+ int i = 0;
+ do
+ {
+ size_t nbytes = WCRTOMB (p, folded[i], &mb_state);
+ if (nbytes == (size_t) -1)
+ goto skip_case_ignore_optimization;
+ p += nbytes;
+ }
+ while (++i < nfolded);
+
+ if (! mbsinit (&mb_state))
+ goto skip_case_ignore_optimization;
+
+ *p++ = ']';
+ }
+ }
+
+ *new_len = p - *new_keys;
+
+ return true;
+}
+
+int
+main (int argc, char **argv)
+{
+ char *keys;
+ size_t keycc, oldcc, keyalloc;
+ int with_filenames;
+ size_t cc;
+ int opt, status, prepended;
+ int prev_optind, last_recursive;
+ int fread_errno;
+ intmax_t default_context;
+ FILE *fp;
+ exit_failure = EXIT_TROUBLE;
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ program_name = argv[0];
+
+ keys = NULL;
+ keycc = 0;
+ with_filenames = 0;
+ eolbyte = '\n';
+ filename_mask = ~0;
+
+ max_count = INTMAX_MAX;
+
+ /* The value -1 means to use DEFAULT_CONTEXT. */
+ out_after = out_before = -1;
+ /* Default before/after context: changed by -C/-NUM options */
+ default_context = 0;
+ /* Changed by -o option */
+ only_matching = 0;
+
+ /* Internationalization. */
+#if defined HAVE_SETLOCALE
+ setlocale (LC_ALL, "");
+#endif
+#if defined ENABLE_NLS
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+#endif
+
+ exit_failure = EXIT_TROUBLE;
+ atexit (clean_up_stdout);
+
+ last_recursive = 0;
+ prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
+ setmatcher (NULL);
+
+ while (prev_optind = optind,
+ (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
+ switch (opt)
+ {
+ case 'A':
+ context_length_arg (optarg, &out_after);
+ break;
+
+ case 'B':
+ context_length_arg (optarg, &out_before);
+ break;
+
+ case 'C':
+ /* Set output match context, but let any explicit leading or
+ trailing amount specified with -A or -B stand. */
+ context_length_arg (optarg, &default_context);
+ break;
+
+ case 'D':
+ if (STREQ (optarg, "read"))
+ devices = READ_DEVICES;
+ else if (STREQ (optarg, "skip"))
+ devices = SKIP_DEVICES;
+ else
+ error (EXIT_TROUBLE, 0, _("unknown devices method"));
+ break;
+
+ case 'E':
+ setmatcher ("egrep");
+ break;
+
+ case 'F':
+ setmatcher ("fgrep");
+ break;
+
+ case 'P':
+ setmatcher ("perl");
+ break;
+
+ case 'G':
+ setmatcher ("grep");
+ break;
+
+ case 'X': /* undocumented on purpose */
+ setmatcher (optarg);
+ break;
+
+ case 'H':
+ with_filenames = 1;
+ no_filenames = 0;
+ break;
+
+ case 'I':
+ binary_files = WITHOUT_MATCH_BINARY_FILES;
+ break;
+
+ case 'T':
+ align_tabs = 1;
+ break;
+
+ case 'U':
+#if defined HAVE_DOS_FILE_CONTENTS
+ dos_use_file_type = DOS_BINARY;
+#endif
+ break;
+
+ case 'u':
+#if defined HAVE_DOS_FILE_CONTENTS
+ dos_report_unix_offset = 1;
+#endif
+ break;
+
+ case 'V':
+ show_version = 1;
+ break;
+
+ case 'a':
+ binary_files = TEXT_BINARY_FILES;
+ break;
+
+ case 'b':
+ out_byte = 1;
+ break;
+
+ case 'c':
+ count_matches = 1;
+ break;
+
+ case 'd':
+ directories = XARGMATCH ("--directories", optarg,
+ directories_args, directories_types);
+ if (directories == RECURSE_DIRECTORIES)
+ last_recursive = prev_optind;
+ break;
+
+ case 'e':
+ cc = strlen (optarg);
+ keys = xrealloc (keys, keycc + cc + 1);
+ strcpy (&keys[keycc], optarg);
+ keycc += cc;
+ keys[keycc++] = '\n';
+ break;
+
+ case 'f':
+ fp = STREQ (optarg, "-") ? stdin : fopen (optarg, "r");
+ if (!fp)
+ error (EXIT_TROUBLE, errno, "%s", optarg);
+ for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
+ ;
+ keys = xrealloc (keys, keyalloc);
+ oldcc = keycc;
+ while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0)
+ {
+ keycc += cc;
+ if (keycc == keyalloc - 1)
+ keys = x2nrealloc (keys, &keyalloc, sizeof *keys);
+ }
+ fread_errno = errno;
+ if (ferror (fp))
+ error (EXIT_TROUBLE, fread_errno, "%s", optarg);
+ if (fp != stdin)
+ fclose (fp);
+ /* Append final newline if file ended in non-newline. */
+ if (oldcc != keycc && keys[keycc - 1] != '\n')
+ keys[keycc++] = '\n';
+ break;
+
+ case 'h':
+ with_filenames = 0;
+ no_filenames = 1;
+ break;
+
+ case 'i':
+ case 'y': /* For old-timers . . . */
+ match_icase = 1;
+ break;
+
+ case 'L':
+ /* Like -l, except list files that don't contain matches.
+ Inspired by the same option in Hume's gre. */
+ list_files = -1;
+ break;
+
+ case 'l':
+ list_files = 1;
+ break;
+
+ case 'm':
+ switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
+ {
+ case LONGINT_OK:
+ case LONGINT_OVERFLOW:
+ break;
+
+ default:
+ error (EXIT_TROUBLE, 0, _("invalid max count"));
+ }
+ break;
+
+ case 'n':
+ out_line = 1;
+ break;
+
+ case 'o':
+ only_matching = 1;
+ break;
+
+ case 'q':
+ exit_on_match = 1;
+ exit_failure = 0;
+ break;
+
+ case 'R':
+ fts_options = basic_fts_options | FTS_LOGICAL;
+ /* Fall through. */
+ case 'r':
+ directories = RECURSE_DIRECTORIES;
+ last_recursive = prev_optind;
+ break;
+
+ case 's':
+ suppress_errors = 1;
+ break;
+
+ case 'v':
+ out_invert = 1;
+ break;
+
+ case 'w':
+ match_words = 1;
+ break;
+
+ case 'x':
+ match_lines = 1;
+ break;
+
+ case 'Z':
+ filename_mask = 0;
+ break;
+
+ case 'z':
+ eolbyte = '\0';
+ break;
+
+ case BINARY_FILES_OPTION:
+ if (STREQ (optarg, "binary"))
+ binary_files = BINARY_BINARY_FILES;
+ else if (STREQ (optarg, "text"))
+ binary_files = TEXT_BINARY_FILES;
+ else if (STREQ (optarg, "without-match"))
+ binary_files = WITHOUT_MATCH_BINARY_FILES;
+ else
+ error (EXIT_TROUBLE, 0, _("unknown binary-files type"));
+ break;
+
+ case COLOR_OPTION:
+ if (optarg)
+ {
+ if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes")
+ || !strcasecmp (optarg, "force"))
+ color_option = 1;
+ else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg,
"no")
+ || !strcasecmp (optarg, "none"))
+ color_option = 0;
+ else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg,
"tty")
+ || !strcasecmp (optarg, "if-tty"))
+ color_option = 2;
+ else
+ show_help = 1;
+ }
+ else
+ color_option = 2;
+ break;
+
+ case EXCLUDE_OPTION:
+ case INCLUDE_OPTION:
+ if (!excluded_patterns)
+ excluded_patterns = new_exclude ();
+ add_exclude (excluded_patterns, optarg,
+ (EXCLUDE_WILDCARDS
+ | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)));
+ break;
+ case EXCLUDE_FROM_OPTION:
+ if (!excluded_patterns)
+ excluded_patterns = new_exclude ();
+ if (add_exclude_file (add_exclude, excluded_patterns, optarg,
+ EXCLUDE_WILDCARDS, '\n') != 0)
+ {
+ error (EXIT_TROUBLE, errno, "%s", optarg);
+ }
+ break;
+
+ case EXCLUDE_DIRECTORY_OPTION:
+ if (!excluded_directory_patterns)
+ excluded_directory_patterns = new_exclude ();
+ add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS);
+ break;
+
+ case GROUP_SEPARATOR_OPTION:
+ group_separator = optarg;
+ break;
+
+ case LINE_BUFFERED_OPTION:
+ line_buffered = 1;
+ break;
+
+ case LABEL_OPTION:
+ label = optarg;
+ break;
+
+ case 0:
+ /* long options */
+ break;
+
+ default:
+ usage (EXIT_TROUBLE);
+ break;
+
+ }
+
+ if (color_option == 2)
+ color_option = isatty (STDOUT_FILENO) && should_colorize ();
+ init_colorize ();
+
+ /* POSIX says that -q overrides -l, which in turn overrides the
+ other output options. */
+ if (exit_on_match)
+ list_files = 0;
+ if (exit_on_match | list_files)
+ {
+ count_matches = 0;
+ done_on_match = 1;
+ }
+ out_quiet = count_matches | done_on_match;
+
+ if (out_after < 0)
+ out_after = default_context;
+ if (out_before < 0)
+ out_before = default_context;
+
+ if (color_option)
+ {
+ /* Legacy. */
+ char *userval = getenv ("GREP_COLOR");
+ if (userval != NULL && *userval != '\0')
+ selected_match_color = context_match_color = userval;
+
+ /* New GREP_COLORS has priority. */
+ parse_grep_colors ();
+ }
+
+ if (show_version)
+ {
+ version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS,
+ (char *) NULL);
+ exit (EXIT_SUCCESS);
+ }
+
+ if (show_help)
+ usage (EXIT_SUCCESS);
+
+ struct stat tmp_stat;
+ if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode))
+ out_stat = tmp_stat;
+
+ if (keys)
+ {
+ if (keycc == 0)
+ {
+ /* No keys were specified (e.g. -f /dev/null). Match nothing. */
+ out_invert ^= 1;
+ match_lines = match_words = 0;
+ }
+ else
+ /* Strip trailing newline. */
+ --keycc;
+ }
+ else if (optind < argc)
+ {
+ /* A copy must be made in case of an xrealloc() or free() later. */
+ keycc = strlen (argv[optind]);
+ keys = xmemdup (argv[optind++], keycc + 1);
+ }
+ else
+ usage (EXIT_TROUBLE);
+
+ /* If case-insensitive fgrep in a multibyte locale, improve
+ performance by using grep instead. */
+ if (match_icase && compile == Fcompile && MB_CUR_MAX > 1)
+ {
+ size_t new_keycc;
+ char *new_keys;
+ fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
+ free (keys);
+ keys = new_keys;
+ keycc = new_keycc;
+ matcher = "grep";
+ compile = Gcompile;
+ execute = EGexecute;
+ }
+
+ /* Case-insensitive matching is expensive in multibyte locales
+ because a few characters may change size when converted to upper
+ or lower case. To accommodate those, search the input one line
+ at a time, rather than using the much more efficient buffer search.
+
+ Try to convert a regular expression 'foo' (ignoring case) to an
+ equivalent regular expression '[fF][oO][oO]' (where case matters).
+ Not only does this avoid the expensive requirement to read and
+ process a line at a time, it also allows use of the kwset engine,
+ a win in non-UTF-8 multibyte locales. */
+ if (match_icase)
+ {
+ size_t new_keycc;
+ char *new_keys;
+ /* It is not possible with -F, not useful with -P (pcre) and there is no
+ point when there is no regexp. It also depends on which constructs
+ appear in the regexp. See trivial_case_ignore for those details. */
+ if (keycc
+ && ! (matcher
+ && (STREQ (matcher, "fgrep") || STREQ (matcher, "perl")))
+ && trivial_case_ignore (keycc, keys, &new_keycc, &new_keys))
+ {
+ match_icase = 0;
+ free (keys);
+ keys = new_keys;
+ keycc = new_keycc;
+ }
+ }
+
+#if MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ build_mbclen_cache ();
+#endif
+
+ compile (keys, keycc);
+ free (keys);
+
+ if ((argc - optind > 1 && !no_filenames) || with_filenames)
+ out_file = 1;
+
+#ifdef SET_BINARY
+ /* Output is set to binary mode because we shouldn't convert
+ NL to CR-LF pairs, especially when grepping binary files. */
+ if (!isatty (1))
+ SET_BINARY (1);
+#endif
+
+ if (max_count == 0)
+ exit (EXIT_FAILURE);
+
+ if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
+ devices = READ_DEVICES;
+
+ if (optind < argc)
+ {
+ status = 1;
+ do
+ status &= grep_command_line_arg (argv[optind]);
+ while (++optind < argc);
+ }
+ else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
+ {
+ /* Grep through ".", omitting leading "./" from diagnostics. */
+ filename_prefix_len = 2;
+ status = grep_command_line_arg (".");
+ }
+ else
+ status = grep_command_line_arg ("-");
+
+ /* We register via atexit() to test stdout. */
+ exit (errseen ? EXIT_TROUBLE : status);
+}
+/* vim:set shiftwidth=2: */
diff --git a/src/search.h b/src/search.h
index 69e3afd..990da73 100644
--- a/src/search.h
+++ b/src/search.h
@@ -52,6 +52,8 @@ extern bool is_mb_middle (const char **, const char *, const
char *, size_t);
/* dfasearch.c */
extern void GEAcompile (char const *, size_t, reg_syntax_t);
extern size_t EGexecute (char const *, size_t, size_t *, char const *);
+extern void Gcompile (char const *pattern, size_t size);
+extern void Ecompile (char const *pattern, size_t size);
/* kwsearch.c */
extern void Fcompile (char const *, size_t);
--
1.9.0
- bug#17157: [PATCH 1/5] Partially revert "dfa: improve port to freestanding DJGPP", (continued)
- bug#17157: [PATCH 1/5] Partially revert "dfa: improve port to freestanding DJGPP", Paul Eggert, 2014/04/02
- bug#17157: [PATCH 1/5] Partially revert "dfa: improve port to freestanding DJGPP", Paul Eggert, 2014/04/03
- bug#17157: [PATCH 1/5] Partially revert "dfa: improve port to freestanding DJGPP", Jim Meyering, 2014/04/04
- bug#17157: [PATCH 1/5] Partially revert "dfa: improve port to freestanding DJGPP", Paul Eggert, 2014/04/05
- bug#17157: [PATCH 1/5] Partially revert "dfa: improve port to freestanding DJGPP", Jim Meyering, 2014/04/06
bug#17156: [PATCH 3/5] grep: avoid to re-build a state built previously., Paolo Bonzini, 2014/04/01
bug#17156: [PATCH 5/5] grep: pass a single line to regex, Paolo Bonzini, 2014/04/01
bug#17156: [PATCH 4/5] grep: optimization of DFA by reuse of multi-byte buffers in non-UTF8 locales, Paolo Bonzini, 2014/04/01
bug#17156: [PATCH 2/5] Revert conversion to shell scripts,
Paolo Bonzini <=
- bug#17156: [PATCH 2/5] Revert conversion to shell scripts, Paul Eggert, 2014/04/01
- bug#17156: [PATCH 2/5] Revert conversion to shell scripts, Paolo Bonzini, 2014/04/01
- bug#17156: [PATCH 2/5] Revert conversion to shell scripts, Paul Eggert, 2014/04/01
- bug#17156: [PATCH 2/5] Revert conversion to shell scripts, Paolo Bonzini, 2014/04/01
- bug#17156: [PATCH 2/5] Revert conversion to shell scripts, Paul Eggert, 2014/04/05
- bug#17156: [PATCH 2/5] Revert conversion to shell scripts, Paolo Bonzini, 2014/04/07