From a65ef9e231f75dc9fac66f9e45724cb14f1b34bc Mon Sep 17 00:00:00 2001
From: Jim Meyering
Date: Sat, 27 Jun 2015 08:43:33 -0700
Subject: [PATCH 1/2] tests: add coreutils' perl-driven test framework
* configure.ac: Set the AM_CONDITIONAL variable, HAVE_PERL.
* tests/Coreutils.pm: New file.
* tests/CuSkip.pm: New file.
* tests/CuTmpdir.pm: New file.
* tests/no-perl: New file.
* tests/Makefile.am: Set up to use .pl tests:
(TEST_EXTENSIONS, TESTSUITE_PERL, TESTSUITE_PERL_OPTIONS): Define.
(SH_LOG_COMPILER, PL_LOG_COMPILER): Define.
(EXTRA_DIST): Add the four new file names.
---
configure.ac | 7 +
tests/Coreutils.pm | 620 +++++++++++++++++++++++++++++++++++++++++++++++++++++
tests/CuSkip.pm | 39 ++++
tests/CuTmpdir.pm | 111 ++++++++++
tests/Makefile.am | 24 +++
tests/no-perl | 6 +
6 files changed, 807 insertions(+)
create mode 100644 tests/Coreutils.pm
create mode 100644 tests/CuSkip.pm
create mode 100644 tests/CuTmpdir.pm
create mode 100644 tests/no-perl
diff --git a/configure.ac b/configure.ac
index 4de0f39..afb8cd2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -94,6 +94,13 @@ AC_TYPE_SIZE_T
AC_C_CONST
gl_INIT
+# The test suite needs to know if we have a working perl.
+# FIXME: this is suboptimal. Ideally, we would be able to call gl_PERL
+# with an ACTION-IF-NOT-FOUND argument ...
+cu_have_perl=yes
+case $PERL in *"/missing "*) cu_have_perl=no;; esac
+AM_CONDITIONAL([HAVE_PERL], [test $cu_have_perl = yes])
+
AC_ARG_ENABLE([gcc-warnings],
[AS_HELP_STRING([--enable-gcc-warnings],
[turn on lots of GCC warnings (for developers)])],
diff --git a/tests/Coreutils.pm b/tests/Coreutils.pm
new file mode 100644
index 0000000..bd2088f
--- /dev/null
+++ b/tests/Coreutils.pm
@@ -0,0 +1,620 @@
+package Coreutils;
+# This is a testing framework.
+
+# Copyright (C) 1998-2015 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+use strict;
+use vars qw($VERSION @ISA @EXPORT);
+
+use FileHandle;
+use File::Compare qw(compare);
+
address@hidden = qw(Exporter);
+($VERSION = '$Revision: 1.5 $ ') =~ tr/[0-9].//cd;
address@hidden = qw (run_tests triple_test getlimits);
+
+my $debug = $ENV{DEBUG};
+
+my @Types = qw (IN IN_PIPE OUT ERR AUX CMP EXIT PRE POST OUT_SUBST
+ ERR_SUBST ENV ENV_DEL);
+my %Types = map {$_ => 1} @Types;
+my %Zero_one_type = map {$_ => 1}
+ qw (OUT ERR EXIT PRE POST OUT_SUBST ERR_SUBST ENV);
+my $srcdir = "$ENV{srcdir}";
+my $Global_count = 1;
+
+# When running in a DJGPP environment, make $ENV{SHELL} point to bash.
+# Otherwise, a bad shell might be used (e.g. command.com) and many
+# tests would fail.
+defined $ENV{DJDIR}
+ and $ENV{SHELL} = "$ENV{DJDIR}/bin/bash.exe";
+
+# A file spec: a scalar or a reference to a single-keyed hash
+# ================
+# 'contents' contents only (file name is derived from test name)
+# {filename => 'contents'} filename and contents
+# {filename => undef} filename only -- $(srcdir)/tests/filename must exist
+#
+# FIXME: If there is more than one input file, then you can't specify 'REDIR'.
+# PIPE is still ok.
+#
+# I/O spec: a hash ref with the following properties
+# ================
+# - one key/value pair
+# - the key must be one of these strings: IN, OUT, ERR, AUX, CMP, EXIT
+# - the value must be a file spec
+# {OUT => 'data'} put data in a temp file and compare it to stdout from cmd
+# {OUT => {'filename'=>undef}} compare contents of existing filename to
+# stdout from cmd
+# {OUT => {'filename'=>[$CTOR, $DTOR]}} $CTOR and $DTOR are references to
+# functions, each which is passed the single argument 'filename'.
+# $CTOR must create 'filename'.
+# DTOR may be omitted in which case 'sub{unlink @_[0]}' is used.
+# FIXME: implement this
+# {ERR => ...}
+# Same as for OUT, but compare with stderr, not stdout.
+# {OUT_SUBST => 's/variable_output/expected_output/'}
+# Transform actual standard output before comparing it against expected.
+# This is useful e.g. for programs like du that produce output that
+# varies a lot from system. E.g., an empty file may consume zero file
+# blocks, or more, depending on the OS and on the file system type.
+# {ERR_SUBST => 's/variable_output/expected_output/'}
+# Transform actual stderr output before comparing it against expected.
+# This is useful when verifying that we get a meaningful diagnostic.
+# For example, in rm/fail-2eperm, we have to account for three different
+# diagnostics: Operation not permitted, Not owner, and Permission denied.
+# {EXIT => N} expect exit status of cmd to be N
+# {ENV => 'VAR=val ...'}
+# Prepend 'VAR=val ...' to the command that we execute via 'system'.
+# {ENV_DEL => 'VAR'}
+# Remove VAR from the environment just before running the corresponding
+# command, and restore any value just afterwards.
+#
+# There may be many input file specs. File names from the input specs
+# are concatenated in order on the command line.
+# There may be at most one of the OUT-, ERR-, and EXIT-keyed specs.
+# If the OUT-(or ERR)-keyed hash ref is omitted, then expect no output
+# on stdout (or stderr).
+# If the EXIT-keyed one is omitted, then expect the exit status to be zero.
+
+# FIXME: Make sure that no junkfile is also listed as a
+# non-junkfile (i.e., with undef for contents)
+
+sub _shell_quote ($)
+{
+ my ($string) = @_;
+ $string =~ s/\'/\'\\\'\'/g;
+ return "'$string'";
+}
+
+sub _create_file ($$$$)
+{
+ my ($program_name, $test_name, $file_name, $data) = @_;
+ my $file;
+ if (defined $file_name)
+ {
+ $file = $file_name;
+ }
+ else
+ {
+ $file = "$test_name.$Global_count";
+ ++$Global_count;
+ }
+
+ warn "creating file '$file' with contents '$data'\n" if $debug;
+
+ # The test spec gave a string.
+ # Write it to a temp file and return tempfile name.
+ my $fh = new FileHandle "> $file";
+ die "$program_name: $file: $!\n" if ! $fh;
+ print $fh $data;
+ $fh->close || die "$program_name: $file: $!\n";
+
+ return $file;
+}
+
+sub _compare_files ($$$$$)
+{
+ my ($program_name, $test_name, $in_or_out, $actual, $expected) = @_;
+
+ my $differ = compare ($actual, $expected);
+ if ($differ)
+ {
+ my $info = (defined $in_or_out ? "std$in_or_out " : '');
+ warn "$program_name: test $test_name: ${info}mismatch, comparing "
+ . "$expected (expected) and $actual (actual)\n";
+ # Ignore any failure, discard stderr.
+ system "diff -c $expected $actual 2>/dev/null";
+ }
+
+ return $differ;
+}
+
+sub _process_file_spec ($$$$$)
+{
+ my ($program_name, $test_name, $file_spec, $type, $junk_files) = @_;
+
+ my ($file_name, $contents);
+ if (!ref $file_spec)
+ {
+ ($file_name, $contents) = (undef, $file_spec);
+ }
+ elsif (ref $file_spec eq 'HASH')
+ {
+ my $n = keys %$file_spec;
+ die "$program_name: $test_name: $type spec has $n elements --"
+ . " expected 1\n"
+ if $n != 1;
+ ($file_name, $contents) = each %$file_spec;
+
+ # This happens for the AUX hash in an io_spec like this:
+ # {CMP=> ['zy123utsrqponmlkji', {'@AUX@'=> undef}]},
+ defined $contents
+ or return $file_name;
+ }
+ else
+ {
+ die "$program_name: $test_name: invalid RHS in $type-spec\n"
+ }
+
+ my $is_junk_file = (! defined $file_name
+ || (($type eq 'IN' || $type eq 'AUX' || $type eq 'CMP')
+ && defined $contents));
+ my $file = _create_file ($program_name, $test_name,
+ $file_name, $contents);
+
+ if ($is_junk_file)
+ {
+ push @$junk_files, $file
+ }
+ else
+ {
+ # FIXME: put $srcdir in here somewhere
+ warn "$program_name: $test_name: specified file '$file' does"
+ . " not exist\n"
+ if ! -f "$srcdir/tests/$file";
+ }
+
+ return $file;
+}
+
+sub _at_replace ($$)
+{
+ my ($map, $s) = @_;
+ foreach my $eo (qw (AUX OUT ERR))
+ {
+ my $f = $map->{$eo};
+ $f
+ and $s =~ /address@hidden@/
+ and $s =~ s/address@hidden@/$f/g;
+ }
+ return $s;
+}
+
+sub getlimits()
+{
+ my $NV;
+ open $NV, "getlimits |" or die "Error running getlimits\n";
+ my %limits = map {split /=|\n/} <$NV>;
+ return \%limits;
+}
+
+# FIXME: cleanup on interrupt
+# FIXME: extract 'do_1_test' function
+
+# FIXME: having to include $program_name here is an expedient kludge.
+# Library code doesn't 'die'.
+sub run_tests ($$$$$)
+{
+ my ($program_name, $prog, $t_spec, $save_temps, $verbose) = @_;
+
+ # To indicate that $prog is a shell built-in, you'd make it a string 'ref'.
+ # E.g., call run_tests ($prog, \$prog, address@hidden, $save_temps, $verbose);
+ # If it's a ref, invoke it via "env":
+ my @prog = ref $prog ? (qw(env --), $$prog) : $prog;
+
+ # Warn about empty t_spec.
+ # FIXME
+
+ # Remove all temp files upon interrupt.
+ # FIXME
+
+ # Verify that test names are distinct.
+ my $bad_test_name = 0;
+ my %seen;
+ my %seen_8dot3;
+ my $t;
+ foreach $t (@$t_spec)
+ {
+ my $test_name = $t->[0];
+ if ($seen{$test_name})
+ {
+ warn "$program_name: $test_name: duplicate test name\n";
+ $bad_test_name = 1;
+ }
+ $seen{$test_name} = 1;
+
+ if (0)
+ {
+ my $t8 = lc substr $test_name, 0, 8;
+ if ($seen_8dot3{$t8})
+ {
+ warn "$program_name: 8.3 test name conflict: "
+ . "$test_name, $seen_8dot3{$t8}\n";
+ $bad_test_name = 1;
+ }
+ $seen_8dot3{$t8} = $test_name;
+ }
+
+ # The test name may be no longer than 30 bytes.
+ # Yes, this is an arbitrary limit. If it causes trouble,
+ # consider removing it.
+ my $max = 30;
+ if ($max < length $test_name)
+ {
+ warn "$program_name: $test_name: test name is too long (> $max)\n";
+ $bad_test_name = 1;
+ }
+ }
+ return 1 if $bad_test_name;
+
+ # FIXME check exit status
+ system (@prog, '--version') if $verbose;
+
+ my @junk_files;
+ my $fail = 0;
+ foreach my $tt (@$t_spec)
+ {
+ my @post_compare;
+ my @dummy = @$tt;
+ my $t = address@hidden;
+ my $test_name = shift @$t;
+ my $expect = {};
+ my ($pre, $post);
+
+ # FIXME: maybe don't reset this.
+ $Global_count = 1;
+ my @args;
+ my $io_spec;
+ my %seen_type;
+ my @env_delete;
+ my $env_prefix = '';
+ my $input_pipe_cmd;
+ foreach $io_spec (@$t)
+ {
+ if (!ref $io_spec)
+ {
+ push @args, $io_spec;
+ next;
+ }
+
+ if (ref $io_spec ne 'HASH')
+ {
+ eval 'use Data::Dumper';
+ die "$program_name: $test_name: invalid entry in test spec; "
+ . "expected HASH-ref,\nbut got this:\n"
+ . Data::Dumper->Dump ([\$io_spec], ['$io_spec']) . "\n";
+ }
+
+ my $n = keys %$io_spec;
+ die "$program_name: $test_name: spec has $n elements --"
+ . " expected 1\n"
+ if $n != 1;
+ my ($type, $val) = each %$io_spec;
+ die "$program_name: $test_name: invalid key '$type' in test spec\n"
+ if ! $Types{$type};
+
+ # Make sure there's no more than one of OUT, ERR, EXIT, etc.
+ die "$program_name: $test_name: more than one $type spec\n"
+ if $Zero_one_type{$type} and $seen_type{$type}++;
+
+ if ($type eq 'PRE' or $type eq 'POST')
+ {
+ $expect->{$type} = $val;
+ next;
+ }
+
+ if ($type eq 'CMP')
+ {
+ my $t = ref $val;
+ $t && $t eq 'ARRAY'
+ or die "$program_name: $test_name: invalid CMP spec\n";
+ @$val == 2
+ or die "$program_name: $test_name: invalid CMP list; must have"
+ . " exactly 2 elements\n";
+ my @cmp_files;
+ foreach my $e (@$val)
+ {
+ my $r = ref $e;
+ $r && $r ne 'HASH'
+ and die "$program_name: $test_name: invalid element ($r)"
+ . " in CMP list; only scalars and hash references "
+ . "are allowed\n";
+ if ($r && $r eq 'HASH')
+ {
+ my $n = keys %$e;
+ $n == 1
+ or die "$program_name: $test_name: CMP spec has $n "
+ . "elements -- expected 1\n";
+
+ # Replace any '@AUX@' in the key of %$e.
+ my ($ff, $val) = each %$e;
+ my $new_ff = _at_replace $expect, $ff;
+ if ($new_ff ne $ff)
+ {
+ $e->{$new_ff} = $val;
+ delete $e->{$ff};
+ }
+ }
+ my $cmp_file = _process_file_spec ($program_name, $test_name,
+ $e, $type, address@hidden);
+ push @cmp_files, $cmp_file;
+ }
+ push @post_compare, address@hidden;
+
+ $expect->{$type} = $val;
+ next;
+ }
+
+ if ($type eq 'EXIT')
+ {
+ die "$program_name: $test_name: invalid EXIT code\n"
+ if $val !~ /^\d+$/;
+ # FIXME: make sure $data is numeric
+ $expect->{EXIT} = $val;
+ next;
+ }
+
+ if ($type =~ /^(OUT|ERR)_SUBST$/)
+ {
+ $expect->{RESULT_SUBST} ||= {};
+ $expect->{RESULT_SUBST}->{$1} = $val;
+ next;
+ }
+
+ if ($type eq 'ENV')
+ {
+ $env_prefix = "$val ";
+ next;
+ }
+
+ if ($type eq 'ENV_DEL')
+ {
+ push @env_delete, $val;
+ next;
+ }
+
+ my $file = _process_file_spec ($program_name, $test_name, $val,
+ $type, address@hidden);
+
+ if ($type eq 'IN' || $type eq 'IN_PIPE')
+ {
+ my $quoted_file = _shell_quote $file;
+ if ($type eq 'IN_PIPE')
+ {
+ defined $input_pipe_cmd
+ and die "$program_name: $test_name: only one input"
+ . " may be specified with IN_PIPE\n";
+ $input_pipe_cmd = "cat $quoted_file |";
+ }
+ else
+ {
+ push @args, $quoted_file;
+ }
+ }
+ elsif ($type eq 'AUX' || $type eq 'OUT' || $type eq 'ERR')
+ {
+ $expect->{$type} = $file;
+ }
+ else
+ {
+ die "$program_name: $test_name: invalid type: $type\n"
+ }
+ }
+
+ # Expect an exit status of zero if it's not specified.
+ $expect->{EXIT} ||= 0;
+
+ # Allow ERR to be omitted -- in that case, expect no error output.
+ foreach my $eo (qw (OUT ERR))
+ {
+ if (!exists $expect->{$eo})
+ {
+ $expect->{$eo} = _create_file ($program_name, $test_name,
+ undef, '');
+ push @junk_files, $expect->{$eo};
+ }
+ }
+
+ # FIXME: Does it ever make sense to specify a filename *and* contents
+ # in OUT or ERR spec?
+
+ # FIXME: this is really suboptimal...
+ my @new_args;
+ foreach my $a (@args)
+ {
+ $a = _at_replace $expect, $a;
+ push @new_args, $a;
+ }
+ @args = @new_args;
+
+ warn "$test_name...\n" if $verbose;
+ &{$expect->{PRE}} if $expect->{PRE};
+ my %actual;
+ $actual{OUT} = "$test_name.O";
+ $actual{ERR} = "$test_name.E";
+ push @junk_files, $actual{OUT}, $actual{ERR};
+ my @cmd = (@prog, @args, "> $actual{OUT}", "2> $actual{ERR}");
+ $env_prefix
+ and unshift @cmd, $env_prefix;
+ defined $input_pipe_cmd
+ and unshift @cmd, $input_pipe_cmd;
+ my $cmd_str = join (' ', @cmd);
+
+ # Delete from the environment any symbols specified by syntax
+ # like this: {ENV_DEL => 'TZ'}.
+ my %pushed_env;
+ foreach my $env_sym (@env_delete)
+ {
+ my $val = delete $ENV{$env_sym};
+ defined $val
+ and $pushed_env{$env_sym} = $val;
+ }
+
+ warn "Running command: '$cmd_str'\n" if $debug;
+ my $rc = 0xffff & system $cmd_str;
+
+ # Restore any environment setting we changed via a deletion.
+ foreach my $env_sym (keys %pushed_env)
+ {
+ $ENV{$env_sym} = $pushed_env{$env_sym};
+ }
+
+ if ($rc == 0xff00)
+ {
+ warn "$program_name: test $test_name failed: command failed:\n"
+ . " '$cmd_str': $!\n";
+ $fail = 1;
+ goto cleanup;
+ }
+ $rc >>= 8 if $rc > 0x80;
+ if ($expect->{EXIT} != $rc)
+ {
+ warn "$program_name: test $test_name failed: exit status mismatch:"
+ . " expected $expect->{EXIT}, got $rc\n";
+ $fail = 1;
+ goto cleanup;
+ }
+
+ my %actual_data;
+ # Record actual stdout and stderr contents, if POST may need them.
+ if ($expect->{POST})
+ {
+ foreach my $eo (qw (OUT ERR))
+ {
+ my $out_file = $actual{$eo};
+ open IN, $out_file
+ or (warn
+ "$program_name: cannot open $out_file for reading: $!\n"),
+ $fail = 1, next;
+ $actual_data{$eo} = ;
+ close IN
+ or (warn "$program_name: failed to read $out_file: $!\n"),
+ $fail = 1;
+ }
+ }
+
+ foreach my $eo (qw (OUT ERR))
+ {
+ my $subst_expr = $expect->{RESULT_SUBST}->{$eo};
+ if (defined $subst_expr)
+ {
+ my $out = $actual{$eo};
+ my $orig = "$out.orig";
+
+ # Move $out aside (to $orig), then recreate $out
+ # by transforming each line of $orig via $subst_expr.
+ rename $out, $orig
+ or (warn "$program_name: cannot rename $out to $orig: $!\n"),
+ $fail = 1, next;
+ open IN, $orig
+ or (warn "$program_name: cannot open $orig for reading: $!\n"),
+ $fail = 1, (unlink $orig), next;
+ unlink $orig
+ or (warn "$program_name: cannot unlink $orig: $!\n"),
+ $fail = 1;
+ open OUT, ">$out"
+ or (warn "$program_name: cannot open $out for writing: $!\n"),
+ $fail = 1, next;
+ while (defined (my $line = ))
+ {
+ eval "\$_ = \$line; $subst_expr; \$line = \$_";
+ print OUT $line;
+ }
+ close IN;
+ close OUT
+ or (warn "$program_name: failed to write $out: $!\n"),
+ $fail = 1, next;
+ }
+
+ my $eo_lower = lc $eo;
+ _compare_files ($program_name, $test_name, $eo_lower,
+ $actual{$eo}, $expect->{$eo})
+ and $fail = 1;
+ }
+
+ foreach my $pair (@post_compare)
+ {
+ my ($expected, $actual) = @$pair;
+ _compare_files $program_name, $test_name, undef, $actual, $expected
+ and $fail = 1;
+ }
+
+ cleanup:
+ $expect->{POST}
+ and &{$expect->{POST}} ($actual_data{OUT}, $actual_data{ERR});
+
+ }
+
+ # FIXME: maybe unlink files inside the big foreach loop?
+ unlink @junk_files if ! $save_temps;
+
+ return $fail;
+}
+
+# For each test in @$TESTS, generate two additional tests,
+# one using stdin, the other using a pipe. I.e., given this one
+# ['idem-0', {IN=>''}, {OUT=>''}],
+# generate these:
+# ['idem-0.r', '<', {IN=>''}, {OUT=>''}],
+# ['idem-0.p', {IN_PIPE=>''}, {OUT=>''}],
+# Generate new tests only if there is exactly one input spec.
+# The returned list of tests contains each input test, followed
+# by zero or two derived tests.
+sub triple_test($)
+{
+ my ($tests) = @_;
+ my @new;
+ foreach my $t (@$tests)
+ {
+ push @new, $t;
+
+ my @in;
+ my @args;
+ my @list_of_hash;
+ foreach my $e (@$t)
+ {
+ !ref $e
+ and push (@args, $e), next;
+
+ ref $e && ref $e eq 'HASH'
+ or (warn "$0: $t->[0]: unexpected entry type\n"), next;
+ defined $e->{IN}
+ and (push @in, $e->{IN}), next;
+ push @list_of_hash, $e;
+ }
+ # Add variants IFF there is exactly one input file.
+ @in == 1
+ or next;
+ shift @args; # discard test name
+ push @new, ["$t->[0].r", @args, '<', {IN => $in[0]}, @list_of_hash];
+ push @new, ["$t->[0].p", @args, {IN_PIPE => $in[0]}, @list_of_hash];
+ }
+ return @new;
+}
+
+## package return
+1;
diff --git a/tests/CuSkip.pm b/tests/CuSkip.pm
new file mode 100644
index 0000000..a25688a
--- /dev/null
+++ b/tests/CuSkip.pm
@@ -0,0 +1,39 @@
+package CuSkip;
+# Skip a test: emit diag to log and to stderr, and exit 77
+
+# Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+use strict;
+use warnings;
+
+our $ME = $0 || "??>";
+
+# Emit a diagnostic both to stderr and to $stderr_fileno_.
+# FIXME: don't hard-code that value (9), since it's already defined in init.cfg.
+sub skip ($)
+{
+ my ($msg) = @_;
+ my $stderr_fileno_ = 9;
+ warn $msg;
+ open FH, ">&$stderr_fileno_"
+ or warn "$ME: failed to dup stderr\n";
+ print FH $msg;
+ close FH
+ or warn "$ME: failed to close FD $stderr_fileno_\n";
+ exit 77;
+}
+
+1;
diff --git a/tests/CuTmpdir.pm b/tests/CuTmpdir.pm
new file mode 100644
index 0000000..fd65556
--- /dev/null
+++ b/tests/CuTmpdir.pm
@@ -0,0 +1,111 @@
+package CuTmpdir;
+# create, then chdir into a temporary sub-directory
+
+# Copyright (C) 2007-2015 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+use strict;
+use warnings;
+
+use File::Temp;
+use File::Find;
+
+our $ME = $0 || "??>";
+
+my $dir;
+
+sub skip_test($)
+{
+ warn "$ME: skipping test: unsafe working directory name: '$_[0]'\n";
+ exit 77;
+}
+
+sub chmod_1
+{
+ my $name = $_;
+
+ # Skip symlinks and non-directories.
+ -l $name || !-d _
+ and return;
+
+ chmod 0700, $name;
+}
+
+sub chmod_tree
+{
+ # When tempdir fails, it croaks, which leaves $dir undefined.
+ defined $dir
+ or return;
+
+ # Perform the equivalent of find "$dir" -type d -print0|xargs -0 chmod -R 700.
+ my $options = {untaint => 1, wanted => \&chmod_1};
+ find ($options, $dir);
+}
+
+sub import {
+ my $prefix = $_[1];
+
+ $ME eq '-' && defined $prefix
+ and $ME = $prefix;
+
+ if ($prefix !~ /^\//)
+ {
+ eval 'use Cwd';
+ my $cwd = $@ ? '.' : Cwd::getcwd();
+ $prefix = "$cwd/$prefix";
+ }
+
+ # Untaint for the upcoming mkdir.
+ $prefix =~ m!^(address@hidden/]+)$!
+ or skip_test $prefix;
+ $prefix = $1;
+
+ my $original_pid = $$;
+
+ my $on_sig_remove_tmpdir = sub {
+ my ($sig) = @_;
+ if ($$ == $original_pid and defined $dir)
+ {
+ chmod_tree;
+ # Older versions of File::Temp lack this method.
+ exists &File::Temp::cleanup
+ and &File::Temp::cleanup;
+ }
+ $SIG{$sig} = 'DEFAULT';
+ kill $sig, $$;
+ };
+
+ foreach my $sig (qw (INT TERM HUP))
+ {
+ $SIG{$sig} = $on_sig_remove_tmpdir;
+ }
+
+ $dir = File::Temp::tempdir("$prefix.tmp-XXXX", CLEANUP => 1 );
+ chdir $dir
+ or warn "$ME: failed to chdir to $dir: $!\n";
+}
+
+END {
+ # Move cwd out of the directory we're about to remove.
+ # This is required on some systems, and by some versions of File::Temp.
+ chdir '..'
+ or warn "$ME: failed to chdir to .. from $dir: $!\n";
+
+ my $saved_errno = $?;
+ chmod_tree;
+ $? = $saved_errno;
+}
+
+1;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index b29328b..03cfdbb 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -14,6 +14,26 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+TEST_EXTENSIONS = .sh .pl
+
+if HAVE_PERL
+TESTSUITE_PERL = $(PERL)
+else
+TESTSUITE_PERL = $(SHELL) $(srcdir)/no-perl
+endif
+
+# Options passed to the perl invocations running the perl test scripts.
+TESTSUITE_PERL_OPTIONS = -w -I$(srcdir) -MCoreutils -MCuSkip
+# '$f' is set by the Automake-generated test harness to the path of the
+# current test script stripped of VPATH components, and is used by the
+# CuTmpdir module to determine the name of the temporary files to be
+# used. Note that $f is a shell variable, not a make macro, so the use
+# of '$$f' below is correct, and not a typo.
+TESTSUITE_PERL_OPTIONS += -M"CuTmpdir qw($$f)"
+
+SH_LOG_COMPILER = $(SHELL)
+PL_LOG_COMPILER = $(TESTSUITE_PERL) $(TESTSUITE_PERL_OPTIONS)
+
check_PROGRAMS = get-mb-cur-max dfa-match-aux
AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib \
-I$(top_srcdir)/src
@@ -152,6 +172,9 @@ EXTRA_DIST = \
$(TESTS) \
bre.awk \
bre.tests \
+ Coreutils.pm \
+ CuSkip.pm \
+ CuTmpdir.pm \
envvar-check \
ere.awk \
ere.tests \
@@ -159,6 +182,7 @@ EXTRA_DIST = \
init.sh \
khadafy.lines \
khadafy.regexp \
+ no-perl \
spencer1.awk \
spencer1.tests \
spencer1-locale.awk
diff --git a/tests/no-perl b/tests/no-perl
new file mode 100644
index 0000000..956a826
--- /dev/null
+++ b/tests/no-perl
@@ -0,0 +1,6 @@
+#! /bin/sh
+# Perl is not available, the test should be considered skipped.
+# FD 9 should have been opened by the test suite harness, pointing
+# to the original stderr (usually, the user's terminal).
+echo "test skipped: no usable version of Perl found" >&9
+exit 77
--
2.8.0-rc2
From 083158051ce6ab058e08223c19b52de2adba9f4b Mon Sep 17 00:00:00 2001
From: Jim Meyering
Date: Sat, 16 Jul 2016 10:51:31 -0700
Subject: [PATCH 2/2] grep: print "filename:lineno:" in invalid-regex
diagnostic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Determining the file name and line number is a little tricky because
of the way the regular expressions are all concatenated onto a newline-
separated list. By the time grep would compiling regular expressions,
the origin of each regexp was no longer available.
This patch adds a list of filename,first_lineno pairs, one per input
source, by which we can then map the ordinal regexp number to a
filename,lineno pair for the diagnostic.
* src/dfasearch.c (GEAcompile): When diagnosing an invalid regexp
specified via -f FILE, include the "FILENAME:LINENO: " prefix.
Also, when there are two or more lines with compilation failures,
diagnose all of them, rather than stopping after the first.
* src/grep.h (pattern_file_name): Declare it.
* src/grep.c: (struct FL_pair): Define type.
(fl_pair, n_fl_pair_slots, n_pattern_files, patfile_lineno):
Define globals.
(fl_add, pattern_file_name): Define functions.
(main): Call fl_add for each type of the following: -e argument,
-f argument, command-line-specified (without -e) regexp.
* tests/filename-lineno.pl: New file.
* tests/Makefile.am (TESTS): Add it.
* NEWS (Improvements): Mention this.
Initially reported by Gunnar Wolf in https://bugs.debian.org/525214
Forwarded to grep's bug list by Santiago Ruano Rincón as
http://debbugs.gnu.org/23965
---
NEWS | 5 +--
src/dfasearch.c | 16 +++++++++-
src/grep.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++
src/grep.h | 1 +
tests/Makefile.am | 1 +
tests/filename-lineno.pl | 61 +++++++++++++++++++++++++++++++++++
6 files changed, 163 insertions(+), 3 deletions(-)
create mode 100755 tests/filename-lineno.pl
diff --git a/NEWS b/NEWS
index c3e6000..44b6fdf 100644
--- a/NEWS
+++ b/NEWS
@@ -6,12 +6,13 @@ GNU grep NEWS -*- outline -*-
grep can be much faster now when standard output is /dev/null.
-** Improvements
-
grep -F is now typically much faster when many patterns are given,
as it now uses the Aho-Corasick algorithm instead of the
Commentz-Walter algorithm in that case.
+ grep now prints a "FILENAME:LINENO: " prefix when diagnosing an
+ invalid regular expression that was read from an '-f'-specified file.
+
* Noteworthy changes in release 2.25 (2016-04-21) [stable]
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 8052ef0..9096785 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -135,6 +135,7 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
this should be a syntax error. The same for backref, where the
backref should be local to each pattern. */
char const *p = pattern;
+ bool compilation_failed = false;
do
{
size_t len;
@@ -157,12 +158,25 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
char const *err = re_compile_pattern (p, len,
&(patterns[pcount].regexbuf));
if (err)
- error (EXIT_TROUBLE, 0, "%s", err);
+ {
+ /* With patterns specified only on the command line, emit the bare
+ diagnostic. Otherwise, include a filename:lineno: prefix. */
+ size_t lineno;
+ char const *pat_filename = pattern_file_name (pcount + 1, &lineno);
+ if (*pat_filename == '\0')
+ error (0, 0, "%s", err);
+ else
+ error (0, 0, "%s:%zu: %s", pat_filename, lineno, err);
+ compilation_failed = true;
+ }
pcount++;
p = sep;
}
while (p);
+ if (compilation_failed)
+ exit (EXIT_TROUBLE);
+
/* In the match_words and match_lines cases, we use a different pattern
for the DFA matcher that will quickly throw out cases that won't work.
Then if DFA succeeds we do some hairy stuff using the regex matcher
diff --git a/src/grep.c b/src/grep.c
index 302e4d7..a82da61 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -81,6 +81,85 @@ static bool only_matching;
/* If nonzero, make sure first content char in a line is on a tab stop. */
static bool align_tabs;
+/* See below */
+struct FL_pair
+ {
+ char const *filename;
+ size_t lineno;
+ };
+
+/* A list of lineno,filename pairs corresponding to -f FILENAME
+ arguments. Since we store the concatenation of all patterns in
+ a single array, KEYS, be they from the command line via "-e PAT"
+ or read from one or more -f-specified FILENAMES. Given this
+ invocation, grep -f <(seq 5) -f <(seq 2) -f <(seq 3) FILE, there
+ will be three entries in LF_PAIR: {1, x} {6, y} {8, z}, where
+ x, y and z are just place-holders for shell-generated names. */
+static struct FL_pair *fl_pair;
+static size_t n_fl_pair_slots;
+/* Count not only -f-specified files, but also individual -e operands
+ and any command-line argument that serves as a regular expression. */
+static size_t n_pattern_files;
+
+/* Given the concatenation of all patterns, one per line, be they
+ specified via -e, a lone command-line argument or -f, this is the
+ number of the first line of each entity, in that concatenation.
+ It is advanced by fl_add and, when needed, used in pattern_file_name
+ to derive a file-relative line number. */
+static uintmax_t patfile_lineno = 1;
+
+/* Return the number of newline bytes in BUF starting at offset BEG
+ and up to and not including offset END. */
+static size_t _GL_ATTRIBUTE_PURE
+count_nl_bytes (char const *buf, size_t beg, size_t end)
+{
+ char const *p = buf + beg;
+ char const *end_p = buf + end;
+ uintmax_t n = 0;
+ while (true)
+ {
+ p = memchr (p, '\n', end_p - p);
+ if (!p)
+ break;
+ p++;
+ n++;
+ }
+ return n;
+}
+
+/* Append a FILENAME,line-number pair to FL_PAIR. The line number we save
+ with FILENAME is the initial value of the global PATFILE_LINENO.
+ PATFILE_LINENO is then incremented by the number of newlines in BUF
+ from offset BEG up to but not including offset END. */
+static void
+fl_add (char const *buf, size_t beg, size_t end, char const *filename)
+{
+ if (n_fl_pair_slots <= n_pattern_files)
+ fl_pair = x2nrealloc (fl_pair, &n_fl_pair_slots, sizeof *fl_pair);
+
+ fl_pair[n_pattern_files].lineno = patfile_lineno;
+ fl_pair[n_pattern_files].filename = filename;
+ n_pattern_files++;
+ patfile_lineno += count_nl_bytes (buf, beg, end);
+}
+
+/* Map the line number, LINENO, of one of the input patterns to the
+ name of the file from which it came. If it was read from stdin
+ or if it was specified on the command line, return "-". */
+char const * _GL_ATTRIBUTE_PURE
+pattern_file_name (size_t lineno, size_t *new_lineno)
+{
+ size_t i;
+ for (i = 1; i < n_pattern_files; i++)
+ {
+ if (lineno < fl_pair[i].lineno)
+ break;
+ }
+
+ *new_lineno = lineno - fl_pair[i - 1].lineno + 1;
+ return fl_pair[i - 1].filename;
+}
+
#if HAVE_ASAN
/* Record the starting address and length of the sole poisoned region,
so that we can unpoison it later, just before each following read. */
@@ -2381,6 +2460,7 @@ main (int argc, char **argv)
strcpy (&keys[keycc], optarg);
keycc += cc;
keys[keycc++] = '\n';
+ fl_add (keys, keycc - cc - 1, keycc, "");
break;
case 'f':
@@ -2405,6 +2485,7 @@ main (int argc, char **argv)
/* Append final newline if file ended in non-newline. */
if (oldcc != keycc && keys[keycc - 1] != '\n')
keys[keycc++] = '\n';
+ fl_add (keys, oldcc, keycc, xstrdup (optarg));
break;
case 'h':
@@ -2645,6 +2726,7 @@ main (int argc, char **argv)
/* A copy must be made in case of an xrealloc() or free() later. */
keycc = strlen (argv[optind]);
keys = xmemdup (argv[optind++], keycc + 1);
+ fl_add (keys, 0, keycc, "");
}
else
usage (EXIT_TROUBLE);
diff --git a/src/grep.h b/src/grep.h
index 75b7ef7..b45992f 100644
--- a/src/grep.h
+++ b/src/grep.h
@@ -30,5 +30,6 @@ extern bool match_lines; /* -x */
extern char eolbyte; /* -z */
extern bool buf_has_encoding_errors (char *, size_t);
+extern char const *pattern_file_name (size_t, size_t *);
#endif
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 03cfdbb..77502ca 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -99,6 +99,7 @@ TESTS = \
fedora \
fgrep-infloop \
file \
+ filename-lineno.pl \
fmbtest \
foad1 \
grep-dev-null \
diff --git a/tests/filename-lineno.pl b/tests/filename-lineno.pl
new file mode 100755
index 0000000..edba286
--- /dev/null
+++ b/tests/filename-lineno.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl
+# Prior to 2.26, an invalid regexp in a -f-specified file would elicit
+# a diagnostic like "Unmatched [ or [^", with no indication of the
+# file or line number from which the offending regular expression came.
+# With 2.26, now, each such diagnostic has a "FILENAME:LINENO: " prefix.
+
+# Copyright (C) 2016 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+use strict;
+
+(my $program_name = $0) =~ s|.*/||;
+
+my $prog = 'grep';
+
+# Turn off localization of executable's output.
address@hidden(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+# There are at least two variants of one diagnostic:
+# - Unmatched [, [^, [:, [., or [=
+# - Unmatched [ or [^
+# Transform each to this: "Unmatched [..."
+my $err_subst = {ERR_SUBST => 's/(: Unmatched \[).*/$1.../'};
+
+my @Tests =
+ (
+ # Show that grep now includes filename:lineno in the diagnostic:
+ ['invalid-re', '-f g', {AUX=>{g=>"1\n2\n3\n4[[\n"}}, {EXIT=>2},
+ $err_subst,
+ {ERR => "$prog: g:4: Unmatched [...\n"},
+ ],
+
+ # Show that with two or more errors, grep now prints all diagnostics:
+ ['invalid-re2', '-f g -f h', {EXIT=>2},
+ {AUX=>{g=>"1\n2[[\n3\n4[[\n"}},
+ {AUX=>{h=>"\n\n[[\n"}},
+ $err_subst,
+ {ERR => "$prog: g:2: Unmatched [...\n"
+ . "$prog: g:4: Unmatched [...\n"
+ . "$prog: h:3: Unmatched [...\n"
+ },
+ ],
+ );
+
+my $save_temps = $ENV{DEBUG};
+my $verbose = $ENV{VERBOSE};
+
+my $fail = run_tests ($program_name, $prog, address@hidden, $save_temps, $verbose);
+exit $fail;
--
2.8.0-rc2