From a65ef9e231f75dc9fac66f9e45724cb14f1b34bc Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 27 Jun 2015 08:43:33 -0700 Subject: [PATCH 1/2] tests: add coreutils' perl-driven test framework * configure.ac: Set the AM_CONDITIONAL variable, HAVE_PERL. * tests/Coreutils.pm: New file. * tests/CuSkip.pm: New file. * tests/CuTmpdir.pm: New file. * tests/no-perl: New file. * tests/Makefile.am: Set up to use .pl tests: (TEST_EXTENSIONS, TESTSUITE_PERL, TESTSUITE_PERL_OPTIONS): Define. (SH_LOG_COMPILER, PL_LOG_COMPILER): Define. (EXTRA_DIST): Add the four new file names. --- configure.ac | 7 + tests/Coreutils.pm | 620 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/CuSkip.pm | 39 ++++ tests/CuTmpdir.pm | 111 ++++++++++ tests/Makefile.am | 24 +++ tests/no-perl | 6 + 6 files changed, 807 insertions(+) create mode 100644 tests/Coreutils.pm create mode 100644 tests/CuSkip.pm create mode 100644 tests/CuTmpdir.pm create mode 100644 tests/no-perl diff --git a/configure.ac b/configure.ac index 4de0f39..afb8cd2 100644 --- a/configure.ac +++ b/configure.ac @@ -94,6 +94,13 @@ AC_TYPE_SIZE_T AC_C_CONST gl_INIT +# The test suite needs to know if we have a working perl. +# FIXME: this is suboptimal. Ideally, we would be able to call gl_PERL +# with an ACTION-IF-NOT-FOUND argument ... +cu_have_perl=yes +case $PERL in *"/missing "*) cu_have_perl=no;; esac +AM_CONDITIONAL([HAVE_PERL], [test $cu_have_perl = yes]) + AC_ARG_ENABLE([gcc-warnings], [AS_HELP_STRING([--enable-gcc-warnings], [turn on lots of GCC warnings (for developers)])], diff --git a/tests/Coreutils.pm b/tests/Coreutils.pm new file mode 100644 index 0000000..bd2088f --- /dev/null +++ b/tests/Coreutils.pm @@ -0,0 +1,620 @@ +package Coreutils; +# This is a testing framework. + +# Copyright (C) 1998-2015 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use strict; +use vars qw($VERSION @ISA @EXPORT); + +use FileHandle; +use File::Compare qw(compare); + address@hidden = qw(Exporter); +($VERSION = '$Revision: 1.5 $ ') =~ tr/[0-9].//cd; address@hidden = qw (run_tests triple_test getlimits); + +my $debug = $ENV{DEBUG}; + +my @Types = qw (IN IN_PIPE OUT ERR AUX CMP EXIT PRE POST OUT_SUBST + ERR_SUBST ENV ENV_DEL); +my %Types = map {$_ => 1} @Types; +my %Zero_one_type = map {$_ => 1} + qw (OUT ERR EXIT PRE POST OUT_SUBST ERR_SUBST ENV); +my $srcdir = "$ENV{srcdir}"; +my $Global_count = 1; + +# When running in a DJGPP environment, make $ENV{SHELL} point to bash. +# Otherwise, a bad shell might be used (e.g. command.com) and many +# tests would fail. +defined $ENV{DJDIR} + and $ENV{SHELL} = "$ENV{DJDIR}/bin/bash.exe"; + +# A file spec: a scalar or a reference to a single-keyed hash +# ================ +# 'contents' contents only (file name is derived from test name) +# {filename => 'contents'} filename and contents +# {filename => undef} filename only -- $(srcdir)/tests/filename must exist +# +# FIXME: If there is more than one input file, then you can't specify 'REDIR'. +# PIPE is still ok. +# +# I/O spec: a hash ref with the following properties +# ================ +# - one key/value pair +# - the key must be one of these strings: IN, OUT, ERR, AUX, CMP, EXIT +# - the value must be a file spec +# {OUT => 'data'} put data in a temp file and compare it to stdout from cmd +# {OUT => {'filename'=>undef}} compare contents of existing filename to +# stdout from cmd +# {OUT => {'filename'=>[$CTOR, $DTOR]}} $CTOR and $DTOR are references to +# functions, each which is passed the single argument 'filename'. +# $CTOR must create 'filename'. +# DTOR may be omitted in which case 'sub{unlink @_[0]}' is used. +# FIXME: implement this +# {ERR => ...} +# Same as for OUT, but compare with stderr, not stdout. +# {OUT_SUBST => 's/variable_output/expected_output/'} +# Transform actual standard output before comparing it against expected. +# This is useful e.g. for programs like du that produce output that +# varies a lot from system. E.g., an empty file may consume zero file +# blocks, or more, depending on the OS and on the file system type. +# {ERR_SUBST => 's/variable_output/expected_output/'} +# Transform actual stderr output before comparing it against expected. +# This is useful when verifying that we get a meaningful diagnostic. +# For example, in rm/fail-2eperm, we have to account for three different +# diagnostics: Operation not permitted, Not owner, and Permission denied. +# {EXIT => N} expect exit status of cmd to be N +# {ENV => 'VAR=val ...'} +# Prepend 'VAR=val ...' to the command that we execute via 'system'. +# {ENV_DEL => 'VAR'} +# Remove VAR from the environment just before running the corresponding +# command, and restore any value just afterwards. +# +# There may be many input file specs. File names from the input specs +# are concatenated in order on the command line. +# There may be at most one of the OUT-, ERR-, and EXIT-keyed specs. +# If the OUT-(or ERR)-keyed hash ref is omitted, then expect no output +# on stdout (or stderr). +# If the EXIT-keyed one is omitted, then expect the exit status to be zero. + +# FIXME: Make sure that no junkfile is also listed as a +# non-junkfile (i.e., with undef for contents) + +sub _shell_quote ($) +{ + my ($string) = @_; + $string =~ s/\'/\'\\\'\'/g; + return "'$string'"; +} + +sub _create_file ($$$$) +{ + my ($program_name, $test_name, $file_name, $data) = @_; + my $file; + if (defined $file_name) + { + $file = $file_name; + } + else + { + $file = "$test_name.$Global_count"; + ++$Global_count; + } + + warn "creating file '$file' with contents '$data'\n" if $debug; + + # The test spec gave a string. + # Write it to a temp file and return tempfile name. + my $fh = new FileHandle "> $file"; + die "$program_name: $file: $!\n" if ! $fh; + print $fh $data; + $fh->close || die "$program_name: $file: $!\n"; + + return $file; +} + +sub _compare_files ($$$$$) +{ + my ($program_name, $test_name, $in_or_out, $actual, $expected) = @_; + + my $differ = compare ($actual, $expected); + if ($differ) + { + my $info = (defined $in_or_out ? "std$in_or_out " : ''); + warn "$program_name: test $test_name: ${info}mismatch, comparing " + . "$expected (expected) and $actual (actual)\n"; + # Ignore any failure, discard stderr. + system "diff -c $expected $actual 2>/dev/null"; + } + + return $differ; +} + +sub _process_file_spec ($$$$$) +{ + my ($program_name, $test_name, $file_spec, $type, $junk_files) = @_; + + my ($file_name, $contents); + if (!ref $file_spec) + { + ($file_name, $contents) = (undef, $file_spec); + } + elsif (ref $file_spec eq 'HASH') + { + my $n = keys %$file_spec; + die "$program_name: $test_name: $type spec has $n elements --" + . " expected 1\n" + if $n != 1; + ($file_name, $contents) = each %$file_spec; + + # This happens for the AUX hash in an io_spec like this: + # {CMP=> ['zy123utsrqponmlkji', {'@AUX@'=> undef}]}, + defined $contents + or return $file_name; + } + else + { + die "$program_name: $test_name: invalid RHS in $type-spec\n" + } + + my $is_junk_file = (! defined $file_name + || (($type eq 'IN' || $type eq 'AUX' || $type eq 'CMP') + && defined $contents)); + my $file = _create_file ($program_name, $test_name, + $file_name, $contents); + + if ($is_junk_file) + { + push @$junk_files, $file + } + else + { + # FIXME: put $srcdir in here somewhere + warn "$program_name: $test_name: specified file '$file' does" + . " not exist\n" + if ! -f "$srcdir/tests/$file"; + } + + return $file; +} + +sub _at_replace ($$) +{ + my ($map, $s) = @_; + foreach my $eo (qw (AUX OUT ERR)) + { + my $f = $map->{$eo}; + $f + and $s =~ /address@hidden@/ + and $s =~ s/address@hidden@/$f/g; + } + return $s; +} + +sub getlimits() +{ + my $NV; + open $NV, "getlimits |" or die "Error running getlimits\n"; + my %limits = map {split /=|\n/} <$NV>; + return \%limits; +} + +# FIXME: cleanup on interrupt +# FIXME: extract 'do_1_test' function + +# FIXME: having to include $program_name here is an expedient kludge. +# Library code doesn't 'die'. +sub run_tests ($$$$$) +{ + my ($program_name, $prog, $t_spec, $save_temps, $verbose) = @_; + + # To indicate that $prog is a shell built-in, you'd make it a string 'ref'. + # E.g., call run_tests ($prog, \$prog, address@hidden, $save_temps, $verbose); + # If it's a ref, invoke it via "env": + my @prog = ref $prog ? (qw(env --), $$prog) : $prog; + + # Warn about empty t_spec. + # FIXME + + # Remove all temp files upon interrupt. + # FIXME + + # Verify that test names are distinct. + my $bad_test_name = 0; + my %seen; + my %seen_8dot3; + my $t; + foreach $t (@$t_spec) + { + my $test_name = $t->[0]; + if ($seen{$test_name}) + { + warn "$program_name: $test_name: duplicate test name\n"; + $bad_test_name = 1; + } + $seen{$test_name} = 1; + + if (0) + { + my $t8 = lc substr $test_name, 0, 8; + if ($seen_8dot3{$t8}) + { + warn "$program_name: 8.3 test name conflict: " + . "$test_name, $seen_8dot3{$t8}\n"; + $bad_test_name = 1; + } + $seen_8dot3{$t8} = $test_name; + } + + # The test name may be no longer than 30 bytes. + # Yes, this is an arbitrary limit. If it causes trouble, + # consider removing it. + my $max = 30; + if ($max < length $test_name) + { + warn "$program_name: $test_name: test name is too long (> $max)\n"; + $bad_test_name = 1; + } + } + return 1 if $bad_test_name; + + # FIXME check exit status + system (@prog, '--version') if $verbose; + + my @junk_files; + my $fail = 0; + foreach my $tt (@$t_spec) + { + my @post_compare; + my @dummy = @$tt; + my $t = address@hidden; + my $test_name = shift @$t; + my $expect = {}; + my ($pre, $post); + + # FIXME: maybe don't reset this. + $Global_count = 1; + my @args; + my $io_spec; + my %seen_type; + my @env_delete; + my $env_prefix = ''; + my $input_pipe_cmd; + foreach $io_spec (@$t) + { + if (!ref $io_spec) + { + push @args, $io_spec; + next; + } + + if (ref $io_spec ne 'HASH') + { + eval 'use Data::Dumper'; + die "$program_name: $test_name: invalid entry in test spec; " + . "expected HASH-ref,\nbut got this:\n" + . Data::Dumper->Dump ([\$io_spec], ['$io_spec']) . "\n"; + } + + my $n = keys %$io_spec; + die "$program_name: $test_name: spec has $n elements --" + . " expected 1\n" + if $n != 1; + my ($type, $val) = each %$io_spec; + die "$program_name: $test_name: invalid key '$type' in test spec\n" + if ! $Types{$type}; + + # Make sure there's no more than one of OUT, ERR, EXIT, etc. + die "$program_name: $test_name: more than one $type spec\n" + if $Zero_one_type{$type} and $seen_type{$type}++; + + if ($type eq 'PRE' or $type eq 'POST') + { + $expect->{$type} = $val; + next; + } + + if ($type eq 'CMP') + { + my $t = ref $val; + $t && $t eq 'ARRAY' + or die "$program_name: $test_name: invalid CMP spec\n"; + @$val == 2 + or die "$program_name: $test_name: invalid CMP list; must have" + . " exactly 2 elements\n"; + my @cmp_files; + foreach my $e (@$val) + { + my $r = ref $e; + $r && $r ne 'HASH' + and die "$program_name: $test_name: invalid element ($r)" + . " in CMP list; only scalars and hash references " + . "are allowed\n"; + if ($r && $r eq 'HASH') + { + my $n = keys %$e; + $n == 1 + or die "$program_name: $test_name: CMP spec has $n " + . "elements -- expected 1\n"; + + # Replace any '@AUX@' in the key of %$e. + my ($ff, $val) = each %$e; + my $new_ff = _at_replace $expect, $ff; + if ($new_ff ne $ff) + { + $e->{$new_ff} = $val; + delete $e->{$ff}; + } + } + my $cmp_file = _process_file_spec ($program_name, $test_name, + $e, $type, address@hidden); + push @cmp_files, $cmp_file; + } + push @post_compare, address@hidden; + + $expect->{$type} = $val; + next; + } + + if ($type eq 'EXIT') + { + die "$program_name: $test_name: invalid EXIT code\n" + if $val !~ /^\d+$/; + # FIXME: make sure $data is numeric + $expect->{EXIT} = $val; + next; + } + + if ($type =~ /^(OUT|ERR)_SUBST$/) + { + $expect->{RESULT_SUBST} ||= {}; + $expect->{RESULT_SUBST}->{$1} = $val; + next; + } + + if ($type eq 'ENV') + { + $env_prefix = "$val "; + next; + } + + if ($type eq 'ENV_DEL') + { + push @env_delete, $val; + next; + } + + my $file = _process_file_spec ($program_name, $test_name, $val, + $type, address@hidden); + + if ($type eq 'IN' || $type eq 'IN_PIPE') + { + my $quoted_file = _shell_quote $file; + if ($type eq 'IN_PIPE') + { + defined $input_pipe_cmd + and die "$program_name: $test_name: only one input" + . " may be specified with IN_PIPE\n"; + $input_pipe_cmd = "cat $quoted_file |"; + } + else + { + push @args, $quoted_file; + } + } + elsif ($type eq 'AUX' || $type eq 'OUT' || $type eq 'ERR') + { + $expect->{$type} = $file; + } + else + { + die "$program_name: $test_name: invalid type: $type\n" + } + } + + # Expect an exit status of zero if it's not specified. + $expect->{EXIT} ||= 0; + + # Allow ERR to be omitted -- in that case, expect no error output. + foreach my $eo (qw (OUT ERR)) + { + if (!exists $expect->{$eo}) + { + $expect->{$eo} = _create_file ($program_name, $test_name, + undef, ''); + push @junk_files, $expect->{$eo}; + } + } + + # FIXME: Does it ever make sense to specify a filename *and* contents + # in OUT or ERR spec? + + # FIXME: this is really suboptimal... + my @new_args; + foreach my $a (@args) + { + $a = _at_replace $expect, $a; + push @new_args, $a; + } + @args = @new_args; + + warn "$test_name...\n" if $verbose; + &{$expect->{PRE}} if $expect->{PRE}; + my %actual; + $actual{OUT} = "$test_name.O"; + $actual{ERR} = "$test_name.E"; + push @junk_files, $actual{OUT}, $actual{ERR}; + my @cmd = (@prog, @args, "> $actual{OUT}", "2> $actual{ERR}"); + $env_prefix + and unshift @cmd, $env_prefix; + defined $input_pipe_cmd + and unshift @cmd, $input_pipe_cmd; + my $cmd_str = join (' ', @cmd); + + # Delete from the environment any symbols specified by syntax + # like this: {ENV_DEL => 'TZ'}. + my %pushed_env; + foreach my $env_sym (@env_delete) + { + my $val = delete $ENV{$env_sym}; + defined $val + and $pushed_env{$env_sym} = $val; + } + + warn "Running command: '$cmd_str'\n" if $debug; + my $rc = 0xffff & system $cmd_str; + + # Restore any environment setting we changed via a deletion. + foreach my $env_sym (keys %pushed_env) + { + $ENV{$env_sym} = $pushed_env{$env_sym}; + } + + if ($rc == 0xff00) + { + warn "$program_name: test $test_name failed: command failed:\n" + . " '$cmd_str': $!\n"; + $fail = 1; + goto cleanup; + } + $rc >>= 8 if $rc > 0x80; + if ($expect->{EXIT} != $rc) + { + warn "$program_name: test $test_name failed: exit status mismatch:" + . " expected $expect->{EXIT}, got $rc\n"; + $fail = 1; + goto cleanup; + } + + my %actual_data; + # Record actual stdout and stderr contents, if POST may need them. + if ($expect->{POST}) + { + foreach my $eo (qw (OUT ERR)) + { + my $out_file = $actual{$eo}; + open IN, $out_file + or (warn + "$program_name: cannot open $out_file for reading: $!\n"), + $fail = 1, next; + $actual_data{$eo} = ; + close IN + or (warn "$program_name: failed to read $out_file: $!\n"), + $fail = 1; + } + } + + foreach my $eo (qw (OUT ERR)) + { + my $subst_expr = $expect->{RESULT_SUBST}->{$eo}; + if (defined $subst_expr) + { + my $out = $actual{$eo}; + my $orig = "$out.orig"; + + # Move $out aside (to $orig), then recreate $out + # by transforming each line of $orig via $subst_expr. + rename $out, $orig + or (warn "$program_name: cannot rename $out to $orig: $!\n"), + $fail = 1, next; + open IN, $orig + or (warn "$program_name: cannot open $orig for reading: $!\n"), + $fail = 1, (unlink $orig), next; + unlink $orig + or (warn "$program_name: cannot unlink $orig: $!\n"), + $fail = 1; + open OUT, ">$out" + or (warn "$program_name: cannot open $out for writing: $!\n"), + $fail = 1, next; + while (defined (my $line = )) + { + eval "\$_ = \$line; $subst_expr; \$line = \$_"; + print OUT $line; + } + close IN; + close OUT + or (warn "$program_name: failed to write $out: $!\n"), + $fail = 1, next; + } + + my $eo_lower = lc $eo; + _compare_files ($program_name, $test_name, $eo_lower, + $actual{$eo}, $expect->{$eo}) + and $fail = 1; + } + + foreach my $pair (@post_compare) + { + my ($expected, $actual) = @$pair; + _compare_files $program_name, $test_name, undef, $actual, $expected + and $fail = 1; + } + + cleanup: + $expect->{POST} + and &{$expect->{POST}} ($actual_data{OUT}, $actual_data{ERR}); + + } + + # FIXME: maybe unlink files inside the big foreach loop? + unlink @junk_files if ! $save_temps; + + return $fail; +} + +# For each test in @$TESTS, generate two additional tests, +# one using stdin, the other using a pipe. I.e., given this one +# ['idem-0', {IN=>''}, {OUT=>''}], +# generate these: +# ['idem-0.r', '<', {IN=>''}, {OUT=>''}], +# ['idem-0.p', {IN_PIPE=>''}, {OUT=>''}], +# Generate new tests only if there is exactly one input spec. +# The returned list of tests contains each input test, followed +# by zero or two derived tests. +sub triple_test($) +{ + my ($tests) = @_; + my @new; + foreach my $t (@$tests) + { + push @new, $t; + + my @in; + my @args; + my @list_of_hash; + foreach my $e (@$t) + { + !ref $e + and push (@args, $e), next; + + ref $e && ref $e eq 'HASH' + or (warn "$0: $t->[0]: unexpected entry type\n"), next; + defined $e->{IN} + and (push @in, $e->{IN}), next; + push @list_of_hash, $e; + } + # Add variants IFF there is exactly one input file. + @in == 1 + or next; + shift @args; # discard test name + push @new, ["$t->[0].r", @args, '<', {IN => $in[0]}, @list_of_hash]; + push @new, ["$t->[0].p", @args, {IN_PIPE => $in[0]}, @list_of_hash]; + } + return @new; +} + +## package return +1; diff --git a/tests/CuSkip.pm b/tests/CuSkip.pm new file mode 100644 index 0000000..a25688a --- /dev/null +++ b/tests/CuSkip.pm @@ -0,0 +1,39 @@ +package CuSkip; +# Skip a test: emit diag to log and to stderr, and exit 77 + +# Copyright (C) 2011-2015 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use strict; +use warnings; + +our $ME = $0 || ""; + +# Emit a diagnostic both to stderr and to $stderr_fileno_. +# FIXME: don't hard-code that value (9), since it's already defined in init.cfg. +sub skip ($) +{ + my ($msg) = @_; + my $stderr_fileno_ = 9; + warn $msg; + open FH, ">&$stderr_fileno_" + or warn "$ME: failed to dup stderr\n"; + print FH $msg; + close FH + or warn "$ME: failed to close FD $stderr_fileno_\n"; + exit 77; +} + +1; diff --git a/tests/CuTmpdir.pm b/tests/CuTmpdir.pm new file mode 100644 index 0000000..fd65556 --- /dev/null +++ b/tests/CuTmpdir.pm @@ -0,0 +1,111 @@ +package CuTmpdir; +# create, then chdir into a temporary sub-directory + +# Copyright (C) 2007-2015 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use strict; +use warnings; + +use File::Temp; +use File::Find; + +our $ME = $0 || ""; + +my $dir; + +sub skip_test($) +{ + warn "$ME: skipping test: unsafe working directory name: '$_[0]'\n"; + exit 77; +} + +sub chmod_1 +{ + my $name = $_; + + # Skip symlinks and non-directories. + -l $name || !-d _ + and return; + + chmod 0700, $name; +} + +sub chmod_tree +{ + # When tempdir fails, it croaks, which leaves $dir undefined. + defined $dir + or return; + + # Perform the equivalent of find "$dir" -type d -print0|xargs -0 chmod -R 700. + my $options = {untaint => 1, wanted => \&chmod_1}; + find ($options, $dir); +} + +sub import { + my $prefix = $_[1]; + + $ME eq '-' && defined $prefix + and $ME = $prefix; + + if ($prefix !~ /^\//) + { + eval 'use Cwd'; + my $cwd = $@ ? '.' : Cwd::getcwd(); + $prefix = "$cwd/$prefix"; + } + + # Untaint for the upcoming mkdir. + $prefix =~ m!^(address@hidden/]+)$! + or skip_test $prefix; + $prefix = $1; + + my $original_pid = $$; + + my $on_sig_remove_tmpdir = sub { + my ($sig) = @_; + if ($$ == $original_pid and defined $dir) + { + chmod_tree; + # Older versions of File::Temp lack this method. + exists &File::Temp::cleanup + and &File::Temp::cleanup; + } + $SIG{$sig} = 'DEFAULT'; + kill $sig, $$; + }; + + foreach my $sig (qw (INT TERM HUP)) + { + $SIG{$sig} = $on_sig_remove_tmpdir; + } + + $dir = File::Temp::tempdir("$prefix.tmp-XXXX", CLEANUP => 1 ); + chdir $dir + or warn "$ME: failed to chdir to $dir: $!\n"; +} + +END { + # Move cwd out of the directory we're about to remove. + # This is required on some systems, and by some versions of File::Temp. + chdir '..' + or warn "$ME: failed to chdir to .. from $dir: $!\n"; + + my $saved_errno = $?; + chmod_tree; + $? = $saved_errno; +} + +1; diff --git a/tests/Makefile.am b/tests/Makefile.am index b29328b..03cfdbb 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -14,6 +14,26 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +TEST_EXTENSIONS = .sh .pl + +if HAVE_PERL +TESTSUITE_PERL = $(PERL) +else +TESTSUITE_PERL = $(SHELL) $(srcdir)/no-perl +endif + +# Options passed to the perl invocations running the perl test scripts. +TESTSUITE_PERL_OPTIONS = -w -I$(srcdir) -MCoreutils -MCuSkip +# '$f' is set by the Automake-generated test harness to the path of the +# current test script stripped of VPATH components, and is used by the +# CuTmpdir module to determine the name of the temporary files to be +# used. Note that $f is a shell variable, not a make macro, so the use +# of '$$f' below is correct, and not a typo. +TESTSUITE_PERL_OPTIONS += -M"CuTmpdir qw($$f)" + +SH_LOG_COMPILER = $(SHELL) +PL_LOG_COMPILER = $(TESTSUITE_PERL) $(TESTSUITE_PERL_OPTIONS) + check_PROGRAMS = get-mb-cur-max dfa-match-aux AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib \ -I$(top_srcdir)/src @@ -152,6 +172,9 @@ EXTRA_DIST = \ $(TESTS) \ bre.awk \ bre.tests \ + Coreutils.pm \ + CuSkip.pm \ + CuTmpdir.pm \ envvar-check \ ere.awk \ ere.tests \ @@ -159,6 +182,7 @@ EXTRA_DIST = \ init.sh \ khadafy.lines \ khadafy.regexp \ + no-perl \ spencer1.awk \ spencer1.tests \ spencer1-locale.awk diff --git a/tests/no-perl b/tests/no-perl new file mode 100644 index 0000000..956a826 --- /dev/null +++ b/tests/no-perl @@ -0,0 +1,6 @@ +#! /bin/sh +# Perl is not available, the test should be considered skipped. +# FD 9 should have been opened by the test suite harness, pointing +# to the original stderr (usually, the user's terminal). +echo "test skipped: no usable version of Perl found" >&9 +exit 77 -- 2.8.0-rc2 From 083158051ce6ab058e08223c19b52de2adba9f4b Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 16 Jul 2016 10:51:31 -0700 Subject: [PATCH 2/2] grep: print "filename:lineno:" in invalid-regex diagnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Determining the file name and line number is a little tricky because of the way the regular expressions are all concatenated onto a newline- separated list. By the time grep would compiling regular expressions, the origin of each regexp was no longer available. This patch adds a list of filename,first_lineno pairs, one per input source, by which we can then map the ordinal regexp number to a filename,lineno pair for the diagnostic. * src/dfasearch.c (GEAcompile): When diagnosing an invalid regexp specified via -f FILE, include the "FILENAME:LINENO: " prefix. Also, when there are two or more lines with compilation failures, diagnose all of them, rather than stopping after the first. * src/grep.h (pattern_file_name): Declare it. * src/grep.c: (struct FL_pair): Define type. (fl_pair, n_fl_pair_slots, n_pattern_files, patfile_lineno): Define globals. (fl_add, pattern_file_name): Define functions. (main): Call fl_add for each type of the following: -e argument, -f argument, command-line-specified (without -e) regexp. * tests/filename-lineno.pl: New file. * tests/Makefile.am (TESTS): Add it. * NEWS (Improvements): Mention this. Initially reported by Gunnar Wolf in https://bugs.debian.org/525214 Forwarded to grep's bug list by Santiago Ruano Rincón as http://debbugs.gnu.org/23965 --- NEWS | 5 +-- src/dfasearch.c | 16 +++++++++- src/grep.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ src/grep.h | 1 + tests/Makefile.am | 1 + tests/filename-lineno.pl | 61 +++++++++++++++++++++++++++++++++++ 6 files changed, 163 insertions(+), 3 deletions(-) create mode 100755 tests/filename-lineno.pl diff --git a/NEWS b/NEWS index c3e6000..44b6fdf 100644 --- a/NEWS +++ b/NEWS @@ -6,12 +6,13 @@ GNU grep NEWS -*- outline -*- grep can be much faster now when standard output is /dev/null. -** Improvements - grep -F is now typically much faster when many patterns are given, as it now uses the Aho-Corasick algorithm instead of the Commentz-Walter algorithm in that case. + grep now prints a "FILENAME:LINENO: " prefix when diagnosing an + invalid regular expression that was read from an '-f'-specified file. + * Noteworthy changes in release 2.25 (2016-04-21) [stable] diff --git a/src/dfasearch.c b/src/dfasearch.c index 8052ef0..9096785 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -135,6 +135,7 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) this should be a syntax error. The same for backref, where the backref should be local to each pattern. */ char const *p = pattern; + bool compilation_failed = false; do { size_t len; @@ -157,12 +158,25 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) char const *err = re_compile_pattern (p, len, &(patterns[pcount].regexbuf)); if (err) - error (EXIT_TROUBLE, 0, "%s", err); + { + /* With patterns specified only on the command line, emit the bare + diagnostic. Otherwise, include a filename:lineno: prefix. */ + size_t lineno; + char const *pat_filename = pattern_file_name (pcount + 1, &lineno); + if (*pat_filename == '\0') + error (0, 0, "%s", err); + else + error (0, 0, "%s:%zu: %s", pat_filename, lineno, err); + compilation_failed = true; + } pcount++; p = sep; } while (p); + if (compilation_failed) + exit (EXIT_TROUBLE); + /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. Then if DFA succeeds we do some hairy stuff using the regex matcher diff --git a/src/grep.c b/src/grep.c index 302e4d7..a82da61 100644 --- a/src/grep.c +++ b/src/grep.c @@ -81,6 +81,85 @@ static bool only_matching; /* If nonzero, make sure first content char in a line is on a tab stop. */ static bool align_tabs; +/* See below */ +struct FL_pair + { + char const *filename; + size_t lineno; + }; + +/* A list of lineno,filename pairs corresponding to -f FILENAME + arguments. Since we store the concatenation of all patterns in + a single array, KEYS, be they from the command line via "-e PAT" + or read from one or more -f-specified FILENAMES. Given this + invocation, grep -f <(seq 5) -f <(seq 2) -f <(seq 3) FILE, there + will be three entries in LF_PAIR: {1, x} {6, y} {8, z}, where + x, y and z are just place-holders for shell-generated names. */ +static struct FL_pair *fl_pair; +static size_t n_fl_pair_slots; +/* Count not only -f-specified files, but also individual -e operands + and any command-line argument that serves as a regular expression. */ +static size_t n_pattern_files; + +/* Given the concatenation of all patterns, one per line, be they + specified via -e, a lone command-line argument or -f, this is the + number of the first line of each entity, in that concatenation. + It is advanced by fl_add and, when needed, used in pattern_file_name + to derive a file-relative line number. */ +static uintmax_t patfile_lineno = 1; + +/* Return the number of newline bytes in BUF starting at offset BEG + and up to and not including offset END. */ +static size_t _GL_ATTRIBUTE_PURE +count_nl_bytes (char const *buf, size_t beg, size_t end) +{ + char const *p = buf + beg; + char const *end_p = buf + end; + uintmax_t n = 0; + while (true) + { + p = memchr (p, '\n', end_p - p); + if (!p) + break; + p++; + n++; + } + return n; +} + +/* Append a FILENAME,line-number pair to FL_PAIR. The line number we save + with FILENAME is the initial value of the global PATFILE_LINENO. + PATFILE_LINENO is then incremented by the number of newlines in BUF + from offset BEG up to but not including offset END. */ +static void +fl_add (char const *buf, size_t beg, size_t end, char const *filename) +{ + if (n_fl_pair_slots <= n_pattern_files) + fl_pair = x2nrealloc (fl_pair, &n_fl_pair_slots, sizeof *fl_pair); + + fl_pair[n_pattern_files].lineno = patfile_lineno; + fl_pair[n_pattern_files].filename = filename; + n_pattern_files++; + patfile_lineno += count_nl_bytes (buf, beg, end); +} + +/* Map the line number, LINENO, of one of the input patterns to the + name of the file from which it came. If it was read from stdin + or if it was specified on the command line, return "-". */ +char const * _GL_ATTRIBUTE_PURE +pattern_file_name (size_t lineno, size_t *new_lineno) +{ + size_t i; + for (i = 1; i < n_pattern_files; i++) + { + if (lineno < fl_pair[i].lineno) + break; + } + + *new_lineno = lineno - fl_pair[i - 1].lineno + 1; + return fl_pair[i - 1].filename; +} + #if HAVE_ASAN /* Record the starting address and length of the sole poisoned region, so that we can unpoison it later, just before each following read. */ @@ -2381,6 +2460,7 @@ main (int argc, char **argv) strcpy (&keys[keycc], optarg); keycc += cc; keys[keycc++] = '\n'; + fl_add (keys, keycc - cc - 1, keycc, ""); break; case 'f': @@ -2405,6 +2485,7 @@ main (int argc, char **argv) /* Append final newline if file ended in non-newline. */ if (oldcc != keycc && keys[keycc - 1] != '\n') keys[keycc++] = '\n'; + fl_add (keys, oldcc, keycc, xstrdup (optarg)); break; case 'h': @@ -2645,6 +2726,7 @@ main (int argc, char **argv) /* A copy must be made in case of an xrealloc() or free() later. */ keycc = strlen (argv[optind]); keys = xmemdup (argv[optind++], keycc + 1); + fl_add (keys, 0, keycc, ""); } else usage (EXIT_TROUBLE); diff --git a/src/grep.h b/src/grep.h index 75b7ef7..b45992f 100644 --- a/src/grep.h +++ b/src/grep.h @@ -30,5 +30,6 @@ extern bool match_lines; /* -x */ extern char eolbyte; /* -z */ extern bool buf_has_encoding_errors (char *, size_t); +extern char const *pattern_file_name (size_t, size_t *); #endif diff --git a/tests/Makefile.am b/tests/Makefile.am index 03cfdbb..77502ca 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -99,6 +99,7 @@ TESTS = \ fedora \ fgrep-infloop \ file \ + filename-lineno.pl \ fmbtest \ foad1 \ grep-dev-null \ diff --git a/tests/filename-lineno.pl b/tests/filename-lineno.pl new file mode 100755 index 0000000..edba286 --- /dev/null +++ b/tests/filename-lineno.pl @@ -0,0 +1,61 @@ +#!/usr/bin/perl +# Prior to 2.26, an invalid regexp in a -f-specified file would elicit +# a diagnostic like "Unmatched [ or [^", with no indication of the +# file or line number from which the offending regular expression came. +# With 2.26, now, each such diagnostic has a "FILENAME:LINENO: " prefix. + +# Copyright (C) 2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use strict; + +(my $program_name = $0) =~ s|.*/||; + +my $prog = 'grep'; + +# Turn off localization of executable's output. address@hidden(LANGUAGE LANG LC_ALL)} = ('C') x 3; + +# There are at least two variants of one diagnostic: +# - Unmatched [, [^, [:, [., or [= +# - Unmatched [ or [^ +# Transform each to this: "Unmatched [..." +my $err_subst = {ERR_SUBST => 's/(: Unmatched \[).*/$1.../'}; + +my @Tests = + ( + # Show that grep now includes filename:lineno in the diagnostic: + ['invalid-re', '-f g', {AUX=>{g=>"1\n2\n3\n4[[\n"}}, {EXIT=>2}, + $err_subst, + {ERR => "$prog: g:4: Unmatched [...\n"}, + ], + + # Show that with two or more errors, grep now prints all diagnostics: + ['invalid-re2', '-f g -f h', {EXIT=>2}, + {AUX=>{g=>"1\n2[[\n3\n4[[\n"}}, + {AUX=>{h=>"\n\n[[\n"}}, + $err_subst, + {ERR => "$prog: g:2: Unmatched [...\n" + . "$prog: g:4: Unmatched [...\n" + . "$prog: h:3: Unmatched [...\n" + }, + ], + ); + +my $save_temps = $ENV{DEBUG}; +my $verbose = $ENV{VERBOSE}; + +my $fail = run_tests ($program_name, $prog, address@hidden, $save_temps, $verbose); +exit $fail; -- 2.8.0-rc2