>From e5e601102cd7e57a9e60afd03f52154fd83123fa Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 2 Aug 2018 10:35:15 -0700 Subject: [PATCH] gzip: make the output more reproducible Problem reported by Bernhard M. Wiedemann (Bug#32342). * NEWS: Mention this. * doc/gzip.texi (Overview, Invoking gzip): Document this. * gzip.c (get_input_size_and_time): New function, which implements the change. (treat_stdin, treat_file): Use it. * tests/reproducible: New test. * tests/Makefile.am (TESTS): Add it. --- NEWS | 9 +++++++++ doc/gzip.texi | 18 ++++++++++++------ gzip.c | 35 ++++++++++++++++++++++------------- tests/Makefile.am | 1 + tests/reproducible | 28 ++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 19 deletions(-) create mode 100755 tests/reproducible diff --git a/NEWS b/NEWS index 0e3a126..49c2e9b 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,15 @@ GNU gzip NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Changes in behavior + + Compressed gzip output no longer contains the current time as a + timestamp when the input is not a regular file. Instead, the output + contains a null (zero) timestamp. This makes gzip's behavior more + reproducible when used as part of a pipeline. (As a reminder, even + regular files will use null timestamps after the year 2106, due to a + limitation in the gzip format.) + * Noteworthy changes in release 1.9 (2018-01-07) [stable] diff --git a/doc/gzip.texi b/doc/gzip.texi index 195bab8..a7037ac 100644 --- a/doc/gzip.texi +++ b/doc/gzip.texi @@ -98,11 +98,16 @@ For example, if file names are limited to 14 characters, gzip.msdos.exe is compressed to gzi.msd.exe.gz. Names are not truncated on systems which do not have a limit on file name length. -By default, @command{gzip} keeps the original file name and timestamp in -the compressed file. These are used when decompressing the file with the address@hidden option. This is useful when the compressed file name was -truncated or when the timestamp was not preserved after a file -transfer. However, due to limitations in the current @command{gzip} file +By default, @command{gzip} keeps the original file name in the +compressed file. This can be useful when decompressing the file with address@hidden if the compressed file name was truncated after a file +transfer. + +If the original is a regular file, @command{gzip} by default keeps its +timestamp in the compressed file. This can be useful when +decompressing the file with @option{-N} if the timestamp was not +preserved after a file transfer. +However, due to limitations in the current @command{gzip} file format, fractional seconds are discarded. Also, timestamps must fall within the range 1970-01-01 00:00:01 through 2106-02-07 06:28:15 @abbr{UTC}, and hosts whose operating systems use 32-bit timestamps @@ -344,7 +349,8 @@ is the default when decompressing. @item --name @itemx -N -When compressing, always save the original file name and timestamp; this +When compressing, always save the original file name, and save +the original timestamp if the original is a regular file; this is the default. When decompressing, restore the original file name and timestamp if present. This option is useful on systems which have a limit on file name length or when the timestamp has been lost after diff --git a/gzip.c b/gzip.c index a023d81..02d5364 100644 --- a/gzip.c +++ b/gzip.c @@ -714,6 +714,25 @@ input_eof () return 0; } +static void +get_input_size_and_time (void) +{ + ifile_size = -1; + time_stamp.tv_nsec = -1; + + /* Record the input file's size and timestamp only if it is a + regular file. Doing this for the timestamp helps to keep gzip's + output more reproducible when it is used as part of a + pipeline. */ + + if (S_ISREG (istat.st_mode)) + { + ifile_size = istat.st_size; + if (!no_time || list) + time_stamp = get_stat_mtime (&istat); + } +} + /* ======================================================================== * Compress or decompress stdin */ @@ -761,15 +780,8 @@ local void treat_stdin() progerror ("standard input"); do_exit (ERROR); } - ifile_size = S_ISREG (istat.st_mode) ? istat.st_size : -1; - time_stamp.tv_nsec = -1; - if (!no_time || list) - { - if (S_ISREG (istat.st_mode)) - time_stamp = get_stat_mtime (&istat); - else - gettime (&time_stamp); - } + + get_input_size_and_time (); clear_bufs(); /* clear input and output buffers */ to_stdout = 1; @@ -941,10 +953,7 @@ local void treat_file(iname) } } - ifile_size = S_ISREG (istat.st_mode) ? istat.st_size : -1; - time_stamp.tv_nsec = -1; - if (!no_time || list) - time_stamp = get_stat_mtime (&istat); + get_input_size_and_time (); /* Generate output file name. For -r and (-t or -l), skip files * without a valid gzip suffix (check done in make_ofname). diff --git a/tests/Makefile.am b/tests/Makefile.am index ebdce5b..691bbf8 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -24,6 +24,7 @@ TESTS = \ memcpy-abuse \ mixed \ null-suffix-clobber \ + reproducible \ stdin \ timestamp \ trailing-nul \ diff --git a/tests/reproducible b/tests/reproducible new file mode 100755 index 0000000..3bb4974 --- /dev/null +++ b/tests/reproducible @@ -0,0 +1,28 @@ +#!/bin/sh +# Ensure that gzip has repoducible output. + +# Copyright 2018 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# limit so don't run it by default. + +. "${srcdir=.}/init.sh"; path_prepend_ .. + +echo a | gzip > exp || fail=1 +sleep 1 +echo a | gzip > out || fail=1 + +compare exp out || fail=1 + +Exit $fail -- 2.17.1