>From b7b4926b2f52cfa2ab7d33742355beaf9a08c695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Sun, 31 Oct 2021 15:38:29 +0000 Subject: [PATCH] copy: avoid SEEK_DATA corruption on ZFS Avoid corruption when using SEEK_DATA on ZFS, as discussed in: https://github.com/openzfs/zfs/issues/11900 * src/copy.c (functional_seek_data): A new function that returns true when we're sure we're not copying from ZFS. Note systems like solaris which doesn't have statfs()->f_type available, will be assumed to not have a usable SEEK_DATA. Most systems in use should have this info available. (infer_scantype): After a file is determined to perhaps be sparse, call functional_seek_data to ensure it's not on ZFS. * init.cfg (seek_data_capable_): Skip on ZFS. * NEWS: Mention the bug avoidance. Addresses https://bugs.gnu.org/51433 --- NEWS | 4 ++++ init.cfg | 12 ++++++++++++ src/copy.c | 36 +++++++++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 086da03ae..78a73ab05 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,10 @@ GNU coreutils NEWS -*- outline -*- ** Bug fixes + cp will avoid corruption bugs in the ZFS file system by avoiding use + of lseek(..., SEEK_DATA), thus using potentially slower hole detection. + [bug triggered in coreutils-9.0] + chmod -R no longer exits with error status when encountering symlinks. All files would be processed correctly, but the exit status was incorrect. [bug introduced in coreutils-9.0] diff --git a/init.cfg b/init.cfg index b92f717f5..0f97a6712 100644 --- a/init.cfg +++ b/init.cfg @@ -541,6 +541,18 @@ seek_data_capable_() return 1 fi + # Skip on zfs due to various SEEK_DATA issues in its implementation + fstype=$(stat -f -c%t "$@") # Ensure we have f_type, as that's what copy uses + if test -z "$fstype" || test "$fstype" = '?'; then + warn_ 'seek_data_capable_: stat(1) failed: assuming not SEEK_DATA capable' + return 1 + fi + fsname=$(stat -f -c%T "$@") + if test "$fsname" = 'zfs'; then + warn_ 'seek_data_capable_: zfs detected: SEEK_DATA is disabled' + return 1 + fi + # Use timeout if available to skip cases where SEEK_DATA takes a long time. # We saw FreeBSD 9.1 take 35s to return from SEEK_DATA for a 1TiB empty file. # Note lseek() is uninterruptible on FreeBSD 9.1, but it does eventually diff --git a/src/copy.c b/src/copy.c index a6523ed97..6e19b8740 100644 --- a/src/copy.c +++ b/src/copy.c @@ -1080,6 +1080,33 @@ union scan_inference off_t ext_start; }; +/* SEEK_DATA on ZFS has many issues as described at: + https://github.com/openzfs/zfs/issues/11900 + so return FALSE for ZFS (or inability to detect fs type). */ + +#ifdef SEEK_HOLE +# include "fs.h" +# if HAVE_SYS_STATFS_H +# include +# elif HAVE_SYS_VFS_H +# include +# endif +static bool +functional_seek_data (int fd) +{ +# if HAVE_FSTATFS && HAVE_STRUCT_STATFS_F_TYPE + struct statfs buf; + if (fstatfs (fd, &buf) == 0) + { + if (buf.f_type != S_MAGIC_ZFS) + return true; + } +# endif + + return false; +} +#endif + /* Return how to scan a file with descriptor FD and stat buffer SB. Store any information gathered into *SCAN. */ static enum scantype @@ -1092,7 +1119,14 @@ infer_scantype (int fd, struct stat const *sb, return PLAIN_SCANTYPE; #ifdef SEEK_HOLE - scan_inference->ext_start = lseek (fd, 0, SEEK_DATA); + if (! functional_seek_data (fd)) + { + errno = ENOTSUP; + scan_inference->ext_start = -1; + } + else + scan_inference->ext_start = lseek (fd, 0, SEEK_DATA); + if (0 <= scan_inference->ext_start || errno == ENXIO) return LSEEK_SCANTYPE; else if (errno != EINVAL && !is_ENOTSUP (errno)) -- 2.26.2