>From b7b4926b2f52cfa2ab7d33742355beaf9a08c695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?=
Date: Sun, 31 Oct 2021 15:38:29 +0000
Subject: [PATCH] copy: avoid SEEK_DATA corruption on ZFS
Avoid corruption when using SEEK_DATA on ZFS, as discussed in:
https://github.com/openzfs/zfs/issues/11900
* src/copy.c (functional_seek_data): A new function that
returns true when we're sure we're not copying from ZFS.
Note systems like solaris which doesn't have statfs()->f_type
available, will be assumed to not have a usable SEEK_DATA.
Most systems in use should have this info available.
(infer_scantype): After a file is determined to perhaps be sparse,
call functional_seek_data to ensure it's not on ZFS.
* init.cfg (seek_data_capable_): Skip on ZFS.
* NEWS: Mention the bug avoidance.
Addresses https://bugs.gnu.org/51433
---
NEWS | 4 ++++
init.cfg | 12 ++++++++++++
src/copy.c | 36 +++++++++++++++++++++++++++++++++++-
3 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/NEWS b/NEWS
index 086da03ae..78a73ab05 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,10 @@ GNU coreutils NEWS -*- outline -*-
** Bug fixes
+ cp will avoid corruption bugs in the ZFS file system by avoiding use
+ of lseek(..., SEEK_DATA), thus using potentially slower hole detection.
+ [bug triggered in coreutils-9.0]
+
chmod -R no longer exits with error status when encountering symlinks.
All files would be processed correctly, but the exit status was incorrect.
[bug introduced in coreutils-9.0]
diff --git a/init.cfg b/init.cfg
index b92f717f5..0f97a6712 100644
--- a/init.cfg
+++ b/init.cfg
@@ -541,6 +541,18 @@ seek_data_capable_()
return 1
fi
+ # Skip on zfs due to various SEEK_DATA issues in its implementation
+ fstype=$(stat -f -c%t "$@") # Ensure we have f_type, as that's what copy uses
+ if test -z "$fstype" || test "$fstype" = '?'; then
+ warn_ 'seek_data_capable_: stat(1) failed: assuming not SEEK_DATA capable'
+ return 1
+ fi
+ fsname=$(stat -f -c%T "$@")
+ if test "$fsname" = 'zfs'; then
+ warn_ 'seek_data_capable_: zfs detected: SEEK_DATA is disabled'
+ return 1
+ fi
+
# Use timeout if available to skip cases where SEEK_DATA takes a long time.
# We saw FreeBSD 9.1 take 35s to return from SEEK_DATA for a 1TiB empty file.
# Note lseek() is uninterruptible on FreeBSD 9.1, but it does eventually
diff --git a/src/copy.c b/src/copy.c
index a6523ed97..6e19b8740 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -1080,6 +1080,33 @@ union scan_inference
off_t ext_start;
};
+/* SEEK_DATA on ZFS has many issues as described at:
+ https://github.com/openzfs/zfs/issues/11900
+ so return FALSE for ZFS (or inability to detect fs type). */
+
+#ifdef SEEK_HOLE
+# include "fs.h"
+# if HAVE_SYS_STATFS_H
+# include