bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module


From: Jeff liu
Subject: bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module
Date: Thu, 17 Feb 2011 21:57:14 +0800

Hello All,

This is the first try to introduce the SEEK_DATA/SEEK_HOLE support to 
extent_scan module for efficient sparse file copy on ZFS,  I have delayed it 
for a long time, sorry for that!

Below is the code change lists:
src/extent_scan.h:  add a new structure item 'src_total_size' to "struct 
extent_info",  since I have to make use of this value to determine
a file is sparse of not for the initial scan.  If the returns of lseek(fd, 0, 
SEEK_HOLE) is equal to the src_total_size or large than it, it means the source 
file
is definitely not a sparse file or maybe it is a sparse file but it does not 
make sense for proceeding scan read.
another change in this file is the signature of extent_scan_init(), just as I 
mentioned above, it need to accept the src_total_size variable.
src/extent_scan.c: implement the new exent_scan_read() through 
SEEK_DATA/SEEK_HOLE, it will be called if those two values are defined at 
<unistd.h>.
src/copy.c: pass src_total_size to extent_scan_init().

On my test environment,  Solaris10, SunOS 5.10 Generic_142910-17, I have tried 
a few simple cases, they are works to me.

For now, I am using diff(1) to verify the copy result,  does anyone know some 
utilities can be used to write the test script?
I have sent an email to ZFS DEV mail-list to ask this question yesterday,  a 
nice guy suggest me to use ZDB(http://cuddletech.com/blog/?p=407) for that, I'm
still study this utility now,   I also noticed there is patch to add 
SEEK_HOLE/SEEK_DATA support to os module in Python community,  please refer to:
http://bugs.python.org/file19566/z.patch
but it require very latest python build I think,  so could anyone give some 
other advices in this point?

The patch is shown as following, any help testing and comments are appreciated!!


Thanks,
-Jeff


From: Jie Liu <address@hidden>
Date: Thu, 17 Feb 2011 21:14:23 +0800
Subject: [PATCH 1/1] copy: add SEEK_DATA/SEEK_HOLE support to extent_scan module

* src/extent_scan.h: add src_total_size to struct extent_info, we need
  to check the SEEK_HOLE result against it for initial extent scan.
  modify the extent_scan_init() signature, to add size_t src_total_size.
* src/extent_scan.c: implement a new extent_scan_read() through SEEK_DATA
  and SEEK_HOLE.
* src/copy.c: pass src_total_size to extent_scan_init().

Signed-off-by: Jie Liu <address@hidden>
---
 src/copy.c        |    2 +-
 src/extent-scan.c |  113 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/extent-scan.h |    9 +++-
 3 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/src/copy.c b/src/copy.c
index 104652d..22b9911 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -306,7 +306,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t 
buf_size,
      We may need this at the end, for a final ftruncate.  */
   off_t dest_pos = 0;
 
-  extent_scan_init (src_fd, &scan);
+  extent_scan_init (src_fd, src_total_size, &scan);
 
   *require_normal_copy = false;
   bool wrote_hole_at_eof = true;
diff --git a/src/extent-scan.c b/src/extent-scan.c
index 1ba59db..ffeab7a 100644
--- a/src/extent-scan.c
+++ b/src/extent-scan.c
@@ -32,13 +32,17 @@
 /* Allocate space for struct extent_scan, initialize the entries if
    necessary and return it as the input argument of extent_scan_read().  */
 extern void
-extent_scan_init (int src_fd, struct extent_scan *scan)
+extent_scan_init (int src_fd, size_t src_total_size,
+                  struct extent_scan *scan)
 {
   scan->fd = src_fd;
   scan->ei_count = 0;
   scan->scan_start = 0;
   scan->initial_scan_failed = false;
   scan->hit_final_extent = false;
+#if defined(SEEK_HOLE) && defined(SEEK_DATA)
+  scan->src_total_size = src_total_size;
+#endif
 }
 
 #ifdef __linux__
@@ -106,6 +110,113 @@ extent_scan_read (struct extent_scan *scan)
 
   return true;
 }
+#elif defined(SEEK_HOLE) && defined(SEEK_DATA)
+extern bool
+extent_scan_read (struct extent_scan *scan)
+{
+  off_t data_pos, hole_pos;
+  union { struct extent_info ei; char c[4096]; } extent_buf;
+  struct extent_info *ext_info = &extent_buf.ei;
+  enum { count = (sizeof extent_buf / sizeof *ext_info) };
+  verify (count != 0);
+
+  memset (&extent_buf, 0, sizeof extent_buf);
+
+  if (scan->scan_start == 0)
+    {
+# ifdef _PC_MIN_HOLE_SIZE
+      /* To determine if the underlaying file system support
+         SEEK_HOLE, if not, fall back to the standard copy.  */
+      if (fpathconf (scan->fd, _PC_MIN_HOLE_SIZE) < 0)
+        {
+          scan->initial_scan_failed = true;
+          return false;
+        }
+# endif
+
+      /* If we have been compiled on an OS that supports SEEK_HOLE
+         but run on an OS that does not support SEEK_HOLE, we get
+         EINVAL.  If the underlying filesystem does not support the
+         SEEK_HOLE call, we get ENOTSUP, fall back to standard copy
+         in either case.  */
+      hole_pos = lseek (scan->fd, (off_t) 0, SEEK_HOLE);
+      if (hole_pos < 0)
+        {
+          if (errno == EINVAL || errno == ENOTSUP)
+            scan->initial_scan_failed = true;
+          return false;
+        }
+
+      /* Seek back to position 0 first if we detected a real hole.  */
+      if (hole_pos > 0)
+        {
+          off_t tmp_pos;
+          tmp_pos = lseek (scan->fd, (off_t) 0, SEEK_SET);
+          if (tmp_pos != (off_t) 0)
+              return false;
+
+          /* The source file is definitely not a sparse file, or it
+             maybe a sparse file but SEEK_HOLE returns the source file's
+             total size, fall back to the standard copy too.  */
+          if (hole_pos >= scan->src_total_size)
+            {
+              scan->initial_scan_failed = true;
+              return false;
+            }
+        }
+    }
+
+  unsigned int i = 0;
+  /* If lseek(2) failed and the errno is set to ENXIO, for
+     SEEK_DATA there are no more data regions past the supplied
+     offset.  For SEEK_HOLE, there are no more holes past the 
+     supplied offset.  Set scan->hit_final_extent to true for
+     either case.  */
+  do {
+    data_pos = lseek (scan->fd, scan->scan_start, SEEK_DATA);
+    if (data_pos < 0)
+      {
+        if (errno != ENXIO)
+          return false;
+        else
+          {
+            scan->hit_final_extent = true;
+            return true;
+          }
+      }
+
+    hole_pos = lseek (scan->fd, data_pos, SEEK_HOLE);
+    if (hole_pos < 0)
+      {
+        if (errno != ENXIO)
+          return false;
+        else
+          {
+            scan->hit_final_extent = true;
+            return true;
+          }
+      }
+
+    ext_info[i].ext_logical = data_pos;
+    ext_info[i].ext_length = hole_pos - data_pos;
+    scan->scan_start = hole_pos;
+    ++i;
+  } while (scan->scan_start < scan->src_total_size && i < count);
+
+  i--;
+  scan->ei_count = i;
+  scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
+
+  for (i = 0; i < scan->ei_count; i++)
+    {
+      assert (ext_info[i].ext_logical <= OFF_T_MAX);
+
+      scan->ext_info[i].ext_logical = ext_info[i].ext_logical;
+      scan->ext_info[i].ext_length = ext_info[i].ext_length;
+    }
+
+  return true; 
+}
 #else
 extern bool
 extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED)
diff --git a/src/extent-scan.h b/src/extent-scan.h
index 4724b25..a271b95 100644
--- a/src/extent-scan.h
+++ b/src/extent-scan.h
@@ -18,7 +18,6 @@
 
 #ifndef EXTENT_SCAN_H
 # define EXTENT_SCAN_H
-
 /* Structure used to store information of each extent.  */
 struct extent_info
 {
@@ -38,6 +37,11 @@ struct extent_scan
   /* File descriptor of extent scan run against.  */
   int fd;
 
+#if defined(SEEK_DATA) && defined(SEEK_HOLE)
+  /* Source file size, i.e, (struct stat) &statbuf.st_size.  */
+  size_t src_total_size;
+#endif
+
   /* Next scan start offset.  */
   off_t scan_start;
 
@@ -55,7 +59,8 @@ struct extent_scan
   struct extent_info *ext_info;
 };
 
-void extent_scan_init (int src_fd, struct extent_scan *scan);
+void extent_scan_init (int src_fd, size_t src_total_size,
+                       struct extent_scan *scan);
 
 bool extent_scan_read (struct extent_scan *scan);
 
-- 
1.7.4

reply via email to

[Prev in Thread] Current Thread [Next in Thread]