[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module
From: |
Jeff liu |
Subject: |
bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module |
Date: |
Mon, 18 Apr 2011 22:15:13 +0800 |
Hi All,
Please ignore the current patch, I will submit another patch with a few fixes
soon.
Thanks,
-Jeff
在 2011-2-17,下午9:57, Jeff liu 写道:
> Hello All,
>
> This is the first try to introduce the SEEK_DATA/SEEK_HOLE support to
> extent_scan module for efficient sparse file copy on ZFS, I have delayed it
> for a long time, sorry for that!
>
> Below is the code change lists:
> src/extent_scan.h: add a new structure item 'src_total_size' to "struct
> extent_info", since I have to make use of this value to determine
> a file is sparse of not for the initial scan. If the returns of lseek(fd, 0,
> SEEK_HOLE) is equal to the src_total_size or large than it, it means the
> source file
> is definitely not a sparse file or maybe it is a sparse file but it does not
> make sense for proceeding scan read.
> another change in this file is the signature of extent_scan_init(), just as I
> mentioned above, it need to accept the src_total_size variable.
> src/extent_scan.c: implement the new exent_scan_read() through
> SEEK_DATA/SEEK_HOLE, it will be called if those two values are defined at
> <unistd.h>.
> src/copy.c: pass src_total_size to extent_scan_init().
>
> On my test environment, Solaris10, SunOS 5.10 Generic_142910-17, I have
> tried a few simple cases, they are works to me.
>
> For now, I am using diff(1) to verify the copy result, does anyone know some
> utilities can be used to write the test script?
> I have sent an email to ZFS DEV mail-list to ask this question yesterday, a
> nice guy suggest me to use ZDB(http://cuddletech.com/blog/?p=407) for that,
> I'm
> still study this utility now, I also noticed there is patch to add
> SEEK_HOLE/SEEK_DATA support to os module in Python community, please refer
> to:
> http://bugs.python.org/file19566/z.patch
> but it require very latest python build I think, so could anyone give some
> other advices in this point?
>
> The patch is shown as following, any help testing and comments are
> appreciated!!
>
>
> Thanks,
> -Jeff
>
>
> From: Jie Liu <address@hidden>
> Date: Thu, 17 Feb 2011 21:14:23 +0800
> Subject: [PATCH 1/1] copy: add SEEK_DATA/SEEK_HOLE support to extent_scan
> module
>
> * src/extent_scan.h: add src_total_size to struct extent_info, we need
> to check the SEEK_HOLE result against it for initial extent scan.
> modify the extent_scan_init() signature, to add size_t src_total_size.
> * src/extent_scan.c: implement a new extent_scan_read() through SEEK_DATA
> and SEEK_HOLE.
> * src/copy.c: pass src_total_size to extent_scan_init().
>
> Signed-off-by: Jie Liu <address@hidden>
> ---
> src/copy.c | 2 +-
> src/extent-scan.c | 113
> ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> src/extent-scan.h | 9 +++-
> 3 files changed, 120 insertions(+), 4 deletions(-)
>
> diff --git a/src/copy.c b/src/copy.c
> index 104652d..22b9911 100644
> --- a/src/copy.c
> +++ b/src/copy.c
> @@ -306,7 +306,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t
> buf_size,
> We may need this at the end, for a final ftruncate. */
> off_t dest_pos = 0;
>
> - extent_scan_init (src_fd, &scan);
> + extent_scan_init (src_fd, src_total_size, &scan);
>
> *require_normal_copy = false;
> bool wrote_hole_at_eof = true;
> diff --git a/src/extent-scan.c b/src/extent-scan.c
> index 1ba59db..ffeab7a 100644
> --- a/src/extent-scan.c
> +++ b/src/extent-scan.c
> @@ -32,13 +32,17 @@
> /* Allocate space for struct extent_scan, initialize the entries if
> necessary and return it as the input argument of extent_scan_read(). */
> extern void
> -extent_scan_init (int src_fd, struct extent_scan *scan)
> +extent_scan_init (int src_fd, size_t src_total_size,
> + struct extent_scan *scan)
> {
> scan->fd = src_fd;
> scan->ei_count = 0;
> scan->scan_start = 0;
> scan->initial_scan_failed = false;
> scan->hit_final_extent = false;
> +#if defined(SEEK_HOLE) && defined(SEEK_DATA)
> + scan->src_total_size = src_total_size;
> +#endif
> }
>
> #ifdef __linux__
> @@ -106,6 +110,113 @@ extent_scan_read (struct extent_scan *scan)
>
> return true;
> }
> +#elif defined(SEEK_HOLE) && defined(SEEK_DATA)
> +extern bool
> +extent_scan_read (struct extent_scan *scan)
> +{
> + off_t data_pos, hole_pos;
> + union { struct extent_info ei; char c[4096]; } extent_buf;
> + struct extent_info *ext_info = &extent_buf.ei;
> + enum { count = (sizeof extent_buf / sizeof *ext_info) };
> + verify (count != 0);
> +
> + memset (&extent_buf, 0, sizeof extent_buf);
> +
> + if (scan->scan_start == 0)
> + {
> +# ifdef _PC_MIN_HOLE_SIZE
> + /* To determine if the underlaying file system support
> + SEEK_HOLE, if not, fall back to the standard copy. */
> + if (fpathconf (scan->fd, _PC_MIN_HOLE_SIZE) < 0)
> + {
> + scan->initial_scan_failed = true;
> + return false;
> + }
> +# endif
> +
> + /* If we have been compiled on an OS that supports SEEK_HOLE
> + but run on an OS that does not support SEEK_HOLE, we get
> + EINVAL. If the underlying filesystem does not support the
> + SEEK_HOLE call, we get ENOTSUP, fall back to standard copy
> + in either case. */
> + hole_pos = lseek (scan->fd, (off_t) 0, SEEK_HOLE);
> + if (hole_pos < 0)
> + {
> + if (errno == EINVAL || errno == ENOTSUP)
> + scan->initial_scan_failed = true;
> + return false;
> + }
> +
> + /* Seek back to position 0 first if we detected a real hole. */
> + if (hole_pos > 0)
> + {
> + off_t tmp_pos;
> + tmp_pos = lseek (scan->fd, (off_t) 0, SEEK_SET);
> + if (tmp_pos != (off_t) 0)
> + return false;
> +
> + /* The source file is definitely not a sparse file, or it
> + maybe a sparse file but SEEK_HOLE returns the source file's
> + total size, fall back to the standard copy too. */
> + if (hole_pos >= scan->src_total_size)
> + {
> + scan->initial_scan_failed = true;
> + return false;
> + }
> + }
> + }
> +
> + unsigned int i = 0;
> + /* If lseek(2) failed and the errno is set to ENXIO, for
> + SEEK_DATA there are no more data regions past the supplied
> + offset. For SEEK_HOLE, there are no more holes past the
> + supplied offset. Set scan->hit_final_extent to true for
> + either case. */
> + do {
> + data_pos = lseek (scan->fd, scan->scan_start, SEEK_DATA);
> + if (data_pos < 0)
> + {
> + if (errno != ENXIO)
> + return false;
> + else
> + {
> + scan->hit_final_extent = true;
> + return true;
> + }
> + }
> +
> + hole_pos = lseek (scan->fd, data_pos, SEEK_HOLE);
> + if (hole_pos < 0)
> + {
> + if (errno != ENXIO)
> + return false;
> + else
> + {
> + scan->hit_final_extent = true;
> + return true;
> + }
> + }
> +
> + ext_info[i].ext_logical = data_pos;
> + ext_info[i].ext_length = hole_pos - data_pos;
> + scan->scan_start = hole_pos;
> + ++i;
> + } while (scan->scan_start < scan->src_total_size && i < count);
> +
> + i--;
> + scan->ei_count = i;
> + scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
> +
> + for (i = 0; i < scan->ei_count; i++)
> + {
> + assert (ext_info[i].ext_logical <= OFF_T_MAX);
> +
> + scan->ext_info[i].ext_logical = ext_info[i].ext_logical;
> + scan->ext_info[i].ext_length = ext_info[i].ext_length;
> + }
> +
> + return true;
> +}
> #else
> extern bool
> extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED)
> diff --git a/src/extent-scan.h b/src/extent-scan.h
> index 4724b25..a271b95 100644
> --- a/src/extent-scan.h
> +++ b/src/extent-scan.h
> @@ -18,7 +18,6 @@
>
> #ifndef EXTENT_SCAN_H
> # define EXTENT_SCAN_H
> -
> /* Structure used to store information of each extent. */
> struct extent_info
> {
> @@ -38,6 +37,11 @@ struct extent_scan
> /* File descriptor of extent scan run against. */
> int fd;
>
> +#if defined(SEEK_DATA) && defined(SEEK_HOLE)
> + /* Source file size, i.e, (struct stat) &statbuf.st_size. */
> + size_t src_total_size;
> +#endif
> +
> /* Next scan start offset. */
> off_t scan_start;
>
> @@ -55,7 +59,8 @@ struct extent_scan
> struct extent_info *ext_info;
> };
>
> -void extent_scan_init (int src_fd, struct extent_scan *scan);
> +void extent_scan_init (int src_fd, size_t src_total_size,
> + struct extent_scan *scan);
>
> bool extent_scan_read (struct extent_scan *scan);
>
> --
> 1.7.4
- bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module,
Jeff liu <=