>From 41e9fbba7e2dc3a9756f85df5dd5074c0680208e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?=
Date: Thu, 2 Oct 2014 14:07:42 +0100 Subject: [PATCH 1/3] cp: make hole detection size independent of I/O size Previously cp would not detect runs of NULs that were smaller than the buffer size used for I/O (currently 128KiB). * src/copy.c (copy_reg): Use an independent hole_size, set to st_blksize, to increase the chances of detecting a representable hole, in a run of NULs read from the input. * tests/cp/sparse.sh: Add test cases for various sparse chunk sizes. * NEWS: Mention the improvement. --- NEWS | 3 + src/copy.c | 114 +++++++++++++++++++++++++++++++++------------------- tests/cp/sparse.sh | 28 +++++++++++++ 3 files changed, 103 insertions(+), 42 deletions(-) diff --git a/NEWS b/NEWS index 1811ae4..785773f 100644 --- a/NEWS +++ b/NEWS @@ -30,6 +30,9 @@ GNU coreutils NEWS -*- outline -*- ** Improvements + cp will convert smaller runs of NULs in the input to holes, + to reduce allocation in the copy. + mv will try a reflink before falling back to a standard copy, which is more efficient when moving files across BTRFS subvolume boundaries. diff --git a/src/copy.c b/src/copy.c index b7baee4..12af6db 100644 --- a/src/copy.c +++ b/src/copy.c @@ -158,7 +158,7 @@ utimens_symlink (char const *file, struct timespec const *timespec) bytes read. */ static bool sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, - bool make_holes, + size_t hole_size, bool make_holes, char const *src_name, char const *dst_name, uintmax_t max_n_read, off_t *total_n_read, bool *last_write_made_hole) @@ -168,8 +168,6 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, while (max_n_read) { - bool make_hole = false; - ssize_t n_read = read (src_fd, buf, MIN (max_n_read, buf_size)); if (n_read < 0) { @@ -183,47 +181,77 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, max_n_read -= n_read; *total_n_read += n_read; - if (make_holes) + /* Loop over the input buffer in chunks of hole_size. */ + bool make_hole = false; + size_t csize = make_holes ? hole_size : buf_size; + char *cbuf = buf; + size_t psize = 0; + char *pbuf = buf; + + while (n_read) { - /* Sentinel required by is_nul(). */ - buf[n_read] = '\1'; -#ifdef lint - typedef uintptr_t word; - /* Usually, buf[n_read] is not the byte just before a "word" - (aka uintptr_t) boundary. In that case, the word-oriented - test below (*wp++ == 0) would read some uninitialized bytes - after the sentinel. To avoid false-positive reports about - this condition (e.g., from a tool like valgrind), set the - remaining bytes -- to any value. */ - memset (buf + n_read + 1, 0, sizeof (word) - 1); -#endif + bool prev_hole = make_hole; + csize = MIN (csize, n_read); - if ((make_hole = is_nul (buf, n_read))) + if (make_holes && csize) { - if (lseek (dest_fd, n_read, SEEK_CUR) < 0) - { - error (0, errno, _("cannot lseek %s"), quote (dst_name)); - return false; - } + /* Setup sentinel required by is_nul(). */ + typedef uintptr_t word; + word isnul_tmp; + memcpy (&isnul_tmp, cbuf + csize, sizeof (word)); + memset (cbuf + csize, 1, sizeof (word)); + + make_hole = is_nul (cbuf, csize); + + memcpy (cbuf + csize, &isnul_tmp, sizeof (word)); } - } - if (!make_hole) - { - size_t n = n_read; - if (full_write (dest_fd, buf, n) != n) + bool transition = (make_hole != prev_hole) && psize; + bool last_chunk = (n_read == csize) || ! csize; + + if (transition || last_chunk) { - error (0, errno, _("error writing %s"), quote (dst_name)); - return false; + if (! transition) + psize += csize; + + if (! prev_hole) + { + if (full_write (dest_fd, pbuf, psize) != psize) + { + error (0, errno, _("error writing %s"), quote (dst_name)); + return false; + } + } + else + { + if (lseek (dest_fd, psize, SEEK_CUR) < 0) + { + error (0, errno, _("cannot lseek %s"), quote (dst_name)); + return false; + } + } + + pbuf += psize; + psize = csize; + + if (transition && last_chunk) + csize = 0; + else if (! csize) + n_read = 0; } + else /* Coalesce writes/seeks. */ + psize += csize; - /* It is tempting to return early here upon a short read from a - regular file. That would save the final read syscall for each - file. Unfortunately that doesn't work for certain files in - /proc with linux kernels from at least 2.6.9 .. 2.6.29. */ + n_read -= csize; + cbuf += csize; } *last_write_made_hole = make_hole; + + /* It's tempting to break early here upon a short read from + a regular file. That would save the final read syscall + for each file. Unfortunately that doesn't work for + certain files in /proc or /sys with linux kernels. */ } return true; @@ -290,7 +318,8 @@ write_zeros (int fd, off_t n_bytes) return false. */ static bool extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, - off_t src_total_size, enum Sparse_type sparse_mode, + size_t hole_size, off_t src_total_size, + enum Sparse_type sparse_mode, char const *src_name, char const *dst_name, bool *require_normal_copy) { @@ -331,7 +360,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, { off_t ext_start; off_t ext_len; - off_t hole_size; + off_t ext_hole_size; if (i < scan.ei_count) { @@ -345,11 +374,11 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, ext_len = 0; } - hole_size = ext_start - last_ext_start - last_ext_len; + ext_hole_size = ext_start - last_ext_start - last_ext_len; wrote_hole_at_eof = false; - if (hole_size) + if (ext_hole_size) { if (lseek (src_fd, ext_start, SEEK_SET) < 0) { @@ -374,9 +403,9 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, /* When not inducing holes and when there is a hole between the end of the previous extent and the beginning of the current one, write zeros to the destination file. */ - off_t nzeros = hole_size; + off_t nzeros = ext_hole_size; if (empty_extent) - nzeros = MIN (src_total_size - dest_pos, hole_size); + nzeros = MIN (src_total_size - dest_pos, ext_hole_size); if (! write_zeros (dest_fd, nzeros)) { @@ -409,7 +438,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, empty_extent = false; last_ext_len = ext_len; - if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size, + if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size, hole_size, sparse_mode == SPARSE_ALWAYS, src_name, dst_name, ext_len, &n_read, &wrote_hole_at_eof)) @@ -1105,6 +1134,7 @@ copy_reg (char const *src_name, char const *dst_name, size_t buf_alignment = lcm (getpagesize (), sizeof (word)); size_t buf_alignment_slop = sizeof (word) + buf_alignment - 1; size_t buf_size = io_blksize (sb); + size_t hole_size = ST_BLKSIZE (sb); fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL); @@ -1164,7 +1194,7 @@ copy_reg (char const *src_name, char const *dst_name, standard copy only if the initial extent scan fails. If the '--sparse=never' option is specified, write all data but use any extents to read more efficiently. */ - if (extent_copy (source_desc, dest_desc, buf, buf_size, + if (extent_copy (source_desc, dest_desc, buf, buf_size, hole_size, src_open_sb.st_size, S_ISREG (sb.st_mode) ? x->sparse_mode : SPARSE_NEVER, src_name, dst_name, &normal_copy_required)) @@ -1179,7 +1209,7 @@ copy_reg (char const *src_name, char const *dst_name, off_t n_read; bool wrote_hole_at_eof; - if ( ! sparse_copy (source_desc, dest_desc, buf, buf_size, + if ( ! sparse_copy (source_desc, dest_desc, buf, buf_size, hole_size, make_holes, src_name, dst_name, UINTMAX_MAX, &n_read, &wrote_hole_at_eof) diff --git a/tests/cp/sparse.sh b/tests/cp/sparse.sh index d6cc4c4..1414d35 100755 --- a/tests/cp/sparse.sh +++ b/tests/cp/sparse.sh @@ -37,4 +37,32 @@ test $(stat --printf %b copy) -le $(stat --printf %b sparse) || fail=1 cp --sparse=always --reflink sparse copy && fail=1 cp --sparse=never --reflink sparse copy && fail=1 + +# Ensure we handle sparse/non-sparse transitions correctly +maxn=128 # how many $hole_size chunks per file +hole_size=$(stat -c %o copy) +dd if=/dev/zero bs=$hole_size count=$maxn of=zeros +tr '\0' '\1' < zeros > ones + +for n in 1 2 3 4 32 $maxn; do + parts=$(expr $maxn / $n) + + rm -f sparse.in + + # Generate sparse file for copying with alternating + # hole/data patterns of size n * $hole_size + for i in $(yes zeros | sed 1~2s/zeros/ones/ | head -n$parts); do + dd iflag=fullblock if=$i of=sparse.in conv=notrunc oflag=append \ + bs=$hole_size count=$n status=none || framework_failure_ + done + + cp --sparse=always sparse.in sparse.out || fail=1 # non sparse input + cp --sparse=always sparse.out sparse.out2 || fail=1 # sparse input + + cmp sparse.in sparse.out || fail=1 + cmp sparse.in sparse.out2 || fail=1 + + ls -lsh sparse.* +done + Exit $fail -- 1.7.7.6 >From b3a933cff8e61e57f9e09931daecb1440849d80d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Mon, 6 Oct 2014 10:19:58 +0100 Subject: [PATCH 2/3] cp: avoid speculative preallocation with --sparse=always With --sparse=always use fallocate(...PUNCH_HOLE...) to avoid any permanent allocation due to speculative preallocation employed by file systems such as XFS. * m4/jm-macros.m4: Check for