qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH for-4.2 1/4] Revert "qcow2: skip writing zero buffers to empt


From: Vladimir Sementsov-Ogievskiy
Subject: Re: [PATCH for-4.2 1/4] Revert "qcow2: skip writing zero buffers to empty COW areas"
Date: Fri, 1 Nov 2019 10:22:37 +0000

01.11.2019 13:00, Max Reitz wrote:
> This reverts commit c8bb23cbdbe32f5c326365e0a82e1b0e68cdcd8a.
> 
> This commit causes fundamental performance problems on XFS (because
> fallocate() stalls the AIO pipeline), and as such it is not clear that
> we should unconditionally enable this behavior.
> 
> We expect subclusters to alleviate the performance penalty of small
> writes to newly allocated clusters, so when we get them, the originally
> intended performance gain may actually no longer be significant.
> 
> If we want to reintroduce something similar to c8bb23cbdbe, it will
> require extensive benchmarking on various systems with subclusters
> enabled.
> 
> Cc: address@hidden
> Signed-off-by: Max Reitz <address@hidden>

It's sad, but OK:
Reviewed-by: Vladimir Sementsov-Ogievskiy <address@hidden>

> ---
>   qapi/block-core.json       |  4 +-
>   block/qcow2.h              |  6 ---
>   block/qcow2-cluster.c      |  2 +-
>   block/qcow2.c              | 86 --------------------------------------
>   block/trace-events         |  1 -
>   tests/qemu-iotests/060     |  7 +---
>   tests/qemu-iotests/060.out |  5 +--
>   7 files changed, 4 insertions(+), 107 deletions(-)
> 
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index aa97ee2641..f053f15431 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -3304,8 +3304,6 @@
>   #
>   # @cor_write: a write due to copy-on-read (since 2.11)
>   #
> -# @cluster_alloc_space: an allocation of file space for a cluster (since 4.1)
> -#
>   # @none: triggers once at creation of the blkdebug node (since 4.1)
>   #
>   # Since: 2.9
> @@ -3326,7 +3324,7 @@
>               'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
>               'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
>               'l1_shrink_write_table', 'l1_shrink_free_l2_clusters',
> -            'cor_write', 'cluster_alloc_space', 'none'] }
> +            'cor_write', 'none'] }
>   
>   ##
>   # @BlkdebugIOType:
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 601c2e4c82..8166f6e311 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -418,12 +418,6 @@ typedef struct QCowL2Meta
>        */
>       Qcow2COWRegion cow_end;
>   
> -    /*
> -     * Indicates that COW regions are already handled and do not require
> -     * any more processing.
> -     */
> -    bool skip_cow;
> -
>       /**
>        * The I/O vector with the data from the actual guest write request.
>        * If non-NULL, this is meant to be merged together with the data
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index 8982b7b762..fbfea8c817 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -809,7 +809,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta 
> *m)
>       assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes);
>       assert(start->offset + start->nb_bytes <= end->offset);
>   
> -    if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) {
> +    if (start->nb_bytes == 0 && end->nb_bytes == 0) {
>           return 0;
>       }
>   
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 7c18721741..17555cb0a1 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -2274,11 +2274,6 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
>               continue;
>           }
>   
> -        /* If COW regions are handled already, skip this too */
> -        if (m->skip_cow) {
> -            continue;
> -        }
> -
>           /* The data (middle) region must be immediately after the
>            * start region */
>           if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
> @@ -2305,81 +2300,6 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
>       return false;
>   }
>   
> -static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t 
> bytes)
> -{
> -    int64_t nr;
> -    return !bytes ||
> -        (!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) &&
> -         nr == bytes);
> -}
> -
> -static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
> -{
> -    /*
> -     * This check is designed for optimization shortcut so it must be
> -     * efficient.
> -     * Instead of is_zero(), use is_unallocated() as it is faster (but not
> -     * as accurate and can result in false negatives).
> -     */
> -    return is_unallocated(bs, m->offset + m->cow_start.offset,
> -                          m->cow_start.nb_bytes) &&
> -           is_unallocated(bs, m->offset + m->cow_end.offset,
> -                          m->cow_end.nb_bytes);
> -}
> -
> -static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
> -{
> -    BDRVQcow2State *s = bs->opaque;
> -    QCowL2Meta *m;
> -
> -    if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
> -        return 0;
> -    }
> -
> -    if (bs->encrypted) {
> -        return 0;
> -    }
> -
> -    for (m = l2meta; m != NULL; m = m->next) {
> -        int ret;
> -
> -        if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
> -            continue;
> -        }
> -
> -        if (!is_zero_cow(bs, m)) {
> -            continue;
> -        }
> -
> -        /*
> -         * instead of writing zero COW buffers,
> -         * efficiently zero out the whole clusters
> -         */
> -
> -        ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
> -                                            m->nb_clusters * s->cluster_size,
> -                                            true);
> -        if (ret < 0) {
> -            return ret;
> -        }
> -
> -        BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
> -        ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
> -                                    m->nb_clusters * s->cluster_size,
> -                                    BDRV_REQ_NO_FALLBACK);
> -        if (ret < 0) {
> -            if (ret != -ENOTSUP && ret != -EAGAIN) {
> -                return ret;
> -            }
> -            continue;
> -        }
> -
> -        trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, 
> m->nb_clusters);
> -        m->skip_cow = true;
> -    }
> -    return 0;
> -}
> -
>   /*
>    * qcow2_co_pwritev_task
>    * Called with s->lock unlocked
> @@ -2421,12 +2341,6 @@ static coroutine_fn int 
> qcow2_co_pwritev_task(BlockDriverState *bs,
>           qiov_offset = 0;
>       }
>   
> -    /* Try to efficiently initialize the physical space with zeroes */
> -    ret = handle_alloc_space(bs, l2meta);
> -    if (ret < 0) {
> -        goto out_unlocked;
> -    }
> -
>       /*
>        * If we need to do COW, check if it's possible to merge the
>        * writing of the guest data together with that of the COW regions.
> diff --git a/block/trace-events b/block/trace-events
> index 6ba86decca..c615b26d71 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -72,7 +72,6 @@ qcow2_writev_done_part(void *co, int cur_bytes) "co %p 
> cur_bytes %d"
>   qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64
>   qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p 
> offset 0x%" PRIx64 " count %d"
>   qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" 
> PRIx64 " count %d"
> -qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 
> 0x%" PRIx64 " nb_clusters %d"
>   
>   # qcow2-cluster.c
>   qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p 
> offset 0x%" PRIx64 " bytes %d"
> diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
> index b91d8321bb..89e911400c 100755
> --- a/tests/qemu-iotests/060
> +++ b/tests/qemu-iotests/060
> @@ -150,15 +150,10 @@ $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | 
> _filter_qemu_io
>   echo
>   echo "=== Testing overlap while COW is in flight ==="
>   echo
> -BACKING_IMG=$TEST_IMG.base
> -TEST_IMG=$BACKING_IMG _make_test_img 1G
> -
> -$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
> -
>   # compat=0.10 is required in order to make the following discard actually
>   # unallocate the sector rather than make it a zero sector - we want COW, 
> after
>   # all.
> -IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G
> +IMGOPTS='compat=0.10' _make_test_img 1G
>   # Write two clusters, the second one enforces creation of an L2 table after
>   # the first data cluster.
>   $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
> diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
> index 0f6b0658a1..e42bf8c5a9 100644
> --- a/tests/qemu-iotests/060.out
> +++ b/tests/qemu-iotests/060.out
> @@ -97,10 +97,7 @@ read 512/512 bytes at offset 0
>   
>   === Testing overlap while COW is in flight ===
>   
> -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1073741824
> -wrote 65536/65536 bytes at offset 0
> -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
> backing_file=TEST_DIR/t.IMGFMT.base
> +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
>   wrote 65536/65536 bytes at offset 0
>   64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
>   wrote 65536/65536 bytes at offset 536870912
> 


-- 
Best regards,
Vladimir



reply via email to

[Prev in Thread] Current Thread [Next in Thread]