[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 1/2] qcow2: add keep-dirty open option
From: |
Kirill Tkhai |
Subject: |
Re: [PATCH v2 1/2] qcow2: add keep-dirty open option |
Date: |
Fri, 28 Jan 2022 11:27:09 +0300 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.5.1 |
On 27.01.2022 20:00, Vladimir Sementsov-Ogievskiy wrote:
> Consider the case:
>
> Thirdparty component works with qcow2 image, and dirty bit is set.
>
> Thirdparty component want to start qemu-img to do some manipulation.
> Ofcourse, third party component flushes refcounts and other metadata
> before starting QEMU.
>
> But the component don't want to clear dirty bit, as this breaks
> transactionability of the operation: we'll have to set it again but it
> may fail. Clearing the dirty bit is unrecoverable action and can't be
> transactional. That's a problem.
>
> The solution is a new qcow2 open option: keep-dirty. When set:
> 1. On qcow2 open, ignore dirty bit and don't do check: caller is
> responsible for refcounts being valid.
> 2. Never clear dirty bit during QEMU execution, including close.
>
> Details:
>
> 1. For simplicity let's just not allow keep-dirty without lazy
> refcounts.
>
> 2. Don't allow to open with keep-dirty when dirty bit is unset. This
> may mean some error in user logic.
>
> 3. For implementation do the following: dirty flag
> in s->incompatible_features behaves same way as without keep-dirty
> option: it actually designate status of refcounts dirtiness. But we
> never clear the flag in the image, and we remember that it is always
> set.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Tested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
> qapi/block-core.json | 8 ++++++
> block/qcow2.h | 2 ++
> block/qcow2.c | 61 +++++++++++++++++++++++++++++++++++---------
> 3 files changed, 59 insertions(+), 12 deletions(-)
>
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 9a5a3641d0..4b5c6d7935 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -3228,6 +3228,13 @@
> # @lazy-refcounts: whether to enable the lazy refcounts
> # feature (default is taken from the image file)
> #
> +# @keep-dirty: whether to not touch dirty bit. When set, QEMU doesn't
> +# check refcounts on qcow2 open (ignoring dirty bit) and doesn't
> +# clear dirty bit on qcow2 close. When true dirty bit must
> +# be already set in the image on open, otherwise open fails.
> +# When true user guarantees that refcounts are consistent on
> +# open, so the check is omitted. (since 7.0)
> +#
> # @pass-discard-request: whether discard requests to the qcow2
> # device should be forwarded to the data source
> #
> @@ -3276,6 +3283,7 @@
> { 'struct': 'BlockdevOptionsQcow2',
> 'base': 'BlockdevOptionsGenericCOWFormat',
> 'data': { '*lazy-refcounts': 'bool',
> + '*keep-dirty': 'bool',
> '*pass-discard-request': 'bool',
> '*pass-discard-snapshot': 'bool',
> '*pass-discard-other': 'bool',
> diff --git a/block/qcow2.h b/block/qcow2.h
> index fd48a89d45..696e13377a 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -130,6 +130,7 @@
>
> #define QCOW2_OPT_DATA_FILE "data-file"
> #define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts"
> +#define QCOW2_OPT_KEEP_DIRTY "keep-dirty"
> #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
> #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
> #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
> @@ -376,6 +377,7 @@ typedef struct BDRVQcow2State {
> int flags;
> int qcow_version;
> bool use_lazy_refcounts;
> + bool keep_dirty;
> int refcount_order;
> int refcount_bits;
> uint64_t refcount_max;
> diff --git a/block/qcow2.c b/block/qcow2.c
> index d509016756..ee82ef2a8f 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -514,15 +514,17 @@ int qcow2_mark_dirty(BlockDriverState *bs)
> return 0; /* already dirty */
> }
>
> - val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
> - ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
> - &val, sizeof(val));
> - if (ret < 0) {
> - return ret;
> - }
> - ret = bdrv_flush(bs->file->bs);
> - if (ret < 0) {
> - return ret;
> + if (!s->keep_dirty) {
> + val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
> + ret = bdrv_pwrite(bs->file, offsetof(QCowHeader,
> incompatible_features),
> + &val, sizeof(val));
> + if (ret < 0) {
> + return ret;
> + }
> + ret = bdrv_flush(bs->file->bs);
> + if (ret < 0) {
> + return ret;
> + }
> }
>
> /* Only treat image as dirty if the header was updated successfully */
> @@ -549,7 +551,13 @@ static int qcow2_mark_clean(BlockDriverState *bs)
> return ret;
> }
>
> - return qcow2_update_header(bs);
> + if (!s->keep_dirty) {
> + /*
> + * No reason to update the header if we don't want to clear dirty
> + * bit.
> + */
> + return qcow2_update_header(bs);
> + }
> }
> return 0;
> }
> @@ -709,6 +717,11 @@ static QemuOptsList qcow2_runtime_opts = {
> .type = QEMU_OPT_BOOL,
> .help = "Postpone refcount updates",
> },
> + {
> + .name = QCOW2_OPT_KEEP_DIRTY,
> + .type = QEMU_OPT_BOOL,
> + .help = "Keep dirty bit set",
> + },
> {
> .name = QCOW2_OPT_DISCARD_REQUEST,
> .type = QEMU_OPT_BOOL,
> @@ -966,6 +979,7 @@ typedef struct Qcow2ReopenState {
> Qcow2Cache *refcount_block_cache;
> int l2_slice_size; /* Number of entries in a slice of the L2 table */
> bool use_lazy_refcounts;
> + bool keep_dirty;
> int overlap_check;
> bool discard_passthrough[QCOW2_DISCARD_MAX];
> uint64_t cache_clean_interval;
> @@ -1088,6 +1102,8 @@ static int
> qcow2_update_options_prepare(BlockDriverState *bs,
> }
> }
>
> + r->keep_dirty = qemu_opt_get_bool(opts, QCOW2_OPT_KEEP_DIRTY, false);
> +
> /* Overlap check options */
> opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
> opt_overlap_check_template = qemu_opt_get(opts,
> QCOW2_OPT_OVERLAP_TEMPLATE);
> @@ -1214,6 +1230,7 @@ static void
> qcow2_update_options_commit(BlockDriverState *bs,
>
> s->overlap_check = r->overlap_check;
> s->use_lazy_refcounts = r->use_lazy_refcounts;
> + s->keep_dirty = r->keep_dirty;
>
> for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
> s->discard_passthrough[i] = r->discard_passthrough[i];
> @@ -1810,7 +1827,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState
> *bs, QDict *options,
> bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
>
> /* Repair image if dirty */
> - if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
> + if (!s->keep_dirty && !(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
> (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
> BdrvCheckResult result = {0};
>
> @@ -1825,6 +1842,20 @@ static int coroutine_fn qcow2_do_open(BlockDriverState
> *bs, QDict *options,
> }
> }
>
> + if (s->keep_dirty) {
> + if (!(s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
> + error_setg(errp, "keep-dirty behaviour is requested but image "
> + "is not dirty");
> + ret = -EINVAL;
> + goto fail;
> + }
> + /*
> + * User guarantees that refcounts are valid. So, consider them valid,
> + * keeping dirty bit set in the header.
> + */
> + s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
> + }
> +
> #ifdef DEBUG_ALLOC
> {
> BdrvCheckResult result = {0};
> @@ -2826,6 +2857,7 @@ int qcow2_update_header(BlockDriverState *bs)
> uint32_t refcount_table_clusters;
> size_t header_length;
> Qcow2UnknownHeaderExtension *uext;
> + uint64_t incompatible_features;
>
> buf = qemu_blockalign(bs, buflen);
>
> @@ -2846,6 +2878,11 @@ int qcow2_update_header(BlockDriverState *bs)
> goto fail;
> }
>
> + incompatible_features = s->incompatible_features;
> + if (s->keep_dirty) {
> + incompatible_features |= QCOW2_INCOMPAT_DIRTY;
> + }
> +
> *header = (QCowHeader) {
> /* Version 2 fields */
> .magic = cpu_to_be32(QCOW_MAGIC),
> @@ -2863,7 +2900,7 @@ int qcow2_update_header(BlockDriverState *bs)
> .snapshots_offset = cpu_to_be64(s->snapshots_offset),
>
> /* Version 3 fields */
> - .incompatible_features = cpu_to_be64(s->incompatible_features),
> + .incompatible_features = cpu_to_be64(incompatible_features),
> .compatible_features = cpu_to_be64(s->compatible_features),
> .autoclear_features = cpu_to_be64(s->autoclear_features),
> .refcount_order = cpu_to_be32(s->refcount_order),