[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3 2/3] qcow2: Implement bdrv_amend_options
From: |
Fam Zheng |
Subject: |
Re: [Qemu-devel] [PATCH v3 2/3] qcow2: Implement bdrv_amend_options |
Date: |
Mon, 2 Sep 2013 11:43:07 +0800 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Fri, 08/30 12:27, Max Reitz wrote:
> Implement bdrv_amend_options for compat, size, backing_file, backing_fmt
> and lazy_refcounts.
>
> Downgrading images from compat=1.1 to compat=0.10 is achieved through
> handling all incompatible flags accordingly, clearing all compatible and
> autoclear flags and expanding all zero clusters.
>
> Signed-off-by: Max Reitz <address@hidden>
> ---
> block/qcow2-cluster.c | 165 ++++++++++++++++++++++++++++++++++++++++++
> block/qcow2.c | 194
> +++++++++++++++++++++++++++++++++++++++++++++++++-
> block/qcow2.h | 3 +
> 3 files changed, 361 insertions(+), 1 deletion(-)
>
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index cca76d4..e0ca104 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1476,3 +1476,168 @@ fail:
>
> return ret;
> }
> +
> +/*
> + * Expands all zero clusters in a specific L1 table (or deallocates them, for
> + * non-backed non-pre-allocated zero clusters).
> + */
> +static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t
> *l1_table,
> + int l1_size)
> +{
> + BDRVQcowState *s = bs->opaque;
> + bool is_active_l1 = (l1_table == s->l1_table);
> + uint64_t *l2_table;
> + int ret;
> + int i, j;
> +
> + if (!is_active_l1) {
> + /* inactive L2 tables require a buffer to be stored in when loading
> + * them from disk */
> + l2_table = qemu_blockalign(bs, s->cluster_size);
> + }
> +
> + for (i = 0; i < l1_size; i++) {
> + uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
> + bool l2_dirty = false;
> +
> + if (!l2_offset) {
> + /* unallocated */
> + continue;
> + }
> +
> + if (is_active_l1) {
> + /* get active L2 tables from cache */
> + ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
> + (void **)&l2_table);
> + } else {
> + /* load inactive L2 tables from disk */
> + ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
> + (void *)l2_table, s->cluster_sectors);
> + }
> + if (ret < 0) {
> + goto fail;
> + }
> +
> + for (j = 0; j < s->l2_size; j++) {
> + uint64_t l2_entry = be64_to_cpu(l2_table[j]);
> + int64_t offset;
> +
> + if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) {
> + continue;
> + }
> +
> + offset = l2_entry & L2E_OFFSET_MASK;
> + if (!offset) {
> + /* not preallocated */
> + if (!bs->backing_hd) {
> + /* not backed; therefore we can simply deallocate the
> + * cluster */
> + l2_table[j] = 0;
> + l2_dirty = true;
> + continue;
> + }
> +
> + offset = qcow2_alloc_clusters(bs, s->cluster_size);
> + if (offset < 0) {
> + ret = offset;
> + goto fail;
> + }
> + }
> +
> + ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
> + s->cluster_sectors);
> + if (ret < 0) {
> + qcow2_free_clusters(bs, offset, s->cluster_size,
> + QCOW2_DISCARD_ALWAYS);
> + goto fail;
> + }
> +
> + l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
> + l2_dirty = true;
> + }
> +
> + if (is_active_l1) {
> + if (l2_dirty) {
> + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
> + qcow2_cache_depends_on_flush(s->l2_table_cache);
> + }
> + ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
> + if (ret < 0) {
> + l2_table = NULL;
> + goto fail;
> + }
> + } else {
> + if (l2_dirty) {
> + ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
> + (void *)l2_table, s->cluster_sectors);
> + if (ret < 0) {
> + goto fail;
> + }
> + }
> + }
> + }
> +
> + ret = 0;
> +
> +fail:
> + if (l2_table) {
> + if (!is_active_l1) {
> + qemu_vfree(l2_table);
> + } else {
> + if (ret < 0) {
> + qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
> + } else {
> + ret = qcow2_cache_put(bs, s->l2_table_cache,
> + (void **)&l2_table);
> + }
> + }
> + }
> + return ret;
> +}
> +
> +/*
> + * For backed images, expands all zero clusters on the image. For non-backed
> + * images, deallocates all non-pre-allocated zero clusters (and claims the
> + * allocation for pre-allocated ones). This is important for downgrading to a
> + * qcow2 version which doesn't yet support metadata zero clusters.
> + */
> +int qcow2_expand_zero_clusters(BlockDriverState *bs)
> +{
> + BDRVQcowState *s = bs->opaque;
> + uint64_t *l1_table = NULL;
> + int ret;
> + int i, j;
> +
> + ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size);
> + if (ret < 0) {
> + goto fail;
> + }
> +
> + for (i = 0; i < s->nb_snapshots; i++) {
> + int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) +
> + BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE;
> +
> + l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
> +
> + ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
> + BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
> + if (ret < 0) {
> + goto fail;
> + }
> +
> + for (j = 0; j < s->snapshots[i].l1_size; j++) {
> + be64_to_cpus(&l1_table[j]);
> + }
> +
> + ret = expand_zero_clusters_in_l1(bs, l1_table,
> s->snapshots[i].l1_size);
> + if (ret < 0) {
> + goto fail;
> + }
> + }
> +
> + ret = 0;
> +
> +fail:
> + g_free(l1_table);
> + return ret;
> +}
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 78097e5..a8eaf45 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -409,6 +409,7 @@ static int qcow2_open(BlockDriverState *bs, QDict
> *options, int flags)
> ret = -ENOTSUP;
> goto fail;
> }
> + s->refcount_order = header.refcount_order;
>
> if (header.cluster_bits < MIN_CLUSTER_BITS ||
> header.cluster_bits > MAX_CLUSTER_BITS) {
> @@ -1076,7 +1077,7 @@ int qcow2_update_header(BlockDriverState *bs)
> .incompatible_features = cpu_to_be64(s->incompatible_features),
> .compatible_features = cpu_to_be64(s->compatible_features),
> .autoclear_features = cpu_to_be64(s->autoclear_features),
> - .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
> + .refcount_order = cpu_to_be32(s->refcount_order),
> .header_length = cpu_to_be32(header_length),
> };
>
> @@ -1735,6 +1736,196 @@ static int qcow2_load_vmstate(BlockDriverState *bs,
> uint8_t *buf,
> return ret;
> }
>
> +/*
> + * Downgrades an image's version. To achieve this, any incompatible features
> + * have to be removed.
> + */
> +static int qcow2_downgrade(BlockDriverState *bs, int target_version)
> +{
> + BDRVQcowState *s = bs->opaque;
> + int current_version = s->qcow_version;
> + int ret;
> +
> + if (target_version == current_version) {
> + return 0;
> + } else if (target_version > current_version) {
> + return -EINVAL;
> + } else if (target_version != 2) {
> + return -EINVAL;
> + }
> +
> + if (s->refcount_order != 4) {
> + /* we would have to convert the image to a refcount_order == 4 image
> + * here; however, since qemu (at the time of writing this) does not
> + * support anything different than 4 anyway, there is no point in
> doing
> + * so right now; however, we should error out (if qemu supports this
> in
> + * the future and this code has not been adapted) */
> + error_report("qcow2_downgrade: Image refcount orders other than 4
> are"
> + "currently not supported.");
> + return -ENOTSUP;
> + }
> +
> + /* clear incompatible features */
> + if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
> + ret = qcow2_mark_clean(bs);
> + if (ret < 0) {
> + return ret;
> + }
> + }
> +
> + if (s->incompatible_features) {
> + return -ENOTSUP;
> + }
> +
> + /* since we can ignore compatible features, we can set them to 0 as well
> */
> + s->compatible_features = 0;
> + /* if lazy refcounts have been used, they have already been fixed through
> + * clearing the dirty flag */
> +
> + /* clearing autoclear features is trivial */
> + s->autoclear_features = 0;
> +
> + ret = qcow2_expand_zero_clusters(bs);
> + if (ret < 0) {
> + return ret;
> + }
> +
> + s->qcow_version = target_version;
> + ret = qcow2_update_header(bs);
> + if (ret < 0) {
> + s->qcow_version = current_version;
> + return ret;
> + }
> + return 0;
> +}
> +
> +static int qcow2_amend_options(BlockDriverState *bs,
> + QEMUOptionParameter *options)
> +{
> + BDRVQcowState *s = bs->opaque;
> + int old_version = s->qcow_version, new_version = old_version;
> + uint64_t new_size = 0;
> + const char *backing_file = NULL, *backing_format = NULL;
> + bool lazy_refcounts = s->use_lazy_refcounts;
> + int ret;
> + int i;
> +
> + for (i = 0; options[i].name; i++)
> + {
> + if (!strcmp(options[i].name, "compat")) {
> + if (!options[i].value.s) {
> + /* preserve default */
> + } else if (!strcmp(options[i].value.s, "0.10")) {
> + new_version = 2;
> + } else if (!strcmp(options[i].value.s, "1.1")) {
> + new_version = 3;
> + } else {
> + fprintf(stderr, "Unknown compatibility level %s.\n",
> + options[i].value.s);
> + return -EINVAL;
> + }
> + } else if (!strcmp(options[i].name, "preallocation")) {
> + if (options[i].assigned) {
For encryption flag and cluster_size, you checked the original value and only
error out on actual change, should check the original preallocation value here
as well?
> + fprintf(stderr, "Cannot change preallocation mode.\n");
> + return -ENOTSUP;
> + }
> + } else if (!strcmp(options[i].name, "size")) {
> + new_size = options[i].value.n;
> + } else if (!strcmp(options[i].name, "backing_file")) {
> + backing_file = options[i].value.s;
> + } else if (!strcmp(options[i].name, "backing_fmt")) {
> + backing_format = options[i].value.s;
> + } else if (!strcmp(options[i].name, "encryption")) {
> + if (options[i].assigned &&
> + (options[i].value.n != !!s->crypt_method)) {
> + fprintf(stderr, "Changing the encryption flag is not "
> + "supported.\n");
> + return -ENOTSUP;
> + }
> + } else if (!strcmp(options[i].name, "cluster_size")) {
> + if (options[i].assigned && (options[i].value.n !=
> s->cluster_size))
> + {
> + fprintf(stderr, "Changing the cluster size is not "
> + "supported.\n");
> + return -ENOTSUP;
> + }
> + } else if (!strcmp(options[i].name, "lazy_refcounts")) {
> + if (options[i].assigned) {
> + lazy_refcounts = options[i].value.n;
> + }
> + } else {
> + /* if this assertion fails, this probably means a new option was
> + * added without having it covered here */
> + assert(false);
A unknown option reported as -ENOTSUP with a proper message is good enough,
it's not that critical for an assert.
> + }
> + }
> +
> + if (new_version != old_version) {
> + if (new_version > old_version) {
> + /* Upgrade */
> + s->qcow_version = new_version;
> + ret = qcow2_update_header(bs);
> + if (ret < 0) {
> + s->qcow_version = old_version;
> + return ret;
> + }
> + } else {
> + ret = qcow2_downgrade(bs, new_version);
> + if (ret < 0) {
> + return ret;
> + }
> + }
> + }
> +
> + if (new_size) {
> + ret = qcow2_truncate(bs, new_size);
> + if (ret < 0) {
> + return ret;
> + }
> + }
> +
> + if (backing_file || backing_format) {
> + ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file,
> + backing_format ?:
> bs->backing_format);
> + if (ret < 0) {
> + return ret;
> + }
> + }
> +
> + if (s->use_lazy_refcounts != lazy_refcounts) {
> + if (lazy_refcounts) {
> + if (s->qcow_version < 3) {
> + fprintf(stderr, "Lazy refcounts only supported with
> compatibility "
> + "level 1.1 and above (use compat=1.1 or greater)\n");
> + return -EINVAL;
> + }
> + s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
> + ret = qcow2_update_header(bs);
> + if (ret < 0) {
> + s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
> + return ret;
> + }
> + s->use_lazy_refcounts = true;
> + } else {
> + /* make image clean first */
> + ret = qcow2_mark_clean(bs);
> + if (ret < 0) {
> + return ret;
> + }
> + /* now disallow lazy refcounts */
> + s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
> + ret = qcow2_update_header(bs);
> + if (ret < 0) {
> + s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
> + return ret;
> + }
> + s->use_lazy_refcounts = false;
> + }
> + }
> +
> + return 0;
> +}
> +
> static QEMUOptionParameter qcow2_create_options[] = {
> {
> .name = BLOCK_OPT_SIZE,
> @@ -1818,6 +2009,7 @@ static BlockDriver bdrv_qcow2 = {
>
> .create_options = qcow2_create_options,
> .bdrv_check = qcow2_check,
> + .bdrv_amend_options = qcow2_amend_options,
> };
>
> static void bdrv_qcow2_init(void)
> diff --git a/block/qcow2.h b/block/qcow2.h
> index dba9771..ad3fd21 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -196,6 +196,7 @@ typedef struct BDRVQcowState {
> int flags;
> int qcow_version;
> bool use_lazy_refcounts;
> + int refcount_order;
>
> bool discard_passthrough[QCOW2_DISCARD_MAX];
>
> @@ -408,6 +409,8 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t
> offset,
> int nb_sectors);
> int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int
> nb_sectors);
>
> +int qcow2_expand_zero_clusters(BlockDriverState *bs);
> +
> /* qcow2-snapshot.c functions */
> int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
> int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
> --
> 1.8.3.1
>
>
- Re: [Qemu-devel] [PATCH v3 2/3] qcow2: Implement bdrv_amend_options,
Fam Zheng <=