[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 1/2] block: allow live commit of active image
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH 1/2] block: allow live commit of active image |
Date: |
Mon, 22 Jul 2013 08:34:18 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130625 Thunderbird/17.0.7 |
Il 22/07/2013 05:46, Fam Zheng ha scritto:
> This patch eliminates limitation of committing the active device.
>
> bdrv_drop_intermediate is reimplemented to take pointers to
> (BlockDriverState *), so it can modify the caller's local pointers to
> preserve their semantics, while updating active BDS in-place by
> bdrv_swap active and base: we need data in 'base' as it's the only
> remaining after commit, but we can't delete 'active' as it's referenced
> everywhere in the program.
>
> Guest writes to active device during the commit are tracked by dirty map
> and committed like block-mirror.
I have only skimmed the patch, but I think this is incomplete.
Management needs to know the moment when 'active' is not valid anymore,
thus this job needs to be completed manually with "block-job-complete".
In fact, I wonder if block/commit.c could reuse most of the code from
block/mirror.c (basically everything except that bdrv_swap should be
replaced by bdrv_drop_intermediate).
Paolo
> Signed-off-by: Fam Zheng <address@hidden>
> ---
> block.c | 102 ++++++++++----------------------
> block/commit.c | 160
> ++++++++++++++++++++++++++------------------------
> include/block/block.h | 5 +-
> 3 files changed, 115 insertions(+), 152 deletions(-)
>
> diff --git a/block.c b/block.c
> index b560241..367e064 100644
> --- a/block.c
> +++ b/block.c
> @@ -2018,18 +2018,11 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState
> *active,
> return overlay;
> }
>
> -typedef struct BlkIntermediateStates {
> - BlockDriverState *bs;
> - QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
> -} BlkIntermediateStates;
> -
> -
> /*
> - * Drops images above 'base' up to and including 'top', and sets the image
> - * above 'top' to have base as its backing file.
> - *
> - * Requires that the overlay to 'top' is opened r/w, so that the backing file
> - * information in 'bs' can be properly updated.
> + * Drops images above '*base' up to and including '*top', and sets new
> '*base'
> + * as backing_hd of top_overlay (the image orignally has 'top' as backing
> + * file). top_overlay may be NULL if '*top' is active, no such update needed.
> + * Requires that the top_overlay to 'top' is opened r/w.
> *
> * E.g., this will convert the following chain:
> * bottom <- base <- intermediate <- top <- active
> @@ -2046,82 +2039,47 @@ typedef struct BlkIntermediateStates {
> *
> * base <- active
> *
> - * Error conditions:
> - * if active == top, that is considered an error
> + * It also allows active==top, in which case it converts:
> + *
> + * base <- intermediate <- active (also top)
> + *
> + * to
> + *
> + * base == active == top, i.e. only base remains: *top == *base when return.
> *
> */
> -int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
> - BlockDriverState *base)
> +int bdrv_drop_intermediate(BlockDriverState *top_overlay,
> + BlockDriverState **top,
> + BlockDriverState **base)
> {
> - BlockDriverState *intermediate;
> + BlockDriverState *pbs;
> BlockDriverState *base_bs = NULL;
> - BlockDriverState *new_top_bs = NULL;
> - BlkIntermediateStates *intermediate_state, *next;
> int ret = -EIO;
>
> - QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
> - QSIMPLEQ_INIT(&states_to_delete);
> -
> - if (!top->drv || !base->drv) {
> + if (!(*top)->drv || !(*base)->drv) {
> goto exit;
> }
>
> - new_top_bs = bdrv_find_overlay(active, top);
> -
> - if (new_top_bs == NULL) {
> - /* we could not find the image above 'top', this is an error */
> - goto exit;
> + for (pbs = (*top)->backing_hd; pbs != *base; pbs = base_bs) {
> + assert(pbs);
> + base_bs = pbs->backing_hd;
> + pbs->backing_hd = NULL;
> + bdrv_delete(pbs);
> }
>
> - /* special case of new_top_bs->backing_hd already pointing to base -
> nothing
> - * to do, no intermediate images */
> - if (new_top_bs->backing_hd == base) {
> - ret = 0;
> - goto exit;
> - }
> + bdrv_swap(*base, *top);
>
> - intermediate = top;
> + (*base)->backing_hd = NULL;
> + bdrv_delete(*base);
> + *base = *top;
>
> - /* now we will go down through the list, and add each BDS we find
> - * into our deletion queue, until we hit the 'base'
> - */
> - while (intermediate) {
> - intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
> - intermediate_state->bs = intermediate;
> - QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
> -
> - if (intermediate->backing_hd == base) {
> - base_bs = intermediate->backing_hd;
> - break;
> - }
> - intermediate = intermediate->backing_hd;
> - }
> - if (base_bs == NULL) {
> - /* something went wrong, we did not end at the base. safely
> - * unravel everything, and exit with error */
> - goto exit;
> - }
> -
> - /* success - we can delete the intermediate states, and link top->base */
> - ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
> - base_bs->drv ? base_bs->drv->format_name
> : "");
> - if (ret) {
> - goto exit;
> - }
> - new_top_bs->backing_hd = base_bs;
> -
> -
> - QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry,
> next) {
> - /* so that bdrv_close() does not recursively close the chain */
> - intermediate_state->bs->backing_hd = NULL;
> - bdrv_delete(intermediate_state->bs);
> + /* overlay exists when active != top, need to change backing file for it
> */
> + if (top_overlay) {
> + ret = bdrv_change_backing_file(top_overlay, (*base)->filename,
> + (*base)->drv ?
> + (*base)->drv->format_name : "");
> }
> - ret = 0;
> -
> exit:
> - QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry,
> next) {
> - g_free(intermediate_state);
> - }
> return ret;
> }
>
> diff --git a/block/commit.c b/block/commit.c
> index 2227fc2..c85b188 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -17,14 +17,13 @@
> #include "block/blockjob.h"
> #include "qemu/ratelimit.h"
>
> -enum {
> - /*
> - * Size of data buffer for populating the image file. This should be
> large
> - * enough to process multiple clusters in a single call, so that
> populating
> - * contiguous regions of the image is efficient.
> - */
> - COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
> -};
> +/*
> + * Size of data buffer for populating the image file. This should be large
> + * enough to process multiple clusters in a single call, so that populating
> + * contiguous regions of the image is efficient.
> + */
> +#define COMMIT_BUFFER_SECTORS 128
> +#define COMMIT_BUFFER_BYTES (COMMIT_BUFFER_SECTORS * BDRV_SECTOR_SIZE)
>
> #define SLICE_TIME 100000000ULL /* ns */
>
> @@ -34,6 +33,7 @@ typedef struct CommitBlockJob {
> BlockDriverState *active;
> BlockDriverState *top;
> BlockDriverState *base;
> + BlockDriverState *overlay;
> BlockdevOnError on_error;
> int base_flags;
> int orig_overlay_flags;
> @@ -65,100 +65,109 @@ static void coroutine_fn commit_run(void *opaque)
> BlockDriverState *active = s->active;
> BlockDriverState *top = s->top;
> BlockDriverState *base = s->base;
> - BlockDriverState *overlay_bs;
> int64_t sector_num, end;
> int ret = 0;
> int n = 0;
> void *buf;
> - int bytes_written = 0;
> int64_t base_len;
> + int64_t next_dirty;
> + HBitmapIter hbi;
>
> + buf = qemu_blockalign(top, COMMIT_BUFFER_BYTES);
> ret = s->common.len = bdrv_getlength(top);
>
> -
> if (s->common.len < 0) {
> - goto exit_restore_reopen;
> + goto exit;
> }
>
> ret = base_len = bdrv_getlength(base);
> if (base_len < 0) {
> - goto exit_restore_reopen;
> + goto exit;
> }
>
> if (base_len < s->common.len) {
> ret = bdrv_truncate(base, s->common.len);
> if (ret) {
> - goto exit_restore_reopen;
> + goto exit;
> }
> }
>
> end = s->common.len >> BDRV_SECTOR_BITS;
> - buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
>
> for (sector_num = 0; sector_num < end; sector_num += n) {
> - uint64_t delay_ns = 0;
> - bool copy;
>
> -wait:
> - /* Note that even when no rate limit is applied we need to yield
> - * with no pending I/O here so that bdrv_drain_all() returns.
> - */
> - block_job_sleep_ns(&s->common, rt_clock, delay_ns);
> - if (block_job_is_cancelled(&s->common)) {
> - break;
> - }
> /* Copy if allocated above the base */
> ret = bdrv_co_is_allocated_above(top, base, sector_num,
> - COMMIT_BUFFER_SIZE /
> BDRV_SECTOR_SIZE,
> + COMMIT_BUFFER_SECTORS,
> &n);
> - copy = (ret == 1);
> - trace_commit_one_iteration(s, sector_num, n, ret);
> - if (copy) {
> - if (s->common.speed) {
> - delay_ns = ratelimit_calculate_delay(&s->limit, n);
> - if (delay_ns > 0) {
> - goto wait;
> - }
> - }
> - ret = commit_populate(top, base, sector_num, n, buf);
> - bytes_written += n * BDRV_SECTOR_SIZE;
> + if (ret) {
> + bdrv_set_dirty(top, sector_num, n);
> + }
> + }
> +
> + while (bdrv_get_dirty_count(s->top)) {
> + uint64_t delay_ns = 0;
> + if (block_job_is_cancelled(&s->common)) {
> + goto exit;
> }
> - if (ret < 0) {
> - if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
> - s->on_error == BLOCKDEV_ON_ERROR_REPORT||
> - (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC))
> {
> - goto exit_free_buf;
> - } else {
> - n = 0;
> - continue;
> +
> + bdrv_dirty_iter_init(s->top, &hbi);
> + for (next_dirty = hbitmap_iter_next(&hbi);
> + next_dirty >= 0;
> + next_dirty = hbitmap_iter_next(&hbi)) {
> + sector_num = next_dirty;
> + if (block_job_is_cancelled(&s->common)) {
> + goto exit;
> }
> + delay_ns = ratelimit_calculate_delay(&s->limit,
> + COMMIT_BUFFER_SECTORS);
> + /* Note that even when no rate limit is applied we need to yield
> + * with no pending I/O here so that bdrv_drain_all() returns.
> + */
> + block_job_sleep_ns(&s->common, rt_clock, delay_ns);
> + trace_commit_one_iteration(s, sector_num,
> + COMMIT_BUFFER_SECTORS, ret);
> + ret = commit_populate(top, base, sector_num,
> + COMMIT_BUFFER_SECTORS, buf);
> + if (ret < 0) {
> + if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
> + s->on_error == BLOCKDEV_ON_ERROR_REPORT ||
> + (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC &&
> + ret == -ENOSPC)) {
> + goto exit;
> + } else {
> + continue;
> + }
> + }
> + /* Publish progress */
> + s->common.offset += COMMIT_BUFFER_BYTES;
> + bdrv_reset_dirty(top, sector_num, COMMIT_BUFFER_SECTORS);
> }
> - /* Publish progress */
> - s->common.offset += n * BDRV_SECTOR_SIZE;
> }
>
> - ret = 0;
> -
> - if (!block_job_is_cancelled(&s->common) && sector_num == end) {
> - /* success */
> - ret = bdrv_drop_intermediate(active, top, base);
> + if (!block_job_is_cancelled(&s->common)) {
> + /* Drop intermediate: [top, base) */
> + ret = bdrv_drop_intermediate(s->overlay, &top, &base);
> + s->common.offset = s->common.len;
> }
>
> -exit_free_buf:
> - qemu_vfree(buf);
> + ret = 0;
> +
> +exit:
> + bdrv_set_dirty_tracking(active, 0);
>
> -exit_restore_reopen:
> /* restore base open flags here if appropriate (e.g., change the base
> back
> * to r/o). These reopens do not need to be atomic, since we won't abort
> * even on failure here */
> - if (s->base_flags != bdrv_get_flags(base)) {
> + if (s->overlay && s->base_flags != bdrv_get_flags(base)) {
> bdrv_reopen(base, s->base_flags, NULL);
> }
> - overlay_bs = bdrv_find_overlay(active, top);
> - if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
> - bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
> +
> + if (s->overlay && s->orig_overlay_flags != bdrv_get_flags(s->overlay)) {
> + bdrv_reopen(s->overlay, s->orig_overlay_flags, NULL);
> }
>
> + qemu_vfree(buf);
> block_job_completed(&s->common, ret);
> }
>
> @@ -198,13 +207,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState
> *base,
> return;
> }
>
> - /* Once we support top == active layer, remove this check */
> - if (top == bs) {
> - error_setg(errp,
> - "Top image as the active layer is currently unsupported");
> - return;
> - }
> -
> if (top == base) {
> error_setg(errp, "Invalid files for merge: top and base are the
> same");
> return;
> @@ -212,23 +214,20 @@ void commit_start(BlockDriverState *bs,
> BlockDriverState *base,
>
> overlay_bs = bdrv_find_overlay(bs, top);
>
> - if (overlay_bs == NULL) {
> - error_setg(errp, "Could not find overlay image for %s:",
> top->filename);
> - return;
> - }
> -
> orig_base_flags = bdrv_get_flags(base);
> - orig_overlay_flags = bdrv_get_flags(overlay_bs);
> + if (overlay_bs) {
> + orig_overlay_flags = bdrv_get_flags(overlay_bs);
> + if (!(orig_overlay_flags & BDRV_O_RDWR)) {
> + reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
> + orig_overlay_flags | BDRV_O_RDWR);
> + }
> + }
>
> /* convert base & overlay_bs to r/w, if necessary */
> if (!(orig_base_flags & BDRV_O_RDWR)) {
> reopen_queue = bdrv_reopen_queue(reopen_queue, base,
> orig_base_flags | BDRV_O_RDWR);
> }
> - if (!(orig_overlay_flags & BDRV_O_RDWR)) {
> - reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
> - orig_overlay_flags | BDRV_O_RDWR);
> - }
> if (reopen_queue) {
> bdrv_reopen_multiple(reopen_queue, &local_err);
> if (local_err != NULL) {
> @@ -237,7 +236,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState
> *base,
> }
> }
>
> -
> s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp);
> if (!s) {
> return;
> @@ -246,13 +244,19 @@ void commit_start(BlockDriverState *bs,
> BlockDriverState *base,
> s->base = base;
> s->top = top;
> s->active = bs;
> + s->overlay = overlay_bs;
>
> s->base_flags = orig_base_flags;
> - s->orig_overlay_flags = orig_overlay_flags;
> + if (overlay_bs) {
> + s->orig_overlay_flags = orig_overlay_flags;
> + }
>
> s->on_error = on_error;
> s->common.co = qemu_coroutine_create(commit_run);
>
> trace_commit_start(bs, base, top, s, s->common.co, opaque);
> +
> + bdrv_set_dirty_tracking(top, COMMIT_BUFFER_BYTES);
> +
> qemu_coroutine_enter(s->common.co, s);
> }
> diff --git a/include/block/block.h b/include/block/block.h
> index b6b9014..caf2c22 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -197,8 +197,9 @@ int bdrv_commit_all(void);
> int bdrv_change_backing_file(BlockDriverState *bs,
> const char *backing_file, const char *backing_fmt);
> void bdrv_register(BlockDriver *bdrv);
> -int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
> - BlockDriverState *base);
> +int bdrv_drop_intermediate(BlockDriverState *top_overlay,
> + BlockDriverState **top,
> + BlockDriverState **base);
> BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
> BlockDriverState *bs);
> BlockDriverState *bdrv_find_base(BlockDriverState *bs);
>
[Qemu-devel] [PATCH 2/2] qemu-iotests: update test cases for commit active, Fam Zheng, 2013/07/21
Re: [Qemu-devel] [PATCH 0/2] block: allow commit active as top, Wenchao Xia, 2013/07/22