[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC PATCH v2 17/23] qcow2: Move COW and L2 update into own
From: |
Kevin Wolf |
Subject: |
[Qemu-devel] [RFC PATCH v2 17/23] qcow2: Move COW and L2 update into own coroutine |
Date: |
Wed, 13 Feb 2013 14:22:07 +0100 |
This creates a separate coroutine for processing the COW and the L2
table update of allocating requests. The request itself can then
complete while the second part is still being processed.
We need a qemu_aio_flush() hook in order to ensure that these
coroutines for the second part aren't still running after bdrv_drain_all
(e.g. when the VM is stopped).
Signed-off-by: Kevin Wolf <address@hidden>
---
block.c | 5 ++
block/qcow2.c | 131 ++++++++++++++++++++++++++++++++++++++-------
block/qcow2.h | 8 +++
include/block/block_int.h | 3 +
4 files changed, 127 insertions(+), 20 deletions(-)
diff --git a/block.c b/block.c
index 50dab8e..5ae80a0 100644
--- a/block.c
+++ b/block.c
@@ -1225,7 +1225,12 @@ void bdrv_drain_all(void)
qemu_co_queue_restart_all(&bs->throttled_reqs);
busy = true;
}
+
+ if (bs->drv && bs->drv->bdrv_drain) {
+ busy |= bs->drv->bdrv_drain(bs);
+ }
}
+
} while (busy);
/* If requests are still pending there is a bug somewhere */
diff --git a/block/qcow2.c b/block/qcow2.c
index 07f7493..3f169b8 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -483,6 +483,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
/* Initialise locks */
qemu_co_mutex_init(&s->lock);
+ qemu_co_rwlock_init(&s->l2meta_flush);
/* Repair image if dirty */
if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
@@ -745,6 +746,70 @@ fail:
return ret;
}
+typedef struct ProcessL2Meta {
+ BlockDriverState *bs;
+ QCowL2Meta *m;
+} ProcessL2Meta;
+
+/**
+ * Processes the second part of a request that wrote to newly allocated
+ * clusters (most importantly, doing COW and updating the L2).
+ *
+ * Make sure that s->l2meta_flush is held as a reader when when entering the
+ * coroutine.
+ */
+static void coroutine_fn process_l2meta(void *opaque)
+{
+ ProcessL2Meta *p = opaque;
+ QCowL2Meta *m = p->m;
+ BlockDriverState *bs = p->bs;
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ assert(s->l2meta_flush.reader > 0);
+ qemu_co_mutex_lock(&s->lock);
+
+ ret = qcow2_alloc_cluster_link_l2(bs, m);
+ if (ret < 0) {
+ /* FIXME */
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+
+ /* Take the request off the list of running requests */
+ if (m->nb_clusters != 0) {
+ QLIST_REMOVE(m, next_in_flight);
+ }
+
+ /* Meanwhile some new dependencies could have accumulated */
+ qemu_co_queue_restart_all(&m->dependent_requests);
+
+ g_free(m);
+
+ qemu_co_rwlock_unlock(&s->l2meta_flush);
+}
+
+static bool qcow2_drain(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ return !QLIST_EMPTY(&s->cluster_allocs);
+}
+
+static inline coroutine_fn void stop_l2meta(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ qemu_co_rwlock_wrlock(&s->l2meta_flush);
+}
+
+static inline coroutine_fn void resume_l2meta(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ qemu_co_rwlock_unlock(&s->l2meta_flush);
+}
+
static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
int64_t sector_num,
int remaining_sectors,
@@ -824,26 +889,37 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState
*bs,
goto fail;
}
- while (l2meta != NULL) {
- QCowL2Meta *next;
+ if (l2meta != NULL) {
+ qemu_co_mutex_unlock(&s->lock);
- l2meta->is_written = true;
+ while (l2meta != NULL) {
+ Coroutine *co;
+ QCowL2Meta *next;
- ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
- if (ret < 0) {
- goto fail;
- }
+ ProcessL2Meta p = {
+ .bs = bs,
+ .m = l2meta,
+ };
- /* Take the request off the list of running requests */
- if (l2meta->nb_clusters != 0) {
- QLIST_REMOVE(l2meta, next_in_flight);
- }
+ /*
+ * Must take l2meta_flush already here instead of in the
+ * coroutine; otherwise it would be possible that a concurrent
+ * flush would claim that this request is written to the disk
+ * when the metadata isn't written yet in fact.
+ */
+ qemu_co_rwlock_rdlock(&s->l2meta_flush);
+ l2meta->is_written = true;
+
+ /* l2meta might already be freed after the coroutine has run */
+ next = l2meta->next;
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
+ co = qemu_coroutine_create(process_l2meta);
+ qemu_coroutine_enter(co, &p);
- next = l2meta->next;
- g_free(l2meta);
- l2meta = next;
+ l2meta = next;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
}
remaining_sectors -= cur_nr_sectors;
@@ -879,6 +955,11 @@ fail:
static void qcow2_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
+
+ while (qcow2_drain(bs)) {
+ qemu_aio_wait();
+ }
+
g_free(s->l1_table);
qcow2_cache_flush(bs, s->l2_table_cache);
@@ -1417,10 +1498,12 @@ static coroutine_fn int
qcow2_co_write_zeroes(BlockDriverState *bs,
}
/* Whatever is left can use real zero clusters */
+ stop_l2meta(bs);
qemu_co_mutex_lock(&s->lock);
ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+ resume_l2meta(bs);
return ret;
}
@@ -1431,10 +1514,13 @@ static coroutine_fn int
qcow2_co_discard(BlockDriverState *bs,
int ret;
BDRVQcowState *s = bs->opaque;
+ stop_l2meta(bs);
qemu_co_mutex_lock(&s->lock);
ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+ resume_l2meta(bs);
+
return ret;
}
@@ -1560,23 +1646,27 @@ static coroutine_fn int
qcow2_co_flush_to_os(BlockDriverState *bs)
BDRVQcowState *s = bs->opaque;
int ret;
+ stop_l2meta(bs);
qemu_co_mutex_lock(&s->lock);
+
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
+ goto fail;
}
if (qcow2_need_accurate_refcounts(s)) {
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
+ goto fail;
}
}
+
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+ resume_l2meta(bs);
- return 0;
+ return ret;
}
static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
@@ -1703,6 +1793,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_co_readv = qcow2_co_readv,
.bdrv_co_writev = qcow2_co_writev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
+ .bdrv_drain = qcow2_drain,
.bdrv_co_write_zeroes = qcow2_co_write_zeroes,
.bdrv_co_discard = qcow2_co_discard,
diff --git a/block/qcow2.h b/block/qcow2.h
index 4c139d0..46ed112 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -162,6 +162,14 @@ typedef struct BDRVQcowState {
CoMutex lock;
+ /*
+ * Only to be aquired while s->lock is not held.
+ *
+ * Readers: All l2meta coroutines that are in flight
+ * Writers: Anyone who requires l2meta to be flushed
+ */
+ CoRwlock l2meta_flush;
+
uint32_t crypt_method; /* current crypt method, 0 if no key yet */
uint32_t crypt_method_header;
AES_KEY aes_encrypt_key;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index eaad53e..6ee7536 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -141,6 +141,9 @@ struct BlockDriver {
*/
int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+ /** Returns true if the block device is still busy */
+ bool (*bdrv_drain)(BlockDriverState *bs);
+
const char *protocol_name;
int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
int64_t (*bdrv_getlength)(BlockDriverState *bs);
--
1.7.6.5
- [Qemu-devel] [RFC PATCH v2 16/23] qcow2: Reading from areas not in L2 tables yet, (continued)
- [Qemu-devel] [RFC PATCH v2 14/23] qcow2: Use byte granularity in qcow2_alloc_cluster_offset(), Kevin Wolf, 2013/02/13
- [Qemu-devel] [RFC PATCH v2 17/23] qcow2: Move COW and L2 update into own coroutine,
Kevin Wolf <=
- [Qemu-devel] [RFC PATCH v2 22/23] qcow2: Move cluster gathering to a non-looping loop, Kevin Wolf, 2013/02/13
- [Qemu-devel] [RFC PATCH v2 21/23] qemu-iotests: Another concurrent multicluster allocation case, Kevin Wolf, 2013/02/13
- [Qemu-devel] [RFC PATCH v2 20/23] qcow2: Cancel COW when overwritten, Kevin Wolf, 2013/02/13
[Qemu-devel] [RFC PATCH v2 23/23] qcow2: Gather clusters in a looping loop, Kevin Wolf, 2013/02/13
Re: [Qemu-devel] [RFC PATCH v2 00/23] qcow2: Delayed COW, Stefan Hajnoczi, 2013/02/14