[Qemu-devel] [RFC PATCH v2 17/23] qcow2: Move COW and L2 update into own

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC PATCH v2 17/23] qcow2: Move COW and L2 update into own

From:	Kevin Wolf
Subject:	[Qemu-devel] [RFC PATCH v2 17/23] qcow2: Move COW and L2 update into own coroutine
Date:	Wed, 13 Feb 2013 14:22:07 +0100

This creates a separate coroutine for processing the COW and the L2
table update of allocating requests. The request itself can then
complete while the second part is still being processed.

We need a qemu_aio_flush() hook in order to ensure that these
coroutines for the second part aren't still running after bdrv_drain_all
(e.g. when the VM is stopped).

Signed-off-by: Kevin Wolf <address@hidden>
---
 block.c                   |    5 ++
 block/qcow2.c             |  131 ++++++++++++++++++++++++++++++++++++++-------
 block/qcow2.h             |    8 +++
 include/block/block_int.h |    3 +
 4 files changed, 127 insertions(+), 20 deletions(-)

diff --git a/block.c b/block.c
index 50dab8e..5ae80a0 100644
--- a/block.c
+++ b/block.c
@@ -1225,7 +1225,12 @@ void bdrv_drain_all(void)
                 qemu_co_queue_restart_all(&bs->throttled_reqs);
                 busy = true;
             }
+
+            if (bs->drv && bs->drv->bdrv_drain) {
+                busy |= bs->drv->bdrv_drain(bs);
+            }
         }
+
     } while (busy);
 
     /* If requests are still pending there is a bug somewhere */
diff --git a/block/qcow2.c b/block/qcow2.c
index 07f7493..3f169b8 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -483,6 +483,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
 
     /* Initialise locks */
     qemu_co_mutex_init(&s->lock);
+    qemu_co_rwlock_init(&s->l2meta_flush);
 
     /* Repair image if dirty */
     if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
@@ -745,6 +746,70 @@ fail:
     return ret;
 }
 
+typedef struct ProcessL2Meta {
+    BlockDriverState *bs;
+    QCowL2Meta *m;
+} ProcessL2Meta;
+
+/**
+ * Processes the second part of a request that wrote to newly allocated
+ * clusters (most importantly, doing COW and updating the L2).
+ *
+ * Make sure that s->l2meta_flush is held as a reader when when entering the
+ * coroutine.
+ */
+static void coroutine_fn process_l2meta(void *opaque)
+{
+    ProcessL2Meta *p = opaque;
+    QCowL2Meta *m = p->m;
+    BlockDriverState *bs = p->bs;
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+
+    assert(s->l2meta_flush.reader > 0);
+    qemu_co_mutex_lock(&s->lock);
+
+    ret = qcow2_alloc_cluster_link_l2(bs, m);
+    if (ret < 0) {
+        /* FIXME */
+    }
+
+    qemu_co_mutex_unlock(&s->lock);
+
+    /* Take the request off the list of running requests */
+    if (m->nb_clusters != 0) {
+        QLIST_REMOVE(m, next_in_flight);
+    }
+
+    /* Meanwhile some new dependencies could have accumulated */
+    qemu_co_queue_restart_all(&m->dependent_requests);
+
+    g_free(m);
+
+    qemu_co_rwlock_unlock(&s->l2meta_flush);
+}
+
+static bool qcow2_drain(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    return !QLIST_EMPTY(&s->cluster_allocs);
+}
+
+static inline coroutine_fn void stop_l2meta(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    qemu_co_rwlock_wrlock(&s->l2meta_flush);
+}
+
+static inline coroutine_fn void resume_l2meta(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    qemu_co_rwlock_unlock(&s->l2meta_flush);
+}
+
 static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                            int64_t sector_num,
                            int remaining_sectors,
@@ -824,26 +889,37 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState 
*bs,
             goto fail;
         }
 
-        while (l2meta != NULL) {
-            QCowL2Meta *next;
+        if (l2meta != NULL) {
+            qemu_co_mutex_unlock(&s->lock);
 
-            l2meta->is_written = true;
+            while (l2meta != NULL) {
+                Coroutine *co;
+                QCowL2Meta *next;
 
-            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
-            if (ret < 0) {
-                goto fail;
-            }
+                ProcessL2Meta p = {
+                    .bs = bs,
+                    .m  = l2meta,
+                };
 
-            /* Take the request off the list of running requests */
-            if (l2meta->nb_clusters != 0) {
-                QLIST_REMOVE(l2meta, next_in_flight);
-            }
+                /*
+                 * Must take l2meta_flush already here instead of in the
+                 * coroutine; otherwise it would be possible that a concurrent
+                 * flush would claim that this request is written to the disk
+                 * when the metadata isn't written yet in fact.
+                 */
+                qemu_co_rwlock_rdlock(&s->l2meta_flush);
+                l2meta->is_written = true;
+
+                /* l2meta might already be freed after the coroutine has run */
+                next = l2meta->next;
 
-            qemu_co_queue_restart_all(&l2meta->dependent_requests);
+                co = qemu_coroutine_create(process_l2meta);
+                qemu_coroutine_enter(co, &p);
 
-            next = l2meta->next;
-            g_free(l2meta);
-            l2meta = next;
+                l2meta = next;
+            }
+
+            qemu_co_mutex_lock(&s->lock);
         }
 
         remaining_sectors -= cur_nr_sectors;
@@ -879,6 +955,11 @@ fail:
 static void qcow2_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
+
+    while (qcow2_drain(bs)) {
+        qemu_aio_wait();
+    }
+
     g_free(s->l1_table);
 
     qcow2_cache_flush(bs, s->l2_table_cache);
@@ -1417,10 +1498,12 @@ static coroutine_fn int 
qcow2_co_write_zeroes(BlockDriverState *bs,
     }
 
     /* Whatever is left can use real zero clusters */
+    stop_l2meta(bs);
     qemu_co_mutex_lock(&s->lock);
     ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
         nb_sectors);
     qemu_co_mutex_unlock(&s->lock);
+    resume_l2meta(bs);
 
     return ret;
 }
@@ -1431,10 +1514,13 @@ static coroutine_fn int 
qcow2_co_discard(BlockDriverState *bs,
     int ret;
     BDRVQcowState *s = bs->opaque;
 
+    stop_l2meta(bs);
     qemu_co_mutex_lock(&s->lock);
     ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
         nb_sectors);
     qemu_co_mutex_unlock(&s->lock);
+    resume_l2meta(bs);
+
     return ret;
 }
 
@@ -1560,23 +1646,27 @@ static coroutine_fn int 
qcow2_co_flush_to_os(BlockDriverState *bs)
     BDRVQcowState *s = bs->opaque;
     int ret;
 
+    stop_l2meta(bs);
     qemu_co_mutex_lock(&s->lock);
+
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
     if (ret < 0) {
-        qemu_co_mutex_unlock(&s->lock);
-        return ret;
+        goto fail;
     }
 
     if (qcow2_need_accurate_refcounts(s)) {
         ret = qcow2_cache_flush(bs, s->refcount_block_cache);
         if (ret < 0) {
-            qemu_co_mutex_unlock(&s->lock);
-            return ret;
+            goto fail;
         }
     }
+
+    ret = 0;
+fail:
     qemu_co_mutex_unlock(&s->lock);
+    resume_l2meta(bs);
 
-    return 0;
+    return ret;
 }
 
 static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
@@ -1703,6 +1793,7 @@ static BlockDriver bdrv_qcow2 = {
     .bdrv_co_readv          = qcow2_co_readv,
     .bdrv_co_writev         = qcow2_co_writev,
     .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
+    .bdrv_drain             = qcow2_drain,
 
     .bdrv_co_write_zeroes   = qcow2_co_write_zeroes,
     .bdrv_co_discard        = qcow2_co_discard,
diff --git a/block/qcow2.h b/block/qcow2.h
index 4c139d0..46ed112 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -162,6 +162,14 @@ typedef struct BDRVQcowState {
 
     CoMutex lock;
 
+    /*
+     * Only to be aquired while s->lock is not held.
+     *
+     * Readers: All l2meta coroutines that are in flight
+     * Writers: Anyone who requires l2meta to be flushed
+     */
+    CoRwlock l2meta_flush;
+
     uint32_t crypt_method; /* current crypt method, 0 if no key yet */
     uint32_t crypt_method_header;
     AES_KEY aes_encrypt_key;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index eaad53e..6ee7536 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -141,6 +141,9 @@ struct BlockDriver {
      */
     int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
 
+    /** Returns true if the block device is still busy */
+    bool (*bdrv_drain)(BlockDriverState *bs);
+
     const char *protocol_name;
     int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
     int64_t (*bdrv_getlength)(BlockDriverState *bs);
-- 
1.7.6.5

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [RFC PATCH v2 16/23] qcow2: Reading from areas not in L2 tables yet, (continued)
- [Qemu-devel] [RFC PATCH v2 16/23] qcow2: Reading from areas not in L2 tables yet, Kevin Wolf, 2013/02/13
- [Qemu-devel] [RFC PATCH v2 18/23] qcow2: Delay the COW, Kevin Wolf, 2013/02/13
  - Re: [Qemu-devel] [RFC PATCH v2 18/23] qcow2: Delay the COW, Stefan Hajnoczi, 2013/02/15
    - Re: [Qemu-devel] [RFC PATCH v2 18/23] qcow2: Delay the COW, Kevin Wolf, 2013/02/15
  - Re: [Qemu-devel] [RFC PATCH v2 18/23] qcow2: Delay the COW, Stefan Hajnoczi, 2013/02/18
- [Qemu-devel] [RFC PATCH v2 19/23] qcow2: Add error handling to the l2meta coroutine, Kevin Wolf, 2013/02/13
  - Re: [Qemu-devel] [RFC PATCH v2 19/23] qcow2: Add error handling to the l2meta coroutine, Stefan Hajnoczi, 2013/02/18
    - Re: [Qemu-devel] [RFC PATCH v2 19/23] qcow2: Add error handling to the l2meta coroutine, Kevin Wolf, 2013/02/21
    - Re: [Qemu-devel] [RFC PATCH v2 19/23] qcow2: Add error handling to the l2meta coroutine, Kevin Wolf, 2013/02/21
- [Qemu-devel] [RFC PATCH v2 14/23] qcow2: Use byte granularity in qcow2_alloc_cluster_offset(), Kevin Wolf, 2013/02/13
- [Qemu-devel] [RFC PATCH v2 17/23] qcow2: Move COW and L2 update into own coroutine, Kevin Wolf <=
  - Re: [Qemu-devel] [RFC PATCH v2 17/23] qcow2: Move COW and L2 update into own coroutine, Stefan Hajnoczi, 2013/02/15
- [Qemu-devel] [RFC PATCH v2 22/23] qcow2: Move cluster gathering to a non-looping loop, Kevin Wolf, 2013/02/13
- [Qemu-devel] [RFC PATCH v2 21/23] qemu-iotests: Another concurrent multicluster allocation case, Kevin Wolf, 2013/02/13
- [Qemu-devel] [RFC PATCH v2 20/23] qcow2: Cancel COW when overwritten, Kevin Wolf, 2013/02/13
  - Re: [Qemu-devel] [RFC PATCH v2 20/23] qcow2: Cancel COW when overwritten, Stefan Hajnoczi, 2013/02/18
  - Re: [Qemu-devel] [RFC PATCH v2 20/23] qcow2: Cancel COW when overwritten, Stefan Hajnoczi, 2013/02/18
    - Re: [Qemu-devel] [RFC PATCH v2 20/23] qcow2: Cancel COW when overwritten, Kevin Wolf, 2013/02/21
    - Re: [Qemu-devel] [RFC PATCH v2 20/23] qcow2: Cancel COW when overwritten, Stefan Hajnoczi, 2013/02/21
- [Qemu-devel] [RFC PATCH v2 23/23] qcow2: Gather clusters in a looping loop, Kevin Wolf, 2013/02/13
- Re: [Qemu-devel] [RFC PATCH v2 00/23] qcow2: Delayed COW, Stefan Hajnoczi, 2013/02/14

Prev by Date: [Qemu-devel] [RFC PATCH v2 14/23] qcow2: Use byte granularity in qcow2_alloc_cluster_offset()
Next by Date: [Qemu-devel] [RFC PATCH v2 22/23] qcow2: Move cluster gathering to a non-looping loop
Previous by thread: [Qemu-devel] [RFC PATCH v2 14/23] qcow2: Use byte granularity in qcow2_alloc_cluster_offset()
Next by thread: Re: [Qemu-devel] [RFC PATCH v2 17/23] qcow2: Move COW and L2 update into own coroutine
Index(es):
- Date
- Thread