qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 2/2] qcow2: improve savevm performance


From: Vladimir Sementsov-Ogievskiy
Subject: Re: [PATCH 2/2] qcow2: improve savevm performance
Date: Thu, 11 Jun 2020 11:04:24 +0300
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.8.1

10.06.2020 22:00, Denis V. Lunev wrote:
This patch does 2 standard basic things:
- it creates intermediate buffer for all writes from QEMU migration code
   to QCOW2 image,
- this buffer is sent to disk asynchronously, allowing several writes to
   run in parallel.

In general, migration code is fantastically inefficent (by observation),
buffers are not aligned and sent with arbitrary pieces, a lot of time
less than 100 bytes at a chunk, which results in read-modify-write
operations with non-cached operations. It should also be noted that all
operations are performed into unallocated image blocks, which also suffer
due to partial writes to such new clusters.

Snapshot creation time (2 GB Fedora-31 VM running over NVME storage):
                 original     fixed
cached:          1.79s       1.27s
non-cached:      3.29s       0.81s

The difference over HDD would be more significant :)

Signed-off-by: Denis V. Lunev <den@openvz.org>

If I follow correctly, you make qcow2_save_vmstate implicitly asynchronous:
it may return immediately after creating a task, and task is executing in
parallel.

I think, block-layer is unprepared for such behavior, it rely on the fact that
.bdrv_save_vmstate is synchronous.

For example, look at bdrv_co_rw_vmstate(). It calls drv->bdrv_save_vmstate
inside pair of bdrv_inc_in_flight()/bdrv_dec_in_flight(). It means that with
this patch, we may break drained section.

Next, it's a kind of cache for vmstate-write operation. It seems for me that
it's not directly related to qcow2. So, we can implement it in generic block
layer, where we can handle in_fligth requests. Can we keep .bdrv_save_vmstate
handlers of format drivers as is, keep them synchronous, but instead change
generic interface to be (optionally?) cached?

---
  block/qcow2.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++-
  block/qcow2.h |   4 ++
  2 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 0cd2e6757e..e6232f32e2 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4797,11 +4797,43 @@ static int qcow2_make_empty(BlockDriverState *bs)
      return ret;
  }
+
+typedef struct Qcow2VMStateTask {
+    AioTask task;
+
+    BlockDriverState *bs;
+    int64_t offset;
+    void *buf;
+    size_t bytes;
+} Qcow2VMStateTask;
+
+typedef struct Qcow2SaveVMState {
+    AioTaskPool *pool;
+    Qcow2VMStateTask *t;
+} Qcow2SaveVMState;
+
  static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
  {
      BDRVQcow2State *s = bs->opaque;
+    Qcow2SaveVMState *state = s->savevm_state;
      int ret;
+ if (state != NULL) {
+        aio_task_pool_start_task(state->pool, &state->t->task);
+
+        aio_task_pool_wait_all(state->pool);
+        ret = aio_task_pool_status(state->pool);
+
+        aio_task_pool_free(state->pool);
+        g_free(state);
+
+        s->savevm_state = NULL;
+
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
      qemu_co_mutex_lock(&s->lock);
      ret = qcow2_write_caches(bs);
      qemu_co_mutex_unlock(&s->lock);
@@ -5098,14 +5130,89 @@ static int qcow2_has_zero_init(BlockDriverState *bs)
      }
  }
+
+static coroutine_fn int qcow2_co_vmstate_task_entry(AioTask *task)
+{
+    int err = 0;
+    Qcow2VMStateTask *t = container_of(task, Qcow2VMStateTask, task);
+
+    if (t->bytes != 0) {
+        QEMUIOVector local_qiov;
+        qemu_iovec_init_buf(&local_qiov, t->buf, t->bytes);
+        err = t->bs->drv->bdrv_co_pwritev_part(t->bs, t->offset, t->bytes,
+                                               &local_qiov, 0, 0);
+    }
+
+    qemu_vfree(t->buf);
+    return err;
+}
+
+static Qcow2VMStateTask *qcow2_vmstate_task_create(BlockDriverState *bs,
+                                                    int64_t pos, size_t size)
+{
+    BDRVQcow2State *s = bs->opaque;
+    Qcow2VMStateTask *t = g_new(Qcow2VMStateTask, 1);
+
+    *t = (Qcow2VMStateTask) {
+        .task.func = qcow2_co_vmstate_task_entry,
+        .buf = qemu_blockalign(bs, size),
+        .offset = qcow2_vm_state_offset(s) + pos,
+        .bs = bs,
+    };
+
+    return t;
+}
+
  static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
                                int64_t pos)
  {
      BDRVQcow2State *s = bs->opaque;
+    Qcow2SaveVMState *state = s->savevm_state;
+    Qcow2VMStateTask *t;
+    size_t buf_size = MAX(s->cluster_size, 1 * MiB);
+    size_t to_copy;
+    size_t off;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
-    return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
-                                         qiov->size, qiov, 0, 0);
+
+    if (state == NULL) {
+        state = g_new(Qcow2SaveVMState, 1);
+        *state = (Qcow2SaveVMState) {
+            .pool = aio_task_pool_new(QCOW2_MAX_WORKERS),
+            .t = qcow2_vmstate_task_create(bs, pos, buf_size),
+        };
+
+        s->savevm_state = state;
+    }
+
+    if (aio_task_pool_status(state->pool) != 0) {
+        return aio_task_pool_status(state->pool);
+    }
+
+    t = state->t;
+    if (t->offset + t->bytes != qcow2_vm_state_offset(s) + pos) {
+        /* Normally this branch is not reachable from migration */
+        return bs->drv->bdrv_co_pwritev_part(bs,
+                qcow2_vm_state_offset(s) + pos, qiov->size, qiov, 0, 0);
+    }
+
+    off = 0;
+    while (1) {
+        to_copy = MIN(qiov->size - off, buf_size - t->bytes);
+        qemu_iovec_to_buf(qiov, off, t->buf + t->bytes, to_copy);
+        t->bytes += to_copy;
+        if (t->bytes < buf_size) {
+            return 0;
+        }
+
+        aio_task_pool_start_task(state->pool, &t->task);
+
+        pos += to_copy;
+        off += to_copy;
+        state->t = t = qcow2_vmstate_task_create(bs, pos, buf_size);
+    }
+
+    return 0;
  }
static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
diff --git a/block/qcow2.h b/block/qcow2.h
index 7ce2c23bdb..146cfed739 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -291,6 +291,8 @@ typedef struct Qcow2BitmapHeaderExt {
#define QCOW2_MAX_THREADS 4 +typedef struct Qcow2SaveVMState Qcow2SaveVMState;
+
  typedef struct BDRVQcow2State {
      int cluster_bits;
      int cluster_size;
@@ -384,6 +386,8 @@ typedef struct BDRVQcow2State {
       * is to convert the image with the desired compression type set.
       */
      Qcow2CompressionType compression_type;
+
+    Qcow2SaveVMState *savevm_state;
  } BDRVQcow2State;
typedef struct Qcow2COWRegion {



--
Best regards,
Vladimir



reply via email to

[Prev in Thread] Current Thread [Next in Thread]