qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v12 07/17] qmp: Add support of "dirty-bitmap" sy


From: John Snow
Subject: Re: [Qemu-devel] [PATCH v12 07/17] qmp: Add support of "dirty-bitmap" sync mode for drive-backup
Date: Wed, 11 Feb 2015 16:13:36 -0500
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.4.0



On 02/11/2015 12:47 PM, Max Reitz wrote:
On 2015-02-09 at 20:35, John Snow wrote:
For "dirty-bitmap" sync mode, the block job will iterate through the
given dirty bitmap to decide if a sector needs backup (backup all the
dirty clusters and skip clean ones), just as allocation conditions of
"top" sync mode.

Signed-off-by: Fam Zheng <address@hidden>
Signed-off-by: John Snow <address@hidden>
---
  block.c                   |   9 +++
  block/backup.c            | 149
+++++++++++++++++++++++++++++++++++++++-------
  block/mirror.c            |   4 ++
  blockdev.c                |  18 +++++-
  hmp.c                     |   3 +-
  include/block/block.h     |   1 +
  include/block/block_int.h |   2 +
  qapi/block-core.json      |  13 ++--
  qmp-commands.hx           |   7 ++-
  9 files changed, 172 insertions(+), 34 deletions(-)

diff --git a/block.c b/block.c
index 8d84ace..e93fceb 100644
--- a/block.c
+++ b/block.c
@@ -5648,6 +5648,15 @@ static void bdrv_reset_dirty(BlockDriverState
*bs, int64_t cur_sector,
      }
  }
+/**
+ * Advance an HBitmapIter to an arbitrary offset.
+ */
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+    assert(hbi->hb);
+    hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
  int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap
*bitmap)
  {
      return hbitmap_count(bitmap->bitmap);
diff --git a/block/backup.c b/block/backup.c
index 1c535b1..bf8c0e7 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -37,6 +37,8 @@ typedef struct CowRequest {
  typedef struct BackupBlockJob {
      BlockJob common;
      BlockDriverState *target;
+    /* bitmap for sync=dirty-bitmap */
+    BdrvDirtyBitmap *sync_bitmap;
      MirrorSyncMode sync_mode;
      RateLimit limit;
      BlockdevOnError on_source_error;
@@ -242,6 +244,31 @@ static void backup_complete(BlockJob *job, void
*opaque)
      g_free(data);
  }
+static bool coroutine_fn yield_and_check(BackupBlockJob *job)
+{
+    if (block_job_is_cancelled(&job->common)) {
+        return true;
+    }
+
+    /* we need to yield so that qemu_aio_flush() returns.
+     * (without, VM does not reboot)
+     */
+    if (job->common.speed) {
+        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
+
job->sectors_read);
+        job->sectors_read = 0;
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
+    } else {
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
+    }
+
+    if (block_job_is_cancelled(&job->common)) {
+        return true;
+    }
+
+    return false;
+}
+
  static void coroutine_fn backup_run(void *opaque)
  {
      BackupBlockJob *job = opaque;
@@ -254,13 +281,13 @@ static void coroutine_fn backup_run(void *opaque)
      };
      int64_t start, end;
      int ret = 0;
+    bool error_is_read;
      QLIST_INIT(&job->inflight_reqs);
      qemu_co_rwlock_init(&job->flush_rwlock);
      start = 0;
-    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
-                       BACKUP_SECTORS_PER_CLUSTER);
+    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
      job->bitmap = hbitmap_alloc(end, 0);
@@ -278,28 +305,61 @@ static void coroutine_fn backup_run(void *opaque)
              qemu_coroutine_yield();
              job->common.busy = true;
          }
+    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        /* Dirty Bitmap sync has a slightly different iteration
method */
+        HBitmapIter hbi;
+        int64_t sector;
+        int64_t cluster;
+        int64_t last_cluster = -1;
+        bool polyrhythmic;
+
+        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
+        /* Does the granularity happen to match our backup cluster
size? */
+        polyrhythmic =
(bdrv_dirty_bitmap_granularity(job->sync_bitmap) !=
+                        BACKUP_CLUSTER_SIZE);
+
+        /* Find the next dirty /sector/ and copy that /cluster/ */
+        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
+            if (yield_and_check(job)) {
+                goto leave;
+            }
+            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
+
+            /* Play some catchup with the progress meter */
+            if (cluster != last_cluster + 1) {
+                job->common.offset += ((cluster - last_cluster - 1) *
+                                       BACKUP_CLUSTER_SIZE);
+            }

I guess the "- 1" in the calculation comes from backup_do_cow()
modifying job->common.offset, too. How about simply overwriting it like
"job->common.offset = cluster * BACKUP_CLUSTER_SIZE"? And it may be a
good idea to move this update above yield_and_check() because that's the
progress we have actually done up to that point (and I imagine
yield_and_check() is the point where the caller might check the progress).


Yes: If the last cluster we wrote out was 2, and we're on 4 now, we only need to make up for #3, because #4 will be handled in the usual ways.

I don't know how wise this was, but I wanted to only "fake" progress updates for clusters I *know* I skipped in incremental backups, so I wanted to make sure the math worked out. Just overwriting the progress seemed like cheating a little, I guess ...

It was kind of just a "testing thing" to just do the additions and see if the math checked out in the end. You may notice that unlike Fam's iotests, I leave the progress checks enabled for my tests.

Putting it above the cancellation check does make sense, though. I didn't test cancellations as well as I ought have.

+
+            do {
+                ret = backup_do_cow(bs, cluster *
BACKUP_SECTORS_PER_CLUSTER,
+                                    BACKUP_SECTORS_PER_CLUSTER,
&error_is_read);
+                if ((ret < 0) &&
+                    backup_error_action(job, error_is_read, -ret) ==
+                    BLOCK_ERROR_ACTION_REPORT) {
+                    goto leave;
+                }
+            } while (ret < 0);
+
+            /* Advance (or rewind) our iterator if we need to. */
+            if (polyrhythmic) {
+                bdrv_set_dirty_iter(&hbi,
+                                    (cluster + 1) *
BACKUP_SECTORS_PER_CLUSTER);
+            }
+
+            last_cluster = cluster;
+        }
+
+        /* Play some final catchup with the progress meter */
+        if (last_cluster + 1 < end) {
+            job->common.offset += ((end - last_cluster - 1) *
+                                   BACKUP_CLUSTER_SIZE);
+        }
+

Superfluous empty line.

      } else {
          /* Both FULL and TOP SYNC_MODE's require copying.. */
          for (; start < end; start++) {
-            bool error_is_read;
-
-            if (block_job_is_cancelled(&job->common)) {
-                break;
-            }
-
-            /* we need to yield so that qemu_aio_flush() returns.
-             * (without, VM does not reboot)
-             */
-            if (job->common.speed) {
-                uint64_t delay_ns = ratelimit_calculate_delay(
-                        &job->limit, job->sectors_read);
-                job->sectors_read = 0;
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME,
delay_ns);
-            } else {
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME,
0);
-            }
-
-            if (block_job_is_cancelled(&job->common)) {
+            if (yield_and_check(job)) {
                  break;
              }
@@ -351,12 +411,26 @@ static void coroutine_fn backup_run(void *opaque)
          }
      }
+leave:
      notifier_with_return_remove(&before_write);
      /* wait until pending backup_do_cow() calls have completed */
      qemu_co_rwlock_wrlock(&job->flush_rwlock);
      qemu_co_rwlock_unlock(&job->flush_rwlock);
+    if (job->sync_bitmap) {
+        BdrvDirtyBitmap *bm;
+        if (ret < 0) {
+            /* Merge the successor back into the parent, delete
nothing. */
+            bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
+            assert(bm);
+            bdrv_enable_dirty_bitmap(job->sync_bitmap);
+        } else {
+            /* Everything is fine, delete this bitmap and install the
backup. */
+            bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
+            assert(bm);
+        }
+    }
      hbitmap_free(job->bitmap);
      bdrv_iostatus_disable(target);
@@ -369,6 +443,7 @@ static void coroutine_fn backup_run(void *opaque)
  void backup_start(BlockDriverState *bs, BlockDriverState *target,
                    int64_t speed, MirrorSyncMode sync_mode,
+                  BdrvDirtyBitmap *sync_bitmap,
                    BlockdevOnError on_source_error,
                    BlockdevOnError on_target_error,
                    BlockCompletionFunc *cb, void *opaque,
@@ -412,17 +487,37 @@ void backup_start(BlockDriverState *bs,
BlockDriverState *target,
          return;
      }
+    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        if (!sync_bitmap) {
+            error_setg(errp, "must provide a valid bitmap name for "
+                             "\"dirty-bitmap\" sync mode");
+            return;
+        }
+
+        /* Create a new bitmap, and freeze/disable this one. */
+        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp)
< 0) {
+            return;
+        }
+

Another superfluous empty line.

+    } else if (sync_bitmap) {
+        error_setg(errp,
+                   "a sync_bitmap was provided to backup_run, "
+                   "but received an incompatible sync_mode (%s)",
+                   MirrorSyncMode_lookup[sync_mode]);
+        return;
+    }
+
      len = bdrv_getlength(bs);
      if (len < 0) {
          error_setg_errno(errp, -len, "unable to get length for '%s'",
                           bdrv_get_device_name(bs));
-        return;
+        goto error;
      }
      BackupBlockJob *job = block_job_create(&backup_job_driver, bs,
speed,
                                             cb, opaque, errp);
      if (!job) {
-        return;
+        goto error;
      }
      bdrv_op_block_all(target, job->common.blocker);
@@ -431,7 +526,15 @@ void backup_start(BlockDriverState *bs,
BlockDriverState *target,
      job->on_target_error = on_target_error;
      job->target = target;
      job->sync_mode = sync_mode;
+    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
+                       sync_bitmap : NULL;
      job->common.len = len;
      job->common.co = qemu_coroutine_create(backup_run);
      qemu_coroutine_enter(job->common.co, job);
+    return;
+
+ error:
+    if (sync_bitmap) {
+        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
+    }
  }
diff --git a/block/mirror.c b/block/mirror.c
index 77bd1ed..271dbf3 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -718,6 +718,10 @@ void mirror_start(BlockDriverState *bs,
BlockDriverState *target,
      bool is_none_mode;
      BlockDriverState *base;
+    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
+        return;
+    }
      is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
      base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
      mirror_start_job(bs, target, replaces,
diff --git a/blockdev.c b/blockdev.c
index 826d0c1..3f41e82 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1569,6 +1569,7 @@ static void
drive_backup_prepare(BlkTransactionState *common, Error **errp)
                       backup->sync,
                       backup->has_mode, backup->mode,
                       backup->has_speed, backup->speed,
+                     backup->has_bitmap, backup->bitmap,
                       backup->has_on_source_error,
backup->on_source_error,
                       backup->has_on_target_error,
backup->on_target_error,
                       &local_err);
@@ -2429,6 +2430,7 @@ void qmp_drive_backup(const char *device, const
char *target,
                        enum MirrorSyncMode sync,
                        bool has_mode, enum NewImageMode mode,
                        bool has_speed, int64_t speed,
+                      bool has_bitmap, const char *bitmap,
                        bool has_on_source_error, BlockdevOnError
on_source_error,
                        bool has_on_target_error, BlockdevOnError
on_target_error,
                        Error **errp)
@@ -2436,6 +2438,7 @@ void qmp_drive_backup(const char *device, const
char *target,
      BlockDriverState *bs;
      BlockDriverState *target_bs;
      BlockDriverState *source = NULL;
+    BdrvDirtyBitmap *bmap = NULL;
      AioContext *aio_context;
      BlockDriver *drv = NULL;
      Error *local_err = NULL;
@@ -2534,7 +2537,16 @@ void qmp_drive_backup(const char *device, const
char *target,
      bdrv_set_aio_context(target_bs, aio_context);
-    backup_start(bs, target_bs, speed, sync, on_source_error,
on_target_error,
+    if (has_bitmap) {
+        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
+        if (!bmap) {
+            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
+            goto out;
+        }
+    }
+
+    backup_start(bs, target_bs, speed, sync, bmap,
+                 on_source_error, on_target_error,
                   block_job_cb, bs, &local_err);
      if (local_err != NULL) {
          bdrv_unref(target_bs);
@@ -2592,8 +2604,8 @@ void qmp_blockdev_backup(const char *device,
const char *target,
      bdrv_ref(target_bs);
      bdrv_set_aio_context(target_bs, aio_context);
-    backup_start(bs, target_bs, speed, sync, on_source_error,
on_target_error,
-                 block_job_cb, bs, &local_err);
+    backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
+                 on_target_error, block_job_cb, bs, &local_err);
      if (local_err != NULL) {
          bdrv_unref(target_bs);
          error_propagate(errp, local_err);
diff --git a/hmp.c b/hmp.c
index b47f331..015499f 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1027,7 +1027,8 @@ void hmp_drive_backup(Monitor *mon, const QDict
*qdict)
      qmp_drive_backup(device, filename, !!format, format,
                       full ? MIRROR_SYNC_MODE_FULL :
MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0, false, 0, false, 0, &err);
+                     true, mode, false, 0, false, NULL,
+                     false, 0, false, 0, &err);
      hmp_handle_error(mon, &err);
  }
diff --git a/include/block/block.h b/include/block/block.h
index b2d84d6..8589e77 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -469,6 +469,7 @@ void bdrv_reset_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
                               int64_t cur_sector, int nr_sectors);
  void bdrv_dirty_iter_init(BlockDriverState *bs,
                            BdrvDirtyBitmap *bitmap, struct
HBitmapIter *hbi);
+void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
  int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap
*bitmap);
  void bdrv_enable_copy_on_read(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7ad1950..2233790 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -588,6 +588,7 @@ void mirror_start(BlockDriverState *bs,
BlockDriverState *target,
   * @target: Block device to write to.
   * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
   * @sync_mode: What parts of the disk image should be copied to the
destination.
+ * @sync_bitmap: The dirty bitmap if sync_mode is
MIRROR_SYNC_MODE_DIRTY_BITMAP.
   * @on_source_error: The action to take upon error reading from the
source.
   * @on_target_error: The action to take upon error writing to the
target.
   * @cb: Completion function for the job.
@@ -598,6 +599,7 @@ void mirror_start(BlockDriverState *bs,
BlockDriverState *target,
   */
  void backup_start(BlockDriverState *bs, BlockDriverState *target,
                    int64_t speed, MirrorSyncMode sync_mode,
+                  BdrvDirtyBitmap *sync_bitmap,
                    BlockdevOnError on_source_error,
                    BlockdevOnError on_target_error,
                    BlockCompletionFunc *cb, void *opaque,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 940eff7..9c5a99c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -508,10 +508,12 @@
  #
  # @none: only copy data written from now on
  #
+# @dirty-bitmap: only copy data described by the dirty bitmap. Since:
2.3
+#
  # Since: 1.3
  ##
  { 'enum': 'MirrorSyncMode',
-  'data': ['top', 'full', 'none'] }
+  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
  ##
  # @BlockJobType:
@@ -686,14 +688,17 @@
  #          probe if @mode is 'existing', else the format of the source
  #
  # @sync: what parts of the disk image should be copied to the
destination
-#        (all the disk, only the sectors allocated in the topmost
image, or
-#        only new I/O).
+#        (all the disk, only the sectors allocated in the topmost
image, from a
+#        dirty bitmap, or only new I/O).
  #
  # @mode: #optional whether and how QEMU should create a new image,
default is
  #        'absolute-paths'.
  #
  # @speed: #optional the maximum speed, in bytes per second
  #
+# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap"
+#          (Since 2.3)
+#
  # @on-source-error: #optional the action to take on an error on the
source,
  #                   default 'report'.  'stop' and 'enospc' can only
be used
  #                   if the block device supports io-status (see
BlockInfo).
@@ -711,7 +716,7 @@
  { 'type': 'DriveBackup',
    'data': { 'device': 'str', 'target': 'str', '*format': 'str',
              'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int',
+            '*speed': 'int', '*bitmap': 'str',
              '*on-source-error': 'BlockdevOnError',
              '*on-target-error': 'BlockdevOnError' } }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ce7782f..5aa3845 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1048,7 +1048,7 @@ EQMP
      {
          .name       = "drive-backup",
          .args_type  =
"sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
-                      "on-source-error:s?,on-target-error:s?",
+                      "bitmap:s?,on-source-error:s?,on-target-error:s?",
          .mhandler.cmd_new = qmp_marshal_input_drive_backup,
      },
@@ -1075,8 +1075,9 @@ Arguments:
              (json-string, optional)
  - "sync": what parts of the disk image should be copied to the
destination;
    possibilities include "full" for all the disk, "top" for only the
sectors
-  allocated in the topmost image, or "none" to only replicate new I/O
-  (MirrorSyncMode).
+  allocated in the topmost image, "dirty-bitmap" for only the dirty
sectors in
+  the bitmap, or "none" to only replicate new I/O (MirrorSyncMode).
+- "bitmap": dirty bitmap name for sync==dirty-bitmap
  - "mode": whether and how QEMU should create a new image
            (NewImageMode, optional, default 'absolute-paths')
  - "speed": the maximum speed, in bytes per second (json-int, optional)

Looks good to me in general, now I need to find out what the successor
bitmap is used for; but I guess I'll find that out by reviewing the rest
of this series.

Max

--
—js



reply via email to

[Prev in Thread] Current Thread [Next in Thread]