[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-block] [PATCH v4] mirror: Rewrite mirror_iteration
From: |
Fam Zheng |
Subject: |
[Qemu-block] [PATCH v4] mirror: Rewrite mirror_iteration |
Date: |
Thu, 12 Nov 2015 11:44:02 +0800 |
The "pnum < nb_sectors" condition in deciding whether to actually copy
data is unnecessarily strict, and the qiov initialization is
unnecessarily for bdrv_aio_write_zeroes and bdrv_aio_discard.
Rewrite mirror_iteration to fix both flaws.
Signed-off-by: Fam Zheng <address@hidden>
---
block/mirror.c | 198 ++++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 134 insertions(+), 64 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index b1252a1..5f22c65 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -45,7 +45,6 @@ typedef struct MirrorBlockJob {
BlockdevOnError on_source_error, on_target_error;
bool synced;
bool should_complete;
- int64_t sector_num;
int64_t granularity;
size_t buf_size;
int64_t bdev_length;
@@ -157,28 +156,16 @@ static void mirror_read_complete(void *opaque, int ret)
mirror_write_complete, op);
}
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
+ int max_sectors)
{
BlockDriverState *source = s->common.bs;
- int nb_sectors, sectors_per_chunk, nb_chunks;
- int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
- uint64_t delay_ns = 0;
+ int sectors_per_chunk, nb_chunks;
+ int64_t next_chunk, next_sector;
+ int nb_sectors;
MirrorOp *op;
- int pnum;
- int64_t ret;
- s->sector_num = hbitmap_iter_next(&s->hbi);
- if (s->sector_num < 0) {
- bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
- s->sector_num = hbitmap_iter_next(&s->hbi);
- trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
- assert(s->sector_num >= 0);
- }
-
- hbitmap_next_sector = s->sector_num;
- sector_num = s->sector_num;
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
- end = s->bdev_length / BDRV_SECTOR_SIZE;
/* Extend the QEMUIOVector to include all adjacent blocks that will
* be copied in this operation.
@@ -198,23 +185,9 @@ static uint64_t coroutine_fn
mirror_iteration(MirrorBlockJob *s)
next_sector = sector_num;
next_chunk = sector_num / sectors_per_chunk;
- /* Wait for I/O to this cluster (from a previous iteration) to be done. */
- while (test_bit(next_chunk, s->in_flight_bitmap)) {
- trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
- s->waiting_for_io = true;
- qemu_coroutine_yield();
- s->waiting_for_io = false;
- }
-
do {
int added_sectors, added_chunks;
- if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
- test_bit(next_chunk, s->in_flight_bitmap)) {
- assert(nb_sectors > 0);
- break;
- }
-
added_sectors = sectors_per_chunk;
if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
bdrv_round_to_clusters(s->target,
@@ -226,12 +199,15 @@ static uint64_t coroutine_fn
mirror_iteration(MirrorBlockJob *s)
*/
if (next_sector < sector_num) {
assert(nb_sectors == 0);
+ /* We have to make sure [sector_num, sector_num + max_sectors)
+ * covers the original range. */
+ max_sectors += sector_num - next_sector;
sector_num = next_sector;
next_chunk = next_sector / sectors_per_chunk;
}
}
- added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
+ added_sectors = MIN(added_sectors, max_sectors);
added_chunks = (added_sectors + sectors_per_chunk - 1) /
sectors_per_chunk;
/* When doing COW, it may happen that there is not enough space for
@@ -252,18 +228,13 @@ static uint64_t coroutine_fn
mirror_iteration(MirrorBlockJob *s)
break;
}
- /* We have enough free space to copy these sectors. */
- bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
-
nb_sectors += added_sectors;
nb_chunks += added_chunks;
next_sector += added_sectors;
next_chunk += added_chunks;
- if (!s->synced && s->common.speed) {
- delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
- }
- } while (delay_ns == 0 && next_sector < end);
+ } while (next_sector < sector_num + max_sectors);
+ assert(nb_sectors);
/* Allocate a MirrorOp that is used as an AIO callback. */
op = g_new(MirrorOp, 1);
op->s = s;
@@ -274,7 +245,6 @@ static uint64_t coroutine_fn
mirror_iteration(MirrorBlockJob *s)
* from s->buf_free.
*/
qemu_iovec_init(&op->qiov, nb_chunks);
- next_sector = sector_num;
while (nb_chunks-- > 0) {
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
@@ -282,39 +252,139 @@ static uint64_t coroutine_fn
mirror_iteration(MirrorBlockJob *s)
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
s->buf_free_count--;
qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
-
- /* Advance the HBitmapIter in parallel, so that we do not examine
- * the same sector twice.
- */
- if (next_sector > hbitmap_next_sector
- && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
- hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
- }
-
- next_sector += sectors_per_chunk;
}
- bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
-
/* Copy the dirty cluster. */
s->in_flight++;
s->sectors_in_flight += nb_sectors;
trace_mirror_one_iteration(s, sector_num, nb_sectors);
- ret = bdrv_get_block_status_above(source, NULL, sector_num,
- nb_sectors, &pnum);
- if (ret < 0 || pnum < nb_sectors ||
- (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
- bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
- mirror_read_complete, op);
- } else if (ret & BDRV_BLOCK_ZERO) {
+ bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+ mirror_read_complete, op);
+ return nb_sectors;
+}
+
+static void mirror_do_zero_or_discard(MirrorBlockJob *s,
+ int64_t sector_num,
+ int nb_sectors,
+ bool is_discard)
+{
+ MirrorOp *op;
+
+ /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
+ * so the freeing in mirror_iteration_done is nop. */
+ op = g_new0(MirrorOp, 1);
+ op->s = s;
+ op->sector_num = sector_num;
+ op->nb_sectors = nb_sectors;
+
+ s->in_flight++;
+ s->sectors_in_flight += nb_sectors;
+ if (is_discard) {
+ bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
+ mirror_write_complete, op);
+ } else {
bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
mirror_write_complete, op);
- } else {
- assert(!(ret & BDRV_BLOCK_DATA));
- bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
- mirror_write_complete, op);
+ }
+}
+
+static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+ BlockDriverState *source = s->common.bs;
+ int64_t sector_num;
+ uint64_t delay_ns = 0;
+ int nb_chunks = 0;
+ int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
+ int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+
+ sector_num = hbitmap_iter_next(&s->hbi);
+ if (sector_num < 0) {
+ bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+ sector_num = hbitmap_iter_next(&s->hbi);
+ trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+ assert(sector_num >= 0);
+ }
+
+
+ while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
+ int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
+ int64_t next_chunk = next_sector / sectors_per_chunk;
+ if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+ break;
+ }
+ /* Wait for I/O to this cluster (from a previous iteration) to be
+ * done.*/
+ while (test_bit(next_chunk, s->in_flight_bitmap)) {
+ trace_mirror_yield_in_flight(s, next_sector, s->in_flight);
+ s->waiting_for_io = true;
+ qemu_coroutine_yield();
+ s->waiting_for_io = false;
+ }
+
+ /* Advance the HBitmapIter in parallel, so that we do not examine
+ * the same sector twice.
+ */
+ hbitmap_iter_next(&s->hbi);
+ nb_chunks++;
+ }
+ assert(nb_chunks);
+
+ /* Clear dirty bits before querying the block status, because
+ * calling bdrv_reset_dirty_bitmap could yield - if some blocks are marked
+ * dirty in this window, we need to know.
+ */
+ bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
+ nb_chunks * sectors_per_chunk);
+ bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
+ while (nb_chunks > 0 && sector_num < end) {
+ int io_sectors;
+ enum MirrorMethod {
+ MIRROR_METHOD_COPY,
+ MIRROR_METHOD_ZERO,
+ MIRROR_METHOD_DISCARD
+ } mirror_method = MIRROR_METHOD_COPY;
+ int ret = bdrv_get_block_status_above(source, NULL, sector_num,
+ nb_chunks * sectors_per_chunk,
+ &io_sectors);
+ if (ret < 0) {
+ io_sectors = nb_chunks * sectors_per_chunk;
+ }
+
+ io_sectors -= io_sectors % sectors_per_chunk;
+ if (io_sectors < sectors_per_chunk) {
+ io_sectors = sectors_per_chunk;
+ } else if (!(ret & BDRV_BLOCK_DATA)) {
+ int64_t target_sector_num;
+ int target_nb_sectors;
+ bdrv_round_to_clusters(s->target, sector_num, io_sectors,
+ &target_sector_num, &target_nb_sectors);
+ if (target_sector_num == sector_num &&
+ target_nb_sectors == io_sectors) {
+ mirror_method = ret & BDRV_BLOCK_ZERO ?
+ MIRROR_METHOD_ZERO :
+ MIRROR_METHOD_DISCARD;
+ }
+ }
+
+ switch (mirror_method) {
+ case MIRROR_METHOD_COPY:
+ io_sectors = mirror_do_read(s, sector_num, io_sectors);
+ break;
+ case MIRROR_METHOD_ZERO:
+ mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
+ break;
+ case MIRROR_METHOD_DISCARD:
+ mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
+ break;
+ default:
+ abort();
+ }
+ assert(io_sectors);
+ sector_num += io_sectors;
+ nb_chunks -= io_sectors / sectors_per_chunk;
+ delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
}
return delay_ns;
}
--
2.4.3
- [Qemu-block] [PATCH v4] mirror: Rewrite mirror_iteration,
Fam Zheng <=