[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 5/9] block/io: expand in_flight inc/dec section: simple cases
From: |
Vladimir Sementsov-Ogievskiy |
Subject: |
[PATCH v2 5/9] block/io: expand in_flight inc/dec section: simple cases |
Date: |
Mon, 27 Apr 2020 17:39:03 +0300 |
It's safer to expand in_flight request to start before enter to
coroutine in synchronous wrappers, due to the following (theoretical)
problem:
Consider write.
It's possible, that qemu_coroutine_enter only schedules execution,
assume such case.
Then we may possibly have the following:
1. Somehow check that we are not in drained section in outer code.
2. Call bdrv_pwritev(), assuming that it will increase in_flight, which
will protect us from starting drained section.
3. It calls bdrv_prwv_co() -> bdrv_coroutine_enter() (not yet increased
in_flight).
4. Assume coroutine not yet actually entered, only scheduled, and we go
to some code, which starts drained section (as in_flight is zero).
5. Scheduled coroutine starts, and blindly increases in_flight, and we
are in drained section with in_flight request.
Signed-off-by: Vladimir Sementsov-Ogievskiy <address@hidden>
---
block/io.c | 161 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 124 insertions(+), 37 deletions(-)
diff --git a/block/io.c b/block/io.c
index 061f3f2590..a91d8c1e21 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1511,7 +1511,8 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
}
-int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
+static int coroutine_fn bdrv_do_preadv_part(BdrvChild *child,
int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags)
@@ -1540,8 +1541,6 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
return 0;
}
- bdrv_inc_in_flight(bs);
-
/* Don't do copy-on-read if we read data before write operation */
if (atomic_read(&bs->copy_on_read)) {
flags |= BDRV_REQ_COPY_ON_READ;
@@ -1554,13 +1553,26 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
bs->bl.request_alignment,
qiov, qiov_offset, flags);
tracked_request_end(&req);
- bdrv_dec_in_flight(bs);
bdrv_padding_destroy(&pad);
return ret;
}
+int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+ int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
+{
+ int ret;
+
+ bdrv_inc_in_flight(child->bs);
+ ret = bdrv_do_preadv_part(child, offset, bytes, qiov, qiov_offset, flags);
+ bdrv_dec_in_flight(child->bs);
+
+ return ret;
+}
+
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags)
{
@@ -1922,7 +1934,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
}
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
+static int coroutine_fn bdrv_do_pwritev_part(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags)
{
@@ -1962,7 +1975,6 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
return 0;
}
- bdrv_inc_in_flight(bs);
/*
* Align write if necessary by performing a read-modify-write cycle.
* Pad qiov with the read parts and be sure to have a tracked request not
@@ -1987,7 +1999,19 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
out:
tracked_request_end(&req);
- bdrv_dec_in_flight(bs);
+
+ return ret;
+}
+
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
+{
+ int ret;
+
+ bdrv_inc_in_flight(child->bs);
+ ret = bdrv_do_pwritev_part(child, offset, bytes, qiov, qiov_offset, flags);
+ bdrv_dec_in_flight(child->bs);
return ret;
}
@@ -2014,17 +2038,18 @@ typedef struct RwCo {
BdrvRequestFlags flags;
} RwCo;
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
static void coroutine_fn bdrv_rw_co_entry(void *opaque)
{
RwCo *rwco = opaque;
if (!rwco->is_write) {
- rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
- rwco->qiov->size, rwco->qiov,
+ rwco->ret = bdrv_do_preadv_part(rwco->child, rwco->offset,
+ rwco->qiov->size, rwco->qiov, 0,
rwco->flags);
} else {
- rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
- rwco->qiov->size, rwco->qiov,
+ rwco->ret = bdrv_do_pwritev_part(rwco->child, rwco->offset,
+ rwco->qiov->size, rwco->qiov, 0,
rwco->flags);
}
aio_wait_kick();
@@ -2047,6 +2072,8 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
.flags = flags,
};
+ bdrv_inc_in_flight(child->bs);
+
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_rw_co_entry(&rwco);
@@ -2055,6 +2082,9 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
bdrv_coroutine_enter(child->bs, co);
BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
}
+
+ bdrv_dec_in_flight(child->bs);
+
return rwco.ret;
}
@@ -2699,15 +2729,14 @@ typedef struct BdrvVmstateCo {
int ret;
} BdrvVmstateCo;
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
static int coroutine_fn
-bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+bdrv_do_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read)
{
BlockDriver *drv = bs->drv;
int ret = -ENOTSUP;
- bdrv_inc_in_flight(bs);
-
if (!drv) {
ret = -ENOMEDIUM;
} else if (drv->bdrv_load_vmstate) {
@@ -2717,17 +2746,19 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector
*qiov, int64_t pos,
ret = drv->bdrv_save_vmstate(bs, qiov, pos);
}
} else if (bs->file) {
- ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
+ bdrv_inc_in_flight(bs->file->bs);
+ ret = bdrv_do_rw_vmstate(bs->file->bs, qiov, pos, is_read);
+ bdrv_dec_in_flight(bs->file->bs);
}
- bdrv_dec_in_flight(bs);
return ret;
}
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
{
BdrvVmstateCo *co = opaque;
- co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
+ co->ret = bdrv_do_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
aio_wait_kick();
}
@@ -2735,8 +2766,12 @@ static inline int
bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read)
{
+ int ret;
+
+ bdrv_inc_in_flight(bs);
+
if (qemu_in_coroutine()) {
- return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
+ ret = bdrv_do_rw_vmstate(bs, qiov, pos, is_read);
} else {
BdrvVmstateCo data = {
.bs = bs,
@@ -2749,8 +2784,12 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector
*qiov, int64_t pos,
bdrv_coroutine_enter(bs, co);
BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
- return data.ret;
+ ret = data.ret;
}
+
+ bdrv_dec_in_flight(bs);
+
+ return ret;
}
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
@@ -2828,16 +2867,14 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
/**************************************************************/
/* Coroutine block device emulation */
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
+static int coroutine_fn bdrv_do_flush(BlockDriverState *bs)
{
int current_gen;
- int ret = 0;
-
- bdrv_inc_in_flight(bs);
+ int ret;
- if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
- bdrv_is_sg(bs)) {
- goto early_exit;
+ if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || bdrv_is_sg(bs)) {
+ return 0;
}
qemu_co_mutex_lock(&bs->reqs_lock);
@@ -2935,8 +2972,17 @@ out:
qemu_co_queue_next(&bs->flush_queue);
qemu_co_mutex_unlock(&bs->reqs_lock);
-early_exit:
+ return ret;
+}
+
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+{
+ int ret;
+
+ bdrv_inc_in_flight(bs);
+ ret = bdrv_do_flush(bs);
bdrv_dec_in_flight(bs);
+
return ret;
}
@@ -2945,11 +2991,12 @@ typedef struct FlushCo {
int ret;
} FlushCo;
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
static void coroutine_fn bdrv_flush_co_entry(void *opaque)
{
FlushCo *rwco = opaque;
- rwco->ret = bdrv_co_flush(rwco->bs);
+ rwco->ret = bdrv_do_flush(rwco->bs);
aio_wait_kick();
}
@@ -2961,6 +3008,8 @@ int bdrv_flush(BlockDriverState *bs)
.ret = NOT_DONE,
};
+ bdrv_inc_in_flight(bs);
+
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_flush_co_entry(&flush_co);
@@ -2970,11 +3019,14 @@ int bdrv_flush(BlockDriverState *bs)
BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
}
+ bdrv_dec_in_flight(bs);
+
return flush_co.ret;
}
-int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
- int64_t bytes)
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
+static int coroutine_fn bdrv_do_pdiscard(BdrvChild *child, int64_t offset,
+ int64_t bytes)
{
BdrvTrackedRequest req;
int max_pdiscard, ret;
@@ -3012,7 +3064,6 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child,
int64_t offset,
head = offset % align;
tail = (offset + bytes) % align;
- bdrv_inc_in_flight(bs);
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
@@ -3083,7 +3134,18 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child,
int64_t offset,
out:
bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
tracked_request_end(&req);
- bdrv_dec_in_flight(bs);
+ return ret;
+}
+
+int coroutine_fn bdrv_co_pdiscard(BdrvChild *child,
+ int64_t offset, int64_t bytes)
+{
+ int ret;
+
+ bdrv_inc_in_flight(child->bs);
+ ret = bdrv_do_pdiscard(child, offset, bytes);
+ bdrv_dec_in_flight(child->bs);
+
return ret;
}
@@ -3094,11 +3156,12 @@ typedef struct DiscardCo {
int ret;
} DiscardCo;
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
{
DiscardCo *rwco = opaque;
- rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
+ rwco->ret = bdrv_do_pdiscard(rwco->child, rwco->offset, rwco->bytes);
aio_wait_kick();
}
@@ -3112,6 +3175,8 @@ int bdrv_pdiscard(BdrvChild *child, int64_t offset,
int64_t bytes)
.ret = NOT_DONE,
};
+ bdrv_inc_in_flight(child->bs);
+
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_pdiscard_co_entry(&rwco);
@@ -3121,6 +3186,8 @@ int bdrv_pdiscard(BdrvChild *child, int64_t offset,
int64_t bytes)
BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
}
+ bdrv_dec_in_flight(child->bs);
+
return rwco.ret;
}
@@ -3411,9 +3478,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
* If 'exact' is true, the file must be resized to exactly the given
* 'offset'. Otherwise, it is sufficient for the node to be at least
* 'offset' bytes in length.
+ *
+ * To be called between exactly one pair of bdrv_inc/dec_in_flight()
*/
-int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, Error **errp)
+static int coroutine_fn bdrv_do_truncate(BdrvChild *child,
+ int64_t offset, bool exact,
+ PreallocMode prealloc, Error **errp)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
@@ -3444,7 +3514,6 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child,
int64_t offset, bool exact,
new_bytes = 0;
}
- bdrv_inc_in_flight(bs);
tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
BDRV_TRACKED_TRUNCATE);
@@ -3493,6 +3562,19 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child,
int64_t offset, bool exact,
out:
tracked_request_end(&req);
+
+ return ret;
+}
+
+int coroutine_fn bdrv_co_truncate(BdrvChild *child,
+ int64_t offset, bool exact,
+ PreallocMode prealloc, Error **errp)
+{
+ int ret;
+ BlockDriverState *bs = child->bs;
+
+ bdrv_inc_in_flight(bs);
+ ret = bdrv_do_truncate(child, offset, exact, prealloc, errp);
bdrv_dec_in_flight(bs);
return ret;
@@ -3507,10 +3589,11 @@ typedef struct TruncateCo {
int ret;
} TruncateCo;
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
{
TruncateCo *tco = opaque;
- tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact,
+ tco->ret = bdrv_do_truncate(tco->child, tco->offset, tco->exact,
tco->prealloc, tco->errp);
aio_wait_kick();
}
@@ -3528,6 +3611,8 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool
exact,
.ret = NOT_DONE,
};
+ bdrv_inc_in_flight(child->bs);
+
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_truncate_co_entry(&tco);
@@ -3537,5 +3622,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool
exact,
BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE);
}
+ bdrv_dec_in_flight(child->bs);
+
return tco.ret;
}
--
2.21.0
- [PATCH v2 0/9] block/io: safer inc/dec in_flight sections, Vladimir Sementsov-Ogievskiy, 2020/04/27
- [PATCH v2 1/9] block/io: refactor bdrv_is_allocated_above to run only one coroutine, Vladimir Sementsov-Ogievskiy, 2020/04/27
- [PATCH v2 8/9] block/io: move bdrv_make_zero under block-status, Vladimir Sementsov-Ogievskiy, 2020/04/27
- [PATCH v2 3/9] block/io: move flush and pdiscard stuff down, Vladimir Sementsov-Ogievskiy, 2020/04/27
- [PATCH v2 6/9] block/io: expand in_flight inc/dec section: block-status, Vladimir Sementsov-Ogievskiy, 2020/04/27
- [PATCH v2 4/9] block/io: move bdrv_rw_co_entry and friends down, Vladimir Sementsov-Ogievskiy, 2020/04/27
- [PATCH v2 5/9] block/io: expand in_flight inc/dec section: simple cases,
Vladimir Sementsov-Ogievskiy <=
- [PATCH v2 9/9] block/io: expand in_flight inc/dec section: bdrv_make_zero, Vladimir Sementsov-Ogievskiy, 2020/04/27
- [PATCH v2 2/9] block/io: refactor bdrv_co_ioctl: move aio stuff to corresponding block, Vladimir Sementsov-Ogievskiy, 2020/04/27
- [PATCH v2 7/9] block/io: add bdrv_do_pwrite_zeroes, Vladimir Sementsov-Ogievskiy, 2020/04/27