On Fri, 03/11 11:28, Paolo Bonzini wrote:
On 10/03/2016 10:40, Christian Borntraeger wrote:
On 03/10/2016 10:03 AM, Christian Borntraeger wrote:
On 03/10/2016 02:51 AM, Fam Zheng wrote:
[...]
The aio_poll() inside "blk_set_aio_context(s->conf->conf.blk, s->ctx)" looks
suspicious:
main thread iothread
----------------------------------------------------------------------------
virtio_blk_handle_output()
virtio_blk_data_plane_start()
vblk->dataplane_started = true;
blk_set_aio_context()
bdrv_set_aio_context()
bdrv_drain()
aio_poll()
<snip...>
virtio_blk_handle_output()
/* s->dataplane_started is true */
!!! -> virtio_blk_handle_request()
event_notifier_set(ioeventfd)
aio_poll()
virtio_blk_handle_request()
Christian, could you try the followed patch? The aio_poll above is replaced
with a "limited aio_poll" that doesn't disptach ioeventfd.
(Note: perhaps moving "vblk->dataplane_started = true;" after
blk_set_aio_context() also *works around* this.)
---
diff --git a/block.c b/block.c
index ba24b8e..e37e8f7 100644
--- a/block.c
+++ b/block.c
@@ -4093,7 +4093,9 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
{
- bdrv_drain(bs); /* ensure there are no in-flight requests */
+ /* ensure there are no in-flight requests */
+ bdrv_drained_begin(bs);
+ bdrv_drained_end(bs);
bdrv_detach_aio_context(bs);
That seems to do the trick.
Or not. Crashed again :-(
I would put bdrv_drained_end just before aio_context_release.
This won't work. bdrv_drained_end must be called with the same ctx as
bdrv_drained_begin, which is only true before bdrv_detach_aio_context().
But secondarily, I'm thinking of making the logic simpler to understand
in two ways:
1) adding a mutex around virtio_blk_data_plane_start/stop.
2) moving
event_notifier_set(virtio_queue_get_host_notifier(s->vq));
virtio_queue_aio_set_host_notifier_handler(s->vq, s->ctx, true, true);
to a bottom half (created with aio_bh_new in s->ctx). The bottom half
takes the mutex, checks again "if (vblk->dataplane_started)" and if it's
true starts the processing.
Like this? If it captures your idea, could Bo or Christian help test?
From b5b8886693828d498ee184fc7d4e13d8c06cdf39 Mon Sep 17 00:00:00 2001
From: Fam Zheng <address@hidden>
Date: Thu, 10 Mar 2016 10:26:36 +0800
Subject: [PATCH] virtio-blk dataplane start crash fix
Suggested-by: Paolo Bonzini <address@hidden>
Signed-off-by: Fam Zheng <address@hidden>
---
block.c | 4 +++-
hw/block/dataplane/virtio-blk.c | 39 ++++++++++++++++++++++++++++++++-------
2 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/block.c b/block.c
index ba24b8e..e37e8f7 100644
--- a/block.c
+++ b/block.c
@@ -4093,7 +4093,9 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
{
- bdrv_drain(bs); /* ensure there are no in-flight requests */
+ /* ensure there are no in-flight requests */
+ bdrv_drained_begin(bs);
+ bdrv_drained_end(bs);
bdrv_detach_aio_context(bs);
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 36f3d2b..6db5c22 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -49,6 +49,8 @@ struct VirtIOBlockDataPlane {
/* Operation blocker on BDS */
Error *blocker;
+
+ QemuMutex start_lock;
};
/* Raise an interrupt to signal guest, if necessary */
@@ -150,6 +152,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev,
VirtIOBlkConf *conf,
s = g_new0(VirtIOBlockDataPlane, 1);
s->vdev = vdev;
s->conf = conf;
+ qemu_mutex_init(&s->start_lock);
if (conf->iothread) {
s->iothread = conf->iothread;
@@ -184,15 +187,38 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane
*s)
g_free(s);
}
+typedef struct {
+ VirtIOBlockDataPlane *s;
+ QEMUBH *bh;
+} VirtIOBlockStartData;
+
+static void virtio_blk_data_plane_start_bh_cb(void *opaque)
+{
+ VirtIOBlockStartData *data = opaque;
+ VirtIOBlockDataPlane *s = data->s;
+
+ /* Kick right away to begin processing requests already in vring */
+ event_notifier_set(virtio_queue_get_host_notifier(s->vq));
+
+ /* Get this show started by hooking up our callbacks */
+ virtio_queue_aio_set_host_notifier_handler(s->vq, s->ctx, true, true);
+
+ qemu_bh_delete(data->bh);
+ g_free(data);
+}
+
/* Context: QEMU global mutex held */
void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
{
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
+ VirtIOBlockStartData *data;
int r;
+ qemu_mutex_lock(&s->start_lock);
if (vblk->dataplane_started || s->starting) {
+ qemu_mutex_unlock(&s->start_lock);
return;
}
@@ -221,13 +247,11 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
blk_set_aio_context(s->conf->conf.blk, s->ctx);
- /* Kick right away to begin processing requests already in vring */
- event_notifier_set(virtio_queue_get_host_notifier(s->vq));
-
- /* Get this show started by hooking up our callbacks */
- aio_context_acquire(s->ctx);
- virtio_queue_aio_set_host_notifier_handler(s->vq, s->ctx, true, true);
- aio_context_release(s->ctx);
+ data = g_new(VirtIOBlockStartData, 1);
+ data->s = s;
+ data->bh = aio_bh_new(s->ctx, virtio_blk_data_plane_start_bh_cb, data);
+ qemu_bh_schedule(data->bh);
+ qemu_mutex_unlock(&s->start_lock);
return;
fail_host_notifier:
@@ -236,6 +260,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
s->disabled = true;
s->starting = false;
vblk->dataplane_started = true;
+ qemu_mutex_unlock(&s->start_lock);
}
/* Context: QEMU global mutex held */