Introduce a new API for thread-local blk_io_plug() that does not
traverse the block graph. The goal is to make blk_io_plug() multi-queue
friendly.
Instead of having block drivers track whether or not we're in a plugged
section, provide an API that allows them to defer a function call until
we're unplugged: blk_io_plug_call(fn, opaque). If blk_io_plug_call() is
called multiple times with the same fn/opaque pair, then fn() is only
called once at the end of the function - resulting in batching.
This patch introduces the API and changes blk_io_plug()/blk_io_unplug().
blk_io_plug()/blk_io_unplug() no longer require a BlockBackend argument
because the plug state is now thread-local.
Later patches convert block drivers to blk_io_plug_call() and then we
can finally remove .bdrv_co_io_plug() once all block drivers have been
converted.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
MAINTAINERS | 1 +
include/sysemu/block-backend-io.h | 13 +--
block/block-backend.c | 22 -----
block/plug.c | 159 ++++++++++++++++++++++++++++++
hw/block/dataplane/xen-block.c | 8 +-
hw/block/virtio-blk.c | 4 +-
hw/scsi/virtio-scsi.c | 6 +-
block/meson.build | 1 +
8 files changed, 173 insertions(+), 41 deletions(-)
create mode 100644 block/plug.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 50585117a0..574202295c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2644,6 +2644,7 @@ F: util/aio-*.c
F: util/aio-*.h
F: util/fdmon-*.c
F: block/io.c
+F: block/plug.c
F: migration/block*
F: include/block/aio.h
F: include/block/aio-wait.h
diff --git a/include/sysemu/block-backend-io.h
b/include/sysemu/block-backend-io.h
index d62a7ee773..be4dcef59d 100644
--- a/include/sysemu/block-backend-io.h
+++ b/include/sysemu/block-backend-io.h
@@ -100,16 +100,9 @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
int blk_get_max_iov(BlockBackend *blk);
int blk_get_max_hw_iov(BlockBackend *blk);
-/*
- * blk_io_plug/unplug are thread-local operations. This means that multiple
- * IOThreads can simultaneously call plug/unplug, but the caller must ensure
- * that each unplug() is called in the same IOThread of the matching plug().
- */
-void coroutine_fn blk_co_io_plug(BlockBackend *blk);
-void co_wrapper blk_io_plug(BlockBackend *blk);
-
-void coroutine_fn blk_co_io_unplug(BlockBackend *blk);
-void co_wrapper blk_io_unplug(BlockBackend *blk);
+void blk_io_plug(void);
+void blk_io_unplug(void);
+void blk_io_plug_call(void (*fn)(void *), void *opaque);
AioContext *blk_get_aio_context(BlockBackend *blk);
BlockAcctStats *blk_get_stats(BlockBackend *blk);
diff --git a/block/block-backend.c b/block/block-backend.c
index ca537cd0ad..1f1d226ba6 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2568,28 +2568,6 @@ void blk_add_insert_bs_notifier(BlockBackend *blk,
Notifier *notify)
notifier_list_add(&blk->insert_bs_notifiers, notify);
}
-void coroutine_fn blk_co_io_plug(BlockBackend *blk)
-{
- BlockDriverState *bs = blk_bs(blk);
- IO_CODE();
- GRAPH_RDLOCK_GUARD();
-
- if (bs) {
- bdrv_co_io_plug(bs);
- }
-}
-
-void coroutine_fn blk_co_io_unplug(BlockBackend *blk)
-{
- BlockDriverState *bs = blk_bs(blk);
- IO_CODE();
- GRAPH_RDLOCK_GUARD();
-
- if (bs) {
- bdrv_co_io_unplug(bs);
- }
-}
-
BlockAcctStats *blk_get_stats(BlockBackend *blk)
{
IO_CODE();
diff --git a/block/plug.c b/block/plug.c
new file mode 100644
index 0000000000..6738a568ba
--- /dev/null
+++ b/block/plug.c
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Block I/O plugging
+ *
+ * Copyright Red Hat.
+ *
+ * This API defers a function call within a blk_io_plug()/blk_io_unplug()
+ * section, allowing multiple calls to batch up. This is a performance
+ * optimization that is used in the block layer to submit several I/O requests
+ * at once instead of individually:
+ *
+ * blk_io_plug(); <-- start of plugged region
+ * ...
+ * blk_io_plug_call(my_func, my_obj); <-- deferred my_func(my_obj) call
+ * blk_io_plug_call(my_func, my_obj); <-- another
+ * blk_io_plug_call(my_func, my_obj); <-- another
+ * ...
+ * blk_io_unplug(); <-- end of plugged region, my_func(my_obj) is called once
+ *
+ * This code is actually generic and not tied to the block layer. If another
+ * subsystem needs this functionality, it could be renamed.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine-tls.h"
+#include "qemu/notify.h"
+#include "qemu/thread.h"
+#include "sysemu/block-backend.h"
+
+/* A function call that has been deferred until unplug() */
+typedef struct {
+ void (*fn)(void *);
+ void *opaque;
+} UnplugFn;
+
+/* Per-thread state */
+typedef struct {
+ unsigned count; /* how many times has plug() been called? */
+ GArray *unplug_fns; /* functions to call at unplug time */
+} Plug;
+
+/* Use get_ptr_plug() to fetch this thread-local value */
+QEMU_DEFINE_STATIC_CO_TLS(Plug, plug);
+
+/* Called at thread cleanup time */
+static void blk_io_plug_atexit(Notifier *n, void *value)
+{
+ Plug *plug = get_ptr_plug();
+ g_array_free(plug->unplug_fns, TRUE);
+}
+
+/* This won't involve coroutines, so use __thread */
+static __thread Notifier blk_io_plug_atexit_notifier;
+
+/**
+ * blk_io_plug_call:
+ * @fn: a function pointer to be invoked
+ * @opaque: a user-defined argument to @fn()
+ *
+ * Call @fn(@opaque) immediately if not within a blk_io_plug()/blk_io_unplug()
+ * section.
+ *
+ * Otherwise defer the call until the end of the outermost
+ * blk_io_plug()/blk_io_unplug() section in this thread. If the same
+ * @fn/@opaque pair has already been deferred, it will only be called once upon
+ * blk_io_unplug() so that accumulated calls are batched into a single call.
+ *
+ * The caller must ensure that @opaque is not be freed before @fn() is invoked.
+ */
+void blk_io_plug_call(void (*fn)(void *), void *opaque)
+{
+ Plug *plug = get_ptr_plug();
+
+ /* Call immediately if we're not plugged */
+ if (plug->count == 0) {
+ fn(opaque);
+ return;
+ }
+
+ GArray *array = plug->unplug_fns;
+ if (!array) {
+ array = g_array_new(FALSE, FALSE, sizeof(UnplugFn));
+ plug->unplug_fns = array;
+ blk_io_plug_atexit_notifier.notify = blk_io_plug_atexit;
+ qemu_thread_atexit_add(&blk_io_plug_atexit_notifier);
+ }
+
+ UnplugFn *fns = (UnplugFn *)array->data;
+ UnplugFn new_fn = {
+ .fn = fn,
+ .opaque = opaque,
+ };
+
+ /*
+ * There won't be many, so do a linear search. If this becomes a bottleneck
+ * then a binary search (glib 2.62+) or different data structure could be
+ * used.
+ */
+ for (guint i = 0; i < array->len; i++) {
+ if (memcmp(&fns[i], &new_fn, sizeof(new_fn)) == 0) {
+ return; /* already exists */
+ }
+ }
+
+ g_array_append_val(array, new_fn);
+}
+
+/**
+ * blk_io_plug: Defer blk_io_plug_call() functions until blk_io_unplug()
+ *
+ * blk_io_plug/unplug are thread-local operations. This means that multiple
+ * threads can simultaneously call plug/unplug, but the caller must ensure that
+ * each unplug() is called in the same thread of the matching plug().
+ *
+ * Nesting is supported. blk_io_plug_call() functions are only called at the
+ * outermost blk_io_unplug().
+ */
+void blk_io_plug(void)
+{
+ Plug *plug = get_ptr_plug();
+
+ assert(plug->count < UINT32_MAX);
+
+ plug->count++;
+}
+
+/**
+ * blk_io_unplug: Run any pending blk_io_plug_call() functions
+ *
+ * There must have been a matching blk_io_plug() call in the same thread prior
+ * to this blk_io_unplug() call.
+ */
+void blk_io_unplug(void)
+{
+ Plug *plug = get_ptr_plug();
+
+ assert(plug->count > 0);
+
+ if (--plug->count > 0) {
+ return;
+ }
+
+ GArray *array = plug->unplug_fns;
+ if (!array) {
+ return;
+ }
+
+ UnplugFn *fns = (UnplugFn *)array->data;
+
+ for (guint i = 0; i < array->len; i++) {
+ fns[i].fn(fns[i].opaque);
+ }
+
+ /*
+ * This resets the array without freeing memory so that appending is cheap
+ * in the future.
+ */
+ g_array_set_size(array, 0);
+}
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index d8bc39d359..e49c24f63d 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -537,7 +537,7 @@ static bool xen_block_handle_requests(XenBlockDataPlane
*dataplane)
* is below us.
*/
if (inflight_atstart > IO_PLUG_THRESHOLD) {
- blk_io_plug(dataplane->blk);
+ blk_io_plug();
}
while (rc != rp) {
/* pull request from ring */
@@ -577,12 +577,12 @@ static bool xen_block_handle_requests(XenBlockDataPlane
*dataplane)
if (inflight_atstart > IO_PLUG_THRESHOLD &&
batched >= inflight_atstart) {
- blk_io_unplug(dataplane->blk);
+ blk_io_unplug();
}
xen_block_do_aio(request);
if (inflight_atstart > IO_PLUG_THRESHOLD) {
if (batched >= inflight_atstart) {
- blk_io_plug(dataplane->blk);
+ blk_io_plug();
batched = 0;
} else {
batched++;
@@ -590,7 +590,7 @@ static bool xen_block_handle_requests(XenBlockDataPlane
*dataplane)
}
}
if (inflight_atstart > IO_PLUG_THRESHOLD) {
- blk_io_unplug(dataplane->blk);
+ blk_io_unplug();
}
return done_something;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 8f65ea4659..b4286424c1 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1134,7 +1134,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
bool suppress_notifications = virtio_queue_get_notification(vq);
aio_context_acquire(blk_get_aio_context(s->blk));
- blk_io_plug(s->blk);
+ blk_io_plug();
do {
if (suppress_notifications) {
@@ -1158,7 +1158,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
virtio_blk_submit_multireq(s, &mrb);
}
- blk_io_unplug(s->blk);
+ blk_io_unplug();
aio_context_release(blk_get_aio_context(s->blk));
}
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 612c525d9d..534a44ee07 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -799,7 +799,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI
*s, VirtIOSCSIReq *req)
return -ENOBUFS;
}
scsi_req_ref(req->sreq);
- blk_io_plug(d->conf.blk);
+ blk_io_plug();
object_unref(OBJECT(d));
return 0;
}
@@ -810,7 +810,7 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI
*s, VirtIOSCSIReq *req)
if (scsi_req_enqueue(sreq)) {
scsi_req_continue(sreq);
}
- blk_io_unplug(sreq->dev->conf.blk);
+ blk_io_unplug();
scsi_req_unref(sreq);
}
@@ -836,7 +836,7 @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s,
VirtQueue *vq)
while (!QTAILQ_EMPTY(&reqs)) {
req = QTAILQ_FIRST(&reqs);
QTAILQ_REMOVE(&reqs, req, next);
- blk_io_unplug(req->sreq->dev->conf.blk);
+ blk_io_unplug();
scsi_req_unref(req->sreq);
virtqueue_detach_element(req->vq, &req->elem, 0);
virtio_scsi_free_req(req);
diff --git a/block/meson.build b/block/meson.build
index 486dda8b85..fb4332bd66 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -23,6 +23,7 @@ block_ss.add(files(
'mirror.c',
'nbd.c',
'null.c',
+ 'plug.c',
'qapi.c',
'qcow2-bitmap.c',
'qcow2-cache.c',
--
2.40.1