[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH for 2.7 resend] linux-aio: share one LinuxAioSta
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH for 2.7 resend] linux-aio: share one LinuxAioState within an AioContext |
Date: |
Wed, 13 Jul 2016 15:25:07 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.1.1 |
Ping.
On 04/07/2016 18:33, Paolo Bonzini wrote:
> This has better performance because it executes fewer system calls
> and does not use a bottom half per disk.
>
> Originally proposed by Ming Lei.
>
> Acked-by: Stefan Hajnoczi <address@hidden>
> Signed-off-by: Paolo Bonzini <address@hidden>
> ---
> async.c | 23 +++++++
> block/linux-aio.c | 10 ++--
> block/raw-posix.c | 119
> +++++--------------------------------
> block/raw-win32.c | 2 +-
> include/block/aio.h | 13 ++++
> {block => include/block}/raw-aio.h | 0
> 6 files changed, 57 insertions(+), 110 deletions(-)
> rename {block => include/block}/raw-aio.h (100%)
>
> diff --git a/async.c b/async.c
> index b4bf205..6caa98c 100644
> --- a/async.c
> +++ b/async.c
> @@ -29,6 +29,7 @@
> #include "block/thread-pool.h"
> #include "qemu/main-loop.h"
> #include "qemu/atomic.h"
> +#include "block/raw-aio.h"
>
> /***********************************************************/
> /* bottom halves (can be seen as timers which expire ASAP) */
> @@ -242,6 +243,14 @@ aio_ctx_finalize(GSource *source)
> qemu_bh_delete(ctx->notify_dummy_bh);
> thread_pool_free(ctx->thread_pool);
>
> +#ifdef CONFIG_LINUX_AIO
> + if (ctx->linux_aio) {
> + laio_detach_aio_context(ctx->linux_aio, ctx);
> + laio_cleanup(ctx->linux_aio);
> + ctx->linux_aio = NULL;
> + }
> +#endif
> +
> qemu_mutex_lock(&ctx->bh_lock);
> while (ctx->first_bh) {
> QEMUBH *next = ctx->first_bh->next;
> @@ -282,6 +291,17 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
> return ctx->thread_pool;
> }
>
> +#ifdef CONFIG_LINUX_AIO
> +LinuxAioState *aio_get_linux_aio(AioContext *ctx)
> +{
> + if (!ctx->linux_aio) {
> + ctx->linux_aio = laio_init();
> + laio_attach_aio_context(ctx->linux_aio, ctx);
> + }
> + return ctx->linux_aio;
> +}
> +#endif
> +
> void aio_notify(AioContext *ctx)
> {
> /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
> @@ -345,6 +365,9 @@ AioContext *aio_context_new(Error **errp)
> false,
> (EventNotifierHandler *)
> event_notifier_dummy_cb);
> +#ifdef CONFIG_LINUX_AIO
> + ctx->linux_aio = NULL;
> +#endif
> ctx->thread_pool = NULL;
> qemu_mutex_init(&ctx->bh_lock);
> rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
> diff --git a/block/linux-aio.c b/block/linux-aio.c
> index e468960..3eb0a0e 100644
> --- a/block/linux-aio.c
> +++ b/block/linux-aio.c
> @@ -50,6 +50,8 @@ typedef struct {
> } LaioQueue;
>
> struct LinuxAioState {
> + AioContext *aio_context;
> +
> io_context_t ctx;
> EventNotifier e;
>
> @@ -227,15 +229,14 @@ static void ioq_submit(LinuxAioState *s)
>
> void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
> {
> - assert(!s->io_q.plugged);
> - s->io_q.plugged = 1;
> + s->io_q.plugged++;
> }
>
> void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
> {
> assert(s->io_q.plugged);
> - s->io_q.plugged = 0;
> - if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
> + if (--s->io_q.plugged == 0 &&
> + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
> ioq_submit(s);
> }
> }
> @@ -325,6 +326,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext
> *old_context)
>
> void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
> {
> + s->aio_context = new_context;
> s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
> aio_set_event_notifier(new_context, &s->e, false,
> qemu_laio_completion_cb);
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index bef7a67..aedf575 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -32,7 +32,7 @@
> #include "trace.h"
> #include "block/thread-pool.h"
> #include "qemu/iov.h"
> -#include "raw-aio.h"
> +#include "block/raw-aio.h"
> #include "qapi/util.h"
> #include "qapi/qmp/qstring.h"
>
> @@ -137,10 +137,6 @@ typedef struct BDRVRawState {
> int open_flags;
> size_t buf_align;
>
> -#ifdef CONFIG_LINUX_AIO
> - int use_aio;
> - LinuxAioState *aio_ctx;
> -#endif
> #ifdef CONFIG_XFS
> bool is_xfs:1;
> #endif
> @@ -154,9 +150,6 @@ typedef struct BDRVRawState {
> typedef struct BDRVRawReopenState {
> int fd;
> int open_flags;
> -#ifdef CONFIG_LINUX_AIO
> - int use_aio;
> -#endif
> } BDRVRawReopenState;
>
> static int fd_open(BlockDriverState *bs);
> @@ -374,58 +367,15 @@ static void raw_parse_flags(int bdrv_flags, int
> *open_flags)
> }
> }
>
> -static void raw_detach_aio_context(BlockDriverState *bs)
> -{
> #ifdef CONFIG_LINUX_AIO
> - BDRVRawState *s = bs->opaque;
> -
> - if (s->use_aio) {
> - laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
> - }
> -#endif
> -}
> -
> -static void raw_attach_aio_context(BlockDriverState *bs,
> - AioContext *new_context)
> +static bool raw_use_aio(int bdrv_flags)
> {
> -#ifdef CONFIG_LINUX_AIO
> - BDRVRawState *s = bs->opaque;
> -
> - if (s->use_aio) {
> - laio_attach_aio_context(s->aio_ctx, new_context);
> - }
> -#endif
> -}
> -
> -#ifdef CONFIG_LINUX_AIO
> -static int raw_set_aio(LinuxAioState **aio_ctx, int *use_aio, int bdrv_flags)
> -{
> - int ret = -1;
> - assert(aio_ctx != NULL);
> - assert(use_aio != NULL);
> /*
> * Currently Linux do AIO only for files opened with O_DIRECT
> * specified so check NOCACHE flag too
> */
> - if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
> - (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
> -
> - /* if non-NULL, laio_init() has already been run */
> - if (*aio_ctx == NULL) {
> - *aio_ctx = laio_init();
> - if (!*aio_ctx) {
> - goto error;
> - }
> - }
> - *use_aio = 1;
> - } else {
> - *use_aio = 0;
> - }
> -
> - ret = 0;
> -
> -error:
> - return ret;
> + return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
> + (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
> }
> #endif
>
> @@ -494,13 +444,7 @@ static int raw_open_common(BlockDriverState *bs, QDict
> *options,
> s->fd = fd;
>
> #ifdef CONFIG_LINUX_AIO
> - if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
> - qemu_close(fd);
> - ret = -errno;
> - error_setg_errno(errp, -ret, "Could not set AIO state");
> - goto fail;
> - }
> - if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
> + if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
> error_setg(errp, "aio=native was specified, but it requires "
> "cache.direct=on, which was not specified.");
> ret = -EINVAL;
> @@ -567,8 +511,6 @@ static int raw_open_common(BlockDriverState *bs, QDict
> *options,
> }
> #endif
>
> - raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
> -
> ret = 0;
> fail:
> if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
> @@ -603,18 +545,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
> state->opaque = g_new0(BDRVRawReopenState, 1);
> raw_s = state->opaque;
>
> -#ifdef CONFIG_LINUX_AIO
> - raw_s->use_aio = s->use_aio;
> -
> - /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
> - * valid in the 'false' condition even if aio_ctx is set, and
> raw_set_aio()
> - * won't override aio_ctx if aio_ctx is non-NULL */
> - if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
> - error_setg(errp, "Could not set AIO state");
> - return -1;
> - }
> -#endif
> -
> if (s->type == FTYPE_CD) {
> raw_s->open_flags |= O_NONBLOCK;
> }
> @@ -697,9 +627,6 @@ static void raw_reopen_commit(BDRVReopenState *state)
>
> qemu_close(s->fd);
> s->fd = raw_s->fd;
> -#ifdef CONFIG_LINUX_AIO
> - s->use_aio = raw_s->use_aio;
> -#endif
>
> g_free(state->opaque);
> state->opaque = NULL;
> @@ -1337,9 +1264,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState
> *bs, uint64_t offset,
> if (!bdrv_qiov_is_aligned(bs, qiov)) {
> type |= QEMU_AIO_MISALIGNED;
> #ifdef CONFIG_LINUX_AIO
> - } else if (s->use_aio) {
> + } else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
> + LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
> assert(qiov->size == bytes);
> - return laio_co_submit(bs, s->aio_ctx, s->fd, offset, qiov, type);
> + return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
> #endif
> }
> }
> @@ -1365,9 +1293,9 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState
> *bs, uint64_t offset,
> static void raw_aio_plug(BlockDriverState *bs)
> {
> #ifdef CONFIG_LINUX_AIO
> - BDRVRawState *s = bs->opaque;
> - if (s->use_aio) {
> - laio_io_plug(bs, s->aio_ctx);
> + if (bs->open_flags & BDRV_O_NATIVE_AIO) {
> + LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
> + laio_io_plug(bs, aio);
> }
> #endif
> }
> @@ -1375,9 +1303,9 @@ static void raw_aio_plug(BlockDriverState *bs)
> static void raw_aio_unplug(BlockDriverState *bs)
> {
> #ifdef CONFIG_LINUX_AIO
> - BDRVRawState *s = bs->opaque;
> - if (s->use_aio) {
> - laio_io_unplug(bs, s->aio_ctx);
> + if (bs->open_flags & BDRV_O_NATIVE_AIO) {
> + LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
> + laio_io_unplug(bs, aio);
> }
> #endif
> }
> @@ -1397,13 +1325,6 @@ static void raw_close(BlockDriverState *bs)
> {
> BDRVRawState *s = bs->opaque;
>
> - raw_detach_aio_context(bs);
> -
> -#ifdef CONFIG_LINUX_AIO
> - if (s->use_aio) {
> - laio_cleanup(s->aio_ctx);
> - }
> -#endif
> if (s->fd >= 0) {
> qemu_close(s->fd);
> s->fd = -1;
> @@ -1962,9 +1883,6 @@ BlockDriver bdrv_file = {
> .bdrv_get_allocated_file_size
> = raw_get_allocated_file_size,
>
> - .bdrv_detach_aio_context = raw_detach_aio_context,
> - .bdrv_attach_aio_context = raw_attach_aio_context,
> -
> .create_opts = &raw_create_opts,
> };
>
> @@ -2410,9 +2328,6 @@ static BlockDriver bdrv_host_device = {
> .bdrv_probe_blocksizes = hdev_probe_blocksizes,
> .bdrv_probe_geometry = hdev_probe_geometry,
>
> - .bdrv_detach_aio_context = raw_detach_aio_context,
> - .bdrv_attach_aio_context = raw_attach_aio_context,
> -
> /* generic scsi device */
> #ifdef __linux__
> .bdrv_aio_ioctl = hdev_aio_ioctl,
> @@ -2532,9 +2447,6 @@ static BlockDriver bdrv_host_cdrom = {
> .bdrv_get_allocated_file_size
> = raw_get_allocated_file_size,
>
> - .bdrv_detach_aio_context = raw_detach_aio_context,
> - .bdrv_attach_aio_context = raw_attach_aio_context,
> -
> /* removable device support */
> .bdrv_is_inserted = cdrom_is_inserted,
> .bdrv_eject = cdrom_eject,
> @@ -2665,9 +2577,6 @@ static BlockDriver bdrv_host_cdrom = {
> .bdrv_get_allocated_file_size
> = raw_get_allocated_file_size,
>
> - .bdrv_detach_aio_context = raw_detach_aio_context,
> - .bdrv_attach_aio_context = raw_attach_aio_context,
> -
> /* removable device support */
> .bdrv_is_inserted = cdrom_is_inserted,
> .bdrv_eject = cdrom_eject,
> diff --git a/block/raw-win32.c b/block/raw-win32.c
> index fd23891..ce77432 100644
> --- a/block/raw-win32.c
> +++ b/block/raw-win32.c
> @@ -27,7 +27,7 @@
> #include "qemu/timer.h"
> #include "block/block_int.h"
> #include "qemu/module.h"
> -#include "raw-aio.h"
> +#include "block/raw-aio.h"
> #include "trace.h"
> #include "block/thread-pool.h"
> #include "qemu/iov.h"
> diff --git a/include/block/aio.h b/include/block/aio.h
> index 88a64ee..afd72a7 100644
> --- a/include/block/aio.h
> +++ b/include/block/aio.h
> @@ -47,6 +47,9 @@ typedef struct AioHandler AioHandler;
> typedef void QEMUBHFunc(void *opaque);
> typedef void IOHandler(void *opaque);
>
> +struct ThreadPool;
> +struct LinuxAioState;
> +
> struct AioContext {
> GSource source;
>
> @@ -119,6 +122,13 @@ struct AioContext {
> /* Thread pool for performing work and receiving completion callbacks */
> struct ThreadPool *thread_pool;
>
> +#ifdef CONFIG_LINUX_AIO
> + /* State for native Linux AIO. Uses aio_context_acquire/release for
> + * locking.
> + */
> + struct LinuxAioState *linux_aio;
> +#endif
> +
> /* TimerLists for calling timers - one per clock type */
> QEMUTimerListGroup tlg;
>
> @@ -335,6 +345,9 @@ GSource *aio_get_g_source(AioContext *ctx);
> /* Return the ThreadPool bound to this AioContext */
> struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
>
> +/* Return the LinuxAioState bound to this AioContext */
> +struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
> +
> /**
> * aio_timer_new:
> * @ctx: the aio context
> diff --git a/block/raw-aio.h b/include/block/raw-aio.h
> similarity index 100%
> rename from block/raw-aio.h
> rename to include/block/raw-aio.h
>