[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization
From: |
Stefan Hajnoczi |
Subject: |
Re: [RFC 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization |
Date: |
Wed, 30 Mar 2022 11:45:12 +0100 |
On Tue, Mar 29, 2022 at 05:24:30PM +0200, Stefano Garzarella wrote:
> On Wed, Mar 23, 2022 at 11:17:26AM +0000, Stefan Hajnoczi wrote:
> > Avoid bounce buffers when QEMUIOVector elements are within previously
> > registered bdrv_register_buf() buffers.
> >
> > The idea is that emulated storage controllers will register guest RAM
> > using bdrv_register_buf() and set the BDRV_REQ_REGISTERED_BUF on I/O
> > requests. Therefore no blkio_add_mem_region() calls are necessary in the
> > performance-critical I/O code path.
> >
> > This optimization doesn't apply if the I/O buffer is internally
> > allocated by QEMU (e.g. qcow2 metadata). There we still take the slow
> > path because BDRV_REQ_REGISTERED_BUF is not set.
> >
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > block/blkio.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 104 insertions(+), 4 deletions(-)
> >
> > diff --git a/block/blkio.c b/block/blkio.c
> > index dd2308b967..78f4ca5f49 100644
> > --- a/block/blkio.c
> > +++ b/block/blkio.c
> > @@ -1,7 +1,9 @@
> > #include "qemu/osdep.h"
> > #include <blkio.h>
> > #include "block/block_int.h"
> > +#include "exec/memory.h"
> > #include "qapi/error.h"
> > +#include "qemu/error-report.h"
> > #include "qapi/qmp/qdict.h"
> > #include "qemu/module.h"
> >
> > @@ -26,6 +28,9 @@ typedef struct {
> > /* Can we skip adding/deleting blkio_mem_regions? */
> > bool needs_mem_regions;
> >
> > + /* Are file descriptors necessary for blkio_mem_regions? */
> > + bool needs_mem_region_fd;
> > +
> > /*
> > * blkio_completion_fd_poll() stashes the next completion for
> > * blkio_completion_fd_poll_ready().
> > @@ -170,6 +175,8 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState
> > *bs, int64_t offset,
> > BlockCompletionFunc *cb, void *opaque)
> > {
> > BDRVBlkioState *s = bs->opaque;
> > + bool needs_mem_regions =
> > + s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
> > struct iovec *iov = qiov->iov;
> > int iovcnt = qiov->niov;
> > BlkioAIOCB *acb;
> > @@ -179,7 +186,7 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState
> > *bs, int64_t offset,
> >
> > acb = blkio_aiocb_get(bs, cb, opaque);
> >
> > - if (s->needs_mem_regions) {
> > + if (needs_mem_regions) {
> > if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
> > qemu_aio_unref(&acb->common);
> > return NULL;
> > @@ -194,7 +201,7 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState
> > *bs, int64_t offset,
> >
> > ret = blkioq_readv(s->blkioq, offset, iov, iovcnt, acb, 0);
> > if (ret < 0) {
> > - if (s->needs_mem_regions) {
> > + if (needs_mem_regions) {
> > blkio_free_mem_region(s->blkio, &acb->mem_region);
> > qemu_iovec_destroy(&acb->qiov);
> > }
> > @@ -215,6 +222,8 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState
> > *bs, int64_t offset,
> > {
> > uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
> > BDRVBlkioState *s = bs->opaque;
> > + bool needs_mem_regions =
> > + s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
> > struct iovec *iov = qiov->iov;
> > int iovcnt = qiov->niov;
> > BlkioAIOCB *acb;
> > @@ -224,7 +233,7 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState
> > *bs, int64_t offset,
> >
> > acb = blkio_aiocb_get(bs, cb, opaque);
> >
> > - if (s->needs_mem_regions) {
> > + if (needs_mem_regions) {
> > if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
> > qemu_aio_unref(&acb->common);
> > return NULL;
> > @@ -238,7 +247,7 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState
> > *bs, int64_t offset,
> >
> > ret = blkioq_writev(s->blkioq, offset, iov, iovcnt, acb, blkio_flags);
> > if (ret < 0) {
> > - if (s->needs_mem_regions) {
> > + if (needs_mem_regions) {
> > blkio_free_mem_region(s->blkio, &acb->mem_region);
> > }
> > qemu_aio_unref(&acb->common);
> > @@ -286,6 +295,83 @@ static void blkio_io_unplug(BlockDriverState *bs)
> > }
> > }
> >
> > +static void blkio_register_buf(BlockDriverState *bs, void *host, size_t
> > size)
> > +{
> > + BDRVBlkioState *s = bs->opaque;
> > + char *errmsg;
> > + int ret;
> > + struct blkio_mem_region region = (struct blkio_mem_region){
> > + .addr = host,
> > + .len = size,
> > + .fd = -1,
> > + };
> > +
> > + if (((uintptr_t)host | size) % s->mem_region_alignment) {
> > + error_report_once("%s: skipping unaligned buf %p with size %zu",
> > + __func__, host, size);
> > + return; /* skip unaligned */
> > + }
> > +
> > + /* Attempt to find the fd for a MemoryRegion */
> > + if (s->needs_mem_region_fd) {
> > + int fd = -1;
> > + ram_addr_t offset;
> > + MemoryRegion *mr;
> > +
> > + /*
> > + * bdrv_register_buf() is called with the BQL held so mr lives at
> > least
> > + * until this function returns.
> > + */
> > + mr = memory_region_from_host(host, &offset);
> > + if (mr) {
> > + fd = memory_region_get_fd(mr);
> > + }
> > + if (fd == -1) {
> > + error_report_once("%s: skipping fd-less buf %p with size %zu",
> > + __func__, host, size);
> > + return; /* skip if there is no fd */
> > + }
> > +
> > + region.fd = fd;
> > + region.fd_offset = offset;
> > + }
> > +
> > + WITH_QEMU_LOCK_GUARD(&s->lock) {
> > + ret = blkio_add_mem_region(s->blkio, ®ion, &errmsg);
> > + }
> > +
> > + if (ret < 0) {
> > + error_report_once("Failed to add blkio mem region %p with size
> > %zu: %s",
> > + host, size, errmsg);
> > + free(errmsg);
> > + }
> > +}
> > +
> > +static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t
> > size)
> > +{
> > + BDRVBlkioState *s = bs->opaque;
> > + char *errmsg;
> > + int ret;
> > + struct blkio_mem_region region = (struct blkio_mem_region){
> > + .addr = host,
> > + .len = size,
> > + };
> > +
> > + if (((uintptr_t)host | size) % s->mem_region_alignment) {
> > + return; /* skip unaligned */
> > + }
> > +
> > + WITH_QEMU_LOCK_GUARD(&s->lock) {
> > + ret = blkio_del_mem_region(s->blkio, ®ion, &errmsg);
> > + }
> > +
> > + if (ret < 0) {
> > + error_report_once("Failed to delete blkio mem region %p with size
> > %zu: %s",
> > + host, size, errmsg);
> > + free(errmsg);
> > + }
> > +}
> > +
> > static void blkio_parse_filename_io_uring(const char *filename, QDict
> > *options,
> > Error **errp)
> > {
> > @@ -356,6 +442,18 @@ static int blkio_file_open(BlockDriverState *bs, QDict
> > *options, int flags,
> > return ret;
> > }
> >
> > + ret = blkio_get_bool(s->blkio,
> > + "needs-mem-region-fd",
> > + &s->needs_mem_region_fd,
> > + &errmsg);
> > + if (ret < 0) {
> > + error_setg_errno(errp, -ret,
> > + "failed to get needs-mem-region-fd: %s", errmsg);
> > + free(errmsg);
> > + blkio_destroy(&s->blkio);
> > + return ret;
> > + }
> > +
> > ret = blkio_get_uint64(s->blkio,
> > "mem-region-alignment",
> > &s->mem_region_alignment,
>
> I already mentioned on IRC while testing the series, but I'm writing it here
> so we don't forget ;-)
>
> To prevent bdrv_driver_pwritev() from removing the BDRV_REQ_REGISTERED_BUF
> flag from requests, we should add this:
>
> @@ -474,7 +474,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict
> *options, int flags,
> return ret;
> }
>
> - bs->supported_write_flags = BDRV_REQ_FUA;
> + bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF;
>
> qemu_mutex_init(&s->lock);
> s->blkioq = blkio_get_queue(s->blkio, 0);
Thanks, will fix.
Stefan
signature.asc
Description: PGP signature
- [RFC 1/8] blkio: add io_uring block driver using libblkio, (continued)
- [RFC 1/8] blkio: add io_uring block driver using libblkio, Stefan Hajnoczi, 2022/03/23
- [RFC 2/8] numa: call ->ram_block_removed() in ram_block_notifer_remove(), Stefan Hajnoczi, 2022/03/23
- [RFC 4/8] block: add BDRV_REQ_REGISTERED_BUF request flag, Stefan Hajnoczi, 2022/03/23
- [RFC 5/8] block: add BlockRAMRegistrar, Stefan Hajnoczi, 2022/03/23
- [RFC 6/8] stubs: add memory_region_from_host() and memory_region_get_fd(), Stefan Hajnoczi, 2022/03/23
- [RFC 8/8] virtio-blk: use BDRV_REQ_REGISTERED_BUF optimization hint, Stefan Hajnoczi, 2022/03/23
- [RFC 3/8] block: pass size to bdrv_unregister_buf(), Stefan Hajnoczi, 2022/03/23
- [RFC 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization, Stefan Hajnoczi, 2022/03/23
- Re: [RFC 0/8] blkio: add libblkio BlockDriver, Stefano Garzarella, 2022/03/29