qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC v3 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization


From: Stefan Hajnoczi
Subject: Re: [RFC v3 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization
Date: Mon, 15 Aug 2022 16:52:15 -0400

On Tue, Jul 12, 2022 at 04:28:02PM +0200, Stefano Garzarella wrote:
> On Fri, Jul 08, 2022 at 05:17:36AM +0100, Stefan Hajnoczi wrote:
> > Avoid bounce buffers when QEMUIOVector elements are within previously
> > registered bdrv_register_buf() buffers.
> > 
> > The idea is that emulated storage controllers will register guest RAM
> > using bdrv_register_buf() and set the BDRV_REQ_REGISTERED_BUF on I/O
> > requests. Therefore no blkio_map_mem_region() calls are necessary in the
> > performance-critical I/O code path.
> > 
> > This optimization doesn't apply if the I/O buffer is internally
> > allocated by QEMU (e.g. qcow2 metadata). There we still take the slow
> > path because BDRV_REQ_REGISTERED_BUF is not set.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > block/blkio.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 101 insertions(+), 3 deletions(-)
> > 
> > diff --git a/block/blkio.c b/block/blkio.c
> > index 7fbdbd7fae..37d593a20c 100644
> > --- a/block/blkio.c
> > +++ b/block/blkio.c
> > @@ -1,7 +1,9 @@
> > #include "qemu/osdep.h"
> > #include <blkio.h>
> > #include "block/block_int.h"
> > +#include "exec/memory.h"
> > #include "qapi/error.h"
> > +#include "qemu/error-report.h"
> > #include "qapi/qmp/qdict.h"
> > #include "qemu/module.h"
> > 
> > @@ -28,6 +30,9 @@ typedef struct {
> > 
> >     /* Can we skip adding/deleting blkio_mem_regions? */
> >     bool needs_mem_regions;
> > +
> > +    /* Are file descriptors necessary for blkio_mem_regions? */
> > +    bool needs_mem_region_fd;
> > } BDRVBlkioState;
> > 
> > static void blkio_aiocb_complete(BlkioAIOCB *acb, int ret)
> > @@ -198,6 +203,8 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState 
> > *bs, int64_t offset,
> >         BlockCompletionFunc *cb, void *opaque)
> > {
> >     BDRVBlkioState *s = bs->opaque;
> > +    bool needs_mem_regions =
> > +        s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
> >     struct iovec *iov = qiov->iov;
> >     int iovcnt = qiov->niov;
> >     BlkioAIOCB *acb;
> > @@ -206,7 +213,7 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState 
> > *bs, int64_t offset,
> > 
> >     acb = blkio_aiocb_get(bs, cb, opaque);
> > 
> > -    if (s->needs_mem_regions) {
> > +    if (needs_mem_regions) {
> >         if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
> >             qemu_aio_unref(&acb->common);
> >             return NULL;
> > @@ -230,6 +237,8 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState 
> > *bs, int64_t offset,
> > {
> >     uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
> >     BDRVBlkioState *s = bs->opaque;
> > +    bool needs_mem_regions =
> > +        s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
> >     struct iovec *iov = qiov->iov;
> >     int iovcnt = qiov->niov;
> >     BlkioAIOCB *acb;
> > @@ -238,7 +247,7 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState 
> > *bs, int64_t offset,
> > 
> >     acb = blkio_aiocb_get(bs, cb, opaque);
> > 
> > -    if (s->needs_mem_regions) {
> > +    if (needs_mem_regions) {
> >         if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
> >             qemu_aio_unref(&acb->common);
> >             return NULL;
> > @@ -324,6 +333,80 @@ static void blkio_io_unplug(BlockDriverState *bs)
> >     }
> > }
> > 
> > +static void blkio_register_buf(BlockDriverState *bs, void *host, size_t 
> > size)
> > +{
> > +    BDRVBlkioState *s = bs->opaque;
> > +    int ret;
> > +    struct blkio_mem_region region = (struct blkio_mem_region){
> > +        .addr = host,
> > +        .len = size,
> > +        .fd = -1,
> > +    };
> > +
> > +    if (((uintptr_t)host | size) % s->mem_region_alignment) {
> > +        error_report_once("%s: skipping unaligned buf %p with size %zu",
> > +                          __func__, host, size);
> > +        return; /* skip unaligned */
> > +    }
> > +
> > +    /* Attempt to find the fd for a MemoryRegion */
> > +    if (s->needs_mem_region_fd) {
> > +        int fd = -1;
> > +        ram_addr_t offset;
> > +        MemoryRegion *mr;
> > +
> > +        /*
> > +         * bdrv_register_buf() is called with the BQL held so mr lives at 
> > least
> > +         * until this function returns.
> > +         */
> > +        mr = memory_region_from_host(host, &offset);
> > +        if (mr) {
> > +            fd = memory_region_get_fd(mr);
> 
> If s->needs_mem_region_fd is true, memory_region_get_fd() crashes I think
> because mr->ram_block is not yet set, indeed from the stack trace
> blkio_register_buf() is called inside qemu_ram_alloc_resizeable(), and its
> result is used to set mr->ram_block in memory_region_init_resizeable_ram():
> 
> Program terminated with signal SIGSEGV, Segmentation fault.
> #0  0x000056235bf1f7a3 in memory_region_get_fd (mr=<optimized out>) at 
> ../softmmu/memory.c:2309
> #1  0x000056235c07e54d in blkio_register_buf (bs=<optimized out>, 
> host=0x7f824e200000, size=2097152)
>     at ../block/blkio.c:364
> #2  0x000056235c0246c6 in bdrv_register_buf (bs=0x56235d606b40, 
> host=0x7f824e200000, size=2097152)
>     at ../block/io.c:3362
> #3  0x000056235bea44e6 in ram_block_notify_add (host=0x7f824e200000, 
> size=131072, max_size=2097152)
>     at ../hw/core/numa.c:863
> #4  0x000056235bf22c00 in ram_block_add (new_block=<optimized out>, 
> errp=<optimized out>)
>     at ../softmmu/physmem.c:2057
> #5  0x000056235bf232e4 in qemu_ram_alloc_internal (size=size@entry=131072,
> max_size=max_size@entry=2097152, resized=resized@entry=0x56235bc0f920
> <fw_cfg_resized>,     host=host@entry=0x0, ram_flags=ram_flags@entry=4,
> mr=mr@entry=0x56235dc3fe00,     errp=0x7ffcb21f1be0) at
> ../softmmu/physmem.c:2180
> #6  0x000056235bf26426 in qemu_ram_alloc_resizeable (size=size@entry=131072,
> maxsz=maxsz@entry=2097152, resized=resized@entry=0x56235bc0f920
> <fw_cfg_resized>,     mr=mr@entry=0x56235dc3fe00,
> errp=errp@entry=0x7ffcb21f1be0) at ../softmmu/physmem.c:2209
> #7  0x000056235bf1cc99 in memory_region_init_resizeable_ram
> (mr=0x56235dc3fe00,     owner=owner@entry=0x56235d93ffc0,
> name=name@entry=0x7ffcb21f1ca0 "/rom@etc/acpi/tables",     size=131072,
> max_size=2097152, resized=resized@entry=0x56235bc0f920 <fw_cfg_resized>,
> errp=0x56235c996490 <error_fatal>) at ../softmmu/memory.c:1586
> #8  0x000056235bc0f99c in rom_set_mr (rom=rom@entry=0x56235ddd0200,
> owner=0x56235d93ffc0,     name=name@entry=0x7ffcb21f1ca0
> "/rom@etc/acpi/tables", ro=ro@entry=true)
>     at ../hw/core/loader.c:961
> #9  0x000056235bc12a65 in rom_add_blob (name=name@entry=0x56235c1a2a09
> "etc/acpi/tables",     blob=0x56235df4f4b0, len=<optimized out>,
> max_len=max_len@entry=2097152,     addr=addr@entry=18446744073709551615,
> fw_file_name=fw_file_name@entry=0x56235c1a2a09 "etc/acpi/tables",
> fw_callback=0x56235be47f90 <acpi_build_update>,
> callback_opaque=0x56235d817830, as=0x0,     read_only=true) at
> ../hw/core/loader.c:1102
> #10 0x000056235bbe0990 in acpi_add_rom_blob (
>     update=update@entry=0x56235be47f90 <acpi_build_update>,
> opaque=opaque@entry=0x56235d817830,     blob=0x56235d3ab750,
> name=name@entry=0x56235c1a2a09 "etc/acpi/tables") at ../hw/acpi/utils.c:46
> #11 0x000056235be481e6 in acpi_setup () at ../hw/i386/acpi-build.c:2805
> #12 0x000056235be3e209 in pc_machine_done (notifier=0x56235d5efce8, 
> data=<optimized out>)
>     at ../hw/i386/pc.c:758
> #13 0x000056235c12e4a7 in notifier_list_notify (
>     list=list@entry=0x56235c963790 <machine_init_done_notifiers>, 
> data=data@entry=0x0)
>     at ../util/notify.c:39

Hi Stefano,
I have fixed this by using RAMBlock instead of MemoryRegion. The next
revision will call qemu_ram_block_from_host() to fetch the RAMBlock's
fd.

Stefan

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]