qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 07/13] vfio/common: Record DMA mapped IOVA ranges


From: Joao Martins
Subject: Re: [PATCH v3 07/13] vfio/common: Record DMA mapped IOVA ranges
Date: Mon, 6 Mar 2023 14:37:04 +0000

On 06/03/2023 13:41, Cédric Le Goater wrote:
> On 3/4/23 02:43, Joao Martins wrote:
>> According to the device DMA logging uAPI, IOVA ranges to be logged by
>> the device must be provided all at once upon DMA logging start.
>>
>> As preparation for the following patches which will add device dirty
>> page tracking, keep a record of all DMA mapped IOVA ranges so later they
>> can be used for DMA logging start.
>>
>> Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked.
>> This is due to the dynamic nature of vIOMMU DMA mapping/unmapping.
>>
>> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>>   hw/vfio/common.c              | 84 +++++++++++++++++++++++++++++++++++
>>   hw/vfio/trace-events          |  1 +
>>   include/hw/vfio/vfio-common.h | 11 +++++
>>   3 files changed, 96 insertions(+)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index ed908e303dbb..d84e5fd86bb4 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -44,6 +44,7 @@
>>   #include "migration/blocker.h"
>>   #include "migration/qemu-file.h"
>>   #include "sysemu/tpm.h"
>> +#include "qemu/iova-tree.h"
>>     VFIOGroupList vfio_group_list =
>>       QLIST_HEAD_INITIALIZER(vfio_group_list);
>> @@ -1313,11 +1314,94 @@ static int vfio_set_dirty_page_tracking(VFIOContainer
>> *container, bool start)
>>       return ret;
>>   }
>>   +/*
>> + * Called for the dirty tracking memory listener to calculate the iova/end
>> + * for a given memory region section. The checks here, replicate the logic
>> + * in vfio_listener_region_{add,del}() used for the same purpose. And thus
>> + * both listener should be kept in sync.
>> + */
>> +static bool vfio_get_section_iova_range(VFIOContainer *container,
>> +                                        MemoryRegionSection *section,
>> +                                        hwaddr *out_iova, hwaddr *out_end)
>> +{
>> +    Int128 llend;
>> +    hwaddr iova;
>> +
>> +    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
>> +    llend = int128_make64(section->offset_within_address_space);
>> +    llend = int128_add(llend, section->size);
>> +    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask()));
>> +
>> +    if (int128_ge(int128_make64(iova), llend)) {
>> +        return false;
>> +    }
>> +
>> +    *out_iova = iova;
>> +    *out_end = int128_get64(llend) - 1;
>> +    return true;
>> +}
>> +
>> +static void vfio_dirty_tracking_update(MemoryListener *listener,
>> +                                       MemoryRegionSection *section)
>> +{
>> +    VFIOContainer *container = container_of(listener, VFIOContainer,
>> +                                            tracking_listener);
>> +    VFIODirtyTrackingRange *range = &container->tracking_range;
>> +    hwaddr max32 = (1ULL << 32) - 1ULL;
>> +    hwaddr iova, end;
>> +
>> +    if (!vfio_listener_valid_section(section) ||
>> +        !vfio_get_section_iova_range(container, section, &iova, &end)) {
>> +        return;
>> +    }
>> +
>> +    WITH_QEMU_LOCK_GUARD(&container->tracking_mutex) {
>> +        if (iova < max32 && end <= max32) {
>> +                if (range->min32 > iova) {
>> +                    range->min32 = iova;
>> +                }
>> +                if (range->max32 < end) {
>> +                    range->max32 = end;
>> +                }
>> +                trace_vfio_device_dirty_tracking_update(iova, end,
>> +                                            range->min32, range->max32);
>> +        } else {
>> +                if (!range->min64 || range->min64 > iova) {
>> +                    range->min64 = iova;
>> +                }
>> +                if (range->max64 < end) {
>> +                    range->max64 = end;
>> +                }
>> +                trace_vfio_device_dirty_tracking_update(iova, end,
>> +                                            range->min64, range->max64);
>> +        }
>> +    }
>> +    return;
>> +}
>> +
>> +static const MemoryListener vfio_dirty_tracking_listener = {
>> +    .name = "vfio-tracking",
>> +    .region_add = vfio_dirty_tracking_update,
>> +};
>> +
>> +static void vfio_dirty_tracking_init(VFIOContainer *container)
>> +{
>> +    memset(&container->tracking_range, 0, 
>> sizeof(container->tracking_range));
>> +    qemu_mutex_init(&container->tracking_mutex);
>> +    container->tracking_listener = vfio_dirty_tracking_listener;
>> +    memory_listener_register(&container->tracking_listener,
>> +                             container->space->as);
> 
> The following unregister+destroy calls seem to belong to a _fini routine.
> Am I missing something ?
> 
The thinking is that once we register the memory listener, it will iterate
over all the sections and once that is finished the memory_listener_register
returns. So the state we initialize here isn't needed anyelse other than to
create the range and hence we destroy it right away. It was at container_init()
but it was unnecessary overhead to keep around if it's *only* needed when we
start/stop dirty tracking.

So the reason I don't add a _fini method is because there's no need to teardown
the state anywhere else other than this function.

I would argue that maybe I don't need the mutex at all as this is all 
serialized...

> Thanks,
> 
> C.
> 
>> +    memory_listener_unregister(&container->tracking_listener);
>> +    qemu_mutex_destroy(&container->tracking_mutex);
>> +}
>> +
>>   static void vfio_listener_log_global_start(MemoryListener *listener)
>>   {
>>       VFIOContainer *container = container_of(listener, VFIOContainer, 
>> listener);
>>       int ret;
>>   +    vfio_dirty_tracking_init(container);
>> +
>>       ret = vfio_set_dirty_page_tracking(container, true);
>>       if (ret) {
>>           vfio_set_migration_error(ret);
>> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
>> index 669d9fe07cd9..d97a6de17921 100644
>> --- a/hw/vfio/trace-events
>> +++ b/hw/vfio/trace-events
>> @@ -104,6 +104,7 @@ vfio_known_safe_misalignment(const char *name, uint64_t
>> iova, uint64_t offset_wi
>>   vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova,
>> uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" 
>> size=0x%"PRIx64"
>> is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
>>   vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING
>> region_del 0x%"PRIx64" - 0x%"PRIx64
>>   vfio_listener_region_del(uint64_t start, uint64_t end) "region_del
>> 0x%"PRIx64" - 0x%"PRIx64
>> +vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t 
>> min,
>> uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" -
>> 0x%"PRIx64"]"
>>   vfio_disconnect_container(int fd) "close container->fd=%d"
>>   vfio_put_group(int fd) "close group->fd=%d"
>>   vfio_get_device(const char * name, unsigned int flags, unsigned int
>> num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: 
>> %u"
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 87524c64a443..96791add2719 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -23,6 +23,7 @@
>>     #include "exec/memory.h"
>>   #include "qemu/queue.h"
>> +#include "qemu/iova-tree.h"
>>   #include "qemu/notify.h"
>>   #include "ui/console.h"
>>   #include "hw/display/ramfb.h"
>> @@ -68,6 +69,13 @@ typedef struct VFIOMigration {
>>       size_t data_buffer_size;
>>   } VFIOMigration;
>>   +typedef struct VFIODirtyTrackingRange {
>> +    hwaddr min32;
>> +    hwaddr max32;
>> +    hwaddr min64;
>> +    hwaddr max64;
>> +} VFIODirtyTrackingRange;
>> +
>>   typedef struct VFIOAddressSpace {
>>       AddressSpace *as;
>>       QLIST_HEAD(, VFIOContainer) containers;
>> @@ -89,6 +97,9 @@ typedef struct VFIOContainer {
>>       uint64_t max_dirty_bitmap_size;
>>       unsigned long pgsizes;
>>       unsigned int dma_max_mappings;
>> +    VFIODirtyTrackingRange tracking_range;
>> +    QemuMutex tracking_mutex;
>> +    MemoryListener tracking_listener;
>>       QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>>       QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
>>       QLIST_HEAD(, VFIOGroup) group_list;
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]