[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v4 12/15] vfio/common: Support device dirty page tracking wit
From: |
Joao Martins |
Subject: |
Re: [PATCH v4 12/15] vfio/common: Support device dirty page tracking with vIOMMU |
Date: |
Mon, 10 Jul 2023 14:49:29 +0100 |
On 09/07/2023 16:24, Avihai Horon wrote:
> On 23/06/2023 0:48, Joao Martins wrote:
>> Currently, device dirty page tracking with vIOMMU is not supported,
>> and a blocker is added and the migration is prevented.
>>
>> When vIOMMU is used, IOVA ranges are DMA mapped/unmapped on the fly as
>> requesting by the vIOMMU. These IOVA ranges can potentially be mapped
>> anywhere in the vIOMMU IOVA space as advertised by the VMM.
>>
>> To support device dirty tracking when vIOMMU enabled instead create the
>> dirty ranges based on the vIOMMU provided limits, which leads to the
>> tracking of the whole IOVA space regardless of what devices use.
>>
>> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>> include/hw/vfio/vfio-common.h | 1 +
>> hw/vfio/common.c | 58 +++++++++++++++++++++++++++++------
>> hw/vfio/pci.c | 7 +++++
>> 3 files changed, 56 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index f41860988d6b..c4bafad084b4 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -71,6 +71,7 @@ typedef struct VFIOMigration {
>> typedef struct VFIOAddressSpace {
>> AddressSpace *as;
>> bool no_dma_translation;
>> + hwaddr max_iova;
>> QLIST_HEAD(, VFIOContainer) containers;
>> QLIST_ENTRY(VFIOAddressSpace) list;
>> } VFIOAddressSpace;
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index ecfb9afb3fb6..85fddef24026 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -428,6 +428,25 @@ static bool vfio_viommu_preset(void)
>> return false;
>> }
>>
>> +static int vfio_viommu_get_max_iova(hwaddr *max_iova)
>> +{
>> + VFIOAddressSpace *space;
>> +
>> + *max_iova = 0;
>> +
>> + QLIST_FOREACH(space, &vfio_address_spaces, list) {
>> + if (space->as == &address_space_memory) {
>> + continue;
>> + }
>> +
>> + if (*max_iova < space->max_iova) {
>> + *max_iova = space->max_iova;
>> + }
>> + }
>
> Looks like max_iova is a per VFIOAddressSpace property, so why do we need to
> iterate over all address spaces?
>
This was more futureproof-ing when Qemu supports multiple vIOMMU. In theory this
tracks device address space, and if two different devices stand behind different
vIOMMU, then this loop would compute the highest IOVA that we would track by the
host device dirty tracker.
But I realize this might introduce unnecessary complexity, and we should 'obey'
the advertised vIOMMU max_iova for the device. With Zhenzhong blocker cleanup I
can make this just fetch the max_iova in the space and be done with it.
Joao
> Thanks.
>
>> +
>> + return *max_iova == 0;
>> +}
>> +
>> int vfio_block_giommu_migration(Error **errp)
>> {
>> int ret;
>> @@ -1464,10 +1483,11 @@ static const MemoryListener
>> vfio_dirty_tracking_listener = {
>> .region_add = vfio_listener_dirty_tracking_update,
>> };
>>
>> -static void vfio_dirty_tracking_init(VFIOContainer *container,
>> +static int vfio_dirty_tracking_init(VFIOContainer *container,
>> VFIODirtyRanges *ranges)
>> {
>> VFIODirtyRangesListener dirty;
>> + int ret;
>>
>> memset(&dirty, 0, sizeof(dirty));
>> dirty.ranges.min32 = UINT32_MAX;
>> @@ -1475,17 +1495,29 @@ static void vfio_dirty_tracking_init(VFIOContainer
>> *container,
>> dirty.listener = vfio_dirty_tracking_listener;
>> dirty.container = container;
>>
>> - memory_listener_register(&dirty.listener,
>> - container->space->as);
>> + if (vfio_viommu_preset()) {
>> + hwaddr iommu_max_iova;
>> +
>> + ret = vfio_viommu_get_max_iova(&iommu_max_iova);
>> + if (ret) {
>> + return -EINVAL;
>> + }
>> +
>> + vfio_dirty_tracking_update(0, iommu_max_iova, &dirty.ranges);
>> + } else {
>> + memory_listener_register(&dirty.listener,
>> + container->space->as);
>> + /*
>> + * The memory listener is synchronous, and used to calculate the
>> range
>> + * to dirty tracking. Unregister it after we are done as we are not
>> + * interested in any follow-up updates.
>> + */
>> + memory_listener_unregister(&dirty.listener);
>> + }
>>
>> *ranges = dirty.ranges;
>>
>> - /*
>> - * The memory listener is synchronous, and used to calculate the range
>> - * to dirty tracking. Unregister it after we are done as we are not
>> - * interested in any follow-up updates.
>> - */
>> - memory_listener_unregister(&dirty.listener);
>> + return 0;
>> }
>>
>> static void vfio_devices_dma_logging_stop(VFIOContainer *container)
>> @@ -1590,7 +1622,13 @@ static int
>> vfio_devices_dma_logging_start(VFIOContainer
>> *container)
>> VFIOGroup *group;
>> int ret = 0;
>>
>> - vfio_dirty_tracking_init(container, &ranges);
>> + ret = vfio_dirty_tracking_init(container, &ranges);
>> + if (ret) {
>> + error_report("Failed to init DMA logging ranges, err %d",
>> + ret);
>> + return -EOPNOTSUPP;
>> + }
>> +
>> feature = vfio_device_feature_dma_logging_start_create(container,
>> &ranges);
>> if (!feature) {
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 8a98e6ffc480..3bda5618c5b5 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2974,6 +2974,13 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>> &dma_translation);
>> space->no_dma_translation = !dma_translation;
>>
>> + /*
>> + * Support for advertised IOMMU address space boundaries is optional.
>> + * By default, it is not advertised i.e. space::max_iova is 0.
>> + */
>> + pci_device_iommu_get_attr(pdev, IOMMU_ATTR_MAX_IOVA,
>> + &space->max_iova);
>> +
>> QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>> error_setg(errp, "device is already attached");
>> --
>> 2.17.2
>>