[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 1/3] virtio-iommu: Add bypass mode support to assigned device
From: |
Eric Auger |
Subject: |
Re: [PATCH 1/3] virtio-iommu: Add bypass mode support to assigned device |
Date: |
Thu, 23 Jun 2022 18:52:24 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.9.0 |
Hi,
On 6/13/22 08:10, Zhenzhong Duan wrote:
> Currently assigned devices can not work in virtio-iommu bypass mode.
> Guest driver fails to probe the device due to DMA failure. And the
> reason is because of lacking GPA -> HPA mappings when VM is created.
>
> Add a root container memory region to hold both bypass memory region
> and iommu memory region, so the switch between them is supported
> just like the implementation in virtual VT-d.
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/virtio/trace-events | 1 +
> hw/virtio/virtio-iommu.c | 115 ++++++++++++++++++++++++++++++-
> include/hw/virtio/virtio-iommu.h | 2 +
> 3 files changed, 116 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index ab8e095b73fa..20af2e7ebd78 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -124,6 +124,7 @@ virtio_iommu_remap(const char *name, uint64_t virt_start,
> uint64_t virt_end, uin
> virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t
> new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64
> virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s"
> virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s"
> +virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn,
> bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
>
> # virtio-mem.c
> virtio_mem_send_response(uint16_t type) "type=%" PRIu16
> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> index 2597e166f939..ff718107ee02 100644
> --- a/hw/virtio/virtio-iommu.c
> +++ b/hw/virtio/virtio-iommu.c
> @@ -69,6 +69,77 @@ static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice
> *dev)
> return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
> }
>
> +static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev)
> +{
> + uint32_t sid;
> + bool bypassed;
> + VirtIOIOMMU *s = sdev->viommu;
> + VirtIOIOMMUEndpoint *ep;
> +
> + sid = virtio_iommu_get_bdf(sdev);
> +
> + qemu_mutex_lock(&s->mutex);
> + /* need to check bypass before system reset */
> + if (!s->endpoints) {
> + bypassed = s->config.bypass;
> + goto unlock;
> + }
> +
> + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
> + if (!ep || !ep->domain) {
> + bypassed = s->config.bypass;
> + } else {
> + bypassed = ep->domain->bypass;
> + }
> +
> +unlock:
> + qemu_mutex_unlock(&s->mutex);
> + return bypassed;
> +}
> +
> +/* Return whether the device is using IOMMU translation. */
> +static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev)
> +{
> + bool use_remapping;
> +
> + assert(sdev);
> +
> + use_remapping = !virtio_iommu_device_bypassed(sdev);
> +
> + trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus),
> + PCI_SLOT(sdev->devfn),
> + PCI_FUNC(sdev->devfn),
> + use_remapping);
> +
> + /* Turn off first then on the other */
> + if (use_remapping) {
> + memory_region_set_enabled(&sdev->bypass_mr, false);
> + memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true);
> + } else {
> + memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false);
> + memory_region_set_enabled(&sdev->bypass_mr, true);
> + }
> +
> + return use_remapping;
> +}
> +
> +static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s)
> +{
> + GHashTableIter iter;
> + IOMMUPciBus *iommu_pci_bus;
> + int i;
> +
> + g_hash_table_iter_init(&iter, s->as_by_busptr);
> + while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
> + for (i = 0; i < PCI_DEVFN_MAX; i++) {
> + if (!iommu_pci_bus->pbdev[i]) {
> + continue;
> + }
> + virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]);
> + }
> + }
> +}
> +
> /**
> * The bus number is used for lookup when SID based operations occur.
> * In that case we lazily populate the IOMMUPciBus array from the bus hash
> @@ -213,6 +284,7 @@ static gboolean virtio_iommu_notify_map_cb(gpointer key,
> gpointer value,
> static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
> {
> VirtIOIOMMUDomain *domain = ep->domain;
> + IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
>
> if (!ep->domain) {
> return;
> @@ -221,6 +293,7 @@ static void
> virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
> ep->iommu_mr);
> QLIST_REMOVE(ep, next);
> ep->domain = NULL;
> + virtio_iommu_switch_address_space(sdev);
> }
>
> static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> @@ -323,12 +396,39 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus
> *bus, void *opaque,
>
> trace_virtio_iommu_init_iommu_mr(name);
>
> + memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX);
> + address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU);
> +
> + /*
> + * Build the IOMMU disabled container with aliases to the
> + * shared MRs. Note that aliasing to a shared memory region
What do you call the 'disabled container' and the shared MRs?
> + * could help the memory API to detect same FlatViews so we
> + * can have devices to share the same FlatView when in bypass
> + * mode. (either by not configuring virtio-iommu driver or with
> + * "iommu=pt"). It will greatly reduce the total number of
> + * FlatViews of the system hence VM runs faster.
Do you mean that we could have used a shared bypass MR instead of
creatingone per device?
> + */
> + memory_region_init_alias(&sdev->bypass_mr, OBJECT(s),
> + "system", get_system_memory(), 0,
> + memory_region_size(get_system_memory()));
> +
> memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
> TYPE_VIRTIO_IOMMU_MEMORY_REGION,
> OBJECT(s), name,
> UINT64_MAX);
> - address_space_init(&sdev->as,
> - MEMORY_REGION(&sdev->iommu_mr),
> TYPE_VIRTIO_IOMMU);
> +
> + /*
> + * Hook both the containers under the root container, we
did you mean "hook both sub-regions to the root container"?
> + * switch between iommu & bypass MRs by enable/disable
> + * corresponding sub-containers
> + */
> + memory_region_add_subregion_overlap(&sdev->root, 0,
> + MEMORY_REGION(&sdev->iommu_mr),
> + 0);
> + memory_region_add_subregion_overlap(&sdev->root, 0,
> + &sdev->bypass_mr, 0);
> +
> + virtio_iommu_switch_address_space(sdev);
> g_free(name);
> }
> return &sdev->as;
> @@ -342,6 +442,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> uint32_t flags = le32_to_cpu(req->flags);
> VirtIOIOMMUDomain *domain;
> VirtIOIOMMUEndpoint *ep;
> + IOMMUDevice *sdev;
>
> trace_virtio_iommu_attach(domain_id, ep_id);
>
> @@ -375,6 +476,8 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
>
> ep->domain = domain;
> + sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
> + virtio_iommu_switch_address_space(sdev);
>
> /* Replay domain mappings on the associated memory region */
> g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
> @@ -887,6 +990,7 @@ static void virtio_iommu_set_config(VirtIODevice *vdev,
> return;
> }
> dev_config->bypass = in_config->bypass;
> + virtio_iommu_switch_address_space_all(dev);
> }
>
> trace_virtio_iommu_set_config(in_config->bypass);
> @@ -1026,6 +1130,8 @@ static void virtio_iommu_system_reset(void *opaque)
> * system reset
> */
> s->config.bypass = s->boot_bypass;
> + virtio_iommu_switch_address_space_all(s);
> +
> }
>
> static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
> @@ -1041,6 +1147,11 @@ static void virtio_iommu_device_realize(DeviceState
> *dev, Error **errp)
> virtio_iommu_handle_command);
> s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
>
> + /*
> + * config.bypass is needed to get initial address space early, such as
> + * in vfio realize
> + */
I don't understand this comment, please can you elaborate?
> + s->config.bypass = s->boot_bypass;
> s->config.page_size_mask = TARGET_PAGE_MASK;
> s->config.input_range.end = UINT64_MAX;
> s->config.domain_range.end = UINT32_MAX;
> diff --git a/include/hw/virtio/virtio-iommu.h
> b/include/hw/virtio/virtio-iommu.h
> index 84391f844826..102eeefa731d 100644
> --- a/include/hw/virtio/virtio-iommu.h
> +++ b/include/hw/virtio/virtio-iommu.h
> @@ -37,6 +37,8 @@ typedef struct IOMMUDevice {
> int devfn;
> IOMMUMemoryRegion iommu_mr;
> AddressSpace as;
> + MemoryRegion root; /* The root container of the device */
> + MemoryRegion bypass_mr; /* The alias of shared memory MR */
> } IOMMUDevice;
>
> typedef struct IOMMUPciBus {
Did you test migration? I wonder if we shouldn't call
virtio_iommu_switch_address_space_all(s)
after restoring the endpoints in iommu_post_load() as it is done in intel-iommu
Thanks
Eric