qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4 03/12] vfio: Add guest side IOMMU support


From: Alex Williamson
Subject: Re: [Qemu-devel] [PATCH v4 03/12] vfio: Add guest side IOMMU support
Date: Thu, 05 Sep 2013 12:49:45 -0600

On Fri, 2013-08-30 at 20:15 +1000, Alexey Kardashevskiy wrote:
> From: David Gibson <address@hidden>
> 
> This patch uses the new IOMMU notifiers to allow VFIO pass through devices
> to work with guest side IOMMUs, as long as the host-side VFIO iommu has
> sufficient capability and granularity to match the guest side. This works
> by tracking all map and unmap operations on the guest IOMMU using the
> notifiers, and mirroring them into VFIO.
> 
> There are a number of FIXMEs, and the scheme involves rather more notifier
> structures than I'd like, but it should make for a reasonable proof of
> concept.
> 
> Signed-off-by: David Gibson <address@hidden>
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> 
> ---
> Changes:
> v4:
> * fixed list objects naming
> * vfio_listener_region_add() reworked to call memory_region_ref() from one
> place only, it is also easier to review the changes
> * fixes boundary check not to fail on sections == 2^64 bytes,
> the "vfio: Fix debug output for int128 values" patch is required;
> this obsoletes the "[PATCH v3 0/3] vfio: fixes for better support
> for 128 bit memory section sizes" patch proposal
> ---
>  hw/misc/vfio.c | 137 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 128 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> index c16f41b..53791fb 100644
> --- a/hw/misc/vfio.c
> +++ b/hw/misc/vfio.c
> @@ -150,10 +150,18 @@ typedef struct VFIOContainer {
>          };
>          void (*release)(struct VFIOContainer *);
>      } iommu_data;
> +    QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>      QLIST_HEAD(, VFIOGroup) group_list;
>      QLIST_ENTRY(VFIOContainer) next;
>  } VFIOContainer;
>  
> +typedef struct VFIOGuestIOMMU {
> +    VFIOContainer *container;
> +    MemoryRegion *iommu;
> +    Notifier n;
> +    QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
> +} VFIOGuestIOMMU;
> +
>  /* Cache of MSI-X setup plus extra mmap and memory region for split BAR map 
> */
>  typedef struct VFIOMSIXInfo {
>      uint8_t table_bar;
> @@ -1917,7 +1925,63 @@ static int vfio_dma_map(VFIOContainer *container, 
> hwaddr iova,
>  
>  static bool vfio_listener_skipped_section(MemoryRegionSection *section)
>  {
> -    return !memory_region_is_ram(section->mr);
> +    return !memory_region_is_ram(section->mr) &&
> +        !memory_region_is_iommu(section->mr);
> +}
> +
> +static void vfio_iommu_map_notify(Notifier *n, void *data)
> +{
> +    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> +    VFIOContainer *container = giommu->container;
> +    IOMMUTLBEntry *iotlb = data;
> +    MemoryRegion *mr;
> +    hwaddr xlat;
> +    hwaddr len = iotlb->addr_mask + 1;
> +    void *vaddr;
> +    int ret;
> +
> +    DPRINTF("iommu map @ %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
> +            iotlb->iova, iotlb->iova + iotlb->addr_mask);
> +
> +    /*
> +     * The IOMMU TLB entry we have just covers translation through
> +     * this IOMMU to its immediate target.  We need to translate
> +     * it the rest of the way through to memory.
> +     */
> +    mr = address_space_translate(&address_space_memory,
> +                                 iotlb->translated_addr,
> +                                 &xlat, &len, iotlb->perm & IOMMU_WO);
> +    if (!memory_region_is_ram(mr)) {
> +        DPRINTF("iommu map to non memory area %"HWADDR_PRIx"\n",
> +                xlat);
> +        return;
> +    }
> +    if (len & iotlb->addr_mask) {
> +        DPRINTF("iommu has granularity incompatible with target AS\n");
> +        return;
> +    }
> +
> +    vaddr = memory_region_get_ram_ptr(mr) + xlat;
> +
> +    if (iotlb->perm != IOMMU_NONE) {
> +        ret = vfio_dma_map(container, iotlb->iova,
> +                           iotlb->addr_mask + 1, vaddr,
> +                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
> +        if (ret) {
> +            error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> +                         "0x%"HWADDR_PRIx", %p) = %d (%m)",
> +                         container, iotlb->iova,
> +                         iotlb->addr_mask + 1, vaddr, ret);
> +        }
> +    } else {
> +        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
> +        if (ret) {
> +            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> +                         "0x%"HWADDR_PRIx") = %d (%m)",
> +                         container, iotlb->iova,
> +                         iotlb->addr_mask + 1, ret);
> +        }
> +    }
>  }
>  
>  static void vfio_listener_region_add(MemoryListener *listener,
> @@ -1926,11 +1990,10 @@ static void vfio_listener_region_add(MemoryListener 
> *listener,
>      VFIOContainer *container = container_of(listener, VFIOContainer,
>                                              iommu_data.listener);
>      hwaddr iova, end;
> +    Int128 llend;
>      void *vaddr;
>      int ret;
>  
> -    assert(!memory_region_is_iommu(section->mr));
> -
>      if (vfio_listener_skipped_section(section)) {
>          DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n",
>                  section->offset_within_address_space,
> @@ -1946,21 +2009,57 @@ static void vfio_listener_region_add(MemoryListener 
> *listener,
>      }
>  
>      iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));

So you're still looking for me to pickup patches 1/3 & 2/3 from the
previous int128 series for this?

> +
> +    if (int128_ge(int128_make64(iova), llend)) {
> +        return;
> +    }
> +
> +    memory_region_ref(section->mr);
> +
> +    if (memory_region_is_iommu(section->mr)) {
> +        VFIOGuestIOMMU *giommu;
> +
> +        DPRINTF("region_add [iommu] %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
> +                iova, int128_get64(int128_sub(llend, int128_one())));
> +        /*
> +         * FIXME: We should do some checking to see if the
> +         * capabilities of the host VFIO IOMMU are adequate to model
> +         * the guest IOMMU
> +         *
> +         * FIXME: This assumes that the guest IOMMU is empty of
> +         * mappings at this point - we should either enforce this, or
> +         * loop through existing mappings to map them into VFIO.

I would hope that when you do the below
memory_region_register_iommu_notifier() the callback gets a replay of
the current state of the iommu like we do for a memory region when we
register the listener that gets us here.  Is that not the case?

> +         *
> +         * FIXME: For VFIO iommu types which have KVM acceleration to
> +         * avoid bouncing all map/unmaps through qemu this way, this
> +         * would be the right place to wire that up (tell the KVM
> +         * device emulation the VFIO iommu handles to use).
> +         */
> +        giommu = g_malloc0(sizeof(*giommu));
> +        giommu->iommu = section->mr;
> +        giommu->container = container;
> +        giommu->n.notify = vfio_iommu_map_notify;
> +        QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> +        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> +
> +        return;
> +    }
> +
> +    /* Here we assume that memory_region_is_ram(section->mr)==true */
> +
>      end = (section->offset_within_address_space + 
> int128_get64(section->size)) &
>            TARGET_PAGE_MASK;

Why isn't this now calculated from llend?  I thought that was part of
why the int128 stuff was rolled in here.

>  
> -    if (iova >= end) {
> -        return;
> -    }
> -
>      vaddr = memory_region_get_ram_ptr(section->mr) +
>              section->offset_within_region +
>              (iova - section->offset_within_address_space);
>  
> -    DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
> +    DPRINTF("region_add [ram] %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
>              iova, end - 1, vaddr);
>  
> -    memory_region_ref(section->mr);
>      ret = vfio_dma_map(container, iova, end - iova, vaddr, 
> section->readonly);
>      if (ret) {
>          error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> @@ -1991,6 +2090,26 @@ static void vfio_listener_region_del(MemoryListener 
> *listener,
>          return;
>      }
>  
> +    if (memory_region_is_iommu(section->mr)) {
> +        VFIOGuestIOMMU *giommu;
> +
> +        QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
> +            if (giommu->iommu == section->mr) {
> +                memory_region_unregister_iommu_notifier(&giommu->n);
> +                QLIST_REMOVE(giommu, giommu_next);
> +                g_free(giommu);
> +                break;
> +            }
> +        }
> +
> +        /*
> +         * FIXME: We assume the one big unmap below is adequate to
> +         * remove any individual page mappings in the IOMMU which
> +         * might have been copied into VFIO.  That may not be true for
> +         * all IOMMU types
> +         */
> +    }
> +
>      iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>      end = (section->offset_within_address_space + 
> int128_get64(section->size)) &
>            TARGET_PAGE_MASK;






reply via email to

[Prev in Thread] Current Thread [Next in Thread]