qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vI


From: Aviv B.D.
Subject: Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present
Date: Thu, 17 Mar 2016 13:58:13 +0200



On Tue, Mar 15, 2016 at 12:53 PM, Michael S. Tsirkin <address@hidden> wrote:
On Sat, Mar 12, 2016 at 06:13:17PM +0200, Aviv B.D. wrote:
> From: "Aviv B.D." <address@hidden>
>
>  * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
>     device are present.
>  * Advertise Cache Mode capability in iommu cap register.
>  * Register every VFIO device with IOMMU state.
>  * On page cache invalidation in vIOMMU, check if the domain belong to
>    VFIO device and mirror the guest requests to host.
>
> Not working (Yet!):
>  * Tested only with network interface card (ixgbevf) and
>     intel_iommu=strict in guest's kernel command line.
>  * Lock up under high load.
>  * Errors on guest poweroff.
>  * High relative latency compare to VFIO without IOMMU.
>
> Signed-off-by: Aviv B.D. <address@hidden>

Thanks, this is very interesting.
So this needs some cleanup, and some issues that will have to be addressed.
See below.
Thanks!

Thanks! 

> ---
>  hw/i386/intel_iommu.c          | 76 ++++++++++++++++++++++++++++++++++++++----
>  hw/i386/intel_iommu_internal.h |  1 +
>  hw/vfio/common.c               | 12 +++++--
>  include/hw/i386/intel_iommu.h  |  4 +++
>  include/hw/vfio/vfio-common.h  |  1 +
>  5 files changed, 85 insertions(+), 9 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 347718f..046688f 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT
> (CSR);
>  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
>  #endif
>  
> +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> +                                    uint8_t devfn, VTDContextEntry *ce);
> +
>  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
>                              uint64_t wmask, uint64_t w1cmask)
>  {
> @@ -126,6 +130,19 @@ static uint32_t vtd_set_clear_mask_long(IntelIOMMUState
> *s, hwaddr addr,
>      return new_val;
>  }
>  
> +static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num, uint8_t
> devfn)
> +{
> +    VTDContextEntry ce;
> +    int ret_fr;
> +
> +    ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
> +    if (ret_fr){
> +        return -1;
> +    }
> +
> +    return VTD_CONTEXT_ENTRY_DID(ce.hi);
> +}
> +
>  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
>                                          uint64_t clear, uint64_t mask)
>  {
> @@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s,
> uint8_t bus_num,
>      }
>  
>      if (!vtd_context_entry_present(ce)) {
> -        VTD_DPRINTF(GENERAL,
> +        /*VTD_DPRINTF(GENERAL,
>                      "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
> -                    "is not present", devfn, bus_num);
> +                    "is not present", devfn, bus_num);*/
>          return -VTD_FR_CONTEXT_ENTRY_P;
>      } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
>                 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
> @@ -1020,14 +1037,53 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState
> *s, uint16_t domain_id,
>                                        hwaddr addr, uint8_t am)
>  {
>      VTDIOTLBPageInvInfo info;
> +    VFIOGuestIOMMU * giommu;
> +    bool flag = false;
>  
>      assert(am <= VTD_MAMV);
>      info.domain_id = domain_id;
>      info.addr = addr;
>      info.mask = ~((1 << am) - 1);
> +
> +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> +        VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace,
> iommu);
> +        uint16_t vfio_source_id = vtd_make_source_id(pci_bus_num(vtd_as->bus),
> vtd_as->devfn);
> +        uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus),
> vtd_as->devfn);
> +        if (vfio_domain_id != (uint16_t)-1 && 
> +                domain_id == vfio_domain_id){
> +            VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s, vfio_source_id,
> addr);
> +            if (iotlb_entry != NULL){
> +                IOMMUTLBEntry entry; 
> +                VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d", addr,
> am);
> +                entry.iova = addr & VTD_PAGE_MASK_4K;
> +                entry.translated_addr = vtd_get_slpte_addr(iotlb_entry->slpte)
> & VTD_PAGE_MASK_4K;
> +                entry.addr_mask = ~VTD_PAGE_MASK_4K;
> +                entry.perm = IOMMU_NONE;
> +                memory_region_notify_iommu(giommu->iommu, entry);
> +                flag = true;
> +
> +            }
> +        }
> +
> +    }
> +
>      g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
> -}
>  
> +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> +        VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace,
> iommu);
> +        uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus),
> vtd_as->devfn);
> +        if (vfio_domain_id != (uint16_t)-1 && 
> +                domain_id == vfio_domain_id && !flag){
> +            /* do vfio map */
> +            VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr, am);
> +            /* call to vtd_iommu_translate */
> +            IOMMUTLBEntry entry = s->iommu_ops.translate(giommu->iommu, addr,
> 0); 
> +            entry.perm = IOMMU_RW;
> +            memory_region_notify_iommu(giommu->iommu, entry);

So this just triggers the iommu notifiers in the regular way.
But what if two devices have conflicting entries?
We need to setup host IOMMU differently for them.
But see below

> +            //g_vfio_iommu->n.notify(&g_vfio_iommu->n, &entry);
> +        }
> +    }
> +}
>  /* Flush IOTLB
>   * Returns the IOTLB Actual Invalidation Granularity.
>   * @val: the content of the IOTLB_REG
> @@ -1895,6 +1951,13 @@ static Property vtd_properties[] = {
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> +void vtd_register_giommu(VFIOGuestIOMMU * giommu)
> +{
> +    VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace,
> iommu);
> +    IntelIOMMUState *s = vtd_as->iommu_state;
> +    
> +    QLIST_INSERT_HEAD(&s->giommu_list, giommu, iommu_next);
> +}
>  
>  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
>  {
> @@ -1949,7 +2012,8 @@ static void vtd_init(IntelIOMMUState *s)
>      s->iq_last_desc_type = VTD_INV_DESC_NONE;
>      s->next_frcd_reg = 0;
>      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
> -             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
> +             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
> +             VTD_CAP_CM;
>      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
>  
>      vtd_reset_context_cache(s);
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index e5f514c..ae40f73 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -190,6 +190,7 @@
>  #define VTD_CAP_MAMV                (VTD_MAMV << 48)
>  #define VTD_CAP_PSI                 (1ULL << 39)
>  #define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
> +#define VTD_CAP_CM                  (1ULL << 7)
>  
>  /* Supported Adjusted Guest Address Widths */
>  #define VTD_CAP_SAGAW_SHIFT         8
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 607ec70..98c8d67 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -32,6 +32,9 @@
>  #include "sysemu/kvm.h"
>  #include "trace.h"
>  
> +#include "hw/sysbus.h"
> +#include "hw/i386/intel_iommu.h"
> +
>  struct vfio_group_head vfio_group_list =
>      QLIST_HEAD_INITIALIZER(vfio_group_list);
>  struct vfio_as_head vfio_address_spaces =
> @@ -312,12 +315,12 @@ static void vfio_iommu_map_notify(Notifier *n, void
> *data)
>  out:
>      rcu_read_unlock();
>  }
> -
> +#if 0
>  static hwaddr vfio_container_granularity(VFIOContainer *container)
>  {
>      return (hwaddr)1 << ctz64(container->iova_pgsizes);
>  }
> -
> +#endif
>  static void vfio_listener_region_add(MemoryListener *listener,
>                                       MemoryRegionSection *section)
>  {
> @@ -344,6 +347,7 @@ static void vfio_listener_region_add(MemoryListener
> *listener,
>      iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>      llend = int128_make64(section->offset_within_address_space);
>      llend = int128_add(llend, section->size);
> +    llend = int128_add(llend, int128_exts64(-1));
>      llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
>  
>      if (int128_ge(int128_make64(iova), llend)) {
> @@ -381,11 +385,13 @@ static void vfio_listener_region_add(MemoryListener
> *listener,
>          giommu->n.notify = vfio_iommu_map_notify;
>          QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
>  
> +        vtd_register_giommu(giommu);
>          memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> +#if 0
>          memory_region_iommu_replay(giommu->iommu, &giommu->n,
>                                     vfio_container_granularity(container),
>                                     false);
> -
> +#endif
>          return;
>      }
>

Looks like there is still a single global domain on the host.
Works up to a point as long as we have a single VFIO device.

You are correct, I should call (from page_invalidation) only to the correct vfio notifier that belong to this domain. This way (as long as no conflicts mapping is present) several VFIO devices can works at the same time. I'll add this feature to the next version of the patch.
currently I am not sure how the code should handle conflicting mapping between two different domains. 
 
 
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index b024ffa..22f3f83 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -23,6 +23,7 @@
>  #define INTEL_IOMMU_H
>  #include "hw/qdev.h"
>  #include "sysemu/dma.h"
> +#include "hw/vfio/vfio-common.h"
>  
>  #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
>  #define INTEL_IOMMU_DEVICE(obj) \
> @@ -123,6 +124,8 @@ struct IntelIOMMUState {
>      MemoryRegionIOMMUOps iommu_ops;
>      GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus*
> reference */
>      VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by
> bus number */
> +
> +    QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>  };
>  
>  /* Find the VTD Address space associated with the given bus pointer,
> @@ -130,4 +133,5 @@ struct IntelIOMMUState {
>   */
>  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn);
>  
> +void vtd_register_giommu(VFIOGuestIOMMU * giommu);
>  #endif
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index f037f3c..9225ba3 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -82,6 +82,7 @@ typedef struct VFIOGuestIOMMU {
>      MemoryRegion *iommu;
>      Notifier n;
>      QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
> +    QLIST_ENTRY(VFIOGuestIOMMU) iommu_next;
>  } VFIOGuestIOMMU;
>  
>  typedef struct VFIODeviceOps VFIODeviceOps;
> -- 
> 1.9.1
>  


reply via email to

[Prev in Thread] Current Thread [Next in Thread]