qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vI


From: Marcel Apfelbaum
Subject: Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present
Date: Mon, 14 Mar 2016 20:52:33 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.5.0

On 03/12/2016 06:13 PM, Aviv B.D. wrote:
From: "Aviv B.D." <address@hidden <mailto:address@hidden>>

  * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
     device are present.
  * Advertise Cache Mode capability in iommu cap register.
  * Register every VFIO device with IOMMU state.
  * On page cache invalidation in vIOMMU, check if the domain belong to
    VFIO device and mirror the guest requests to host.

Not working (Yet!):
  * Tested only with network interface card (ixgbevf) and
     intel_iommu=strict in guest's kernel command line.
  * Lock up under high load.
  * Errors on guest poweroff.
  * High relative latency compare to VFIO without IOMMU.

Adding (possibly) interested developers to the thread.

Thanks,
Marcel


Signed-off-by: Aviv B.D. <address@hidden <mailto:address@hidden>>
---
  hw/i386/intel_iommu.c          | 76 ++++++++++++++++++++++++++++++++++++++----
  hw/i386/intel_iommu_internal.h |  1 +
  hw/vfio/common.c               | 12 +++++--
  include/hw/i386/intel_iommu.h  |  4 +++
  include/hw/vfio/vfio-common.h  |  1 +
  5 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 347718f..046688f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | 
VTD_DBGBIT(CSR);
  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
  #endif
+static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
+                                    uint8_t devfn, VTDContextEntry *ce);
+
  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
                              uint64_t wmask, uint64_t w1cmask)
  {
@@ -126,6 +130,19 @@ static uint32_t vtd_set_clear_mask_long(IntelIOMMUState 
*s, hwaddr addr,
      return new_val;
  }
+static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num, uint8_t 
devfn)
+{
+    VTDContextEntry ce;
+    int ret_fr;
+
+    ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
+    if (ret_fr){
+        return -1;
+    }
+
+    return VTD_CONTEXT_ENTRY_DID(ce.hi);
+}
+
  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
                                          uint64_t clear, uint64_t mask)
  {
@@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, 
uint8_t bus_num,
      }
      if (!vtd_context_entry_present(ce)) {
-        VTD_DPRINTF(GENERAL,
+        /*VTD_DPRINTF(GENERAL,
                      "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
-                    "is not present", devfn, bus_num);
+                    "is not present", devfn, bus_num);*/
          return -VTD_FR_CONTEXT_ENTRY_P;
      } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
                 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
@@ -1020,14 +1037,53 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState 
*s, uint16_t domain_id,
                                        hwaddr addr, uint8_t am)
  {
      VTDIOTLBPageInvInfo info;
+    VFIOGuestIOMMU * giommu;
+    bool flag = false;
      assert(am <= VTD_MAMV);
      info.domain_id = domain_id;
      info.addr = addr;
      info.mask = ~((1 << am) - 1);
+
+    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
+        VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace, 
iommu);
+        uint16_t vfio_source_id = vtd_make_source_id(pci_bus_num(vtd_as->bus), 
vtd_as->devfn);
+        uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus), 
vtd_as->devfn);
+        if (vfio_domain_id != (uint16_t)-1 &&
+                domain_id == vfio_domain_id){
+            VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s, vfio_source_id, 
addr);
+            if (iotlb_entry != NULL){
+                IOMMUTLBEntry entry;
+                VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d", addr, 
am);
+                entry.iova = addr & VTD_PAGE_MASK_4K;
+                entry.translated_addr = vtd_get_slpte_addr(iotlb_entry->slpte) 
& VTD_PAGE_MASK_4K;
+                entry.addr_mask = ~VTD_PAGE_MASK_4K;
+                entry.perm = IOMMU_NONE;
+                memory_region_notify_iommu(giommu->iommu, entry);
+                flag = true;
+
+            }
+        }
+
+    }
+
      g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
-}
+    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
+        VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace, 
iommu);
+        uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus), 
vtd_as->devfn);
+        if (vfio_domain_id != (uint16_t)-1 &&
+                domain_id == vfio_domain_id && !flag){
+            /* do vfio map */
+            VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr, am);
+            /* call to vtd_iommu_translate */
+            IOMMUTLBEntry entry = s->iommu_ops.translate(giommu->iommu, addr, 
0);
+            entry.perm = IOMMU_RW;
+            memory_region_notify_iommu(giommu->iommu, entry);
+            //g_vfio_iommu->n.notify(&g_vfio_iommu->n, &entry);
+        }
+    }
+}
  /* Flush IOTLB
   * Returns the IOTLB Actual Invalidation Granularity.
   * @val: the content of the IOTLB_REG
@@ -1895,6 +1951,13 @@ static Property vtd_properties[] = {
      DEFINE_PROP_END_OF_LIST(),
  };
+void vtd_register_giommu(VFIOGuestIOMMU * giommu)
+{
+    VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace, 
iommu);
+    IntelIOMMUState *s = vtd_as->iommu_state;
+
+    QLIST_INSERT_HEAD(&s->giommu_list, giommu, iommu_next);
+}
  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
  {
@@ -1949,7 +2012,8 @@ static void vtd_init(IntelIOMMUState *s)
      s->iq_last_desc_type = VTD_INV_DESC_NONE;
      s->next_frcd_reg = 0;
      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
-             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
+             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
+             VTD_CAP_CM;
      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
      vtd_reset_context_cache(s);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e5f514c..ae40f73 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -190,6 +190,7 @@
  #define VTD_CAP_MAMV                (VTD_MAMV << 48)
  #define VTD_CAP_PSI                 (1ULL << 39)
  #define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
+#define VTD_CAP_CM                  (1ULL << 7)
  /* Supported Adjusted Guest Address Widths */
  #define VTD_CAP_SAGAW_SHIFT         8
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 607ec70..98c8d67 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -32,6 +32,9 @@
  #include "sysemu/kvm.h"
  #include "trace.h"
+#include "hw/sysbus.h"
+#include "hw/i386/intel_iommu.h"
+
  struct vfio_group_head vfio_group_list =
      QLIST_HEAD_INITIALIZER(vfio_group_list);
  struct vfio_as_head vfio_address_spaces =
@@ -312,12 +315,12 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
  out:
      rcu_read_unlock();
  }
-
+#if 0
  static hwaddr vfio_container_granularity(VFIOContainer *container)
  {
      return (hwaddr)1 << ctz64(container->iova_pgsizes);
  }
-
+#endif
  static void vfio_listener_region_add(MemoryListener *listener,
                                       MemoryRegionSection *section)
  {
@@ -344,6 +347,7 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
      iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
      llend = int128_make64(section->offset_within_address_space);
      llend = int128_add(llend, section->size);
+    llend = int128_add(llend, int128_exts64(-1));
      llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
      if (int128_ge(int128_make64(iova), llend)) {
@@ -381,11 +385,13 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
          giommu->n.notify = vfio_iommu_map_notify;
          QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
+        vtd_register_giommu(giommu);
          memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
+#if 0
          memory_region_iommu_replay(giommu->iommu, &giommu->n,
                                     vfio_container_granularity(container),
                                     false);
-
+#endif
          return;
      }
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index b024ffa..22f3f83 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -23,6 +23,7 @@
  #define INTEL_IOMMU_H
  #include "hw/qdev.h"
  #include "sysemu/dma.h"
+#include "hw/vfio/vfio-common.h"
  #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
  #define INTEL_IOMMU_DEVICE(obj) \
@@ -123,6 +124,8 @@ struct IntelIOMMUState {
      MemoryRegionIOMMUOps iommu_ops;
      GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* 
reference */
      VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by 
bus number */
+
+    QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
  };
  /* Find the VTD Address space associated with the given bus pointer,
@@ -130,4 +133,5 @@ struct IntelIOMMUState {
   */
  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn);
+void vtd_register_giommu(VFIOGuestIOMMU * giommu);
  #endif
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f037f3c..9225ba3 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -82,6 +82,7 @@ typedef struct VFIOGuestIOMMU {
      MemoryRegion *iommu;
      Notifier n;
      QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
+    QLIST_ENTRY(VFIOGuestIOMMU) iommu_next;
  } VFIOGuestIOMMU;
  typedef struct VFIODeviceOps VFIODeviceOps;
--
1.9.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]