[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH RFC v2] IOMMU: Add Support to VFIO devices with vIOM
From: |
Aviv B.D. |
Subject: |
[Qemu-devel] [PATCH RFC v2] IOMMU: Add Support to VFIO devices with vIOMMU present |
Date: |
Sat, 9 Apr 2016 21:03:38 +0300 |
From: "Aviv Ben-David" <address@hidden>
Date: Tue, 23 Feb 2016 00:24:54 +0200
Subject: [PATCH] IOMMU: Add Support to VFIO devices with vIOMMU present
* Fix bug that prevent qemu from starting up with vIOMMU and VFIO
device are present.
* Advertize Cache Mode capability in iommu cap register.
* Register every VFIO device with IOMMU state.
* On page cache invalidation in vIOMMU, check if the domain belong to
VFIO device and mirror the guest requests to host.
Changes from previous versions:
* remove assumption that the cache do not clears
* fix lock up on high load.
* refactor vtd_get_did_dev to return success return code, and actual
domain_id via argument.
Tested only on network cards (also with multiple cards at once).
Signed-off-by: Aviv Ben-David <address@hidden>
---
hw/i386/intel_iommu.c | 113 +++++++++++++++++++++++++++++++++++------
hw/i386/intel_iommu_internal.h | 3 ++
hw/vfio/common.c | 12 +++--
include/exec/memory.h | 8 ++-
include/hw/i386/intel_iommu.h | 4 ++
include/hw/vfio/vfio-common.h | 1 +
6 files changed, 121 insertions(+), 20 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 347718f..a568181 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -43,6 +43,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) |
VTD_DBGBIT(CSR);
#define VTD_DPRINTF(what, fmt, ...) do {} while (0)
#endif
+static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
+ uint8_t devfn, VTDContextEntry *ce);
+
static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
uint64_t wmask, uint64_t w1cmask)
{
@@ -126,6 +129,22 @@ static uint32_t
vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr,
return new_val;
}
+static int vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num,
uint8_t devfn, uint16_t * domain_id)
+{
+ VTDContextEntry ce;
+ int ret_fr;
+
+ assert(domain_id);
+
+ ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
+ if (ret_fr){
+ return -1;
+ }
+
+ *domain_id = VTD_CONTEXT_ENTRY_DID(ce.hi);
+ return 0;
+}
+
static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
uint64_t clear, uint64_t mask)
{
@@ -621,7 +640,7 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte,
uint32_t level)
/* Given the @gpa, get relevant @slptep. @slpte_level will be the last level
* of the translation, can be used for deciding the size of large page.
*/
-static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
+static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa,
IOMMUAccessPermissions is_write,
uint64_t *slptep, uint32_t *slpte_level,
bool *reads, bool *writes)
{
@@ -641,7 +660,19 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce,
uint64_t gpa, bool is_write,
}
/* FIXME: what is the Atomics request here? */
- access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
+ switch(is_write){
+ case IOMMU_WRITE:
+ access_right_check = VTD_SL_W;
+ break;
+ case IOMMU_READ:
+ access_right_check = VTD_SL_R;
+ break;
+ case IOMMU_ANY:
+ access_right_check = VTD_SL_R | VTD_SL_W;
+ break;
+ default:
+ assert(0);
+ }
while (true) {
offset = vtd_gpa_level_offset(gpa, level);
@@ -711,9 +742,9 @@ static int
vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
}
if (!vtd_context_entry_present(ce)) {
- VTD_DPRINTF(GENERAL,
+ /*VTD_DPRINTF(GENERAL,
"error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
- "is not present", devfn, bus_num);
+ "is not present", devfn, bus_num);*/
return -VTD_FR_CONTEXT_ENTRY_P;
} else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
(ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
@@ -785,7 +816,7 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
* @entry: IOMMUTLBEntry that contain the addr to be translated and result
*/
static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
- uint8_t devfn, hwaddr addr, bool is_write,
+ uint8_t devfn, hwaddr addr,
IOMMUAccessPermissions is_write,
IOMMUTLBEntry *entry)
{
IntelIOMMUState *s = vtd_as->iommu_state;
@@ -848,12 +879,14 @@ static void
vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
if (ret_fr) {
ret_fr = -ret_fr;
- if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
- VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
+ if (is_write != IOMMU_ANY){
+ if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
+ VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
"requests through this context-entry "
"(with FPD Set)");
- } else {
- vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
+ } else {
+ vtd_report_dmar_fault(s, source_id, addr, ret_fr,
is_write);
+ }
}
return;
}
@@ -870,11 +903,13 @@ static void
vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
&reads, &writes);
if (ret_fr) {
ret_fr = -ret_fr;
- if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
- VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests "
+ if (is_write != IOMMU_ANY){
+ if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
+ VTD_DPRINTF(FLOG, "fault processing is disabled for
DMA requests "
"through this context-entry (with FPD Set)");
- } else {
- vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
+ } else {
+ vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
+ }
}
return;
}
@@ -1016,18 +1051,58 @@ static void
vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
&domain_id);
}
+static void vtd_iotlb_page_invalidate_vfio(IntelIOMMUState *s,
uint16_t domain_id,
+ hwaddr addr, uint8_t am)
+{
+ VFIOGuestIOMMU * giommu;
+
+ QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
+ VTDAddressSpace *vtd_as = container_of(giommu->iommu,
VTDAddressSpace, iommu);
+ uint16_t vfio_domain_id;
+ int ret = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &vfio_domain_id);
+ int i=0;
+ if (!ret && domain_id == vfio_domain_id){
+ IOMMUTLBEntry entry;
+
+ /* do vfio unmap */
+ VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d", addr, am);
+ entry.target_as = NULL;
+ entry.iova = addr & VTD_PAGE_MASK_4K;
+ entry.translated_addr = 0;
+ entry.addr_mask = ~VTD_PAGE_MASK(VTD_PAGE_SHIFT_4K + am);
+ entry.perm = IOMMU_NONE;
+ memory_region_notify_iommu(giommu->iommu, entry);
+
+ /* do vfio map */
+ VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr, am);
+ /* call to vtd_iommu_translate */
+ for (i = 0; i < (1 << am); i++, addr+=(1 << VTD_PAGE_SHIFT_4K)){
+ IOMMUTLBEntry entry =
s->iommu_ops.translate(giommu->iommu, addr, IOMMU_ANY);
+ if (entry.perm != IOMMU_NONE){
+ memory_region_notify_iommu(giommu->iommu, entry);
+ }
+ }
+ }
+ }
+}
+
static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
hwaddr addr, uint8_t am)
{
VTDIOTLBPageInvInfo info;
assert(am <= VTD_MAMV);
+
info.domain_id = domain_id;
info.addr = addr;
info.mask = ~((1 << am) - 1);
+
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
+
+ vtd_iotlb_page_invalidate_vfio(s, domain_id, addr, am);
}
+
/* Flush IOTLB
* Returns the IOTLB Actual Invalidation Granularity.
* @val: the content of the IOTLB_REG
@@ -1840,7 +1915,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
}
static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
- bool is_write)
+ IOMMUAccessPermissions is_write)
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
@@ -1895,6 +1970,13 @@ static Property vtd_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
+void vtd_register_giommu(VFIOGuestIOMMU * giommu)
+{
+ VTDAddressSpace *vtd_as = container_of(giommu->iommu,
VTDAddressSpace, iommu);
+ IntelIOMMUState *s = vtd_as->iommu_state;
+
+ QLIST_INSERT_HEAD(&s->giommu_list, giommu, iommu_next);
+}
VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
{
@@ -1949,7 +2031,8 @@ static void vtd_init(IntelIOMMUState *s)
s->iq_last_desc_type = VTD_INV_DESC_NONE;
s->next_frcd_reg = 0;
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
- VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
+ VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS|
+ VTD_CAP_CM;
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
vtd_reset_context_cache(s);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e5f514c..102e9a5 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -190,6 +190,7 @@
#define VTD_CAP_MAMV (VTD_MAMV << 48)
#define VTD_CAP_PSI (1ULL << 39)
#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
+#define VTD_CAP_CM (1ULL << 7)
/* Supported Adjusted Guest Address Widths */
#define VTD_CAP_SAGAW_SHIFT 8
@@ -338,6 +339,8 @@ typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
#define VTD_PAGE_SHIFT_1G 30
#define VTD_PAGE_MASK_1G (~((1ULL << VTD_PAGE_SHIFT_1G) - 1))
+#define VTD_PAGE_MASK(shift) (~((1ULL << (shift)) - 1))
+
struct VTDRootEntry {
uint64_t val;
uint64_t rsvd;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 607ec70..98c8d67 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -32,6 +32,9 @@
#include "sysemu/kvm.h"
#include "trace.h"
+#include "hw/sysbus.h"
+#include "hw/i386/intel_iommu.h"
+
struct vfio_group_head vfio_group_list =
QLIST_HEAD_INITIALIZER(vfio_group_list);
struct vfio_as_head vfio_address_spaces =
@@ -312,12 +315,12 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
out:
rcu_read_unlock();
}
-
+#if 0
static hwaddr vfio_container_granularity(VFIOContainer *container)
{
return (hwaddr)1 << ctz64(container->iova_pgsizes);
}
-
+#endif
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -344,6 +347,7 @@ static void
vfio_listener_region_add(MemoryListener *listener,
iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
llend = int128_make64(section->offset_within_address_space);
llend = int128_add(llend, section->size);
+ llend = int128_add(llend, int128_exts64(-1));
llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
if (int128_ge(int128_make64(iova), llend)) {
@@ -381,11 +385,13 @@ static void
vfio_listener_region_add(MemoryListener *listener,
giommu->n.notify = vfio_iommu_map_notify;
QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
+ vtd_register_giommu(giommu);
memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
+#if 0
memory_region_iommu_replay(giommu->iommu, &giommu->n,
vfio_container_granularity(container),
false);
-
+#endif
return;
}
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 2de7898..0e814ab 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -146,10 +146,14 @@ struct MemoryRegionOps {
};
typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
-
+typedef enum IOMMUAccessPermissions{
+ IOMMU_READ = 0,
+ IOMMU_WRITE = 1,
+ IOMMU_ANY = 2
+} IOMMUAccessPermissions;
struct MemoryRegionIOMMUOps {
/* Return a TLB entry that contains a given address. */
- IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool
is_write);
+ IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr,
IOMMUAccessPermissions is_write);
};
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index b024ffa..22f3f83 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -23,6 +23,7 @@
#define INTEL_IOMMU_H
#include "hw/qdev.h"
#include "sysemu/dma.h"
+#include "hw/vfio/vfio-common.h"
#define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
#define INTEL_IOMMU_DEVICE(obj) \
@@ -123,6 +124,8 @@ struct IntelIOMMUState {
MemoryRegionIOMMUOps iommu_ops;
GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by
PCIBus* reference */
VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects
indexed by bus number */
+
+ QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
};
/* Find the VTD Address space associated with the given bus pointer,
@@ -130,4 +133,5 @@ struct IntelIOMMUState {
*/
VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn);
+void vtd_register_giommu(VFIOGuestIOMMU * giommu);
#endif
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f037f3c..9225ba3 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -82,6 +82,7 @@ typedef struct VFIOGuestIOMMU {
MemoryRegion *iommu;
Notifier n;
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
+ QLIST_ENTRY(VFIOGuestIOMMU) iommu_next;
} VFIOGuestIOMMU;
typedef struct VFIODeviceOps VFIODeviceOps;
--
1.9.1
- [Qemu-devel] [PATCH RFC v2] IOMMU: Add Support to VFIO devices with vIOMMU present,
Aviv B.D. <=