[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-ppc] [PATCH] vfio-powerpc: added VFIO support (v4)
From: |
Alexey Kardashevskiy |
Subject: |
[Qemu-ppc] [PATCH] vfio-powerpc: added VFIO support (v4) |
Date: |
Wed, 18 Jul 2012 21:09:28 +1000 |
It literally does the following:
1. POWERPC IOMMU support (the kernel counterpart is required)
2. The patch assumes that IOAPIC calls are going to be replaced
with something generic.
3. Added sPAPRVFIOData (hw/spapr_iommu_vfio.h) which describes
the interface between VFIO and sPAPR IOMMU.
4. Change sPAPR PHB to scan the PCI bus which is used for
the IOMMU-VFIO group. Now it is enough to add the following to
the QEMU command line to get VFIO up with all the devices from
IOMMU group with id=3:
-device spapr-pci-host-bridge,busname=E1000E,buid=0x3,iommu=3,\
mem_win_addr=0x230000000000,io_win_addr=0x240000000000,msi_win_addr=0x250000000000
WIth the pathes posted today a bit earlier, this patch fully supports
VFIO what includes MSIX as well.
ps. yes, I know that linux_vfio.h has moved, will fix it later :)
Signed-off-by: Alexey Kardashevskiy <address@hidden>
---
hw/linux-vfio.h | 26 +++++++++++
hw/ppc/Makefile.objs | 3 ++
hw/spapr.h | 4 ++
hw/spapr_iommu.c | 62 ++++++++++++++++++++++++-
hw/spapr_iommu_vfio.h | 34 ++++++++++++++
hw/spapr_pci.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++--
hw/spapr_pci.h | 6 +++
hw/vfio_pci.c | 64 +++++++++++++++++++++++++
hw/vfio_pci.h | 2 +
trace-events | 1 +
10 files changed, 320 insertions(+), 6 deletions(-)
create mode 100644 hw/spapr_iommu_vfio.h
diff --git a/hw/linux-vfio.h b/hw/linux-vfio.h
index 300d49b..27a0501 100644
--- a/hw/linux-vfio.h
+++ b/hw/linux-vfio.h
@@ -442,4 +442,30 @@ struct vfio_iommu_type1_dma_unmap {
#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
+/*
+ * Interface to SPAPR TCE (POWERPC Book3S)
+ */
+#define SPAPR_TCE_IOMMU 2
+
+struct tce_iommu_info {
+ __u32 argsz;
+ __u32 flags;
+ __u32 dma32_window_start;
+ __u32 dma32_window_size;
+ __u64 dma64_window_start;
+ __u64 dma64_window_size;
+};
+
+#define SPAPR_TCE_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+struct tce_iommu_dma_map {
+ __u32 argsz;
+ __u32 flags;
+ __u64 va;
+ __u64 dmaaddr;
+};
+
+#define SPAPR_TCE_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
+#define SPAPR_TCE_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
+
#endif /* VFIO_H */
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index f573a95..c46a049 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
# Xilinx PPC peripherals
obj-y += xilinx_ethlite.o
+# VFIO PCI device assignment
+obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
+
obj-y := $(addprefix ../,$(obj-y))
diff --git a/hw/spapr.h b/hw/spapr.h
index b37f337..0c15c88 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char
*propname,
int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
DMAContext *dma);
+struct sPAPRVFIOData;
+void spapr_vfio_init_dma(int group_id, uint32_t liobn,
+ struct sPAPRVFIOData *data);
+
#endif /* !defined (__HW_SPAPR_H__) */
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 50c288d..0a82842 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -23,6 +23,8 @@
#include "dma.h"
#include "hw/spapr.h"
+#include "hw/spapr_iommu_vfio.h"
+#include "hw/vfio_pci.h"
#include <libfdt.h>
@@ -183,6 +185,60 @@ static int put_tce_emu(target_ulong liobn, target_ulong
ioba, target_ulong tce)
return 0;
}
+typedef struct sPAPRVFIOTable {
+ struct sPAPRVFIOData *data;
+ uint32_t liobn;
+ QLIST_ENTRY(sPAPRVFIOTable) list;
+} sPAPRVFIOTable;
+
+QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
+
+void spapr_vfio_init_dma(int group_id, uint32_t liobn,
+ struct sPAPRVFIOData *data)
+{
+ sPAPRVFIOTable *t;
+
+ t = g_malloc0(sizeof(*t));
+ t->data = data;
+ t->liobn = liobn;
+
+ QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
+}
+
+static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
+{
+ sPAPRVFIOTable *t;
+ struct tce_iommu_dma_map map = {
+ .argsz = sizeof(map),
+ .va = 0,
+ .dmaaddr = ioba,
+ };
+
+ QLIST_FOREACH(t, &vfio_tce_tables, list) {
+ if (t->liobn != liobn) {
+ continue;
+ }
+ if (!t->data) {
+ return H_NO_MEM;
+ }
+ if (tce) {
+ map.va = (uintptr_t)qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK);
+
+ if (t->data->map(t->data->groupid, &map)) {
+ perror("TCE_MAP_DMA");
+ return H_PARAMETER;
+ }
+ } else {
+ if (t->data->unmap(t->data->groupid, &map)) {
+ perror("TCE_UNMAP_DMA");
+ return H_PARAMETER;
+ }
+ }
+ return H_SUCCESS;
+ }
+ return H_CONTINUE; /* positive non-zero value */
+}
+
static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
target_ulong opcode, target_ulong *args)
{
@@ -200,7 +256,11 @@ static target_ulong h_put_tce(CPUPPCState *env,
sPAPREnvironment *spapr,
ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
ret = put_tce_emu(liobn, ioba, tce);
- if (0 >= ret) {
+ if (ret <= 0) {
+ return ret ? H_PARAMETER : H_SUCCESS;
+ }
+ ret = put_tce_vfio(liobn, ioba, tce);
+ if (ret <= 0) {
return ret ? H_PARAMETER : H_SUCCESS;
}
#ifdef DEBUG_TCE
diff --git a/hw/spapr_iommu_vfio.h b/hw/spapr_iommu_vfio.h
new file mode 100644
index 0000000..b3d6115
--- /dev/null
+++ b/hw/spapr_iommu_vfio.h
@@ -0,0 +1,34 @@
+/*
+ * Definitions for VFIO IOMMU implementation for SPAPR TCE.
+ *
+ * Copyright (c) 2012 Alexey Kardashevskiy <address@hidden>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if !defined(__HW_SPAPR_IOMMU_VFIO_H__)
+#define __HW_SPAPR_IOMMU_VFIO_H__
+
+#include "hw/linux-vfio.h"
+
+struct sPAPRVFIOData {
+ int groupid;
+ struct tce_iommu_info info;
+ int (*map)(int groupid, struct tce_iommu_dma_map *param);
+ int (*unmap)(int groupid, struct tce_iommu_dma_map *param);
+};
+
+void spapr_register_vfio_container(struct sPAPRVFIOData *data);
+
+#endif
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 88c8f2c..75743e7 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -22,6 +22,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include <sys/types.h>
+#include <dirent.h>
+
#include "hw.h"
#include "pci.h"
#include "msi.h"
@@ -32,7 +35,6 @@
#include "exec-memory.h"
#include <libfdt.h>
#include "trace.h"
-
#include "hw/pci_internals.h"
/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
@@ -440,6 +442,12 @@ static void pci_spapr_set_irq(void *opaque, int irq_num,
int level)
level);
}
+static int pci_spapr_get_irq(void *opaque, int irq_num)
+{
+ sPAPRPHBState *phb = opaque;
+ return phb->lsi_table[irq_num].dt_irq;
+}
+
static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
unsigned size)
{
@@ -515,6 +523,93 @@ static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus,
void *opaque,
return phb->dma;
}
+void spapr_register_vfio_container(struct sPAPRVFIOData *data)
+{
+ sPAPRPHBState *phb;
+
+ QLIST_FOREACH(phb, &spapr->phbs, list) {
+ if (phb->iommugroupid == data->groupid) {
+ phb->vfio_data = *data;
+ phb->dma_window_start = phb->vfio_data.info.dma32_window_start;
+ phb->dma_window_size = phb->vfio_data.info.dma32_window_size;
+ return;
+ }
+ }
+}
+
+static int spapr_pci_scan_vfio(sPAPRPHBState *phb)
+{
+ char iommupath[256];
+ DIR *dirp;
+ struct dirent *entry;
+
+ if (!phb->scan) {
+ trace_spapr_pci("Autoscan disabled for ", phb->dtbusname);
+ return 0;
+ }
+
+ snprintf(iommupath, sizeof(iommupath),
+ "/sys/kernel/iommu_groups/%d/devices/", phb->iommugroupid);
+ dirp = opendir(iommupath);
+
+ while ((entry = readdir(dirp)) != NULL) {
+ char *tmp;
+ FILE *deviceclassfile;
+ unsigned deviceclass = 0, domainid, busid, devid, fnid;
+ char addr[32];
+ DeviceState *dev;
+
+ if (sscanf(entry->d_name, "%X:%X:%X.%x",
+ &domainid, &busid, &devid, &fnid) != 4) {
+ continue;
+ }
+
+ tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name);
+ trace_spapr_pci("Reading device class from ", tmp);
+
+ deviceclassfile = fopen(tmp, "r");
+ if (deviceclassfile) {
+ fscanf(deviceclassfile, "%x", &deviceclass);
+ fclose(deviceclassfile);
+ }
+ g_free(tmp);
+
+ if (!deviceclass) {
+ continue;
+ }
+ if ((phb->scan < 2) &&
+ ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8))) {
+ /* Skip _any_ bridge */
+ continue;
+ }
+ if ((deviceclass == 0xc0310) || (deviceclass == 0xc0320)) {
+ /* Tweak USB */
+ phb->force_addr = 1;
+ phb->enable_multifunction = 1;
+ }
+
+ trace_spapr_pci("Creating devicei from ", entry->d_name);
+
+ dev = qdev_create(&phb->host_state.bus->qbus, "vfio-pci");
+ if (!dev) {
+ fprintf(stderr, "failed to create vfio-pci\n");
+ continue;
+ }
+ qdev_prop_parse(dev, "host", entry->d_name);
+ if (phb->force_addr) {
+ snprintf(addr, sizeof(addr), "%x.%x", devid, fnid);
+ qdev_prop_parse(dev, "addr", addr);
+ }
+ if (phb->enable_multifunction) {
+ qdev_prop_set_bit(dev, "multifunction", 1);
+ }
+ qdev_init_nofail(dev);
+ }
+ closedir(dirp);
+
+ return 0;
+}
+
static int spapr_phb_init(SysBusDevice *s)
{
sPAPRPHBState *phb = DO_UPCAST(sPAPRPHBState, host_state.busdev, s);
@@ -567,15 +662,13 @@ static int spapr_phb_init(SysBusDevice *s)
bus = pci_register_bus(&phb->host_state.busdev.qdev,
phb->busname ? phb->busname : phb->dtbusname,
- pci_spapr_set_irq, NULL, pci_spapr_map_irq, phb,
+ pci_spapr_set_irq, pci_spapr_get_irq,
+ pci_spapr_map_irq, phb,
&phb->memspace, &phb->iospace,
PCI_DEVFN(0, 0), PCI_NUM_PINS);
phb->host_state.bus = bus;
phb->dma_liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) << 16);
- phb->dma_window_start = 0;
- phb->dma_window_size = 0x40000000;
- phb->dma = spapr_tce_new_dma_context(phb->dma_liobn, phb->dma_window_size);
pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb);
QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
@@ -588,6 +681,19 @@ static int spapr_phb_init(SysBusDevice *s)
}
}
+ if (phb->iommugroupid >= 0) {
+ if (spapr_pci_scan_vfio(phb) < 0) {
+ return -1;
+ }
+ spapr_vfio_init_dma(phb->iommugroupid, phb->dma_liobn,
&phb->vfio_data);
+ return 0;
+ }
+
+ phb->dma_window_start = 0;
+ phb->dma_window_size = 0x40000000;
+ phb->dma = spapr_tce_new_dma_context(phb->dma_liobn,
+ phb->dma_window_size);
+
return 0;
}
@@ -599,6 +705,10 @@ static Property spapr_phb_properties[] = {
DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
+ DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
+ DEFINE_PROP_UINT8("scan", sPAPRPHBState, scan, 1),
+ DEFINE_PROP_UINT8("mf", sPAPRPHBState, enable_multifunction, 0),
+ DEFINE_PROP_UINT8("forceaddr", sPAPRPHBState, force_addr, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -729,6 +839,10 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
_FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
sizeof(interrupt_map)));
+ if (!phb->dma_window_size) {
+ fprintf(stderr, "Unexpected error: DMA window is zero, exiting\n");
+ exit(1);
+ }
spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
phb->dma_liobn, phb->dma_window_start,
phb->dma_window_size);
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index 145071c..ed17053 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -26,6 +26,7 @@
#include "hw/pci.h"
#include "hw/pci_host.h"
#include "hw/xics.h"
+#include "hw/spapr_iommu_vfio.h"
#define SPAPR_MSIX_MAX_DEVS 32
@@ -57,6 +58,11 @@ typedef struct sPAPRPHBState {
int nvec;
} msi_table[SPAPR_MSIX_MAX_DEVS];
+ struct sPAPRVFIOData vfio_data;
+ int32_t iommugroupid;
+ uint8_t scan; /* 0 don't scan 1 scan only devices 2 scan everything */
+ uint8_t enable_multifunction, force_addr;
+
QLIST_ENTRY(sPAPRPHBState) list;
} sPAPRPHBState;
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 0ad4761..0023d41 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -43,6 +43,7 @@
#include "range.h"
#include "vfio_pci.h"
#include "linux-vfio.h"
+#include "hw/spapr_iommu_vfio.h"
//#define DEBUG_VFIO
#ifdef DEBUG_VFIO
@@ -55,6 +56,8 @@
#define MSIX_CAP_LENGTH 12
+static VFIOGroup *vfio_get_group(int groupid);
+
static QLIST_HEAD(, VFIOContainer)
container_list = QLIST_HEAD_INITIALIZER(container_list);
@@ -1088,6 +1091,31 @@ static void vfio_listener_release(VFIOContainer
*container)
}
/*
+ * sPAPR TCE DMA interface
+ */
+static int spapr_tce_map(int groupid, struct tce_iommu_dma_map *param)
+{
+ VFIOGroup *group = vfio_get_group(groupid);
+
+ if (!group || !group->container) {
+ return -1;
+ }
+
+ return ioctl(group->container->fd, SPAPR_TCE_IOMMU_MAP_DMA, param);
+}
+
+static int spapr_tce_unmap(int groupid, struct tce_iommu_dma_map *param)
+{
+ VFIOGroup *group = vfio_get_group(groupid);
+
+ if (!group || !group->container) {
+ return -1;
+ }
+
+ return ioctl(group->container->fd, SPAPR_TCE_IOMMU_UNMAP_DMA, param);
+}
+
+/*
* Interrupt setup
*/
static void vfio_disable_interrupts(VFIODevice *vdev)
@@ -1590,6 +1618,42 @@ static int vfio_connect_container(VFIOGroup *group)
memory_listener_register(&container->iommu_data.listener,
get_system_memory());
+ } else if (ioctl(fd, VFIO_CHECK_EXTENSION, SPAPR_TCE_IOMMU)) {
+ ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+ if (ret) {
+ error_report("vfio: failed to set group container: %s\n",
+ strerror(errno));
+ g_free(container);
+ close(fd);
+ return -1;
+ }
+
+ ret = ioctl(fd, VFIO_SET_IOMMU, SPAPR_TCE_IOMMU);
+ if (ret) {
+ error_report("vfio: failed to set iommu for container: %s\n",
+ strerror(errno));
+ g_free(container);
+ close(fd);
+ return -1;
+ }
+
+ container->iommu_data.spapr.info.argsz =
+ sizeof(container->iommu_data.spapr.info);
+ ret = ioctl(fd, SPAPR_TCE_IOMMU_GET_INFO,
+ &container->iommu_data.spapr.info);
+ if (ret) {
+ error_report("vfio: failed to get iommu info for container: %s\n",
+ strerror(errno));
+ g_free(container);
+ close(fd);
+ return -1;
+ }
+
+ container->iommu_data.spapr.map = spapr_tce_map;
+ container->iommu_data.spapr.unmap = spapr_tce_unmap;
+ container->iommu_data.spapr.groupid = group->groupid;
+ spapr_register_vfio_container(&container->iommu_data.spapr);
+
} else {
error_report("vfio: No available IOMMU models\n");
g_free(container);
diff --git a/hw/vfio_pci.h b/hw/vfio_pci.h
index 00bb3dd..ddb5332 100644
--- a/hw/vfio_pci.h
+++ b/hw/vfio_pci.h
@@ -6,6 +6,7 @@
#include "pci.h"
#include "ioapic.h"
#include "event_notifier.h"
+#include "hw/spapr_iommu_vfio.h"
typedef struct VFIOPCIHostDevice {
uint16_t seg;
@@ -59,6 +60,7 @@ typedef struct VFIOContainer {
struct {
union {
MemoryListener listener;
+ struct sPAPRVFIOData spapr;
};
void (*release)(struct VFIOContainer *);
} iommu_data;
diff --git a/trace-events b/trace-events
index e548f86..9100591 100644
--- a/trace-events
+++ b/trace-events
@@ -848,6 +848,7 @@ qxl_render_guest_primary_resized(int32_t width, int32_t
height, int32_t stride,
qxl_render_update_area_done(void *cookie) "%p"
# hw/spapr_pci.c
+spapr_pci(const char *msg1, const char *msg2) "%s%s"
spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d,
cfg=%x)"
spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr)
"dev\"%s\" vector %u, addr=%"PRIx64
spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested
%u"
--
1.7.10.4
- Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v2), (continued)
- [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alexey Kardashevskiy, 2012/07/13
- Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Blue Swirl, 2012/07/13
- Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alex Williamson, 2012/07/13
- Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alexey Kardashevskiy, 2012/07/13
- Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alex Williamson, 2012/07/16
- Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alex Williamson, 2012/07/16
- Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alexey Kardashevskiy, 2012/07/17
- Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alex Williamson, 2012/07/17
[Qemu-ppc] [PATCH] vfio-powerpc: added VFIO support (v4),
Alexey Kardashevskiy <=
[Qemu-ppc] [PATCH] vfio-powerpc: added VFIO support (v5), Alexey Kardashevskiy, 2012/07/19