qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3)


From: Alexey Kardashevskiy
Subject: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3)
Date: Fri, 13 Jul 2012 17:26:17 +1000

It literally does the following:

1. POWERPC IOMMU support (the kernel counterpart is required)

2. The patch assumes that IOAPIC calls are going to be replaced
with something generic.

3. vfio_group_iommu_ioctl() has been added to let sPAPR IOMMU
handler to call VFIO IOMMU driver.

4. Change sPAPR PHB to scan the PCI bus which is used for
the IOMMU-VFIO group. Now it is enough to add the following to
the QEMU command line to get VFIO up with all the devices from
IOMMU group with id=3:
-device spapr-pci-host-bridge,busname=E1000E,buid=0x3,iommu=3,\
mem_win_addr=0x230000000000,io_win_addr=0x240000000000,msi_win_addr=0x250000000000

Signed-off-by: Alexey Kardashevskiy <address@hidden>
---
 hw/ppc/Makefile.objs  |    3 ++
 hw/spapr.h            |    4 ++
 hw/spapr_iommu.c      |   69 ++++++++++++++++++++++++++++++-
 hw/spapr_iommu_vfio.h |   49 ++++++++++++++++++++++
 hw/spapr_pci.c        |  108 ++++++++++++++++++++++++++++++++++++++++++++++---
 hw/spapr_pci.h        |    4 ++
 hw/vfio_pci.c         |   30 ++++++++++++++
 hw/vfio_pci.h         |    2 +
 trace-events          |    1 +
 9 files changed, 264 insertions(+), 6 deletions(-)
 create mode 100644 hw/spapr_iommu_vfio.h

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index f573a95..c46a049 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
 # Xilinx PPC peripherals
 obj-y += xilinx_ethlite.o
 
+# VFIO PCI device assignment
+obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
+
 obj-y := $(addprefix ../,$(obj-y))
diff --git a/hw/spapr.h b/hw/spapr.h
index b37f337..26e26f6 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
*propname,
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
                       DMAContext *dma);
 
+void spapr_vfio_init_dma(int group_id, uint32_t liobn,
+                         uint64_t *dma32_window_start,
+                         uint64_t *dma32_window_size);
+
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 50c288d..e48ced1 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -23,6 +23,8 @@
 #include "dma.h"
 
 #include "hw/spapr.h"
+#include "hw/spapr_iommu_vfio.h"
+#include "hw/vfio_pci.h"
 
 #include <libfdt.h>
 
@@ -183,6 +185,67 @@ static int put_tce_emu(target_ulong liobn, target_ulong 
ioba, target_ulong tce)
     return 0;
 }
 
+typedef struct sPAPRVFIOTable {
+    int group_id;
+    uint32_t liobn;
+    QLIST_ENTRY(sPAPRVFIOTable) list;
+} sPAPRVFIOTable;
+
+QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
+
+void spapr_vfio_init_dma(int group_id, uint32_t liobn,
+                         uint64_t *dma32_window_start,
+                         uint64_t *dma32_window_size)
+{
+    sPAPRVFIOTable *t;
+    struct tce_iommu_info info = { .argsz = sizeof(info) };
+
+    if (vfio_group_iommu_ioctl(group_id, SPAPR_TCE_IOMMU_GET_INFO, &info)) {
+        perror("SPAPR_TCE_IOMMU_GET_INFO failed");
+        return;
+    }
+    *dma32_window_start = info.dma32_window_start;
+    *dma32_window_size = info.dma32_window_size;
+
+    t = g_malloc0(sizeof(*t));
+    t->group_id = group_id;
+    t->liobn = liobn;
+
+    QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
+}
+
+static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
+{
+    sPAPRVFIOTable *t;
+    struct tce_iommu_dma_map map = {
+        .argsz = sizeof(map),
+        .va = 0,
+        .dmaaddr = ioba,
+    };
+
+    QLIST_FOREACH(t, &vfio_tce_tables, list) {
+        if (t->liobn != liobn) {
+            continue;
+        }
+        if (tce) {
+            map.va = (uintptr_t)qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK);
+            if (vfio_group_iommu_ioctl(t->group_id, SPAPR_TCE_IOMMU_MAP_DMA,
+                                       &map)) {
+                perror("TCE_MAP_DMA");
+                return H_PARAMETER;
+            }
+        } else {
+            if (vfio_group_iommu_ioctl(t->group_id, SPAPR_TCE_IOMMU_UNMAP_DMA,
+                                       &map)) {
+                perror("TCE_UNMAP_DMA");
+                return H_PARAMETER;
+            }
+        }
+        return H_SUCCESS;
+    }
+    return H_CONTINUE; /* positive non-zero value */
+}
+
 static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
                               target_ulong opcode, target_ulong *args)
 {
@@ -200,7 +263,11 @@ static target_ulong h_put_tce(CPUPPCState *env, 
sPAPREnvironment *spapr,
     ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
 
     ret = put_tce_emu(liobn, ioba, tce);
-    if (0 >= ret) {
+    if (ret <= 0) {
+        return ret ? H_PARAMETER : H_SUCCESS;
+    }
+    ret = put_tce_vfio(liobn, ioba, tce);
+    if (ret <= 0) {
         return ret ? H_PARAMETER : H_SUCCESS;
     }
 #ifdef DEBUG_TCE
diff --git a/hw/spapr_iommu_vfio.h b/hw/spapr_iommu_vfio.h
new file mode 100644
index 0000000..711e3e4
--- /dev/null
+++ b/hw/spapr_iommu_vfio.h
@@ -0,0 +1,49 @@
+/*
+ * Definitions for VFIO IOMMU driver implementing SPAPR TCE.
+ * This is the copy of the kernel header.
+ *
+ * Copyright (c) 2012 Alexey Kardashevskiy <address@hidden>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if !defined(__HW_SPAPR_IOMMU_VFIO_H__)
+#define __HW_SPAPR_IOMMU_VFIO_H__
+
+#include "hw/linux-vfio.h"
+
+#define SPAPR_TCE_IOMMU         2
+
+struct tce_iommu_info {
+    __u32 argsz;
+    __u32 flags;
+    __u32 dma32_window_start;
+    __u32 dma32_window_size;
+    __u64 dma64_window_start;
+    __u64 dma64_window_size;
+};
+
+#define SPAPR_TCE_IOMMU_GET_INFO        _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+struct tce_iommu_dma_map {
+    __u32 argsz;
+    __u32 flags;
+    __u64 va;
+    __u64 dmaaddr;
+};
+
+#define SPAPR_TCE_IOMMU_MAP_DMA         _IO(VFIO_TYPE, VFIO_BASE + 13)
+#define SPAPR_TCE_IOMMU_UNMAP_DMA       _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+#endif
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 014297b..836ec4f 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -22,6 +22,9 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+#include <sys/types.h>
+#include <dirent.h>
+
 #include "hw.h"
 #include "pci.h"
 #include "msi.h"
@@ -32,7 +35,6 @@
 #include "exec-memory.h"
 #include <libfdt.h>
 #include "trace.h"
-
 #include "hw/pci_internals.h"
 
 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
@@ -440,6 +442,12 @@ static void pci_spapr_set_irq(void *opaque, int irq_num, 
int level)
                  level);
 }
 
+static int pci_spapr_get_irq(void *opaque, int irq_num)
+{
+    sPAPRPHBState *phb = opaque;
+    return phb->lsi_table[irq_num].dt_irq;
+}
+
 static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
                               unsigned size)
 {
@@ -515,6 +523,79 @@ static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, 
void *opaque,
     return phb->dma;
 }
 
+static int spapr_pci_scan_vfio(sPAPRPHBState *phb)
+{
+    char iommupath[256];
+    DIR *dirp;
+    struct dirent *entry;
+
+    if (!phb->scan) {
+        trace_spapr_pci("Autoscan disabled for ", phb->dtbusname);
+        return 0;
+    }
+
+    snprintf(iommupath, sizeof(iommupath),
+             "/sys/kernel/iommu_groups/%d/devices/", phb->iommugroupid);
+    dirp = opendir(iommupath);
+
+    while ((entry = readdir(dirp)) != NULL) {
+        char *tmp;
+        FILE *deviceclassfile;
+        unsigned deviceclass = 0, domainid, busid, devid, fnid;
+        char addr[32];
+        DeviceState *dev;
+
+        if (sscanf(entry->d_name, "%X:%X:%X.%x",
+                   &domainid, &busid, &devid, &fnid) != 4) {
+            continue;
+        }
+
+        tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name);
+        trace_spapr_pci("Reading device class from ", tmp);
+
+        deviceclassfile = fopen(tmp, "r");
+        if (deviceclassfile) {
+            fscanf(deviceclassfile, "%x", &deviceclass);
+            fclose(deviceclassfile);
+        }
+        g_free(tmp);
+
+        if (!deviceclass) {
+            continue;
+        }
+        if ((phb->scan < 2) &&
+            ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8))) {
+            /* Skip _any_ bridge */
+            continue;
+        }
+        if ((deviceclass == 0xc0310) || (deviceclass == 0xc0320)) {
+            /* Tweak USB */
+            phb->force_addr = 1;
+            phb->enable_multifunction = 1;
+        }
+
+        trace_spapr_pci("Creating devicei from ", entry->d_name);
+
+        dev = qdev_create(&phb->host_state.bus->qbus, "vfio-pci");
+        if (!dev) {
+            fprintf(stderr, "failed to create vfio-pci\n");
+            continue;
+        }
+        qdev_prop_parse(dev, "host", entry->d_name);
+        if (phb->force_addr) {
+            snprintf(addr, sizeof(addr), "%x.%x", devid, fnid);
+            qdev_prop_parse(dev, "addr", addr);
+        }
+        if (phb->enable_multifunction) {
+            qdev_prop_set_bit(dev, "multifunction", 1);
+        }
+        qdev_init_nofail(dev);
+    }
+    closedir(dirp);
+
+    return 0;
+}
+
 static int spapr_phb_init(SysBusDevice *s)
 {
     sPAPRPHBState *phb = DO_UPCAST(sPAPRPHBState, host_state.busdev, s);
@@ -567,15 +648,13 @@ static int spapr_phb_init(SysBusDevice *s)
 
     bus = pci_register_bus(&phb->host_state.busdev.qdev,
                            phb->busname ? phb->busname : phb->dtbusname,
-                           pci_spapr_set_irq, NULL, pci_spapr_map_irq, phb,
+                           pci_spapr_set_irq, pci_spapr_get_irq,
+                           pci_spapr_map_irq, phb,
                            &phb->memspace, &phb->iospace,
                            PCI_DEVFN(0, 0), PCI_NUM_PINS);
     phb->host_state.bus = bus;
 
     phb->dma_liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) << 16);
-    phb->dma_window_start = 0;
-    phb->dma_window_size = 0x40000000;
-    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn, phb->dma_window_size);
     pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb);
 
     QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
@@ -588,6 +667,21 @@ static int spapr_phb_init(SysBusDevice *s)
         }
     }
 
+    if (phb->iommugroupid >= 0) {
+        if (spapr_pci_scan_vfio(phb) < 0) {
+            return -1;
+        }
+        spapr_vfio_init_dma(phb->iommugroupid, phb->dma_liobn,
+                            &phb->dma_window_start,
+                            &phb->dma_window_size);
+        return 0;
+    }
+
+    phb->dma_window_start = 0;
+    phb->dma_window_size = 0x40000000;
+    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn,
+                                         phb->dma_window_size);
+
     return 0;
 }
 
@@ -599,6 +693,10 @@ static Property spapr_phb_properties[] = {
     DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
     DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
     DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
+    DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
+    DEFINE_PROP_UINT8("scan", sPAPRPHBState, scan, 1),
+    DEFINE_PROP_UINT8("mf", sPAPRPHBState, enable_multifunction, 0),
+    DEFINE_PROP_UINT8("forceaddr", sPAPRPHBState, force_addr, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index 145071c..f514823 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -57,6 +57,10 @@ typedef struct sPAPRPHBState {
         int nvec;
     } msi_table[SPAPR_MSIX_MAX_DEVS];
 
+    int32_t iommugroupid;
+    uint8_t scan; /* 0 don't scan 1 scan only devices 2 scan everything */
+    uint8_t enable_multifunction, force_addr;
+
     QLIST_ENTRY(sPAPRPHBState) list;
 } sPAPRPHBState;
 
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 1ac287f..fc84fb4 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -1581,6 +1581,24 @@ static int vfio_connect_container(VFIOGroup *group)
 
         memory_listener_register(&container->listener, get_system_memory());
 
+    } else if (ioctl(fd, VFIO_CHECK_EXTENSION, SPAPR_TCE_IOMMU)) {
+        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+        if (ret) {
+            error_report("vfio: failed to set group container: %s\n",
+                         strerror(errno));
+            g_free(container);
+            close(fd);
+            return -1;
+        }
+
+        ret = ioctl(fd, VFIO_SET_IOMMU, SPAPR_TCE_IOMMU);
+        if (ret) {
+            error_report("vfio: failed to set iommu for container: %s\n",
+                         strerror(errno));
+            g_free(container);
+            close(fd);
+            return -1;
+        }
     } else {
         error_report("vfio: No available IOMMU models\n");
         g_free(container);
@@ -2005,3 +2023,15 @@ static void register_vfio_pci_dev_type(void)
 }
 
 type_init(register_vfio_pci_dev_type)
+
+int vfio_group_iommu_ioctl(int iommu_group, int request, void *data)
+{
+    VFIOGroup *group;
+
+    group = vfio_get_group(iommu_group);
+    if (!group->container) {
+        return -EINVAL;
+    }
+
+    return ioctl(group->container->fd, request, data);
+}
diff --git a/hw/vfio_pci.h b/hw/vfio_pci.h
index 226607c..f44ff07 100644
--- a/hw/vfio_pci.h
+++ b/hw/vfio_pci.h
@@ -105,4 +105,6 @@ typedef struct VFIOGroup {
 #define VFIO_FLAG_IOMMU_SHARED_BIT 0
 #define VFIO_FLAG_IOMMU_SHARED (1U << VFIO_FLAG_UIOMMU_SHARED_BIT)
 
+int vfio_group_iommu_ioctl(int iommu_group, int request, void *data);
+
 #endif /* __VFIO_H__ */
diff --git a/trace-events b/trace-events
index e548f86..9100591 100644
--- a/trace-events
+++ b/trace-events
@@ -848,6 +848,7 @@ qxl_render_guest_primary_resized(int32_t width, int32_t 
height, int32_t stride,
 qxl_render_update_area_done(void *cookie) "%p"
 
 # hw/spapr_pci.c
+spapr_pci(const char *msg1, const char *msg2) "%s%s"
 spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, 
cfg=%x)"
 spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) 
"dev\"%s\" vector %u, addr=%"PRIx64
 spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested 
%u"
-- 
1.7.10




reply via email to

[Prev in Thread] Current Thread [Next in Thread]