qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH qemu v8 11/14] spapr_pci: Enable vfio-pci hotplug


From: Alexey Kardashevskiy
Subject: [Qemu-devel] [PATCH qemu v8 11/14] spapr_pci: Enable vfio-pci hotplug
Date: Thu, 18 Jun 2015 21:37:33 +1000

sPAPR IOMMU is managing two copies of an TCE table:
1) a guest view of the table - this is what emulated devices use and
this is where H_GET_TCE reads from;
2) a hardware TCE table - only present if there is at least one vfio-pci
device on a PHB; it is updated via a memory listener on a PHB address
space which forwards map/unmap requests to vfio-pci IOMMU host driver.

At the moment presence of vfio-pci devices on a bus affect the way
the guest view table is allocated. If there is no vfio-pci on a PHB
and the host kernel supports KVM acceleration of H_PUT_TCE, a table
is allocated in KVM. However, if there is vfio-pci and we do yet not
support KVM acceleration for these, the table has to be allocated
by the userspace.

When vfio-pci device is hotplugged and there were no vfio-pci devices
already, the guest view table could have been allocated by KVM which
means that H_PUT_TCE is handled by the host kernel and since we
do not support vfio-pci in KVM, the hardware table will not be updated.

This reallocates the guest view table in QEMU if the first vfio-pci
device has just been plugged. spapr_tce_realloc_userspace() handles this.

This replays all the mappings to make sure that the tables are in sync.
This will not have a visible effect though as for a new device
the guest kernel will allocate-and-map new addresses and therefore
existing mappings from emulated devices will not be used by vfio-pci
devices.

This adds calls to spapr_phb_dma_capabilities_update() in PCI hotplug
hooks .

Signed-off-by: Alexey Kardashevskiy <address@hidden>
---
 hw/ppc/spapr_iommu.c   | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
 hw/ppc/spapr_pci.c     | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |  2 ++
 trace-events           |  2 ++
 4 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 45c00d8..5e6bdb4 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -78,12 +78,13 @@ static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
                                        uint32_t nb_table,
                                        uint32_t page_shift,
                                        int *fd,
-                                       bool vfio_accel)
+                                       bool vfio_accel,
+                                       bool force_userspace)
 {
     uint64_t *table = NULL;
     uint64_t window_size = (uint64_t)nb_table << page_shift;
 
-    if (kvm_enabled() && !(window_size >> 32)) {
+    if (kvm_enabled() && !force_userspace && !(window_size >> 32)) {
         table = kvmppc_create_spapr_tce(liobn, window_size, fd, vfio_accel);
     }
 
@@ -222,7 +223,8 @@ static void spapr_tce_table_do_enable(sPAPRTCETable *tcet, 
bool vfio_accel)
                                         tcet->nb_table,
                                         tcet->page_shift,
                                         &tcet->fd,
-                                        vfio_accel);
+                                        vfio_accel,
+                                        false);
 
     memory_region_set_size(&tcet->iommu,
                            (uint64_t)tcet->nb_table << tcet->page_shift);
@@ -495,6 +497,48 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
*propname,
     return 0;
 }
 
+static int spapr_tce_do_replay(sPAPRTCETable *tcet, uint64_t *table)
+{
+    target_ulong ioba = tcet->bus_offset, pgsz = (1ULL << tcet->page_shift);
+    long i, ret = 0;
+
+    for (i = 0; i < tcet->nb_table; ++i, ioba += pgsz) {
+        ret = put_tce_emu(tcet, ioba, table[i]);
+        if (ret)
+            break;
+    }
+
+    return ret;
+}
+
+int spapr_tce_replay(sPAPRTCETable *tcet)
+{
+    return spapr_tce_do_replay(tcet, tcet->table);
+}
+
+int spapr_tce_realloc_userspace(sPAPRTCETable *tcet, bool replay)
+{
+    int ret = 0, oldfd;
+    uint64_t *oldtable;
+
+    oldtable = tcet->table;
+    oldfd = tcet->fd;
+    tcet->table = spapr_tce_alloc_table(tcet->liobn,
+                                        tcet->nb_table,
+                                        tcet->page_shift,
+                                        &tcet->fd,
+                                        false,
+                                        true); /* force_userspace */
+
+    if (replay) {
+        ret = spapr_tce_do_replay(tcet, oldtable);
+    }
+
+    spapr_tce_free_table(oldtable, oldfd, tcet->nb_table);
+
+    return ret;
+}
+
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
                       sPAPRTCETable *tcet)
 {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index ca3772e..1f980fa 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -716,6 +716,33 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void 
*opaque, int devfn)
     return &phb->iommu_as;
 }
 
+static int spapr_phb_dma_update(Object *child, void *opaque)
+{
+    int ret = 0;
+    sPAPRTCETable *tcet = (sPAPRTCETable *)
+        object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+
+    if (!tcet) {
+        return 0;
+    }
+
+    if (tcet->fd >= 0) {
+        /*
+         * We got first vfio-pci device on accelerated table.
+         * VFIO acceleration is not possible.
+         * Reallocate table in userspace and replay mappings.
+         */
+        ret = spapr_tce_realloc_userspace(tcet, true);
+        trace_spapr_pci_dma_realloc_update(tcet->liobn, ret);
+    } else {
+        /* There was no acceleration, so just replay mappings. */
+        ret = spapr_tce_replay(tcet);
+        trace_spapr_pci_dma_update(tcet->liobn, ret);
+    }
+
+    return 0;
+}
+
 static int spapr_phb_dma_capabilities_update(sPAPRPHBState *sphb)
 {
     int ret;
@@ -776,6 +803,20 @@ int spapr_phb_dma_reset(sPAPRPHBState *sphb)
     return 0;
 }
 
+static int spapr_phb_hotplug_dma_sync(sPAPRPHBState *sphb)
+{
+    int ret = 0;
+    bool had_vfio = sphb->has_vfio;
+
+    spapr_phb_dma_capabilities_update(sphb);
+
+    if (!had_vfio && sphb->has_vfio) {
+        object_child_foreach(OBJECT(sphb), spapr_phb_dma_update, NULL);
+    }
+
+    return ret;
+}
+
 /* Macros to operate with address in OF binding to PCI */
 #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
 #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
@@ -1042,6 +1083,7 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector 
*drc,
     if (dev->hotplugged) {
         fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
                                         &fdt_start_offset);
+        spapr_phb_hotplug_dma_sync(phb);
     }
 
     drck->attach(drc, DEVICE(pdev),
@@ -1065,6 +1107,7 @@ static void spapr_phb_remove_pci_device_cb(DeviceState 
*dev, void *opaque)
      */
     pci_device_reset(PCI_DEVICE(dev));
     object_unparent(OBJECT(dev));
+    spapr_phb_hotplug_dma_sync((sPAPRPHBState *)opaque);
 }
 
 static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index e32e787..4645f16 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -588,6 +588,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
*propname,
                  uint32_t liobn, uint64_t window, uint32_t size);
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
                       sPAPRTCETable *tcet);
+int spapr_tce_replay(sPAPRTCETable *tcet);
+int spapr_tce_realloc_userspace(sPAPRTCETable *tcet, bool replay);
 void spapr_pci_switch_vga(bool big_endian);
 void spapr_hotplug_req_add_event(sPAPRDRConnector *drc);
 void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc);
diff --git a/trace-events b/trace-events
index a93af9a..3cd8bf7 100644
--- a/trace-events
+++ b/trace-events
@@ -1300,6 +1300,8 @@ spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned 
ioa, unsigned intr) "q
 spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) 
"@%"PRIx64"<=%"PRIx64" IRQ %u"
 spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
 spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) 
"Guest device at %x asked %u, have only %u"
+spapr_pci_dma_update(uint64_t liobn, long ret) "liobn=%"PRIx64" tcet=%ld"
+spapr_pci_dma_realloc_update(uint64_t liobn, long ret) "liobn=%"PRIx64" 
tcet=%ld"
 
 # hw/pci/pci.c
 pci_update_mappings_del(void *d, uint32_t bus, uint32_t func, uint32_t slot, 
int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x 
%d,%#"PRIx64"+%#"PRIx64
-- 
2.4.0.rc3.8.gfb3e7d5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]