[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH qemu v8 11/14] spapr_pci: Enable vfio-pci hotplug
From: |
Alexey Kardashevskiy |
Subject: |
[Qemu-devel] [PATCH qemu v8 11/14] spapr_pci: Enable vfio-pci hotplug |
Date: |
Thu, 18 Jun 2015 21:37:33 +1000 |
sPAPR IOMMU is managing two copies of an TCE table:
1) a guest view of the table - this is what emulated devices use and
this is where H_GET_TCE reads from;
2) a hardware TCE table - only present if there is at least one vfio-pci
device on a PHB; it is updated via a memory listener on a PHB address
space which forwards map/unmap requests to vfio-pci IOMMU host driver.
At the moment presence of vfio-pci devices on a bus affect the way
the guest view table is allocated. If there is no vfio-pci on a PHB
and the host kernel supports KVM acceleration of H_PUT_TCE, a table
is allocated in KVM. However, if there is vfio-pci and we do yet not
support KVM acceleration for these, the table has to be allocated
by the userspace.
When vfio-pci device is hotplugged and there were no vfio-pci devices
already, the guest view table could have been allocated by KVM which
means that H_PUT_TCE is handled by the host kernel and since we
do not support vfio-pci in KVM, the hardware table will not be updated.
This reallocates the guest view table in QEMU if the first vfio-pci
device has just been plugged. spapr_tce_realloc_userspace() handles this.
This replays all the mappings to make sure that the tables are in sync.
This will not have a visible effect though as for a new device
the guest kernel will allocate-and-map new addresses and therefore
existing mappings from emulated devices will not be used by vfio-pci
devices.
This adds calls to spapr_phb_dma_capabilities_update() in PCI hotplug
hooks .
Signed-off-by: Alexey Kardashevskiy <address@hidden>
---
hw/ppc/spapr_iommu.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
hw/ppc/spapr_pci.c | 43 +++++++++++++++++++++++++++++++++++++++++++
include/hw/ppc/spapr.h | 2 ++
trace-events | 2 ++
4 files changed, 94 insertions(+), 3 deletions(-)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 45c00d8..5e6bdb4 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -78,12 +78,13 @@ static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
uint32_t nb_table,
uint32_t page_shift,
int *fd,
- bool vfio_accel)
+ bool vfio_accel,
+ bool force_userspace)
{
uint64_t *table = NULL;
uint64_t window_size = (uint64_t)nb_table << page_shift;
- if (kvm_enabled() && !(window_size >> 32)) {
+ if (kvm_enabled() && !force_userspace && !(window_size >> 32)) {
table = kvmppc_create_spapr_tce(liobn, window_size, fd, vfio_accel);
}
@@ -222,7 +223,8 @@ static void spapr_tce_table_do_enable(sPAPRTCETable *tcet,
bool vfio_accel)
tcet->nb_table,
tcet->page_shift,
&tcet->fd,
- vfio_accel);
+ vfio_accel,
+ false);
memory_region_set_size(&tcet->iommu,
(uint64_t)tcet->nb_table << tcet->page_shift);
@@ -495,6 +497,48 @@ int spapr_dma_dt(void *fdt, int node_off, const char
*propname,
return 0;
}
+static int spapr_tce_do_replay(sPAPRTCETable *tcet, uint64_t *table)
+{
+ target_ulong ioba = tcet->bus_offset, pgsz = (1ULL << tcet->page_shift);
+ long i, ret = 0;
+
+ for (i = 0; i < tcet->nb_table; ++i, ioba += pgsz) {
+ ret = put_tce_emu(tcet, ioba, table[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int spapr_tce_replay(sPAPRTCETable *tcet)
+{
+ return spapr_tce_do_replay(tcet, tcet->table);
+}
+
+int spapr_tce_realloc_userspace(sPAPRTCETable *tcet, bool replay)
+{
+ int ret = 0, oldfd;
+ uint64_t *oldtable;
+
+ oldtable = tcet->table;
+ oldfd = tcet->fd;
+ tcet->table = spapr_tce_alloc_table(tcet->liobn,
+ tcet->nb_table,
+ tcet->page_shift,
+ &tcet->fd,
+ false,
+ true); /* force_userspace */
+
+ if (replay) {
+ ret = spapr_tce_do_replay(tcet, oldtable);
+ }
+
+ spapr_tce_free_table(oldtable, oldfd, tcet->nb_table);
+
+ return ret;
+}
+
int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
sPAPRTCETable *tcet)
{
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index ca3772e..1f980fa 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -716,6 +716,33 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void
*opaque, int devfn)
return &phb->iommu_as;
}
+static int spapr_phb_dma_update(Object *child, void *opaque)
+{
+ int ret = 0;
+ sPAPRTCETable *tcet = (sPAPRTCETable *)
+ object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+
+ if (!tcet) {
+ return 0;
+ }
+
+ if (tcet->fd >= 0) {
+ /*
+ * We got first vfio-pci device on accelerated table.
+ * VFIO acceleration is not possible.
+ * Reallocate table in userspace and replay mappings.
+ */
+ ret = spapr_tce_realloc_userspace(tcet, true);
+ trace_spapr_pci_dma_realloc_update(tcet->liobn, ret);
+ } else {
+ /* There was no acceleration, so just replay mappings. */
+ ret = spapr_tce_replay(tcet);
+ trace_spapr_pci_dma_update(tcet->liobn, ret);
+ }
+
+ return 0;
+}
+
static int spapr_phb_dma_capabilities_update(sPAPRPHBState *sphb)
{
int ret;
@@ -776,6 +803,20 @@ int spapr_phb_dma_reset(sPAPRPHBState *sphb)
return 0;
}
+static int spapr_phb_hotplug_dma_sync(sPAPRPHBState *sphb)
+{
+ int ret = 0;
+ bool had_vfio = sphb->has_vfio;
+
+ spapr_phb_dma_capabilities_update(sphb);
+
+ if (!had_vfio && sphb->has_vfio) {
+ object_child_foreach(OBJECT(sphb), spapr_phb_dma_update, NULL);
+ }
+
+ return ret;
+}
+
/* Macros to operate with address in OF binding to PCI */
#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p))
#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */
@@ -1042,6 +1083,7 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector
*drc,
if (dev->hotplugged) {
fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
&fdt_start_offset);
+ spapr_phb_hotplug_dma_sync(phb);
}
drck->attach(drc, DEVICE(pdev),
@@ -1065,6 +1107,7 @@ static void spapr_phb_remove_pci_device_cb(DeviceState
*dev, void *opaque)
*/
pci_device_reset(PCI_DEVICE(dev));
object_unparent(OBJECT(dev));
+ spapr_phb_hotplug_dma_sync((sPAPRPHBState *)opaque);
}
static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index e32e787..4645f16 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -588,6 +588,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char
*propname,
uint32_t liobn, uint64_t window, uint32_t size);
int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
sPAPRTCETable *tcet);
+int spapr_tce_replay(sPAPRTCETable *tcet);
+int spapr_tce_realloc_userspace(sPAPRTCETable *tcet, bool replay);
void spapr_pci_switch_vga(bool big_endian);
void spapr_hotplug_req_add_event(sPAPRDRConnector *drc);
void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc);
diff --git a/trace-events b/trace-events
index a93af9a..3cd8bf7 100644
--- a/trace-events
+++ b/trace-events
@@ -1300,6 +1300,8 @@ spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned
ioa, unsigned intr) "q
spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq)
"@%"PRIx64"<=%"PRIx64" IRQ %u"
spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs)
"Guest device at %x asked %u, have only %u"
+spapr_pci_dma_update(uint64_t liobn, long ret) "liobn=%"PRIx64" tcet=%ld"
+spapr_pci_dma_realloc_update(uint64_t liobn, long ret) "liobn=%"PRIx64"
tcet=%ld"
# hw/pci/pci.c
pci_update_mappings_del(void *d, uint32_t bus, uint32_t func, uint32_t slot,
int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x
%d,%#"PRIx64"+%#"PRIx64
--
2.4.0.rc3.8.gfb3e7d5
- [Qemu-devel] [PATCH qemu v8 00/14] spapr: vfio: Enable Dynamic DMA windows (DDW), Alexey Kardashevskiy, 2015/06/18
- [Qemu-devel] [PATCH qemu v8 04/14] spapr_pci: Convert finish_realize() to dma_capabilities_update()+dma_init_window(), Alexey Kardashevskiy, 2015/06/18
- [Qemu-devel] [PATCH qemu v8 03/14] spapr_pci_vfio: Enable multiple groups per container, Alexey Kardashevskiy, 2015/06/18
- [Qemu-devel] [PATCH qemu v8 05/14] spapr_iommu: Move table allocation to helpers, Alexey Kardashevskiy, 2015/06/18
- [Qemu-devel] [PATCH qemu v8 11/14] spapr_pci: Enable vfio-pci hotplug,
Alexey Kardashevskiy <=
- [Qemu-devel] [PATCH qemu v8 13/14] vfio: spapr: Add SPAPR IOMMU v2 support (DMA memory preregistering), Alexey Kardashevskiy, 2015/06/18
- [Qemu-devel] [PATCH qemu v8 10/14] spapr_vfio_pci: Remove redundant spapr-pci-vfio-host-bridge, Alexey Kardashevskiy, 2015/06/18
- [Qemu-devel] [PATCH qemu v8 14/14] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW), Alexey Kardashevskiy, 2015/06/18
- [Qemu-devel] [PATCH qemu v8 01/14] vmstate: Define VARRAY with VMS_ALLOC, Alexey Kardashevskiy, 2015/06/18
- [Qemu-devel] [PATCH qemu v8 02/14] vfio: spapr: Move SPAPR-related code to a separate file, Alexey Kardashevskiy, 2015/06/18