[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v2)
From: |
Alex Williamson |
Subject: |
Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v2) |
Date: |
Thu, 12 Jul 2012 21:47:11 -0600 |
On Thu, 2012-07-12 at 18:52 +1000, Alexey Kardashevskiy wrote:
> It literally does the following:
>
> 1. POWERPC IOMMU support (the kernel counterpart is required)
>
> 2. The patch assumes that IOAPIC calls are going to be replaced
> with something generic. I have something in my local git but it's
> too early, we need to extend PCIINTxRoute first.
>
> 3. vfio_get_group() made public. I want to open IOMMU group from
> the sPAPR code to have everything I need for VFIO on sPAPR and
> avoid ugly workarounds with finilizing PHB setup on sPAPR.
>
> 4. Change sPAPR PHB to scan the PCI bus which is used for
> the IOMMU-VFIO group. Now it is enough to add the following to
> the QEMU command line to get VFIO up with all the devices from
> IOMMU group with id=3:
> -device spapr-pci-host-bridge,busname=E1000E,buid=0x3,iommu=3,\
> mem_win_addr=0x230000000000,io_win_addr=0x240000000000,msi_win_addr=0x250000000000
>
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> hw/ppc/Makefile.objs | 3 ++
> hw/spapr.h | 4 ++
> hw/spapr_iommu.c | 87 ++++++++++++++++++++++++++++++++++++++
> hw/spapr_pci.c | 115
> +++++++++++++++++++++++++++++++++++++++++++++++---
> hw/spapr_pci.h | 5 +++
> hw/vfio_pci.c | 28 +++++++++++-
> hw/vfio_pci.h | 2 +
> 7 files changed, 237 insertions(+), 7 deletions(-)
>
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index f573a95..c46a049 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
> # Xilinx PPC peripherals
> obj-y += xilinx_ethlite.o
>
> +# VFIO PCI device assignment
> +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
> +
> obj-y := $(addprefix ../,$(obj-y))
> diff --git a/hw/spapr.h b/hw/spapr.h
> index b37f337..9dca704 100644
> --- a/hw/spapr.h
> +++ b/hw/spapr.h
> @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char
> *propname,
> int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
> DMAContext *dma);
>
> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
> + uint64_t *dma32_window_start,
> + uint64_t *dma32_window_size);
> +
> #endif /* !defined (__HW_SPAPR_H__) */
> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
> index 50c288d..0a194e8 100644
> --- a/hw/spapr_iommu.c
> +++ b/hw/spapr_iommu.c
> @@ -16,6 +16,8 @@
> * You should have received a copy of the GNU Lesser General Public
> * License along with this library; if not, see
> <http://www.gnu.org/licenses/>.
> */
> +#include <sys/ioctl.h>
> +
> #include "hw.h"
> #include "kvm.h"
> #include "qdev.h"
> @@ -23,6 +25,7 @@
> #include "dma.h"
>
> #include "hw/spapr.h"
> +#include "hw/linux-vfio.h"
>
> #include <libfdt.h>
>
> @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, target_ulong
> ioba, target_ulong tce)
> return 0;
> }
>
> +/* -------- API for POWERPC IOMMU -------- */
> +
> +#define POWERPC_IOMMU 2
> +
> +struct tce_iommu_info {
> + __u32 argsz;
> + __u32 dma32_window_start;
> + __u32 dma32_window_size;
> +};
> +
> +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> +
> +struct tce_iommu_dma_map {
> + __u32 argsz;
> + __u64 va;
> + __u64 dmaaddr;
> +};
> +
> +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
> +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
> +
> +typedef struct sPAPRVFIOTable {
> + int fd;
> + uint32_t liobn;
> + QLIST_ENTRY(sPAPRVFIOTable) list;
> +} sPAPRVFIOTable;
> +
> +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
> +
> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
> + uint64_t *dma32_window_start,
> + uint64_t *dma32_window_size)
> +{
> + sPAPRVFIOTable *t;
> + struct tce_iommu_info info = { .argsz = sizeof(info) };
> +
> + if (ioctl(fd, POWERPC_IOMMU_GET_INFO, &info)) {
> + fprintf(stderr, "POWERPC_IOMMU_GET_INFO failed %d\n", errno);
> + return;
> + }
> + *dma32_window_start = info.dma32_window_start;
> + *dma32_window_size = info.dma32_window_size;
> +
> + t = g_malloc0(sizeof(*t));
> + t->fd = fd;
> + t->liobn = liobn;
> +
> + QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
> +}
> +
> +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
> +{
> + sPAPRVFIOTable *t;
> + struct tce_iommu_dma_map map = {
> + .argsz = sizeof(map),
> + .va = 0,
> + .dmaaddr = ioba,
> + };
> +
> + QLIST_FOREACH(t, &vfio_tce_tables, list) {
> + if (t->liobn != liobn) {
> + continue;
> + }
> + if (tce) {
> + map.va = (uintptr_t)qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK);
> + if (ioctl(t->fd, POWERPC_IOMMU_MAP_DMA, &map)) {
> + fprintf(stderr, "TCE_MAP_DMA: %d\n", errno);
> + return H_PARAMETER;
> + }
> + } else {
> + if (ioctl(t->fd, POWERPC_IOMMU_UNMAP_DMA, &map)) {
> + fprintf(stderr, "TCE_UNMAP_DMA: %d\n", errno);
> + return H_PARAMETER;
> + }
> + }
> + return H_SUCCESS;
> + }
> + return H_CONTINUE; /* positive non-zero value */
> +}
> +
> static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
> target_ulong opcode, target_ulong *args)
> {
> @@ -203,6 +286,10 @@ static target_ulong h_put_tce(CPUPPCState *env,
> sPAPREnvironment *spapr,
> if (0 >= ret) {
> return ret ? H_PARAMETER : H_SUCCESS;
> }
> + ret = put_tce_vfio(liobn, ioba, tce);
> + if (0 >= ret) {
> + return ret ? H_PARAMETER : H_SUCCESS;
> + }
> #ifdef DEBUG_TCE
> fprintf(stderr, "%s on liobn=" TARGET_FMT_lx
> " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx " ret=%d\n",
> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
> index 014297b..92c48b6 100644
> --- a/hw/spapr_pci.c
> +++ b/hw/spapr_pci.c
> @@ -22,6 +22,9 @@
> * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> * THE SOFTWARE.
> */
> +#include <sys/types.h>
> +#include <dirent.h>
> +
> #include "hw.h"
> #include "pci.h"
> #include "msi.h"
> @@ -29,10 +32,10 @@
> #include "pci_host.h"
> #include "hw/spapr.h"
> #include "hw/spapr_pci.h"
> +#include "hw/vfio_pci.h"
:-\
> #include "exec-memory.h"
> #include <libfdt.h>
> #include "trace.h"
> -
> #include "hw/pci_internals.h"
>
> /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
> @@ -440,6 +443,12 @@ static void pci_spapr_set_irq(void *opaque, int irq_num,
> int level)
> level);
> }
>
> +static int pci_spapr_get_irq(void *opaque, int irq_num)
> +{
> + sPAPRPHBState *phb = opaque;
> + return phb->lsi_table[irq_num].dt_irq;
> +}
> +
> static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
> unsigned size)
> {
> @@ -515,6 +524,82 @@ static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus,
> void *opaque,
> return phb->dma;
> }
>
> +static int spapr_pci_scan_vfio(sPAPRPHBState *phb)
> +{
> + char iommupath[256];
> + DIR *dirp;
> + struct dirent *entry;
> +
> + phb->iommugroup = vfio_get_group(phb->iommugroupid);
> + if (!phb->iommugroup) {
> + fprintf(stderr, "Cannot open IOMMU group %d\n", phb->iommugroupid);
> + return -1;
> + }
> +
> + if (!phb->scan) {
> + printf("Autoscan disabled\n");
> + return 0;
> + }
> +
> + sprintf(iommupath, "/sys/kernel/iommu_groups/%d/devices/",
> phb->iommugroupid);
> + dirp = opendir(iommupath);
> +
> + while ((entry = readdir(dirp)) != NULL) {
> + char *tmp = alloca(strlen(iommupath) + strlen(entry->d_name) + 32);
> + FILE *deviceclassfile;
> + unsigned deviceclass = 0, domainid, busid, devid, fnid;
> + char addr[32];
> + DeviceState *dev;
> +
> + if (4 != sscanf(entry->d_name, "%X:%X:%X.%x",
> + &domainid, &busid, &devid, &fnid)) {
> + continue;
> + }
> +
> + sprintf(tmp, "%s%s/class", iommupath, entry->d_name);
> + printf("Reading device class from %s\n", tmp);
> +
> + deviceclassfile = fopen(tmp, "r");
> + if (deviceclassfile) {
> + fscanf(deviceclassfile, "%x", &deviceclass);
> + fclose(deviceclassfile);
> + }
> + if (!deviceclass) {
> + continue;
> + }
> +#define PCI_BASE_CLASS_BRIDGE 0x06
> + if ((phb->scan < 2) && ((deviceclass >> 16) ==
> PCI_BASE_CLASS_BRIDGE)) {
> + continue;
> + }
> + if ((deviceclass == 0xc0310) || (deviceclass == 0xc0320)) {
> + /* Tweak USB */
> + phb->force_addr = 1;
> + phb->enable_multifunction = 1;
> + }
> +
> + printf("Creating device %X:%X:%X.%x class=0x%X\n",
> + domainid, busid, devid, fnid, deviceclass);
> +
> + dev = qdev_create(&phb->host_state.bus->qbus, "vfio-pci");
> + if (!dev) {
> + fprintf(stderr, "failed to create vfio-pci\n");
> + continue;
> + }
> + qdev_prop_parse(dev, "host", entry->d_name);
> + if (phb->force_addr) {
> + sprintf(addr, "%X.%X", devid, fnid);
> + qdev_prop_parse(dev, "addr", addr);
> + }
> + if (phb->enable_multifunction) {
> + qdev_prop_set_bit(dev, "multifunction", 1);
> + }
> + qdev_init_nofail(dev);
> + }
> + closedir(dirp);
> +
> + return 0;
> +}
> +
> static int spapr_phb_init(SysBusDevice *s)
> {
> sPAPRPHBState *phb = DO_UPCAST(sPAPRPHBState, host_state.busdev, s);
> @@ -567,15 +652,13 @@ static int spapr_phb_init(SysBusDevice *s)
>
> bus = pci_register_bus(&phb->host_state.busdev.qdev,
> phb->busname ? phb->busname : phb->dtbusname,
> - pci_spapr_set_irq, NULL, pci_spapr_map_irq, phb,
> + pci_spapr_set_irq, pci_spapr_get_irq,
> + pci_spapr_map_irq, phb,
> &phb->memspace, &phb->iospace,
> PCI_DEVFN(0, 0), PCI_NUM_PINS);
> phb->host_state.bus = bus;
>
> phb->dma_liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) << 16);
> - phb->dma_window_start = 0;
> - phb->dma_window_size = 0x40000000;
> - phb->dma = spapr_tce_new_dma_context(phb->dma_liobn,
> phb->dma_window_size);
> pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb);
>
> QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
> @@ -588,6 +671,24 @@ static int spapr_phb_init(SysBusDevice *s)
> }
> }
>
> + if (phb->iommugroupid >= 0) {
> + if (0 > spapr_pci_scan_vfio(phb)) {
> + return -1;
> + }
> + if (!phb->iommugroup || !phb->iommugroup->container) {
> + return -1;
> + }
> + spapr_vfio_init_dma(phb->iommugroup->container->fd, phb->dma_liobn,
I'm not really a fan of this approach, the vfio data structures are not
designed for external use. Perhaps they should be pulled into
vfio_pci.c.
Could we instead just make a group service function, something like:
int vfio_group_iommu_ioctl(int iommu_group, int request, ...)
Then it could just be a passthrough and you don't have to keep an fd or
de-reference vfio structures. There's a little overhead that we have to
lookup the group each time, but this is your slow method anyway and how
many groups are going to be attached to a single guest.
> + &phb->dma_window_start,
> + &phb->dma_window_size);
> + return 0;
> + }
> +
> + phb->dma_window_start = 0;
> + phb->dma_window_size = 0x40000000;
> + phb->dma = spapr_tce_new_dma_context(phb->dma_liobn,
> + phb->dma_window_size);
> +
> return 0;
> }
>
> @@ -599,6 +700,10 @@ static Property spapr_phb_properties[] = {
> DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
> DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
> DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
> + DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
> + DEFINE_PROP_UINT8("scan", sPAPRPHBState, scan, 1), /* 0 don't 1 +devices
> 2 +buses */
> + DEFINE_PROP_UINT8("mf", sPAPRPHBState, enable_multifunction, 0),
> + DEFINE_PROP_UINT8("forceaddr", sPAPRPHBState, force_addr, 0),
> DEFINE_PROP_END_OF_LIST(),
> };
>
> diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
> index 145071c..1953a74 100644
> --- a/hw/spapr_pci.h
> +++ b/hw/spapr_pci.h
> @@ -57,6 +57,11 @@ typedef struct sPAPRPHBState {
> int nvec;
> } msi_table[SPAPR_MSIX_MAX_DEVS];
>
> + int32_t iommugroupid;
> + struct VFIOGroup *iommugroup;
> + uint8_t scan; /* 0 don't scan 1 scan only devices 2 scan everything */
> + uint8_t enable_multifunction, force_addr;
> +
> QLIST_ENTRY(sPAPRPHBState) list;
> } sPAPRPHBState;
>
> diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
> index 1ac287f..73681fb 100644
> --- a/hw/vfio_pci.c
> +++ b/hw/vfio_pci.c
> @@ -21,7 +21,6 @@
> #include <dirent.h>
> #include <stdio.h>
> #include <unistd.h>
> -#include <sys/io.h>
> #include <sys/ioctl.h>
> #include <sys/mman.h>
> #include <sys/types.h>
> @@ -43,6 +42,12 @@
> #include "range.h"
> #include "vfio_pci.h"
> #include "linux-vfio.h"
> +#ifndef TARGET_PPC64
> +#include <sys/io.h>
> +#else
> +#include "hw/pci_internals.h"
> +#include "hw/spapr.h"
> +#endif
>
> //#define DEBUG_VFIO
> #ifdef DEBUG_VFIO
> @@ -1581,6 +1586,25 @@ static int vfio_connect_container(VFIOGroup *group)
>
> memory_listener_register(&container->listener, get_system_memory());
>
> +#define POWERPC_IOMMU 2
> + } else if (ioctl(fd, VFIO_CHECK_EXTENSION, POWERPC_IOMMU)) {
> + ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
> + if (ret) {
> + error_report("vfio: failed to set group container: %s\n",
> + strerror(errno));
> + g_free(container);
> + close(fd);
> + return -1;
> + }
> +
> + ret = ioctl(fd, VFIO_SET_IOMMU, POWERPC_IOMMU);
> + if (ret) {
> + error_report("vfio: failed to set iommu for container: %s\n",
> + strerror(errno));
> + g_free(container);
> + close(fd);
> + return -1;
> + }
> } else {
> error_report("vfio: No available IOMMU models\n");
> g_free(container);
> @@ -1620,7 +1644,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
> }
> }
>
> -static VFIOGroup *vfio_get_group(int groupid)
> +VFIOGroup *vfio_get_group(int groupid)
> {
> VFIOGroup *group;
> char path[32];
> diff --git a/hw/vfio_pci.h b/hw/vfio_pci.h
> index 226607c..d63dd63 100644
> --- a/hw/vfio_pci.h
> +++ b/hw/vfio_pci.h
> @@ -105,4 +105,6 @@ typedef struct VFIOGroup {
> #define VFIO_FLAG_IOMMU_SHARED_BIT 0
> #define VFIO_FLAG_IOMMU_SHARED (1U << VFIO_FLAG_UIOMMU_SHARED_BIT)
>
> +VFIOGroup *vfio_get_group(int groupid);
> +
> #endif /* __VFIO_H__ */
- Re: [Qemu-devel] [PATCH 0/2] RFC: powerpc-vfio: adding support, (continued)
[Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v2), Alexey Kardashevskiy, 2012/07/12
Re: [Qemu-devel] [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v2), Scott Wood, 2012/07/12
Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v2),
Alex Williamson <=
[Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alexey Kardashevskiy, 2012/07/13
- Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Blue Swirl, 2012/07/13
- Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alex Williamson, 2012/07/13
- Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alexey Kardashevskiy, 2012/07/13
- Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alex Williamson, 2012/07/16
- Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alex Williamson, 2012/07/16
- Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alexey Kardashevskiy, 2012/07/17
- Re: [Qemu-devel] [PATCH] RFC: vfio-powerpc: added VFIO support (v3), Alex Williamson, 2012/07/17
[Qemu-devel] [PATCH] vfio-powerpc: added VFIO support (v4), Alexey Kardashevskiy, 2012/07/18