[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC v2 3/6] vfio: add vfio-platform support
From: |
Eric Auger |
Subject: |
[Qemu-devel] [RFC v2 3/6] vfio: add vfio-platform support |
Date: |
Wed, 9 Apr 2014 16:33:06 +0100 |
From: Kim Phillips <address@hidden>
Functions for which PCI and platform device support share are moved
into common.c. The common vfio_{get,put}_group() get an additional
argument, a pointer to a vfio_reset_handler(), for which to pass on to
qemu_register_reset, but only if it exists (the platform device code
currently passes a NULL as its reset_handler).
For the platform device code, we basically use SysBusDevice
instead of PCIDevice. Since realize() returns void, unlike
PCIDevice's initfn, error codes are moved into the
error message text with %m.
Currently only MMIO access is supported at this time.
The perceived path for future QEMU development is:
- add support for interrupts
- verify and test platform dev unmap path
- test existing PCI path for any regressions
- add support for creating platform devices on the qemu command line
- currently device address specification is hardcoded for test
development on Calxeda Midway's fff51000.ethernet device
- reset is not supported and registration of reset functions is
bypassed for platform devices.
- there is no standard means of resetting a platform device,
unsure if it suffices to be handled at device--VFIO binding time
Signed-off-by: Kim Phillips <address@hidden>
[1] http://www.spinics.net/lists/kvm-arm/msg08195.html
---
hw/vfio/Makefile.objs | 2 +
hw/vfio/common.c | 486 ++++++++++++++++++++++++++++++++++++++++++++++++++
hw/vfio/pci.c | 480 ++-----------------------------------------------
hw/vfio/platform.c | 381 +++++++++++++++++++++++++++++++++++++++
hw/vfio/vfio-common.h | 55 ++++++
5 files changed, 937 insertions(+), 467 deletions(-)
create mode 100644 hw/vfio/common.c
create mode 100644 hw/vfio/platform.c
create mode 100644 hw/vfio/vfio-common.h
diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
index 31c7dab..c5c76fe 100644
--- a/hw/vfio/Makefile.objs
+++ b/hw/vfio/Makefile.objs
@@ -1,3 +1,5 @@
ifeq ($(CONFIG_LINUX), y)
+obj-$(CONFIG_SOFTMMU) += common.o
obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_SOFTMMU) += platform.o
endif
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
new file mode 100644
index 0000000..9d1f723
--- /dev/null
+++ b/hw/vfio/common.c
@@ -0,0 +1,486 @@
+/*
+ * vfio based device assignment support
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Alex Williamson <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ * Adapted for KVM by Qumranet.
+ * Copyright (c) 2007, Neocleus, Alex Novik (address@hidden)
+ * Copyright (c) 2007, Neocleus, Guy Zana (address@hidden)
+ * Copyright (C) 2008, Qumranet, Amit Shah (address@hidden)
+ * Copyright (C) 2008, Red Hat, Amit Shah (address@hidden)
+ * Copyright (C) 2008, IBM, Muli Ben-Yehuda (address@hidden)
+ */
+
+#include <dirent.h>
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/pci.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "qemu/range.h"
+#include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
+
+#include "vfio-common.h"
+
+#define DEBUG_VFIO
+#ifdef DEBUG_VFIO
+#define DPRINTF(fmt, ...) \
+ do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+ do { } while (0)
+#endif
+
+static QLIST_HEAD(, VFIOContainer)
+ container_list = QLIST_HEAD_INITIALIZER(container_list);
+
+QLIST_HEAD(, VFIOGroup)
+ group_list = QLIST_HEAD_INITIALIZER(group_list);
+
+
+struct VFIODevice;
+
+#ifdef CONFIG_KVM
+/*
+ * We have a single VFIO pseudo device per KVM VM. Once created it lives
+ * for the life of the VM. Closing the file descriptor only drops our
+ * reference to it and the device's reference to kvm. Therefore once
+ * initialized, this file descriptor is only released on QEMU exit and
+ * we'll re-use it should another vfio device be attached before then.
+ */
+static int vfio_kvm_device_fd = -1;
+#endif
+
+/*
+ * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
+ */
+static int vfio_dma_unmap(VFIOContainer *container,
+ hwaddr iova, ram_addr_t size)
+{
+ struct vfio_iommu_type1_dma_unmap unmap = {
+ .argsz = sizeof(unmap),
+ .flags = 0,
+ .iova = iova,
+ .size = size,
+ };
+
+ if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+ DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly)
+{
+ struct vfio_iommu_type1_dma_map map = {
+ .argsz = sizeof(map),
+ .flags = VFIO_DMA_MAP_FLAG_READ,
+ .vaddr = (__u64)(uintptr_t)vaddr,
+ .iova = iova,
+ .size = size,
+ };
+
+ if (!readonly) {
+ map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
+ }
+
+ /*
+ * Try the mapping, if it fails with EBUSY, unmap the region and try
+ * again. This shouldn't be necessary, but we sometimes see it in
+ * the the VGA ROM space.
+ */
+ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
+ (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
+ ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
+ return 0;
+ }
+
+ DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
+ return -errno;
+}
+
+static bool vfio_listener_skipped_section(MemoryRegionSection *section)
+{
+ return !memory_region_is_ram(section->mr) ||
+ /*
+ * Sizing an enabled 64-bit BAR can cause spurious mappings to
+ * addresses in the upper part of the 64-bit address space. These
+ * are never accessed by the CPU and beyond the address width of
+ * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width.
+ */
+ section->offset_within_address_space & (1ULL << 63);
+}
+
+static void vfio_listener_region_add(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer,
+ iommu_data.type1.listener);
+ hwaddr iova, end;
+ void *vaddr;
+ int ret;
+
+ assert(!memory_region_is_iommu(section->mr));
+
+ if (vfio_listener_skipped_section(section)) {
+ DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n",
+ section->offset_within_address_space,
+ section->offset_within_address_space +
+ int128_get64(int128_sub(section->size, int128_one())));
+ return;
+ }
+
+ if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+ (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+ error_report("%s received unaligned region", __func__);
+ return;
+ }
+
+ iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+ end = (section->offset_within_address_space + int128_get64(section->size))
&
+ TARGET_PAGE_MASK;
+
+ if (iova >= end) {
+ return;
+ }
+
+ vaddr = memory_region_get_ram_ptr(section->mr) +
+ section->offset_within_region +
+ (iova - section->offset_within_address_space);
+
+ DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
+ iova, end - 1, vaddr);
+
+ memory_region_ref(section->mr);
+ ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
+ if (ret) {
+ error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx", %p) = %d (%m)",
+ container, iova, end - iova, vaddr, ret);
+
+ /*
+ * On the initfn path, store the first error in the container so we
+ * can gracefully fail. Runtime, there's not much we can do other
+ * than throw a hardware error.
+ */
+ if (!container->iommu_data.type1.initialized) {
+ if (!container->iommu_data.type1.error) {
+ container->iommu_data.type1.error = ret;
+ }
+ } else {
+ hw_error("vfio: DMA mapping failed, unable to continue");
+ }
+ }
+}
+
+static void vfio_listener_region_del(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer,
+ iommu_data.type1.listener);
+ hwaddr iova, end;
+ int ret;
+
+ if (vfio_listener_skipped_section(section)) {
+ DPRINTF("SKIPPING region_del %"HWADDR_PRIx" - %"PRIx64"\n",
+ section->offset_within_address_space,
+ section->offset_within_address_space +
+ int128_get64(int128_sub(section->size, int128_one())));
+ return;
+ }
+
+ if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+ (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+ error_report("%s received unaligned region", __func__);
+ return;
+ }
+
+ iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+ end = (section->offset_within_address_space + int128_get64(section->size))
&
+ TARGET_PAGE_MASK;
+
+ if (iova >= end) {
+ return;
+ }
+
+ DPRINTF("region_del %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
+ iova, end - 1);
+
+ ret = vfio_dma_unmap(container, iova, end - iova);
+ memory_region_unref(section->mr);
+ if (ret) {
+ error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx") = %d (%m)",
+ container, iova, end - iova, ret);
+ }
+}
+
+static MemoryListener vfio_memory_listener = {
+ .region_add = vfio_listener_region_add,
+ .region_del = vfio_listener_region_del,
+};
+
+static void vfio_listener_release(VFIOContainer *container)
+{
+ memory_listener_unregister(&container->iommu_data.type1.listener);
+}
+
+static void vfio_kvm_device_add_group(VFIOGroup *group)
+{
+#ifdef CONFIG_KVM
+ struct kvm_device_attr attr = {
+ .group = KVM_DEV_VFIO_GROUP,
+ .attr = KVM_DEV_VFIO_GROUP_ADD,
+ .addr = (uint64_t)(unsigned long)&group->fd,
+ };
+
+ if (!kvm_enabled()) {
+ return;
+ }
+
+ if (vfio_kvm_device_fd < 0) {
+ struct kvm_create_device cd = {
+ .type = KVM_DEV_TYPE_VFIO,
+ };
+
+ if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
+ DPRINTF("KVM_CREATE_DEVICE: %m\n");
+ return;
+ }
+
+ vfio_kvm_device_fd = cd.fd;
+ }
+
+ if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+ error_report("Failed to add group %d to KVM VFIO device: %m",
+ group->groupid);
+ }
+#endif
+}
+
+static void vfio_kvm_device_del_group(VFIOGroup *group)
+{
+#ifdef CONFIG_KVM
+ struct kvm_device_attr attr = {
+ .group = KVM_DEV_VFIO_GROUP,
+ .attr = KVM_DEV_VFIO_GROUP_DEL,
+ .addr = (uint64_t)(unsigned long)&group->fd,
+ };
+
+ if (vfio_kvm_device_fd < 0) {
+ return;
+ }
+
+ if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+ error_report("Failed to remove group %d from KVM VFIO device: %m",
+ group->groupid);
+ }
+#endif
+}
+
+static int vfio_connect_container(VFIOGroup *group)
+{
+ VFIOContainer *container;
+ int ret, fd;
+
+ if (group->container) {
+ return 0;
+ }
+
+ QLIST_FOREACH(container, &container_list, next) {
+ if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+ group->container = container;
+ QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+ return 0;
+ }
+ }
+
+ fd = qemu_open("/dev/vfio/vfio", O_RDWR);
+ if (fd < 0) {
+ error_report("vfio: failed to open /dev/vfio/vfio: %m");
+ return -errno;
+ }
+
+ ret = ioctl(fd, VFIO_GET_API_VERSION);
+ if (ret != VFIO_API_VERSION) {
+ error_report("vfio: supported vfio version: %d, "
+ "reported version: %d", VFIO_API_VERSION, ret);
+ close(fd);
+ return -EINVAL;
+ }
+
+ container = g_malloc0(sizeof(*container));
+ container->fd = fd;
+
+ if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
+ ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+ if (ret) {
+ error_report("vfio: failed to set group container: %m");
+ g_free(container);
+ close(fd);
+ return -errno;
+ }
+
+ ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
+ if (ret) {
+ error_report("vfio: failed to set iommu for container: %m");
+ g_free(container);
+ close(fd);
+ return -errno;
+ }
+
+ container->iommu_data.type1.listener = vfio_memory_listener;
+ container->iommu_data.release = vfio_listener_release;
+
+ memory_listener_register(&container->iommu_data.type1.listener,
+ &address_space_memory);
+
+ if (container->iommu_data.type1.error) {
+ ret = container->iommu_data.type1.error;
+ vfio_listener_release(container);
+ g_free(container);
+ close(fd);
+ error_report("vfio: memory listener initialization failed for
container");
+ return ret;
+ }
+
+ container->iommu_data.type1.initialized = true;
+
+ } else {
+ error_report("vfio: No available IOMMU models");
+ g_free(container);
+ close(fd);
+ return -EINVAL;
+ }
+
+ QLIST_INIT(&container->group_list);
+ QLIST_INSERT_HEAD(&container_list, container, next);
+
+ group->container = container;
+ QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+
+ return 0;
+}
+
+static void vfio_disconnect_container(VFIOGroup *group)
+{
+ VFIOContainer *container = group->container;
+
+ if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
+ error_report("vfio: error disconnecting group %d from container",
+ group->groupid);
+ }
+
+ QLIST_REMOVE(group, container_next);
+ group->container = NULL;
+
+ if (QLIST_EMPTY(&container->group_list)) {
+ if (container->iommu_data.release) {
+ container->iommu_data.release(container);
+ }
+ QLIST_REMOVE(container, next);
+ DPRINTF("vfio_disconnect_container: close container->fd\n");
+ close(container->fd);
+ g_free(container);
+ }
+}
+
+VFIOGroup *vfio_get_group(int groupid, QEMUResetHandler *reset_handler)
+{
+ VFIOGroup *group;
+ char path[32];
+ struct vfio_group_status status = { .argsz = sizeof(status) };
+
+ QLIST_FOREACH(group, &group_list, next) {
+ if (group->groupid == groupid) {
+ return group;
+ }
+ }
+
+ group = g_malloc0(sizeof(*group));
+
+ snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
+ group->fd = qemu_open(path, O_RDWR);
+ if (group->fd < 0) {
+ error_report("vfio: error opening %s: %m", path);
+ g_free(group);
+ return NULL;
+ }
+
+ if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
+ error_report("vfio: error getting group status: %m");
+ close(group->fd);
+ g_free(group);
+ return NULL;
+ }
+
+ if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+ error_report("vfio: error, group %d is not viable, please ensure "
+ "all devices within the iommu_group are bound to their "
+ "vfio bus driver.", groupid);
+ close(group->fd);
+ g_free(group);
+ return NULL;
+ }
+
+ group->groupid = groupid;
+ QLIST_INIT(&group->device_list);
+
+ if (vfio_connect_container(group)) {
+ error_report("vfio: failed to setup container for group %d", groupid);
+ close(group->fd);
+ g_free(group);
+ return NULL;
+ }
+
+ if (QLIST_EMPTY(&group_list) && reset_handler) {
+ qemu_register_reset(reset_handler, NULL);
+ }
+
+ QLIST_INSERT_HEAD(&group_list, group, next);
+
+ vfio_kvm_device_add_group(group);
+
+ return group;
+}
+
+void vfio_put_group(VFIOGroup *group, QEMUResetHandler *reset_handler)
+{
+ if (!QLIST_EMPTY(&group->device_list)) {
+ return;
+ }
+
+ vfio_kvm_device_del_group(group);
+ vfio_disconnect_container(group);
+ QLIST_REMOVE(group, next);
+ DPRINTF("vfio_put_group: close group->fd\n");
+ close(group->fd);
+ g_free(group);
+
+ if (QLIST_EMPTY(&group_list) && reset_handler) {
+ qemu_unregister_reset(reset_handler, NULL);
+ }
+}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9cf5b84..9e70d68 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1,5 +1,5 @@
/*
- * vfio based device assignment support
+ * vfio based device assignment support - PCI devices
*
* Copyright Red Hat, Inc. 2012
*
@@ -40,6 +40,8 @@
#include "sysemu/kvm.h"
#include "sysemu/sysemu.h"
+#include "vfio-common.h"
+
/* #define DEBUG_VFIO */
#ifdef DEBUG_VFIO
#define DPRINTF(fmt, ...) \
@@ -55,6 +57,8 @@
#define VFIO_ALLOW_KVM_MSI 1
#define VFIO_ALLOW_KVM_MSIX 1
+extern QLIST_HEAD(, VFIOGroup) group_list;
+
struct VFIODevice;
typedef struct VFIOQuirk {
@@ -135,25 +139,6 @@ enum {
struct VFIOGroup;
-typedef struct VFIOType1 {
- MemoryListener listener;
- int error;
- bool initialized;
-} VFIOType1;
-
-typedef struct VFIOContainer {
- int fd; /* /dev/vfio/vfio, empowered by the attached groups */
- struct {
- /* enable abstraction to support various iommu backends */
- union {
- VFIOType1 type1;
- };
- void (*release)(struct VFIOContainer *);
- } iommu_data;
- QLIST_HEAD(, VFIOGroup) group_list;
- QLIST_ENTRY(VFIOContainer) next;
-} VFIOContainer;
-
/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
typedef struct VFIOMSIXInfo {
uint8_t table_bar;
@@ -200,15 +185,6 @@ typedef struct VFIODevice {
bool rom_read_failed;
} VFIODevice;
-typedef struct VFIOGroup {
- int fd;
- int groupid;
- VFIOContainer *container;
- QLIST_HEAD(, VFIODevice) device_list;
- QLIST_ENTRY(VFIOGroup) next;
- QLIST_ENTRY(VFIOGroup) container_next;
-} VFIOGroup;
-
typedef struct VFIORomBlacklistEntry {
uint16_t vendor_id;
uint16_t device_id;
@@ -234,23 +210,6 @@ static const VFIORomBlacklistEntry romblacklist[] = {
#define MSIX_CAP_LENGTH 12
-static QLIST_HEAD(, VFIOContainer)
- container_list = QLIST_HEAD_INITIALIZER(container_list);
-
-static QLIST_HEAD(, VFIOGroup)
- group_list = QLIST_HEAD_INITIALIZER(group_list);
-
-#ifdef CONFIG_KVM
-/*
- * We have a single VFIO pseudo device per KVM VM. Once created it lives
- * for the life of the VM. Closing the file descriptor only drops our
- * reference to it and the device's reference to kvm. Therefore once
- * initialized, this file descriptor is only released on QEMU exit and
- * we'll re-use it should another vfio device be attached before then.
- */
-static int vfio_kvm_device_fd = -1;
-#endif
-
static void vfio_disable_interrupts(VFIODevice *vdev);
static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
@@ -2180,183 +2139,6 @@ static void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr,
}
/*
- * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
- */
-static int vfio_dma_unmap(VFIOContainer *container,
- hwaddr iova, ram_addr_t size)
-{
- struct vfio_iommu_type1_dma_unmap unmap = {
- .argsz = sizeof(unmap),
- .flags = 0,
- .iova = iova,
- .size = size,
- };
-
- if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
- DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
- return -errno;
- }
-
- return 0;
-}
-
-static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
-{
- struct vfio_iommu_type1_dma_map map = {
- .argsz = sizeof(map),
- .flags = VFIO_DMA_MAP_FLAG_READ,
- .vaddr = (__u64)(uintptr_t)vaddr,
- .iova = iova,
- .size = size,
- };
-
- if (!readonly) {
- map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
- }
-
- /*
- * Try the mapping, if it fails with EBUSY, unmap the region and try
- * again. This shouldn't be necessary, but we sometimes see it in
- * the the VGA ROM space.
- */
- if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
- (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
- ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
- return 0;
- }
-
- DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
- return -errno;
-}
-
-static bool vfio_listener_skipped_section(MemoryRegionSection *section)
-{
- return !memory_region_is_ram(section->mr) ||
- /*
- * Sizing an enabled 64-bit BAR can cause spurious mappings to
- * addresses in the upper part of the 64-bit address space. These
- * are never accessed by the CPU and beyond the address width of
- * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width.
- */
- section->offset_within_address_space & (1ULL << 63);
-}
-
-static void vfio_listener_region_add(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- VFIOContainer *container = container_of(listener, VFIOContainer,
- iommu_data.type1.listener);
- hwaddr iova, end;
- void *vaddr;
- int ret;
-
- assert(!memory_region_is_iommu(section->mr));
-
- if (vfio_listener_skipped_section(section)) {
- DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n",
- section->offset_within_address_space,
- section->offset_within_address_space +
- int128_get64(int128_sub(section->size, int128_one())));
- return;
- }
-
- if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
- (section->offset_within_region & ~TARGET_PAGE_MASK))) {
- error_report("%s received unaligned region", __func__);
- return;
- }
-
- iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
- end = (section->offset_within_address_space + int128_get64(section->size))
&
- TARGET_PAGE_MASK;
-
- if (iova >= end) {
- return;
- }
-
- vaddr = memory_region_get_ram_ptr(section->mr) +
- section->offset_within_region +
- (iova - section->offset_within_address_space);
-
- DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
- iova, end - 1, vaddr);
-
- memory_region_ref(section->mr);
- ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
- if (ret) {
- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx", %p) = %d (%m)",
- container, iova, end - iova, vaddr, ret);
-
- /*
- * On the initfn path, store the first error in the container so we
- * can gracefully fail. Runtime, there's not much we can do other
- * than throw a hardware error.
- */
- if (!container->iommu_data.type1.initialized) {
- if (!container->iommu_data.type1.error) {
- container->iommu_data.type1.error = ret;
- }
- } else {
- hw_error("vfio: DMA mapping failed, unable to continue");
- }
- }
-}
-
-static void vfio_listener_region_del(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- VFIOContainer *container = container_of(listener, VFIOContainer,
- iommu_data.type1.listener);
- hwaddr iova, end;
- int ret;
-
- if (vfio_listener_skipped_section(section)) {
- DPRINTF("SKIPPING region_del %"HWADDR_PRIx" - %"PRIx64"\n",
- section->offset_within_address_space,
- section->offset_within_address_space +
- int128_get64(int128_sub(section->size, int128_one())));
- return;
- }
-
- if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
- (section->offset_within_region & ~TARGET_PAGE_MASK))) {
- error_report("%s received unaligned region", __func__);
- return;
- }
-
- iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
- end = (section->offset_within_address_space + int128_get64(section->size))
&
- TARGET_PAGE_MASK;
-
- if (iova >= end) {
- return;
- }
-
- DPRINTF("region_del %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
- iova, end - 1);
-
- ret = vfio_dma_unmap(container, iova, end - iova);
- memory_region_unref(section->mr);
- if (ret) {
- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%m)",
- container, iova, end - iova, ret);
- }
-}
-
-static MemoryListener vfio_memory_listener = {
- .region_add = vfio_listener_region_add,
- .region_del = vfio_listener_region_del,
-};
-
-static void vfio_listener_release(VFIOContainer *container)
-{
- memory_listener_unregister(&container->iommu_data.type1.listener);
-}
-
-/*
* Interrupt setup
*/
static void vfio_disable_interrupts(VFIODevice *vdev)
@@ -3221,244 +3003,8 @@ static void vfio_pci_reset_handler(void *opaque)
}
}
-static void vfio_kvm_device_add_group(VFIOGroup *group)
-{
-#ifdef CONFIG_KVM
- struct kvm_device_attr attr = {
- .group = KVM_DEV_VFIO_GROUP,
- .attr = KVM_DEV_VFIO_GROUP_ADD,
- .addr = (uint64_t)(unsigned long)&group->fd,
- };
-
- if (!kvm_enabled()) {
- return;
- }
-
- if (vfio_kvm_device_fd < 0) {
- struct kvm_create_device cd = {
- .type = KVM_DEV_TYPE_VFIO,
- };
-
- if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
- DPRINTF("KVM_CREATE_DEVICE: %m\n");
- return;
- }
-
- vfio_kvm_device_fd = cd.fd;
- }
-
- if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
- error_report("Failed to add group %d to KVM VFIO device: %m",
- group->groupid);
- }
-#endif
-}
-
-static void vfio_kvm_device_del_group(VFIOGroup *group)
-{
-#ifdef CONFIG_KVM
- struct kvm_device_attr attr = {
- .group = KVM_DEV_VFIO_GROUP,
- .attr = KVM_DEV_VFIO_GROUP_DEL,
- .addr = (uint64_t)(unsigned long)&group->fd,
- };
-
- if (vfio_kvm_device_fd < 0) {
- return;
- }
-
- if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
- error_report("Failed to remove group %d from KVM VFIO device: %m",
- group->groupid);
- }
-#endif
-}
-
-static int vfio_connect_container(VFIOGroup *group)
-{
- VFIOContainer *container;
- int ret, fd;
-
- if (group->container) {
- return 0;
- }
-
- QLIST_FOREACH(container, &container_list, next) {
- if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
- group->container = container;
- QLIST_INSERT_HEAD(&container->group_list, group, container_next);
- return 0;
- }
- }
-
- fd = qemu_open("/dev/vfio/vfio", O_RDWR);
- if (fd < 0) {
- error_report("vfio: failed to open /dev/vfio/vfio: %m");
- return -errno;
- }
-
- ret = ioctl(fd, VFIO_GET_API_VERSION);
- if (ret != VFIO_API_VERSION) {
- error_report("vfio: supported vfio version: %d, "
- "reported version: %d", VFIO_API_VERSION, ret);
- close(fd);
- return -EINVAL;
- }
-
- container = g_malloc0(sizeof(*container));
- container->fd = fd;
-
- if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
- ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
- if (ret) {
- error_report("vfio: failed to set group container: %m");
- g_free(container);
- close(fd);
- return -errno;
- }
-
- ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
- if (ret) {
- error_report("vfio: failed to set iommu for container: %m");
- g_free(container);
- close(fd);
- return -errno;
- }
-
- container->iommu_data.type1.listener = vfio_memory_listener;
- container->iommu_data.release = vfio_listener_release;
-
- memory_listener_register(&container->iommu_data.type1.listener,
- &address_space_memory);
-
- if (container->iommu_data.type1.error) {
- ret = container->iommu_data.type1.error;
- vfio_listener_release(container);
- g_free(container);
- close(fd);
- error_report("vfio: memory listener initialization failed for
container");
- return ret;
- }
-
- container->iommu_data.type1.initialized = true;
-
- } else {
- error_report("vfio: No available IOMMU models");
- g_free(container);
- close(fd);
- return -EINVAL;
- }
-
- QLIST_INIT(&container->group_list);
- QLIST_INSERT_HEAD(&container_list, container, next);
-
- group->container = container;
- QLIST_INSERT_HEAD(&container->group_list, group, container_next);
-
- return 0;
-}
-
-static void vfio_disconnect_container(VFIOGroup *group)
-{
- VFIOContainer *container = group->container;
-
- if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
- error_report("vfio: error disconnecting group %d from container",
- group->groupid);
- }
-
- QLIST_REMOVE(group, container_next);
- group->container = NULL;
-
- if (QLIST_EMPTY(&container->group_list)) {
- if (container->iommu_data.release) {
- container->iommu_data.release(container);
- }
- QLIST_REMOVE(container, next);
- DPRINTF("vfio_disconnect_container: close container->fd\n");
- close(container->fd);
- g_free(container);
- }
-}
-
-static VFIOGroup *vfio_get_group(int groupid)
-{
- VFIOGroup *group;
- char path[32];
- struct vfio_group_status status = { .argsz = sizeof(status) };
-
- QLIST_FOREACH(group, &group_list, next) {
- if (group->groupid == groupid) {
- return group;
- }
- }
-
- group = g_malloc0(sizeof(*group));
-
- snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
- group->fd = qemu_open(path, O_RDWR);
- if (group->fd < 0) {
- error_report("vfio: error opening %s: %m", path);
- g_free(group);
- return NULL;
- }
-
- if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
- error_report("vfio: error getting group status: %m");
- close(group->fd);
- g_free(group);
- return NULL;
- }
-
- if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
- error_report("vfio: error, group %d is not viable, please ensure "
- "all devices within the iommu_group are bound to their "
- "vfio bus driver.", groupid);
- close(group->fd);
- g_free(group);
- return NULL;
- }
-
- group->groupid = groupid;
- QLIST_INIT(&group->device_list);
-
- if (vfio_connect_container(group)) {
- error_report("vfio: failed to setup container for group %d", groupid);
- close(group->fd);
- g_free(group);
- return NULL;
- }
-
- if (QLIST_EMPTY(&group_list)) {
- qemu_register_reset(vfio_pci_reset_handler, NULL);
- }
-
- QLIST_INSERT_HEAD(&group_list, group, next);
-
- vfio_kvm_device_add_group(group);
-
- return group;
-}
-
-static void vfio_put_group(VFIOGroup *group)
-{
- if (!QLIST_EMPTY(&group->device_list)) {
- return;
- }
-
- vfio_kvm_device_del_group(group);
- vfio_disconnect_container(group);
- QLIST_REMOVE(group, next);
- DPRINTF("vfio_put_group: close group->fd\n");
- close(group->fd);
- g_free(group);
-
- if (QLIST_EMPTY(&group_list)) {
- qemu_unregister_reset(vfio_pci_reset_handler, NULL);
- }
-}
-
-static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice
*vdev)
+static int vfio_get_device(VFIOGroup *group, const char *name,
+ struct VFIODevice *vdev)
{
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
@@ -3485,7 +3031,7 @@ static int vfio_get_device(VFIOGroup *group, const char
*name, VFIODevice *vdev)
goto error;
}
- DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
+ DPRINTF("Device %s flags: %u, regions: %u, irqs: %u\n", name,
dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
@@ -3768,7 +3314,7 @@ static int vfio_initfn(PCIDevice *pdev)
DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
vdev->host.bus, vdev->host.slot, vdev->host.function, groupid);
- group = vfio_get_group(groupid);
+ group = vfio_get_group(groupid, vfio_pci_reset_handler);
if (!group) {
error_report("vfio: failed to get group %d", groupid);
return -ENOENT;
@@ -3785,7 +3331,7 @@ static int vfio_initfn(PCIDevice *pdev)
pvdev->host.function == vdev->host.function) {
error_report("vfio: error: device %s is already attached", path);
- vfio_put_group(group);
+ vfio_put_group(group, vfio_pci_reset_handler);
return -EBUSY;
}
}
@@ -3793,7 +3339,7 @@ static int vfio_initfn(PCIDevice *pdev)
ret = vfio_get_device(group, path, vdev);
if (ret) {
error_report("vfio: failed to get device %s", path);
- vfio_put_group(group);
+ vfio_put_group(group, vfio_pci_reset_handler);
return ret;
}
@@ -3879,7 +3425,7 @@ out_teardown:
out_put:
g_free(vdev->emulated_config_bits);
vfio_put_device(vdev);
- vfio_put_group(group);
+ vfio_put_group(group, vfio_pci_reset_handler);
return ret;
}
@@ -3899,7 +3445,7 @@ static void vfio_exitfn(PCIDevice *pdev)
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
vfio_put_device(vdev);
- vfio_put_group(group);
+ vfio_put_group(group, vfio_pci_reset_handler);
}
static void vfio_pci_reset(DeviceState *dev)
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
new file mode 100644
index 0000000..138fb13
--- /dev/null
+++ b/hw/vfio/platform.c
@@ -0,0 +1,381 @@
+/*
+ * vfio based device assignment support - platform devices
+ *
+ * Copyright Linaro Limited, 2014
+ *
+ * Authors:
+ * Kim Phillips <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on vfio based PCI device assignment support:
+ * Copyright Red Hat, Inc. 2012
+ */
+
+#include <dirent.h>
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "qemu/range.h"
+#include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "hw/hw.h"
+#include "hw/sysbus.h"
+
+#include "vfio-common.h"
+
+#define DEBUG_VFIO
+#ifdef DEBUG_VFIO
+#define DPRINTF(fmt, ...) \
+ do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+ do { } while (0)
+#endif
+
+/* Extra debugging, trap acceleration paths for more logging */
+#define VFIO_ALLOW_MMAP 1
+
+#define TYPE_VFIO_PLATFORM "vfio-platform"
+
+typedef struct VFIORegion {
+ off_t fd_offset; /* offset of region within device fd */
+ int fd; /* device fd, allows us to pass VFIORegion as opaque data */
+ MemoryRegion mem; /* slow, read/write access */
+ MemoryRegion mmap_mem; /* direct mapped access */
+ void *mmap;
+ size_t size;
+ uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+ uint8_t nr; /* cache the region number for debug */
+} VFIORegion;
+
+typedef struct VFIODevice {
+ SysBusDevice sbdev;
+ int fd;
+ int num_regions;
+ VFIORegion *regions;
+ QLIST_ENTRY(VFIODevice) next;
+ struct VFIOGroup *group;
+ char *name;
+} VFIODevice;
+
+static int vfio_mmap_region(VFIODevice *vdev, VFIORegion *region,
+ MemoryRegion *mem, MemoryRegion *submem,
+ void **map, size_t size, off_t offset,
+ const char *name)
+{
+ int ret = 0;
+
+ if (VFIO_ALLOW_MMAP && size && region->flags & VFIO_REGION_INFO_FLAG_MMAP)
{
+ int prot = 0;
+ ret = 0;
+
+ if (region->flags & VFIO_REGION_INFO_FLAG_READ) {
+ prot |= PROT_READ;
+ }
+
+ if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) {
+ prot |= PROT_WRITE;
+ }
+
+ *map = mmap(NULL, size, prot, MAP_SHARED,
+ region->fd, region->fd_offset + offset);
+ if (*map == MAP_FAILED) {
+ ret = -errno;
+ *map = NULL;
+ goto error;
+ }
+
+ memory_region_init_ram_ptr(submem, OBJECT(vdev), name, size, *map);
+ }
+
+ memory_region_add_subregion(mem, offset, submem);
+
+error:
+ return ret;
+}
+
+/*
+ * IO Port/MMIO - Beware of the endians, VFIO is always little endian
+ */
+static void vfio_region_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIORegion *region = opaque;
+ union {
+ uint8_t byte;
+ uint16_t word;
+ uint32_t dword;
+ uint64_t qword;
+ } buf;
+
+ switch (size) {
+ case 1:
+ buf.byte = data;
+ break;
+ case 2:
+ buf.word = data;
+ break;
+ case 4:
+ buf.dword = data;
+ break;
+ default:
+ hw_error("vfio: unsupported write size, %d bytes\n", size);
+ break;
+ }
+
+ if (pwrite(region->fd, &buf, size, region->fd_offset + addr) != size) {
+ error_report("%s(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m",
+ __func__, addr, data, size);
+ }
+
+ DPRINTF("%s(region %d+0x%"HWADDR_PRIx", 0x%"PRIx64", %d)\n",
+ __func__, region->nr, addr, data, size);
+}
+
+static uint64_t vfio_region_read(void *opaque, hwaddr addr, unsigned size)
+{
+ VFIORegion *region = opaque;
+ union {
+ uint8_t byte;
+ uint16_t word;
+ uint32_t dword;
+ uint64_t qword;
+ } buf;
+ uint64_t data = 0;
+
+ if (pread(region->fd, &buf, size, region->fd_offset + addr) != size) {
+ error_report("%s(,0x%"HWADDR_PRIx", %d) failed: %m",
+ __func__, addr, size);
+ return (uint64_t)-1;
+ }
+
+ switch (size) {
+ case 1:
+ data = buf.byte;
+ break;
+ case 2:
+ data = buf.word;
+ break;
+ case 4:
+ data = buf.dword;
+ break;
+ default:
+ hw_error("vfio: unsupported read size, %d bytes\n", size);
+ break;
+ }
+
+ DPRINTF("%s(region %d+0x%"HWADDR_PRIx", %d) = 0x%"PRIx64"\n",
+ __func__, region->nr, addr, size, data);
+
+ return data;
+}
+
+static const MemoryRegionOps vfio_region_ops = {
+ .read = vfio_region_read,
+ .write = vfio_region_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void vfio_map_region(VFIODevice *vdev, int nr)
+{
+ VFIORegion *region = &vdev->regions[nr];
+ unsigned size = region->size;
+ char name[64];
+
+ snprintf(name, sizeof(name), "VFIO %s region %d", vdev->name, nr);
+
+ /* A "slow" read/write mapping underlies all regions */
+ memory_region_init_io(®ion->mem, OBJECT(vdev), &vfio_region_ops,
+ region, name, size);
+
+ strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
+ if (vfio_mmap_region(vdev, region, ®ion->mem,
+ ®ion->mmap_mem, ®ion->mmap, size, 0, name)) {
+ error_report("%s unsupported. Performance may be slow", name);
+ }
+}
+
+static int vfio_get_device(VFIOGroup *group, const char *name,
+ struct VFIODevice *vdev)
+{
+ struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
+ struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+ struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
+ int ret, i;
+
+ ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+ if (ret < 0) {
+ error_report("vfio: error getting device %s from group %d: %m",
+ name, group->groupid);
+ error_printf("Verify all devices in group %d are bound to the vfio "
+ "platform driver and are not already in use\n",
+ group->groupid);
+ return ret;
+ }
+
+ vdev->fd = ret;
+ vdev->group = group;
+ QLIST_INSERT_HEAD(&group->device_list, vdev, next);
+
+ /* Sanity check device */
+ ret = ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &dev_info);
+ if (ret) {
+ error_report("vfio: error getting device info: %m");
+ goto error;
+ }
+
+ DPRINTF("Device %s flags: %u, regions: %u, irqs: %u\n", name,
+ dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
+
+ vdev->regions = g_malloc0(sizeof(VFIORegion) * dev_info.num_regions);
+ if (!vdev->regions) {
+ error_report("vfio: Error allocating space for %d regions",
+ dev_info.num_regions);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ vdev->num_regions = dev_info.num_regions;
+
+ for (i = 0; i < dev_info.num_regions; i++) {
+ reg_info.index = i;
+
+ ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info);
+ if (ret) {
+ error_report("vfio: Error getting region %d info: %m", i);
+ goto error;
+ }
+
+ DPRINTF("Device %s region %d:\n", name, i);
+ DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
+ (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
+ (unsigned long)reg_info.flags);
+
+ vdev->regions[i].flags = reg_info.flags;
+ vdev->regions[i].size = reg_info.size;
+ vdev->regions[i].fd_offset = reg_info.offset;
+ vdev->regions[i].fd = vdev->fd;
+ vdev->regions[i].nr = i;
+ }
+
+error:
+ if (ret) {
+ g_free(vdev->regions);
+ QLIST_REMOVE(vdev, next);
+ vdev->group = NULL;
+ close(vdev->fd);
+ }
+ return ret;
+}
+
+static void vfio_platform_realize(DeviceState *dev, Error **errp)
+{
+ SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
+ VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, sbdev, sbdev);
+ VFIOGroup *group;
+ char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+ ssize_t len;
+ struct stat st;
+ int groupid, i, ret;
+
+ /* TODO: pass device name on command line */
+ vdev->name = malloc(PATH_MAX);
+ strcpy(vdev->name, "fff51000.ethernet");
+
+ /* Check that the host device exists */
+ snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/", vdev->name);
+ if (stat(path, &st) < 0) {
+ error_report("vfio: error: no such host device: %s", path);
+ return;
+ }
+
+ strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+
+ len = readlink(path, iommu_group_path, PATH_MAX);
+ if (len <= 0) {
+ error_report("vfio: error no iommu_group for device");
+ return;
+ }
+
+ iommu_group_path[len] = 0;
+ group_name = basename(iommu_group_path);
+
+ if (sscanf(group_name, "%d", &groupid) != 1) {
+ error_report("vfio: error reading %s: %m", path);
+ return;
+ }
+
+ DPRINTF("%s(%s) group %d\n", __func__, vdev->name, groupid);
+
+ group = vfio_get_group(groupid, NULL);
+ if (!group) {
+ error_report("vfio: failed to get group %d", groupid);
+ return;
+ }
+
+ snprintf(path, sizeof(path), "%s", vdev->name);
+
+ QLIST_FOREACH(pvdev, &group->device_list, next) {
+ if (strcmp(pvdev->name, vdev->name) == 0) {
+ error_report("vfio: error: device %s is already attached", path);
+ vfio_put_group(group, NULL);
+ return;
+ }
+ }
+
+ ret = vfio_get_device(group, path, vdev);
+ if (ret) {
+ error_report("vfio: failed to get device %s", path);
+ vfio_put_group(group, NULL);
+ return;
+ }
+
+ for (i = 0; i < vdev->num_regions; i++) {
+ vfio_map_region(vdev, i);
+ sysbus_init_mmio(sbdev, &vdev->regions[i].mem);
+ }
+}
+
+static const VMStateDescription vfio_platform_vmstate = {
+ .name = TYPE_VFIO_PLATFORM,
+ .unmigratable = 1,
+};
+
+static void vfio_platform_dev_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = vfio_platform_realize;
+ dc->vmsd = &vfio_platform_vmstate;
+ dc->desc = "VFIO-based platform device assignment";
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static const TypeInfo vfio_platform_dev_info = {
+ .name = TYPE_VFIO_PLATFORM,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(VFIODevice),
+ .class_init = vfio_platform_dev_class_init,
+};
+
+static void register_vfio_platform_dev_type(void)
+{
+ type_register_static(&vfio_platform_dev_info);
+}
+
+type_init(register_vfio_platform_dev_type)
diff --git a/hw/vfio/vfio-common.h b/hw/vfio/vfio-common.h
new file mode 100644
index 0000000..21148ef
--- /dev/null
+++ b/hw/vfio/vfio-common.h
@@ -0,0 +1,55 @@
+/*
+ * common header for vfio based device assignment support
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Alex Williamson <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ * Adapted for KVM by Qumranet.
+ * Copyright (c) 2007, Neocleus, Alex Novik (address@hidden)
+ * Copyright (c) 2007, Neocleus, Guy Zana (address@hidden)
+ * Copyright (C) 2008, Qumranet, Amit Shah (address@hidden)
+ * Copyright (C) 2008, Red Hat, Amit Shah (address@hidden)
+ * Copyright (C) 2008, IBM, Muli Ben-Yehuda (address@hidden)
+ */
+
+struct VFIODevice;
+
+struct VFIOGroup;
+
+typedef struct VFIOType1 {
+ MemoryListener listener;
+ int error;
+ bool initialized;
+} VFIOType1;
+
+typedef struct VFIOContainer {
+ int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+ struct {
+ /* enable abstraction to support various iommu backends */
+ union {
+ VFIOType1 type1;
+ };
+ void (*release)(struct VFIOContainer *);
+ } iommu_data;
+ QLIST_HEAD(, VFIOGroup) group_list;
+ QLIST_ENTRY(VFIOContainer) next;
+} VFIOContainer;
+
+typedef struct VFIOGroup {
+ int fd;
+ int groupid;
+ VFIOContainer *container;
+ QLIST_HEAD(, VFIODevice) device_list;
+ QLIST_ENTRY(VFIOGroup) next;
+ QLIST_ENTRY(VFIOGroup) container_next;
+} VFIOGroup;
+
+
+VFIOGroup *vfio_get_group(int groupid, QEMUResetHandler *reset_handler);
+void vfio_put_group(VFIOGroup *group, QEMUResetHandler *reset_handler);
--
1.8.3.2