qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v2)


From: Alexey Kardashevskiy
Subject: Re: [Qemu-ppc] [PATCH] RFC: vfio-powerpc: added VFIO support (v2)
Date: Fri, 13 Jul 2012 15:24:07 +1000
User-agent: Mozilla/5.0 (X11; Linux i686; rv:13.0) Gecko/20120614 Thunderbird/13.0.1

Two comments below.

On 13/07/12 06:54, Blue Swirl wrote:
> On Thu, Jul 12, 2012 at 8:52 AM, Alexey Kardashevskiy <address@hidden> wrote:
>> It literally does the following:
>>
>> 1. POWERPC IOMMU support (the kernel counterpart is required)
>>
>> 2. The patch assumes that IOAPIC calls are going to be replaced
>> with something generic. I have something in my local git but it's
>> too early, we need to extend PCIINTxRoute first.
>>
>> 3. vfio_get_group() made public. I want to open IOMMU group from
>> the sPAPR code to have everything I need for VFIO on sPAPR and
>> avoid ugly workarounds with finilizing PHB setup on sPAPR.
>>
>> 4. Change sPAPR PHB to scan the PCI bus which is used for
>> the IOMMU-VFIO group. Now it is enough to add the following to
>> the QEMU command line to get VFIO up with all the devices from
>> IOMMU group with id=3:
>> -device spapr-pci-host-bridge,busname=E1000E,buid=0x3,iommu=3,\
>> mem_win_addr=0x230000000000,io_win_addr=0x240000000000,msi_win_addr=0x250000000000
>>
>> Signed-off-by: Alexey Kardashevskiy <address@hidden>
>> ---
>>  hw/ppc/Makefile.objs |    3 ++
>>  hw/spapr.h           |    4 ++
>>  hw/spapr_iommu.c     |   87 ++++++++++++++++++++++++++++++++++++++
>>  hw/spapr_pci.c       |  115 
>> +++++++++++++++++++++++++++++++++++++++++++++++---
>>  hw/spapr_pci.h       |    5 +++
>>  hw/vfio_pci.c        |   28 +++++++++++-
>>  hw/vfio_pci.h        |    2 +
>>  7 files changed, 237 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
>> index f573a95..c46a049 100644
>> --- a/hw/ppc/Makefile.objs
>> +++ b/hw/ppc/Makefile.objs
>> @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
>>  # Xilinx PPC peripherals
>>  obj-y += xilinx_ethlite.o
>>
>> +# VFIO PCI device assignment
>> +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
>> +
>>  obj-y := $(addprefix ../,$(obj-y))
>> diff --git a/hw/spapr.h b/hw/spapr.h
>> index b37f337..9dca704 100644
>> --- a/hw/spapr.h
>> +++ b/hw/spapr.h
>> @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
>> *propname,
>>  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
>>                        DMAContext *dma);
>>
>> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
>> +                         uint64_t *dma32_window_start,
>> +                         uint64_t *dma32_window_size);
>> +
>>  #endif /* !defined (__HW_SPAPR_H__) */
>> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
>> index 50c288d..0a194e8 100644
>> --- a/hw/spapr_iommu.c
>> +++ b/hw/spapr_iommu.c
>> @@ -16,6 +16,8 @@
>>   * You should have received a copy of the GNU Lesser General Public
>>   * License along with this library; if not, see 
>> <http://www.gnu.org/licenses/>.
>>   */
>> +#include <sys/ioctl.h>
>> +
>>  #include "hw.h"
>>  #include "kvm.h"
>>  #include "qdev.h"
>> @@ -23,6 +25,7 @@
>>  #include "dma.h"
>>
>>  #include "hw/spapr.h"
>> +#include "hw/linux-vfio.h"
>>
>>  #include <libfdt.h>
>>
>> @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, target_ulong 
>> ioba, target_ulong tce)
>>      return 0;
>>  }
>>
>> +/* -------- API for POWERPC IOMMU -------- */
>> +
>> +#define POWERPC_IOMMU           2
>> +
>> +struct tce_iommu_info {
> 
> CamelCase.
> 
>> +    __u32 argsz;
>> +    __u32 dma32_window_start;
>> +    __u32 dma32_window_size;
> 
> Please use uint32_t.
> 
>> +};
>> +
>> +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
>> +
>> +struct tce_iommu_dma_map {
>> +    __u32 argsz;
> 
> The structure may or may not be padded here since there's no
> QEMU_PACKED attribute. If possible, just rearrange the fields.
> 
>> +    __u64 va;
>> +    __u64 dmaaddr;
>> +};
>> +
>> +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
>> +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
>> +
>> +typedef struct sPAPRVFIOTable {
>> +    int fd;
>> +    uint32_t liobn;
>> +    QLIST_ENTRY(sPAPRVFIOTable) list;
>> +} sPAPRVFIOTable;
>> +
>> +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
>> +
>> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
>> +                         uint64_t *dma32_window_start,
>> +                         uint64_t *dma32_window_size)
>> +{
>> +    sPAPRVFIOTable *t;
>> +    struct tce_iommu_info info = { .argsz = sizeof(info) };
>> +
>> +    if (ioctl(fd, POWERPC_IOMMU_GET_INFO, &info)) {
>> +        fprintf(stderr, "POWERPC_IOMMU_GET_INFO failed %d\n", errno);
>> +        return;
>> +    }
>> +    *dma32_window_start = info.dma32_window_start;
>> +    *dma32_window_size = info.dma32_window_size;
>> +
>> +    t = g_malloc0(sizeof(*t));
>> +    t->fd = fd;
>> +    t->liobn = liobn;
>> +
>> +    QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
>> +}
>> +
>> +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
>> +{
>> +    sPAPRVFIOTable *t;
>> +    struct tce_iommu_dma_map map = {
>> +        .argsz = sizeof(map),
>> +        .va = 0,
>> +        .dmaaddr = ioba,
>> +    };
>> +
>> +    QLIST_FOREACH(t, &vfio_tce_tables, list) {
>> +        if (t->liobn != liobn) {
>> +            continue;
>> +        }
>> +        if (tce) {
>> +            map.va = (uintptr_t)qemu_get_ram_ptr(tce & 
>> ~SPAPR_TCE_PAGE_MASK);
>> +            if (ioctl(t->fd, POWERPC_IOMMU_MAP_DMA, &map)) {
>> +                fprintf(stderr, "TCE_MAP_DMA: %d\n", errno);
> 
> perror()?
> 
>> +                return H_PARAMETER;
>> +            }
>> +        } else {
>> +            if (ioctl(t->fd, POWERPC_IOMMU_UNMAP_DMA, &map)) {
>> +                fprintf(stderr, "TCE_UNMAP_DMA: %d\n", errno);
>> +                return H_PARAMETER;
>> +            }
>> +        }
>> +        return H_SUCCESS;
>> +    }
>> +    return H_CONTINUE; /* positive non-zero value */
>> +}
>> +
>>  static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
>>                                target_ulong opcode, target_ulong *args)
>>  {
>> @@ -203,6 +286,10 @@ static target_ulong h_put_tce(CPUPPCState *env, 
>> sPAPREnvironment *spapr,
>>      if (0 >= ret) {
>>          return ret ? H_PARAMETER : H_SUCCESS;
>>      }
>> +    ret = put_tce_vfio(liobn, ioba, tce);
>> +    if (0 >= ret) {
> 
> This order in expressions is not common, please reverse.
> 
>> +        return ret ? H_PARAMETER : H_SUCCESS;
>> +    }
>>  #ifdef DEBUG_TCE
>>      fprintf(stderr, "%s on liobn=" TARGET_FMT_lx
>>              "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx " ret=%d\n",
>> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
>> index 014297b..92c48b6 100644
>> --- a/hw/spapr_pci.c
>> +++ b/hw/spapr_pci.c
>> @@ -22,6 +22,9 @@
>>   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>>   * THE SOFTWARE.
>>   */
>> +#include <sys/types.h>
>> +#include <dirent.h>
>> +
>>  #include "hw.h"
>>  #include "pci.h"
>>  #include "msi.h"
>> @@ -29,10 +32,10 @@
>>  #include "pci_host.h"
>>  #include "hw/spapr.h"
>>  #include "hw/spapr_pci.h"
>> +#include "hw/vfio_pci.h"
>>  #include "exec-memory.h"
>>  #include <libfdt.h>
>>  #include "trace.h"
>> -
>>  #include "hw/pci_internals.h"
>>
>>  /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
>> @@ -440,6 +443,12 @@ static void pci_spapr_set_irq(void *opaque, int 
>> irq_num, int level)
>>                   level);
>>  }
>>
>> +static int pci_spapr_get_irq(void *opaque, int irq_num)
>> +{
>> +    sPAPRPHBState *phb = opaque;
>> +    return phb->lsi_table[irq_num].dt_irq;
>> +}
>> +
>>  static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
>>                                unsigned size)
>>  {
>> @@ -515,6 +524,82 @@ static DMAContext *spapr_pci_dma_context_fn(PCIBus 
>> *bus, void *opaque,
>>      return phb->dma;
>>  }
>>
>> +static int spapr_pci_scan_vfio(sPAPRPHBState *phb)
>> +{
>> +    char iommupath[256];
>> +    DIR *dirp;
>> +    struct dirent *entry;
>> +
>> +    phb->iommugroup = vfio_get_group(phb->iommugroupid);
>> +    if (!phb->iommugroup) {
>> +        fprintf(stderr, "Cannot open IOMMU group %d\n", phb->iommugroupid);
>> +        return -1;
>> +    }
>> +
>> +    if (!phb->scan) {
>> +        printf("Autoscan disabled\n");
>> +        return 0;
>> +    }
>> +
>> +    sprintf(iommupath, "/sys/kernel/iommu_groups/%d/devices/", 
>> phb->iommugroupid);
> 
> Please use snprintf() or g_strdup_printf().
> 
> 
>> +    dirp = opendir(iommupath);
>> +
>> +    while ((entry = readdir(dirp)) != NULL) {
>> +        char *tmp = alloca(strlen(iommupath) + strlen(entry->d_name) + 32);
>> +        FILE *deviceclassfile;
>> +        unsigned deviceclass = 0, domainid, busid, devid, fnid;
>> +        char addr[32];
>> +        DeviceState *dev;
>> +
>> +        if (4 != sscanf(entry->d_name, "%X:%X:%X.%x",
> 
> Please put the constant last.
> 
>> +                        &domainid, &busid, &devid, &fnid)) {
>> +            continue;
>> +        }
>> +
>> +        sprintf(tmp, "%s%s/class", iommupath, entry->d_name);
> 
> Again, snprintf() or g_strdup_printf() (which avoids the alloca() too).
> 
>> +        printf("Reading device class from %s\n", tmp);
> 
> Leftover debugging?
> 
>> +
>> +        deviceclassfile = fopen(tmp, "r");
>> +        if (deviceclassfile) {
>> +            fscanf(deviceclassfile, "%x", &deviceclass);
>> +            fclose(deviceclassfile);
>> +        }
>> +        if (!deviceclass) {
>> +            continue;
>> +        }
>> +#define PCI_BASE_CLASS_BRIDGE           0x06
> 
> This belongs to pci_ids.h.


It should but it is not. It is way smaller than the same one in the kernel.



>> +        if ((phb->scan < 2) && ((deviceclass >> 16) == 
>> PCI_BASE_CLASS_BRIDGE)) {
>> +            continue;
>> +        }
>> +        if ((deviceclass == 0xc0310) || (deviceclass == 0xc0320)) {
>> +            /* Tweak USB */
>> +            phb->force_addr = 1;
>> +            phb->enable_multifunction = 1;
>> +        }
>> +
>> +        printf("Creating device %X:%X:%X.%x class=0x%X\n",
>> +               domainid, busid, devid, fnid, deviceclass);
> 
> Lower case hex, please.
> 
>> +
>> +        dev = qdev_create(&phb->host_state.bus->qbus, "vfio-pci");
>> +        if (!dev) {
>> +            fprintf(stderr, "failed to create vfio-pci\n");
>> +            continue;
>> +        }
>> +        qdev_prop_parse(dev, "host", entry->d_name);
>> +        if (phb->force_addr) {
>> +            sprintf(addr, "%X.%X", devid, fnid);
> 
> snprintf, lower case hex.
> 
>> +            qdev_prop_parse(dev, "addr", addr);
>> +        }
>> +        if (phb->enable_multifunction) {
>> +            qdev_prop_set_bit(dev, "multifunction", 1);
>> +        }
>> +        qdev_init_nofail(dev);
>> +    }
>> +    closedir(dirp);
>> +
>> +    return 0;
>> +}
>> +
>>  static int spapr_phb_init(SysBusDevice *s)
>>  {
>>      sPAPRPHBState *phb = DO_UPCAST(sPAPRPHBState, host_state.busdev, s);
>> @@ -567,15 +652,13 @@ static int spapr_phb_init(SysBusDevice *s)
>>
>>      bus = pci_register_bus(&phb->host_state.busdev.qdev,
>>                             phb->busname ? phb->busname : phb->dtbusname,
>> -                           pci_spapr_set_irq, NULL, pci_spapr_map_irq, phb,
>> +                           pci_spapr_set_irq, pci_spapr_get_irq,
>> +                           pci_spapr_map_irq, phb,
>>                             &phb->memspace, &phb->iospace,
>>                             PCI_DEVFN(0, 0), PCI_NUM_PINS);
>>      phb->host_state.bus = bus;
>>
>>      phb->dma_liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) << 16);
>> -    phb->dma_window_start = 0;
>> -    phb->dma_window_size = 0x40000000;
>> -    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn, 
>> phb->dma_window_size);
>>      pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb);
>>
>>      QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
>> @@ -588,6 +671,24 @@ static int spapr_phb_init(SysBusDevice *s)
>>          }
>>      }
>>
>> +    if (phb->iommugroupid >= 0) {
>> +        if (0 > spapr_pci_scan_vfio(phb)) {
> 
> Order.
> 
>> +            return -1;
>> +        }
>> +        if (!phb->iommugroup || !phb->iommugroup->container) {
>> +            return -1;
>> +        }
>> +        spapr_vfio_init_dma(phb->iommugroup->container->fd, phb->dma_liobn,
>> +                            &phb->dma_window_start,
>> +                            &phb->dma_window_size);
>> +        return 0;
>> +    }
>> +
>> +    phb->dma_window_start = 0;
>> +    phb->dma_window_size = 0x40000000;
>> +    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn,
>> +                                         phb->dma_window_size);
>> +
>>      return 0;
>>  }
>>
>> @@ -599,6 +700,10 @@ static Property spapr_phb_properties[] = {
>>      DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
>>      DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
>>      DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
>> +    DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
>> +    DEFINE_PROP_UINT8("scan", sPAPRPHBState, scan, 1), /* 0 don't 1 
>> +devices 2 +buses */
>> +    DEFINE_PROP_UINT8("mf", sPAPRPHBState, enable_multifunction, 0),
>> +    DEFINE_PROP_UINT8("forceaddr", sPAPRPHBState, force_addr, 0),
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>>
>> diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
>> index 145071c..1953a74 100644
>> --- a/hw/spapr_pci.h
>> +++ b/hw/spapr_pci.h
>> @@ -57,6 +57,11 @@ typedef struct sPAPRPHBState {
>>          int nvec;
>>      } msi_table[SPAPR_MSIX_MAX_DEVS];
>>
>> +    int32_t iommugroupid;
>> +    struct VFIOGroup *iommugroup;
>> +    uint8_t scan; /* 0 don't scan 1 scan only devices 2 scan everything */
>> +    uint8_t enable_multifunction, force_addr;
> 
> Use bool for both?



There is no DEFINE_PROP_BOOL. There is DEFINE_PROP_BIT but it works with bits, 
not bools.
uint8_t is the closest one.



>> +
>>      QLIST_ENTRY(sPAPRPHBState) list;
>>  } sPAPRPHBState;
>>
>> diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
>> index 1ac287f..73681fb 100644
>> --- a/hw/vfio_pci.c
>> +++ b/hw/vfio_pci.c
>> @@ -21,7 +21,6 @@
>>  #include <dirent.h>
>>  #include <stdio.h>
>>  #include <unistd.h>
>> -#include <sys/io.h>
>>  #include <sys/ioctl.h>
>>  #include <sys/mman.h>
>>  #include <sys/types.h>
>> @@ -43,6 +42,12 @@
>>  #include "range.h"
>>  #include "vfio_pci.h"
>>  #include "linux-vfio.h"
>> +#ifndef TARGET_PPC64
>> +#include <sys/io.h>
>> +#else
>> +#include "hw/pci_internals.h"
>> +#include "hw/spapr.h"
>> +#endif
>>
>>  //#define DEBUG_VFIO
>>  #ifdef DEBUG_VFIO
>> @@ -1581,6 +1586,25 @@ static int vfio_connect_container(VFIOGroup *group)
>>
>>          memory_listener_register(&container->listener, get_system_memory());
>>
>> +#define POWERPC_IOMMU           2
>> +    } else if (ioctl(fd, VFIO_CHECK_EXTENSION, POWERPC_IOMMU)) {
>> +        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
>> +        if (ret) {
>> +            error_report("vfio: failed to set group container: %s\n",
>> +                         strerror(errno));
>> +            g_free(container);
>> +            close(fd);
>> +            return -1;
>> +        }
>> +
>> +        ret = ioctl(fd, VFIO_SET_IOMMU, POWERPC_IOMMU);
>> +        if (ret) {
>> +            error_report("vfio: failed to set iommu for container: %s\n",
>> +                         strerror(errno));
>> +            g_free(container);
>> +            close(fd);
>> +            return -1;
>> +        }
>>      } else {
>>          error_report("vfio: No available IOMMU models\n");
>>          g_free(container);
>> @@ -1620,7 +1644,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
>>      }
>>  }
>>
>> -static VFIOGroup *vfio_get_group(int groupid)
>> +VFIOGroup *vfio_get_group(int groupid)
>>  {
>>      VFIOGroup *group;
>>      char path[32];
>> diff --git a/hw/vfio_pci.h b/hw/vfio_pci.h
>> index 226607c..d63dd63 100644
>> --- a/hw/vfio_pci.h
>> +++ b/hw/vfio_pci.h
>> @@ -105,4 +105,6 @@ typedef struct VFIOGroup {
>>  #define VFIO_FLAG_IOMMU_SHARED_BIT 0
>>  #define VFIO_FLAG_IOMMU_SHARED (1U << VFIO_FLAG_UIOMMU_SHARED_BIT)
>>
>> +VFIOGroup *vfio_get_group(int groupid);
>> +
>>  #endif /* __VFIO_H__ */
>> --
>> 1.7.10
>>
>>


-- 
Alexey





reply via email to

[Prev in Thread] Current Thread [Next in Thread]