[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v4 11/18] spapr_pci/spapr_pci_vfio: Support Dyna
From: |
David Gibson |
Subject: |
Re: [Qemu-devel] [PATCH v4 11/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW) |
Date: |
Thu, 5 Feb 2015 14:51:44 +1100 |
User-agent: |
Mutt/1.5.23 (2014-03-12) |
On Thu, Jan 29, 2015 at 08:27:23PM +1100, Alexey Kardashevskiy wrote:
> This implements DDW for emulated and VFIO PHB.
>
> This removes all DMA windows on reset and creates the default window,
> same is done on the "ibm,reset-pe-dma-window" call.
> This converts sPAPRPHBClass::finish_realize to sPAPRPHBClass::ddw_reset
> and others.
>
> The "ddw" property is enabled by default on a PHB but for compatibility
> pseries-2.1 machine disables it.
Now that we're past the 2.2 release, this should change to only be
enabled for 2.3+, yes?
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> Changes:
> v4:
> * reset handler is back in generalized form
>
> v3:
> * removed reset
> * windows_num is now 1 or bigger rather than 0-based value and it is only
> changed in PHB code, not in RTAS
> * added page mask check in create()
> * added SPAPR_PCI_DDW_MAX_WINDOWS to track how many windows are already
> created
>
> v2:
> * tested on hacked emulated E1000
> * implemented DDW reset on the PHB reset
> * spapr_pci_ddw_remove/spapr_pci_ddw_reset are public for reuse by VFIO
>
> spapr_pci_vfio: Enable DDW
>
> This implements DDW for VFIO. Host kernel support is required for this.
>
> After this patch DDW will be enabled on all machines but pseries-2.1.
>
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> Changes:
> v2:
> * remove()/reset() callbacks use spapr_pci's ones
> ---
> hw/ppc/spapr_pci.c | 160
> +++++++++++++++++++++++++++++++++++---------
> hw/ppc/spapr_pci_vfio.c | 98 +++++++++++++++++----------
> include/hw/pci-host/spapr.h | 15 ++++-
> 3 files changed, 203 insertions(+), 70 deletions(-)
>
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 6bd00e8..3ec03be 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -469,6 +469,126 @@ static const MemoryRegionOps spapr_msi_ops = {
> .endianness = DEVICE_LITTLE_ENDIAN
> };
>
> +static int spapr_phb_get_win_num_cb(Object *child, void *opaque)
> +{
> + if (object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE)) {
> + ++*(unsigned *)opaque;
> + }
> + return 0;
> +}
> +
> +unsigned spapr_phb_get_win_num(sPAPRPHBState *sphb)
> +{
> + unsigned ret = 0;
> +
> + object_child_foreach(OBJECT(sphb), spapr_phb_get_win_num_cb, &ret);
> +
> + return ret;
> +}
> +
> +/*
> + * Dynamic DMA windows
> + */
> +static int spapr_pci_ddw_query(sPAPRPHBState *sphb,
> + uint32_t *windows_supported,
> + uint32_t *page_size_mask,
> + uint32_t *dma32_window_size,
> + uint64_t *dma64_window_size)
> +{
> + *windows_supported = SPAPR_PCI_DDW_MAX_WINDOWS;
> + *page_size_mask = DDW_PGSIZE_64K | DDW_PGSIZE_16M;
> + *dma32_window_size = SPAPR_PCI_TCE32_WIN_SIZE;
> + *dma64_window_size = ram_size;
> +
> + return 0;
> +}
> +
> +static int spapr_pci_ddw_create(sPAPRPHBState *sphb, uint32_t liobn,
> + uint32_t page_shift, uint32_t window_shift,
> + sPAPRTCETable **ptcet)
> +{
> + uint64_t bus_offset = spapr_phb_get_win_num(sphb) ?
> + SPAPR_PCI_TCE64_START : 0;
Should you also have an assert that spapr_phb_get_win_num(sphb) <=1 at
this point?
> +
> + if (((page_shift != 16) && (page_shift != 24) && (page_shift != 12))) {
> + return -1;
You only have two return values: failure and success. So is there a
reason you're using an int, rather than returning the sPAPRTCETable *
or NULL?
> + }
> +
> + *ptcet = spapr_tce_new_table(DEVICE(sphb), liobn,
> + bus_offset,
> + page_shift,
> + 1ULL << (window_shift - page_shift),
> + false);
> + if (!*ptcet) {
> + return -1;
> + }
> + memory_region_add_subregion(&sphb->iommu_root, (*ptcet)->bus_offset,
> + spapr_tce_get_iommu(*ptcet));
> +
> + return 0;
> +}
> +
> +int spapr_pci_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable *tcet)
> +{
> + memory_region_del_subregion(&sphb->iommu_root,
> + spapr_tce_get_iommu(tcet));
> + spapr_tce_free_table(tcet);
> +
> + return 0;
> +}
> +
> +static int spapr_pci_remove_ddw_cb(Object *child, void *opaque)
> +{
> + sPAPRTCETable *tcet;
> +
> + tcet = (sPAPRTCETable *) object_dynamic_cast(child,
> TYPE_SPAPR_TCE_TABLE);
> +
> + if (tcet) {
> + sPAPRPHBState *sphb = opaque;
> + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
> +
> + spc->ddw_remove(sphb, tcet);
> + }
> +
> + return 0;
> +}
> +
> +int spapr_pci_ddw_reset(sPAPRPHBState *sphb)
> +{
> + int ret;
> + sPAPRPHBClass *spc;
> + sPAPRTCETable *tcet;
> + uint32_t windows_supported = 0, page_size_mask = 0, dma32_window_size =
> 0;
> + uint64_t dma64_window_size = 0;
> +
> + /* Remove all windows */
> + object_child_foreach(OBJECT(sphb), spapr_pci_remove_ddw_cb, sphb);
> +
> + /* Create default 32bit window */
This comment seems to below a few lines down from here.
> + spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
> + if (!spc->ddw_create || !spc->ddw_query) {
> + return -1;
> + }
> +
> + ret = spc->ddw_query(sphb, &windows_supported, &page_size_mask,
> + &dma32_window_size, &dma64_window_size);
> + if (ret) {
> + return ret;
> + }
> +
> + sphb->ddw_enabled = (windows_supported > 1);
ddw_enabled doesn't actually seem to be tested anywhere. And
shouldn't it depend on the externall set property for pre-2.3
compat, not just on the # windows supported by the underlying
implementation?
> + ret = spc->ddw_create(sphb, SPAPR_PCI_LIOBN(sphb->index, 0),
> + SPAPR_TCE_PAGE_SHIFT, ctzl(dma32_window_size),
> &tcet);
> + if (ret) {
> + return ret;
> + }
> +
> + object_unref(OBJECT(tcet));
This could perhaps do with a comment saying why you've ended up with
an extraneous reference.
> +
> + return 0;
> +}
> +
> /*
> * PHB PCI device
> */
> @@ -484,7 +604,6 @@ static void spapr_phb_realize(DeviceState *dev, Error
> **errp)
> SysBusDevice *s = SYS_BUS_DEVICE(dev);
> sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
> PCIHostState *phb = PCI_HOST_BRIDGE(s);
> - sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
> char *namebuf;
> int i;
> PCIBus *bus;
> @@ -622,37 +741,9 @@ static void spapr_phb_realize(DeviceState *dev, Error
> **errp)
> sphb->lsi_table[i].irq = irq;
> }
>
> - if (!info->finish_realize) {
> - error_setg(errp, "finish_realize not defined");
> - return;
> - }
> -
> - info->finish_realize(sphb, errp);
> -
> sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free,
> g_free);
> }
>
> -static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
> -{
> - sPAPRTCETable *tcet;
> -
> - tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
> - 0,
> - SPAPR_TCE_PAGE_SHIFT,
> - 0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false);
> - if (!tcet) {
> - error_setg(errp, "Unable to create TCE table for %s",
> - sphb->dtbusname);
> - return ;
> - }
> -
> - /* Register default 32bit DMA window */
> - memory_region_add_subregion(&sphb->iommu_root, 0,
> - spapr_tce_get_iommu(tcet));
> -
> - object_unref(OBJECT(tcet));
> -}
> -
> static int spapr_phb_children_reset(Object *child, void *opaque)
> {
> DeviceState *dev = (DeviceState *) object_dynamic_cast(child,
> TYPE_DEVICE);
> @@ -666,7 +757,11 @@ static int spapr_phb_children_reset(Object *child, void
> *opaque)
>
> static void spapr_phb_reset(DeviceState *qdev)
> {
> - /* Reset the IOMMU state */
> + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(qdev);
> +
> + if (spc->ddw_reset) {
> + spc->ddw_reset(SPAPR_PCI_HOST_BRIDGE(qdev));
> + }
> object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
> }
>
> @@ -801,7 +896,10 @@ static void spapr_phb_class_init(ObjectClass *klass,
> void *data)
> dc->vmsd = &vmstate_spapr_pci;
> set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
> dc->cannot_instantiate_with_device_add_yet = false;
> - spc->finish_realize = spapr_phb_finish_realize;
> + spc->ddw_query = spapr_pci_ddw_query;
> + spc->ddw_create = spapr_pci_ddw_create;
> + spc->ddw_remove = spapr_pci_ddw_remove;
> + spc->ddw_reset = spapr_pci_ddw_reset;
> }
>
> static const TypeInfo spapr_phb_info = {
> diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
> index aabf0ae..b20ac90 100644
> --- a/hw/ppc/spapr_pci_vfio.c
> +++ b/hw/ppc/spapr_pci_vfio.c
> @@ -27,65 +27,89 @@ static Property spapr_phb_vfio_properties[] = {
> DEFINE_PROP_END_OF_LIST(),
> };
>
> -static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
> +static int spapr_pci_vfio_ddw_query(sPAPRPHBState *sphb,
> + uint32_t *windows_supported,
> + uint32_t *page_size_mask,
> + uint32_t *dma32_window_size,
> + uint64_t *dma64_window_size)
> {
> sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
> struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
> int ret;
> - sPAPRTCETable *tcet;
> - uint32_t liobn = svphb->phb.dma_liobn;
>
> - if (svphb->iommugroupid == -1) {
> - error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
> - return;
> - }
> -
> - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> - VFIO_CHECK_EXTENSION,
> - (void *) VFIO_SPAPR_TCE_IOMMU);
> - if (ret != 1) {
> - error_setg_errno(errp, -ret,
> - "spapr-vfio: SPAPR extension is not supported");
> - return;
> - }
> -
> - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> + ret = vfio_container_ioctl(&sphb->iommu_as, svphb->iommugroupid,
> VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
> if (ret) {
> - error_setg_errno(errp, -ret,
> - "spapr-vfio: get info from container failed");
> - return;
> + return ret;
> }
>
> - tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
> - SPAPR_TCE_PAGE_SHIFT,
> - info.dma32_window_size >>
> SPAPR_TCE_PAGE_SHIFT,
> - true);
> - if (!tcet) {
> - error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
> - return;
> + *windows_supported = info.windows_supported;
> + *page_size_mask = info.flags & DDW_PGSIZE_MASK;
> + *dma32_window_size = info.dma32_window_size;
> + *dma64_window_size = ram_size;
> +
> + return ret;
> +}
> +
> +static int spapr_pci_vfio_ddw_create(sPAPRPHBState *sphb, uint32_t liobn,
> + uint32_t page_shift, uint32_t
> window_shift,
> + sPAPRTCETable **ptcet)
> +{
> + sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
> + struct vfio_iommu_spapr_tce_create create = {
> + .argsz = sizeof(create),
> + .page_shift = page_shift,
> + .window_shift = window_shift,
> + .levels = 1,
> + .start_addr = 0,
> + };
> + int ret;
> +
> + ret = vfio_container_ioctl(&sphb->iommu_as, svphb->iommugroupid,
> + VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
> + if (ret) {
> + return ret;
> }
>
> - /* Register default 32bit DMA window */
> - memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
> - spapr_tce_get_iommu(tcet));
> + *ptcet = spapr_tce_new_table(DEVICE(sphb), liobn,
> + create.start_addr,
> + page_shift,
> + 1ULL << (window_shift - page_shift),
> + true);
> + if (!*ptcet) {
> + return -1;
> + }
> + memory_region_add_subregion(&sphb->iommu_root, (*ptcet)->bus_offset,
> + spapr_tce_get_iommu(*ptcet));
>
> - object_unref(OBJECT(tcet));
> + return ret;
> }
>
> -static void spapr_phb_vfio_reset(DeviceState *qdev)
> +static int spapr_pci_vfio_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable
> *tcet)
> {
> - /* Do nothing */
> + sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
> + struct vfio_iommu_spapr_tce_remove remove = {
> + .argsz = sizeof(remove),
> + .start_addr = tcet->bus_offset
> + };
> + int ret;
> +
> + spapr_pci_ddw_remove(sphb, tcet);
> + ret = vfio_container_ioctl(&sphb->iommu_as, svphb->iommugroupid,
> + VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
> +
> + return ret;
> }
>
> static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
> {
> - DeviceClass *dc = DEVICE_CLASS(klass);
> sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
> + DeviceClass *dc = DEVICE_CLASS(klass);
>
> dc->props = spapr_phb_vfio_properties;
> - dc->reset = spapr_phb_vfio_reset;
> - spc->finish_realize = spapr_phb_vfio_finish_realize;
> + spc->ddw_query = spapr_pci_vfio_ddw_query;
> + spc->ddw_create = spapr_pci_vfio_ddw_create;
> + spc->ddw_remove = spapr_pci_vfio_ddw_remove;
> }
>
> static const TypeInfo spapr_phb_vfio_info = {
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index eec95f3..577f908 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -48,8 +48,6 @@ typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
> struct sPAPRPHBClass {
> PCIHostBridgeClass parent_class;
>
> - void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
> -
> /* sPAPR spec defined pagesize mask values */
> #define DDW_PGSIZE_4K 0x01
> #define DDW_PGSIZE_64K 0x02
> @@ -106,6 +104,8 @@ struct sPAPRPHBState {
> int32_t msi_devs_num;
> spapr_pci_msi_mig *msi_devs;
>
> + bool ddw_enabled;
> +
> QLIST_ENTRY(sPAPRPHBState) list;
> };
>
> @@ -129,6 +129,14 @@ struct sPAPRPHBVFIOState {
>
> #define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL
>
> +#define SPAPR_PCI_TCE32_WIN_SIZE 0x80000000ULL
> +
> +/* Default 64bit dynamic window offset */
> +#define SPAPR_PCI_TCE64_START 0x8000000000000000ULL
> +
> +/* Maximum allowed number of DMA windows for emulated PHB */
> +#define SPAPR_PCI_DDW_MAX_WINDOWS 2
> +
> static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
> {
> return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq);
> @@ -147,5 +155,8 @@ void spapr_pci_rtas_init(void);
> sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid);
> PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid,
> uint32_t config_addr);
> +int spapr_pci_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable *tcet);
> +int spapr_pci_ddw_reset(sPAPRPHBState *sphb);
> +unsigned spapr_phb_get_win_num(sPAPRPHBState *sphb);
>
> #endif /* __HW_SPAPR_PCI_H__ */
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
pgprsYw9mUZjS.pgp
Description: PGP signature
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [Qemu-devel] [PATCH v4 11/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW),
David Gibson <=