qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4 3/3] pcie: Add support for Single Root I/O Vi


From: Marcel Apfelbaum
Subject: Re: [Qemu-devel] [PATCH v4 3/3] pcie: Add support for Single Root I/O Virtualization (SR/IOV)
Date: Wed, 7 Oct 2015 16:45:44 +0300
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.1.0

On 09/17/2015 05:10 PM, Knut Omang wrote:
On Thu, 2015-09-17 at 14:49 +0300, Marcel Apfelbaum wrote:
On 09/12/2015 03:36 PM, Knut Omang wrote:
This patch provides the building blocks for creating an SR/IOV
PCIe Extended Capability header and register/unregister
SR/IOV Virtual Functions.

Signed-off-by: Knut Omang <address@hidden>
---
   hw/pci/Makefile.objs        |   2 +-
   hw/pci/pci.c                |  99 ++++++++++++----
   hw/pci/pcie.c               |   9 +-
   hw/pci/pcie_sriov.c         | 271
++++++++++++++++++++++++++++++++++++++++++++
   include/hw/pci/pci.h        |  11 +-
   include/hw/pci/pcie.h       |   6 +
   include/hw/pci/pcie_sriov.h |  55 +++++++++
   include/qemu/typedefs.h     |   2 +
   8 files changed, 426 insertions(+), 29 deletions(-)
   create mode 100644 hw/pci/pcie_sriov.c
   create mode 100644 include/hw/pci/pcie_sriov.h

diff --git a/hw/pci/Makefile.objs b/hw/pci/Makefile.objs
index 9f905e6..2226980 100644
--- a/hw/pci/Makefile.objs
+++ b/hw/pci/Makefile.objs
@@ -3,7 +3,7 @@ common-obj-$(CONFIG_PCI) += msix.o msi.o
   common-obj-$(CONFIG_PCI) += shpc.o
   common-obj-$(CONFIG_PCI) += slotid_cap.o
   common-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o
-common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
+common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
pcie_sriov.o

   common-obj-$(call lnot,$(CONFIG_PCI)) += pci-stub.o
   common-obj-$(CONFIG_ALL) += pci-stub.o
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index a5cc015..9c0eba1 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -153,6 +153,9 @@ int pci_bar(PCIDevice *d, int reg)
   {
       uint8_t type;

+    /* PCIe virtual functions do not have their own BARs */
+    assert(!pci_is_vf(d));
+
       if (reg != PCI_ROM_SLOT)
           return PCI_BASE_ADDRESS_0 + reg * 4;

@@ -211,22 +214,13 @@ void pci_device_deassert_intx(PCIDevice *dev)
       }
   }

-static void pci_do_device_reset(PCIDevice *dev)
+static void pci_reset_regions(PCIDevice *dev)
   {
       int r;
+    if (pci_is_vf(dev)) {
+        return;
+    }

-    pci_device_deassert_intx(dev);
-    assert(dev->irq_state == 0);
-
-    /* Clear all writable bits */
-    pci_word_test_and_clear_mask(dev->config + PCI_COMMAND,
-                                 pci_get_word(dev->wmask +
PCI_COMMAND) |
-                                 pci_get_word(dev->w1cmask +
PCI_COMMAND));
-    pci_word_test_and_clear_mask(dev->config + PCI_STATUS,
-                                 pci_get_word(dev->wmask +
PCI_STATUS) |
-                                 pci_get_word(dev->w1cmask +
PCI_STATUS));
-    dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
-    dev->config[PCI_INTERRUPT_LINE] = 0x0;
       for (r = 0; r < PCI_NUM_REGIONS; ++r) {
           PCIIORegion *region = &dev->io_regions[r];
           if (!region->size) {
@@ -240,6 +234,27 @@ static void pci_do_device_reset(PCIDevice
*dev)
               pci_set_long(dev->config + pci_bar(dev, r), region
->type);
           }
       }
+}
+
+static void pci_do_device_reset(PCIDevice *dev)
+{
+    qdev_reset_all(&dev->qdev);

Hi,
Thank you for resubmitting this series!

This is only a quick look, I hope I'll have more time next week to go
over it again.


+
+    dev->irq_state = 0;

Are you sure we need the assignment above? It seems that the
irq_state
should be modified only by the intx wrappers as
  pci_device_deassert_intx and so.


+    pci_update_irq_status(dev);

Why do we have to update the irq status on reset?

I struggled a lot with interrupts and PF disapperance in earlier
versions, this may be unintended leftovers from rebase.

The intention was to avoid the qdev removal which caused problems with
unintended "hot unplug of the PF" at VF removal earlier, but after some
of the more recent QOM'ification all those problems disappeared.

I tried without these lines and the do_device_reset refactor and it
seems to work just fine, will fix that in v5, thanks!

+    pci_device_deassert_intx(dev);
+    assert(dev->irq_state == 0);
+
+    /* Clear all writable bits */
+    pci_word_test_and_clear_mask(dev->config + PCI_COMMAND,
+                                 pci_get_word(dev->wmask +
PCI_COMMAND) |
+                                 pci_get_word(dev->w1cmask +
PCI_COMMAND));
+    pci_word_test_and_clear_mask(dev->config + PCI_STATUS,
+                                 pci_get_word(dev->wmask +
PCI_STATUS) |
+                                 pci_get_word(dev->w1cmask +
PCI_STATUS));
+    dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
+    dev->config[PCI_INTERRUPT_LINE] = 0x0;
+    pci_reset_regions(dev);
       pci_update_mappings(dev);

       msi_reset(dev);
@@ -771,6 +786,15 @@ static void pci_init_multifunction(PCIBus
*bus, PCIDevice *dev, Error **errp)
           dev->config[PCI_HEADER_TYPE] |=
PCI_HEADER_TYPE_MULTI_FUNCTION;
       }

+    /* With SR/IOV and ARI, a device at function 0 need not be a
multifunction
+     * device, as it may just be a VF that ended up with function
0 in
+     * the legacy PCI interpretation. Avoid failing in such cases:
+     */
+    if (pci_is_vf(dev) &&
+        dev->exp.sriov_vf.pf->cap_present &
QEMU_PCI_CAP_MULTIFUNCTION) {
+        return;
+    }
+
       /*
        * multifunction bit is interpreted in two ways as follows.
        *   - all functions must set the bit to 1.
@@ -962,6 +986,7 @@ void pci_register_bar(PCIDevice *pci_dev, int
region_num,
       uint64_t wmask;
       pcibus_t size = memory_region_size(memory);

+    assert(!pci_is_vf(pci_dev)); /* VFs must use
pcie_sriov_vf_register_bar */
       assert(region_num >= 0);
       assert(region_num < PCI_NUM_REGIONS);
       if (size & (size-1)) {
@@ -1060,11 +1085,44 @@ pcibus_t pci_get_bar_addr(PCIDevice
*pci_dev, int region_num)
       return pci_dev->io_regions[region_num].addr;
   }

-static pcibus_t pci_bar_address(PCIDevice *d,
-                               int reg, uint8_t type, pcibus_t
size)
+
+static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
+                                        uint8_t type, pcibus_t
size)
+{
+    pcibus_t new_addr;
+    if (!pci_is_vf(d)) {
+        int bar = pci_bar(d, reg);
+        if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+            new_addr = pci_get_quad(d->config + bar);
+        } else {
+            new_addr = pci_get_long(d->config + bar);
+        }
+    } else {
+        PCIDevice *pf = d->exp.sriov_vf.pf;
+        uint16_t sriov_cap = pf->exp.sriov_cap;
+        int bar = sriov_cap + PCI_SRIOV_BAR + reg * 4;
+        uint16_t vf_offset = pci_get_word(pf->config + sriov_cap +
PCI_SRIOV_VF_OFFSET);
+        uint16_t vf_stride = pci_get_word(pf->config + sriov_cap +
PCI_SRIOV_VF_STRIDE);
+        uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) /
vf_stride;
+
+        if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+            new_addr = pci_get_quad(pf->config + bar);
+        } else {
+            new_addr = pci_get_long(pf->config + bar);
+        }
+        new_addr += vf_num * size;
+    }
+    if (reg != PCI_ROM_SLOT) {
+        /* Preserve the rom enable bit */
+        new_addr &= ~(size - 1);
+    }
+    return new_addr;
+}
+
+pcibus_t pci_bar_address(PCIDevice *d,
+                         int reg, uint8_t type, pcibus_t size)
   {
       pcibus_t new_addr, last_addr;
-    int bar = pci_bar(d, reg);
       uint16_t cmd = pci_get_word(d->config + PCI_COMMAND);
       Object *machine = qdev_get_machine();
       ObjectClass *oc = object_get_class(machine);
@@ -1075,7 +1133,7 @@ static pcibus_t pci_bar_address(PCIDevice *d,
           if (!(cmd & PCI_COMMAND_IO)) {
               return PCI_BAR_UNMAPPED;
           }
-        new_addr = pci_get_long(d->config + bar) & ~(size - 1);
+        new_addr = pci_config_get_bar_addr(d, reg, type, size);
           last_addr = new_addr + size - 1;
           /* Check if 32 bit BAR wraps around explicitly.
            * TODO: make priorities correct and remove this work
around.
@@ -1090,11 +1148,7 @@ static pcibus_t pci_bar_address(PCIDevice
*d,
       if (!(cmd & PCI_COMMAND_MEMORY)) {
           return PCI_BAR_UNMAPPED;
       }
-    if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
-        new_addr = pci_get_quad(d->config + bar);
-    } else {
-        new_addr = pci_get_long(d->config + bar);
-    }
+    new_addr = pci_config_get_bar_addr(d, reg, type, size);
       /* the ROM slot has a specific enable bit */
       if (reg == PCI_ROM_SLOT && !(new_addr &
PCI_ROM_ADDRESS_ENABLE)) {
           return PCI_BAR_UNMAPPED;
@@ -1228,6 +1282,7 @@ void pci_default_write_config(PCIDevice *d,
uint32_t addr, uint32_t val_in, int

       msi_write_config(d, addr, val_in, l);
       msix_write_config(d, addr, val_in, l);
+    pcie_sriov_config_write(d, addr, val_in, l);
   }

   /***********************************************************/
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6e28985..ba49c0f 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -253,7 +253,7 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler
*hotplug_dev, DeviceState *dev,
        * Right now, only a device of function = 0 is allowed to be
        * hot plugged/unplugged.
        */
-    assert(PCI_FUNC(pci_dev->devfn) == 0);
+    assert(PCI_FUNC(pci_dev->devfn) == 0 || pci_is_vf(pci_dev));

       pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
                                  PCI_EXP_SLTSTA_PDS);
@@ -265,10 +265,11 @@ void
pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
                                            DeviceState *dev, Error
**errp)
   {
       uint8_t *exp_cap;
+    PCIDevice *pdev = PCI_DEVICE(hotplug_dev);

-    pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev,
&exp_cap, errp);
+    pcie_cap_slot_hotplug_common(pdev, dev, &exp_cap, errp);

-    pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev));
+    pcie_cap_slot_push_attention_button(pdev);
   }

This chunk is not directly liked to the patch, I would put it in a
different patch.

ok

  /* pci express slot for pci express root/downstream port
@@ -408,7 +409,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
       }

       /*
-     * If the slot is polulated, power indicator is off and power
+     * If the slot is populated, power indicator is off and power
        * controller is off, it is safe to detach the devices.
        */
       if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val &
PCI_EXP_SLTCTL_PCC) &&


Same here. I am always happy to have this kind of types taken care
of,
but I think a separate patch would be cleaner.

Would you like it in the patch set as a separate patch or should I
schedule it for a trivial patches set instead, maybe together with the
  PCI_DEVICE thing?

Hi,

I think in the same patch set  would be just fine.

Thanks,
Marcel


[...]



reply via email to

[Prev in Thread] Current Thread [Next in Thread]