qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v16 QEMU 04/16] vfio: Add save and load functions for VFIO PC


From: Kirti Wankhede
Subject: Re: [PATCH v16 QEMU 04/16] vfio: Add save and load functions for VFIO PCI devices
Date: Thu, 7 May 2020 11:10:40 +0530
User-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0



On 5/7/2020 1:33 AM, Alex Williamson wrote:
On Thu, 7 May 2020 01:18:19 +0530
Kirti Wankhede <address@hidden> wrote:

On 5/6/2020 11:41 AM, Yan Zhao wrote:
On Tue, May 05, 2020 at 12:37:11PM +0800, Alex Williamson wrote:
On Tue, 5 May 2020 04:48:37 +0530
Kirti Wankhede <address@hidden> wrote:
On 3/26/2020 1:26 AM, Alex Williamson wrote:
On Wed, 25 Mar 2020 02:39:02 +0530
Kirti Wankhede <address@hidden> wrote:
These functions save and restore PCI device specific data - config
space of PCI device.
Tested save and restore with MSI and MSIX type.

Signed-off-by: Kirti Wankhede <address@hidden>
Reviewed-by: Neo Jia <address@hidden>
---
    hw/vfio/pci.c                 | 163 
++++++++++++++++++++++++++++++++++++++++++
    include/hw/vfio/vfio-common.h |   2 +
    2 files changed, 165 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6c77c12e44b9..8deb11e87ef7 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -41,6 +41,7 @@
    #include "trace.h"
    #include "qapi/error.h"
    #include "migration/blocker.h"
+#include "migration/qemu-file.h"
#define TYPE_VFIO_PCI "vfio-pci"
    #define PCI_VFIO(obj)    OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
@@ -1632,6 +1633,50 @@ static void vfio_bars_prepare(VFIOPCIDevice *vdev)
        }
    }
+static int vfio_bar_validate(VFIOPCIDevice *vdev, int nr)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    VFIOBAR *bar = &vdev->bars[nr];
+    uint64_t addr;
+    uint32_t addr_lo, addr_hi = 0;
+
+    /* Skip unimplemented BARs and the upper half of 64bit BARS. */
+    if (!bar->size) {
+        return 0;
+    }
+
+    addr_lo = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + nr * 4, 4);
+
+    addr_lo = addr_lo & (bar->ioport ? PCI_BASE_ADDRESS_IO_MASK :
+                                       PCI_BASE_ADDRESS_MEM_MASK);

Nit, &= or combine with previous set.
+    if (bar->type == PCI_BASE_ADDRESS_MEM_TYPE_64) {
+        addr_hi = pci_default_read_config(pdev,
+                                         PCI_BASE_ADDRESS_0 + (nr + 1) * 4, 4);
+    }
+
+    addr = ((uint64_t)addr_hi << 32) | addr_lo;

Could we use a union?
+
+    if (!QEMU_IS_ALIGNED(addr, bar->size)) {
+        return -EINVAL;
+    }

What specifically are we validating here?  This should be true no
matter what we wrote to the BAR or else BAR emulation is broken.  The
bits that could make this unaligned are not implemented in the BAR.
+
+    return 0;
+}
+
+static int vfio_bars_validate(VFIOPCIDevice *vdev)
+{
+    int i, ret;
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        ret = vfio_bar_validate(vdev, i);
+        if (ret) {
+            error_report("vfio: BAR address %d validation failed", i);
+            return ret;
+        }
+    }
+    return 0;
+}
+
    static void vfio_bar_register(VFIOPCIDevice *vdev, int nr)
    {
        VFIOBAR *bar = &vdev->bars[nr];
@@ -2414,11 +2459,129 @@ static Object *vfio_pci_get_object(VFIODevice 
*vbasedev)
        return OBJECT(vdev);
    }
+static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
+{
+    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+    PCIDevice *pdev = &vdev->pdev;
+    uint16_t pci_cmd;
+    int i;
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        uint32_t bar;
+
+        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
+        qemu_put_be32(f, bar);
+    }
+
+    qemu_put_be32(f, vdev->interrupt);
+    if (vdev->interrupt == VFIO_INT_MSI) {
+        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
+        bool msi_64bit;
+
+        msi_flags = pci_default_read_config(pdev, pdev->msi_cap + 
PCI_MSI_FLAGS,
+                                            2);
+        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
+
+        msi_addr_lo = pci_default_read_config(pdev,
+                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 
4);
+        qemu_put_be32(f, msi_addr_lo);
+
+        if (msi_64bit) {
+            msi_addr_hi = pci_default_read_config(pdev,
+                                             pdev->msi_cap + 
PCI_MSI_ADDRESS_HI,
+                                             4);
+        }
+        qemu_put_be32(f, msi_addr_hi);
+
+        msi_data = pci_default_read_config(pdev,
+                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : 
PCI_MSI_DATA_32),
+                2);
+        qemu_put_be32(f, msi_data);

Isn't the data field only a u16?

Yes, fixing it.
+    } else if (vdev->interrupt == VFIO_INT_MSIX) {
+        uint16_t offset;
+
+        /* save enable bit and maskall bit */
+        offset = pci_default_read_config(pdev,
+                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
+        qemu_put_be16(f, offset);
+        msix_save(pdev, f);
+    }
+    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
+    qemu_put_be16(f, pci_cmd);
+}
+
+static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
+{
+    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+    PCIDevice *pdev = &vdev->pdev;
+    uint32_t interrupt_type;
+    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
+    uint16_t pci_cmd;
+    bool msi_64bit;
+    int i, ret;
+
+    /* retore pci bar configuration */
+    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
+    vfio_pci_write_config(pdev, PCI_COMMAND,
+                        pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        uint32_t bar = qemu_get_be32(f);
+
+        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
+    }
+
+    ret = vfio_bars_validate(vdev);
+    if (ret) {
+        return ret;
+    }
+
+    interrupt_type = qemu_get_be32(f);
+
+    if (interrupt_type == VFIO_INT_MSI) {
+        /* restore msi configuration */
+        msi_flags = pci_default_read_config(pdev,
+                                            pdev->msi_cap + PCI_MSI_FLAGS, 2);
+        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
+
+        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
+                              msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2);
+
+        msi_addr_lo = qemu_get_be32(f);
+        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO,
+                              msi_addr_lo, 4);
+
+        msi_addr_hi = qemu_get_be32(f);
+        if (msi_64bit) {
+            vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
+                                  msi_addr_hi, 4);
+        }
+        msi_data = qemu_get_be32(f);
+        vfio_pci_write_config(pdev,
+                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : 
PCI_MSI_DATA_32),
+                msi_data, 2);
+
+        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
+                              msi_flags | PCI_MSI_FLAGS_ENABLE, 2);
+    } else if (interrupt_type == VFIO_INT_MSIX) {
+        uint16_t offset = qemu_get_be16(f);
+
+        /* load enable bit and maskall bit */
+        vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1,
+                              offset, 2);
+        msix_load(pdev, f);
+    }
+    pci_cmd = qemu_get_be16(f);
+    vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2);
+    return 0;
+}

It always seems like there should be a lot more state than this, and I
probably sound like a broken record because I ask every time, but maybe
that's a good indication that we (or at least I) need a comment
explaining why we only care about these.  For example, what if we
migrate a device in the D3 power state, don't we need to account for
the state stored in the PM capability or does the device wake up into
D0 auto-magically after migration?  I think we could repeat that
question for every capability that can be modified.  Even for the MSI/X
cases, the interrupt may not be active, but there could be state in
virtual config space that would be different on the target.  For
example, if we migrate with a device in INTx mode where the guest had
written vector fields on the source, but only writes the enable bit on
the target, can we seamlessly figure out the rest?  For other
capabilities, that state may represent config space changes written
through to the physical device and represent a functional difference on
the target.  Thanks,

These are very basic set of registers from config state. Other are more
of vendor specific which vendor driver can save and restore in their own
data. I don't think we have to take care of all those vendor specific
fields here.

That had not been clear to me.  Intel folks, is this your understanding
regarding the responsibility of the user to save and restore config
space of the device as part of the vendor provided migration stream
data?  Thanks,
Currently, the code works for us. but I agree with you that there should
be more states to save, at least for emulated config bits.
I think we should call pci_device_save() to serve that purpose.

If vendor driver can restore all vendor specific config space, then
adding it again in QEMU might be redundant. As an example, I had mailed
mtty sample code, in which config space has vendor specific information
and that is restored in easy way.

The redundancy is implementing it in each vendor driver.  Thanks,


Vendor driver knows better about vendor specific configs, isn't it better vendor driver handle those at their end rather than adding vendor specific quirks in QEMU?

Thanks,
Kirti



reply via email to

[Prev in Thread] Current Thread [Next in Thread]