qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC] vfio/migration: reduce the msix virq setup cost in resume phase


From: Longpeng(Mike)
Subject: [RFC] vfio/migration: reduce the msix virq setup cost in resume phase
Date: Fri, 13 Aug 2021 12:06:14 +0800

In migration resume phase, all unmasked msix vectors need to be
setup when load the VF state. However, the setup operation would
takes longer if the VF has more unmasked vectors.

In our case, the VF has 65 vectors and each one spend 0.8ms on
setup operation (vfio_add_kvm_msi_virq -> kvm_irqchip_commit_routes),
the total cost of the VF is more than 40ms. Even worse, the VM has
8 VFs, so the downtime increase more than 320ms.

vfio_pci_load_config
  vfio_msix_enable
    msix_set_vector_notifiers
      for (vector = 0; vector < dev->msix_entries_nr; vector++) {
        vfio_msix_vector_do_use
          vfio_add_kvm_msi_virq
            kvm_irqchip_commit_routes <-- 0.8ms
      }

Originaly, We tried to batch all routes and just commit once
outside the loop, but it's not easy to fallback to qemu interrupt
if someone fails.

So this patch trys to defer the KVM interrupt setup, the unmasked
vector will use qemu interrupt as default and switch to kvm interrupt
once it fires.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
---
 hw/vfio/pci.c | 39 ++++++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h |  2 ++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e1ea1d8..dd35170 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -47,6 +47,8 @@
 
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
+static void vfio_add_kvm_msix_virq(VFIOPCIDevice *vdev,
+                                   VFIOMSIVector *vector, int nr);
 
 /*
  * Disabling BAR mmaping can be slow, but toggling it around INTx can
@@ -347,6 +349,11 @@ static void vfio_msi_interrupt(void *opaque)
         get_msg = msix_get_message;
         notify = msix_notify;
 
+        if (unlikely(vector->need_switch)) {
+            vfio_add_kvm_msix_virq(vdev, vector, nr);
+            vector->need_switch = false;
+        }
+
         /* A masked vector firing needs to use the PBA, enable it */
         if (msix_is_masked(&vdev->pdev, nr)) {
             set_bit(nr, vdev->msix->pending);
@@ -438,6 +445,25 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, 
VFIOMSIVector *vector,
     vector->virq = virq;
 }
 
+static void
+vfio_add_kvm_msix_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, int nr)
+{
+    Error *err = NULL;
+    int fd;
+
+    vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+    if (vector->virq < 0) {
+        return;
+    }
+
+    fd = event_notifier_get_fd(&vector->kvm_interrupt);
+    if (vfio_set_irq_signaling(&vdev->vbasedev,
+                               VFIO_PCI_MSIX_IRQ_INDEX, nr,
+                               VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+        error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+    }
+}
+
 static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
 {
     kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
@@ -490,7 +516,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, 
unsigned int nr,
         }
     } else {
         if (msg) {
-            vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+            if (unlikely(vdev->defer_set_virq)) {
+                vector->need_switch = true;
+            } else {
+                vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+            }
         }
     }
 
@@ -566,6 +596,11 @@ static void vfio_msix_vector_release(PCIDevice *pdev, 
unsigned int nr)
     }
 }
 
+static void inline vfio_msix_defer_set_virq(VFIOPCIDevice *vdev, bool defer)
+{
+    vdev->defer_set_virq = defer;
+}
+
 static void vfio_msix_enable(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -2466,7 +2501,9 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, 
QEMUFile *f)
     if (msi_enabled(pdev)) {
         vfio_msi_enable(vdev);
     } else if (msix_enabled(pdev)) {
+        vfio_msix_defer_set_virq(vdev, true);
         vfio_msix_enable(vdev);
+        vfio_msix_defer_set_virq(vdev, false);
     }
 
     return ret;
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 6477751..846ae85 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -95,6 +95,7 @@ typedef struct VFIOMSIVector {
     struct VFIOPCIDevice *vdev; /* back pointer to device */
     int virq;
     bool use;
+    bool need_switch; /* switch to kvm interrupt ? */
 } VFIOMSIVector;
 
 enum {
@@ -171,6 +172,7 @@ struct VFIOPCIDevice {
     bool no_kvm_ioeventfd;
     bool no_vfio_ioeventfd;
     bool enable_ramfb;
+    bool defer_set_virq;
     VFIODisplay *dpy;
     Notifier irqchip_change_notifier;
 };
-- 
1.8.3.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]