qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized()


From: David Hildenbrand
Subject: [PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized()
Date: Thu, 5 Mar 2020 15:29:42 +0100

Let's implement ram_block_resized(), allowing resizeable mappings.

For resizeable mappings, we reserve $max_size IOVA address space, but only
map $size of it. When resizing, unmap the old part and remap the new
part. We'll need e.g., new ioctl to do this atomically (e.g., to resize
while the guest is running).

Right now, we only resize RAM blocks during incoming migration (when
syncing RAM block sizes during the precopy phase) or after guest
resets when building acpi tables. Any future user of resizeable RAM has to
be aware that vfio has to be treated with care.

Reviewed-by: Peter Xu <address@hidden>
Cc: Richard Henderson <address@hidden>
Cc: Paolo Bonzini <address@hidden>
Cc: Eduardo Habkost <address@hidden>
Cc: Marcel Apfelbaum <address@hidden>
Cc: Alex Williamson <address@hidden>
Cc: Stefan Hajnoczi <address@hidden>
Cc: "Dr. David Alan Gilbert" <address@hidden>
Signed-off-by: David Hildenbrand <address@hidden>
---
 util/trace-events   |  7 ++--
 util/vfio-helpers.c | 95 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/util/trace-events b/util/trace-events
index 83b6639018..a4d39eca5e 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -74,10 +74,11 @@ qemu_mutex_unlock(void *mutex, const char *file, const int 
line) "released mutex
 
 # vfio-helpers.c
 qemu_vfio_dma_reset_temporary(void *s) "s %p"
-qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size 
0x%zx"
-qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size 
0x%zx"
+qemu_vfio_ram_block_added(void *s, void *p, size_t size, size_t max_size) "s 
%p host %p size 0x%zx max_size 0x%zx"
+qemu_vfio_ram_block_removed(void *s, void *p, size_t size, size_t max_size) "s 
%p host %p size 0x%zx max_size 0x%zx"
+qemu_vfio_ram_block_resized(void *s, void *p, size_t old_size, size_t 
new_sizze) "s %p host %p old_size 0x%zx new_size 0x%zx"
 qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
-qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t 
iova) "s %p host %p size %zu index %d iova 0x%"PRIx64
+qemu_vfio_new_mapping(void *s, void *host, size_t size, size_t max_size, int 
index, uint64_t iova) "s %p host %p size %zu max_size %zu index %d iova 
0x%"PRIx64
 qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t iova) "s %p 
host %p size %zu iova 0x%"PRIx64
 qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t 
*iova) "s %p host %p size %zu temporary %d iova %p"
 qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index f0c77f0d69..789faf38bd 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -36,6 +36,7 @@ typedef struct {
     /* Page aligned addr. */
     void *host;
     size_t size;
+    size_t max_size;
     uint64_t iova;
 } IOVAMapping;
 
@@ -372,14 +373,20 @@ fail_container:
     return ret;
 }
 
+static int qemu_vfio_dma_map_resizeable(QEMUVFIOState *s, void *host,
+                                        size_t size, size_t max_size,
+                                        bool temporary, uint64_t *iova);
+static void qemu_vfio_dma_map_resize(QEMUVFIOState *s, void *host,
+                                     size_t old_size, size_t new_size);
+
 static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host,
                                       size_t size, size_t max_size)
 {
     QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
     int ret;
 
-    trace_qemu_vfio_ram_block_added(s, host, max_size);
-    ret = qemu_vfio_dma_map(s, host, max_size, false, NULL);
+    trace_qemu_vfio_ram_block_added(s, host, size, max_size);
+    ret = qemu_vfio_dma_map_resizeable(s, host, size, max_size, false, NULL);
     if (ret) {
         error_report("qemu_vfio_dma_map(%p, %zu) failed: %s", host, max_size,
                      strerror(-ret));
@@ -391,16 +398,28 @@ static void qemu_vfio_ram_block_removed(RAMBlockNotifier 
*n, void *host,
 {
     QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
     if (host) {
-        trace_qemu_vfio_ram_block_removed(s, host, max_size);
+        trace_qemu_vfio_ram_block_removed(s, host, size, max_size);
         qemu_vfio_dma_unmap(s, host);
     }
 }
 
+static void qemu_vfio_ram_block_resized(RAMBlockNotifier *n, void *host,
+                                        size_t old_size, size_t new_size)
+{
+    QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
+
+    if (host) {
+        trace_qemu_vfio_ram_block_resized(s, host, old_size, new_size);
+        qemu_vfio_dma_map_resize(s, host, old_size, new_size);
+    }
+}
+
 static void qemu_vfio_open_common(QEMUVFIOState *s)
 {
     qemu_mutex_init(&s->lock);
     s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
     s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
+    s->ram_notifier.ram_block_resized = qemu_vfio_ram_block_resized;
     s->low_water_mark = QEMU_VFIO_IOVA_MIN;
     s->high_water_mark = QEMU_VFIO_IOVA_MAX;
     ram_block_notifier_add(&s->ram_notifier);
@@ -495,16 +514,23 @@ static IOVAMapping *qemu_vfio_find_mapping(QEMUVFIOState 
*s, void *host,
  */
 static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
                                           void *host, size_t size,
-                                          int index, uint64_t iova)
+                                          size_t max_size, int index,
+                                          uint64_t iova)
 {
+    const IOVAMapping m = {
+        .host = host,
+        .size = size,
+        .max_size = max_size,
+        .iova = iova,
+    };
     int shift;
-    IOVAMapping m = {.host = host, .size = size, .iova = iova};
     IOVAMapping *insert;
 
     assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
+    assert(size <= max_size);
     assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size));
     assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size));
-    trace_qemu_vfio_new_mapping(s, host, size, index, iova);
+    trace_qemu_vfio_new_mapping(s, host, size, max_size, index, iova);
 
     assert(index >= 0);
     s->nr_mappings++;
@@ -597,9 +623,14 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
  * the result in @iova if not NULL. The caller need to make sure the area is
  * aligned to page size, and mustn't overlap with existing mapping areas (split
  * mapping status within this area is not allowed).
+ *
+ * If size < max_size, a region of max_size in IOVA address is reserved, such
+ * that the mapping can later be resized. Resizeable mappings are only allowed
+ * for !temporary mappings.
  */
-int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
-                      bool temporary, uint64_t *iova)
+static int qemu_vfio_dma_map_resizeable(QEMUVFIOState *s, void *host,
+                                        size_t size, size_t max_size,
+                                        bool temporary, uint64_t *iova)
 {
     int ret = 0;
     int index;
@@ -608,19 +639,24 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, 
size_t size,
 
     assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
     assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
+    assert(QEMU_IS_ALIGNED(max_size, qemu_real_host_page_size));
+    assert(size == max_size || !temporary);
+    assert(size <= max_size);
+
     trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
     qemu_mutex_lock(&s->lock);
     mapping = qemu_vfio_find_mapping(s, host, &index);
     if (mapping) {
         iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
     } else {
-        if (s->high_water_mark - s->low_water_mark + 1 < size) {
+        if (s->high_water_mark - s->low_water_mark + 1 < max_size) {
             ret = -ENOMEM;
             goto out;
         }
         if (!temporary) {
             iova0 = s->low_water_mark;
-            mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
+            mapping = qemu_vfio_add_mapping(s, host, size, max_size, index + 1,
+                                            iova0);
             if (!mapping) {
                 ret = -ENOMEM;
                 goto out;
@@ -631,7 +667,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
                 qemu_vfio_remove_mapping(s, mapping);
                 goto out;
             }
-            s->low_water_mark += size;
+            s->low_water_mark += max_size;
             qemu_vfio_dump_mappings(s);
         } else {
             iova0 = s->high_water_mark - size;
@@ -650,6 +686,12 @@ out:
     return ret;
 }
 
+int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
+                      bool temporary, uint64_t *iova)
+{
+    return qemu_vfio_dma_map_resizeable(s, host, size, size, temporary, iova);
+}
+
 /* Reset the high watermark and free all "temporary" mappings. */
 int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
 {
@@ -694,6 +736,37 @@ out:
     qemu_mutex_unlock(&s->lock);
 }
 
+static void qemu_vfio_dma_map_resize(QEMUVFIOState *s, void *host,
+                                     size_t old_size, size_t new_size)
+{
+    IOVAMapping *m;
+    int index = 0;
+
+    qemu_mutex_lock(&s->lock);
+    m = qemu_vfio_find_mapping(s, host, &index);
+    if (!m) {
+        return;
+    }
+    assert(m->size == old_size);
+    assert(new_size <= m->max_size);
+
+    /*
+     * For now, we must unmap the whole mapped range first and remap with
+     * the new size. The reason is that VFIO_IOMMU_UNMAP_DMA might fail
+     * when partially unmapping previous mappings. Although we could add
+     * new mappings to extend the old range, we won't able to always
+     * shrink. The side effect is that it's never safe to resize during VM
+     * execution and we'll e.g., need a new IOCTL to make this work.
+     */
+    qemu_vfio_undo_mapping(s, m->iova, m->size);
+    qemu_vfio_do_mapping(s, host, m->iova, new_size);
+
+    m->size = new_size;
+    assert(qemu_vfio_verify_mappings(s));
+
+    qemu_mutex_unlock(&s->lock);
+}
+
 static void qemu_vfio_reset(QEMUVFIOState *s)
 {
     ioctl(s->device, VFIO_DEVICE_RESET);
-- 
2.24.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]