[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized()
From: |
David Hildenbrand |
Subject: |
[PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized() |
Date: |
Thu, 5 Mar 2020 15:29:42 +0100 |
Let's implement ram_block_resized(), allowing resizeable mappings.
For resizeable mappings, we reserve $max_size IOVA address space, but only
map $size of it. When resizing, unmap the old part and remap the new
part. We'll need e.g., new ioctl to do this atomically (e.g., to resize
while the guest is running).
Right now, we only resize RAM blocks during incoming migration (when
syncing RAM block sizes during the precopy phase) or after guest
resets when building acpi tables. Any future user of resizeable RAM has to
be aware that vfio has to be treated with care.
Reviewed-by: Peter Xu <address@hidden>
Cc: Richard Henderson <address@hidden>
Cc: Paolo Bonzini <address@hidden>
Cc: Eduardo Habkost <address@hidden>
Cc: Marcel Apfelbaum <address@hidden>
Cc: Alex Williamson <address@hidden>
Cc: Stefan Hajnoczi <address@hidden>
Cc: "Dr. David Alan Gilbert" <address@hidden>
Signed-off-by: David Hildenbrand <address@hidden>
---
util/trace-events | 7 ++--
util/vfio-helpers.c | 95 +++++++++++++++++++++++++++++++++++++++------
2 files changed, 88 insertions(+), 14 deletions(-)
diff --git a/util/trace-events b/util/trace-events
index 83b6639018..a4d39eca5e 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -74,10 +74,11 @@ qemu_mutex_unlock(void *mutex, const char *file, const int
line) "released mutex
# vfio-helpers.c
qemu_vfio_dma_reset_temporary(void *s) "s %p"
-qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size
0x%zx"
-qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size
0x%zx"
+qemu_vfio_ram_block_added(void *s, void *p, size_t size, size_t max_size) "s
%p host %p size 0x%zx max_size 0x%zx"
+qemu_vfio_ram_block_removed(void *s, void *p, size_t size, size_t max_size) "s
%p host %p size 0x%zx max_size 0x%zx"
+qemu_vfio_ram_block_resized(void *s, void *p, size_t old_size, size_t
new_sizze) "s %p host %p old_size 0x%zx new_size 0x%zx"
qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
-qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t
iova) "s %p host %p size %zu index %d iova 0x%"PRIx64
+qemu_vfio_new_mapping(void *s, void *host, size_t size, size_t max_size, int
index, uint64_t iova) "s %p host %p size %zu max_size %zu index %d iova
0x%"PRIx64
qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t iova) "s %p
host %p size %zu iova 0x%"PRIx64
qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t
*iova) "s %p host %p size %zu temporary %d iova %p"
qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index f0c77f0d69..789faf38bd 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -36,6 +36,7 @@ typedef struct {
/* Page aligned addr. */
void *host;
size_t size;
+ size_t max_size;
uint64_t iova;
} IOVAMapping;
@@ -372,14 +373,20 @@ fail_container:
return ret;
}
+static int qemu_vfio_dma_map_resizeable(QEMUVFIOState *s, void *host,
+ size_t size, size_t max_size,
+ bool temporary, uint64_t *iova);
+static void qemu_vfio_dma_map_resize(QEMUVFIOState *s, void *host,
+ size_t old_size, size_t new_size);
+
static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host,
size_t size, size_t max_size)
{
QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
int ret;
- trace_qemu_vfio_ram_block_added(s, host, max_size);
- ret = qemu_vfio_dma_map(s, host, max_size, false, NULL);
+ trace_qemu_vfio_ram_block_added(s, host, size, max_size);
+ ret = qemu_vfio_dma_map_resizeable(s, host, size, max_size, false, NULL);
if (ret) {
error_report("qemu_vfio_dma_map(%p, %zu) failed: %s", host, max_size,
strerror(-ret));
@@ -391,16 +398,28 @@ static void qemu_vfio_ram_block_removed(RAMBlockNotifier
*n, void *host,
{
QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
if (host) {
- trace_qemu_vfio_ram_block_removed(s, host, max_size);
+ trace_qemu_vfio_ram_block_removed(s, host, size, max_size);
qemu_vfio_dma_unmap(s, host);
}
}
+static void qemu_vfio_ram_block_resized(RAMBlockNotifier *n, void *host,
+ size_t old_size, size_t new_size)
+{
+ QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
+
+ if (host) {
+ trace_qemu_vfio_ram_block_resized(s, host, old_size, new_size);
+ qemu_vfio_dma_map_resize(s, host, old_size, new_size);
+ }
+}
+
static void qemu_vfio_open_common(QEMUVFIOState *s)
{
qemu_mutex_init(&s->lock);
s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
+ s->ram_notifier.ram_block_resized = qemu_vfio_ram_block_resized;
s->low_water_mark = QEMU_VFIO_IOVA_MIN;
s->high_water_mark = QEMU_VFIO_IOVA_MAX;
ram_block_notifier_add(&s->ram_notifier);
@@ -495,16 +514,23 @@ static IOVAMapping *qemu_vfio_find_mapping(QEMUVFIOState
*s, void *host,
*/
static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
void *host, size_t size,
- int index, uint64_t iova)
+ size_t max_size, int index,
+ uint64_t iova)
{
+ const IOVAMapping m = {
+ .host = host,
+ .size = size,
+ .max_size = max_size,
+ .iova = iova,
+ };
int shift;
- IOVAMapping m = {.host = host, .size = size, .iova = iova};
IOVAMapping *insert;
assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
+ assert(size <= max_size);
assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size));
assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size));
- trace_qemu_vfio_new_mapping(s, host, size, index, iova);
+ trace_qemu_vfio_new_mapping(s, host, size, max_size, index, iova);
assert(index >= 0);
s->nr_mappings++;
@@ -597,9 +623,14 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
* the result in @iova if not NULL. The caller need to make sure the area is
* aligned to page size, and mustn't overlap with existing mapping areas (split
* mapping status within this area is not allowed).
+ *
+ * If size < max_size, a region of max_size in IOVA address is reserved, such
+ * that the mapping can later be resized. Resizeable mappings are only allowed
+ * for !temporary mappings.
*/
-int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
- bool temporary, uint64_t *iova)
+static int qemu_vfio_dma_map_resizeable(QEMUVFIOState *s, void *host,
+ size_t size, size_t max_size,
+ bool temporary, uint64_t *iova)
{
int ret = 0;
int index;
@@ -608,19 +639,24 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host,
size_t size,
assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
+ assert(QEMU_IS_ALIGNED(max_size, qemu_real_host_page_size));
+ assert(size == max_size || !temporary);
+ assert(size <= max_size);
+
trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
qemu_mutex_lock(&s->lock);
mapping = qemu_vfio_find_mapping(s, host, &index);
if (mapping) {
iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
} else {
- if (s->high_water_mark - s->low_water_mark + 1 < size) {
+ if (s->high_water_mark - s->low_water_mark + 1 < max_size) {
ret = -ENOMEM;
goto out;
}
if (!temporary) {
iova0 = s->low_water_mark;
- mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
+ mapping = qemu_vfio_add_mapping(s, host, size, max_size, index + 1,
+ iova0);
if (!mapping) {
ret = -ENOMEM;
goto out;
@@ -631,7 +667,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t
size,
qemu_vfio_remove_mapping(s, mapping);
goto out;
}
- s->low_water_mark += size;
+ s->low_water_mark += max_size;
qemu_vfio_dump_mappings(s);
} else {
iova0 = s->high_water_mark - size;
@@ -650,6 +686,12 @@ out:
return ret;
}
+int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
+ bool temporary, uint64_t *iova)
+{
+ return qemu_vfio_dma_map_resizeable(s, host, size, size, temporary, iova);
+}
+
/* Reset the high watermark and free all "temporary" mappings. */
int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
{
@@ -694,6 +736,37 @@ out:
qemu_mutex_unlock(&s->lock);
}
+static void qemu_vfio_dma_map_resize(QEMUVFIOState *s, void *host,
+ size_t old_size, size_t new_size)
+{
+ IOVAMapping *m;
+ int index = 0;
+
+ qemu_mutex_lock(&s->lock);
+ m = qemu_vfio_find_mapping(s, host, &index);
+ if (!m) {
+ return;
+ }
+ assert(m->size == old_size);
+ assert(new_size <= m->max_size);
+
+ /*
+ * For now, we must unmap the whole mapped range first and remap with
+ * the new size. The reason is that VFIO_IOMMU_UNMAP_DMA might fail
+ * when partially unmapping previous mappings. Although we could add
+ * new mappings to extend the old range, we won't able to always
+ * shrink. The side effect is that it's never safe to resize during VM
+ * execution and we'll e.g., need a new IOCTL to make this work.
+ */
+ qemu_vfio_undo_mapping(s, m->iova, m->size);
+ qemu_vfio_do_mapping(s, host, m->iova, new_size);
+
+ m->size = new_size;
+ assert(qemu_vfio_verify_mappings(s));
+
+ qemu_mutex_unlock(&s->lock);
+}
+
static void qemu_vfio_reset(QEMUVFIOState *s)
{
ioctl(s->device, VFIO_DEVICE_RESET);
--
2.24.1
- [PATCH v4 06/15] exec: Drop "shared" parameter from ram_block_add(), (continued)
- [PATCH v4 06/15] exec: Drop "shared" parameter from ram_block_add(), David Hildenbrand, 2020/03/05
- [PATCH v4 09/15] util/mmap-alloc: Factor out activating of memory to mmap_activate(), David Hildenbrand, 2020/03/05
- [PATCH v4 10/15] util/mmap-alloc: Prepare for resizeable mmaps, David Hildenbrand, 2020/03/05
- [PATCH v4 11/15] util/mmap-alloc: Implement resizeable mmaps, David Hildenbrand, 2020/03/05
- [PATCH v4 07/15] util/mmap-alloc: Factor out calculation of the pagesize for the guard page, David Hildenbrand, 2020/03/05
- [PATCH v4 13/15] util: oslib: Resizeable anonymous allocations under POSIX, David Hildenbrand, 2020/03/05
- [PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized(),
David Hildenbrand <=
- [PATCH v4 14/15] numa: Introduce ram_block_notifiers_support_resize(), David Hildenbrand, 2020/03/05
- [PATCH v4 15/15] exec: Ram blocks with resizeable anonymous allocations under POSIX, David Hildenbrand, 2020/03/05