qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v4 2/7] memory: introduce MemoryRegion container wit


From: Igor Mammedov
Subject: [Qemu-devel] [PATCH v4 2/7] memory: introduce MemoryRegion container with reserved HVA range
Date: Thu, 9 Jul 2015 13:47:19 +0200

Patch adds
  - memory_region_init_hva_range()
  - memory_region_add_subregion_to_hva()
  - memory_region_find_hva_range()
API to allocate, map into and lookup reserved HVA MemoryRegion.

MemoryRegion with reserved HVA range will be used for
providing linear 1:1 HVA->GVA mapping for RAM MemoryRegion-s
that are added as subregions inside it.

It will be used for memory hotplug and vhost integration,
reducing all hotplugged MemoryRegions down to a single
memory range descriptor, which allows to overcome
vhost's limitation on number of allowed memory ranges.

Signed-off-by: Igor Mammedov <address@hidden>
---
v1->v4:
  - fix offset calculation in memory_region_find_hva_range()
  - add memory_region_add_subregion_to_hva()
RFC->v1:
  - rename:
       memory_region_init_rsvd_hva -> memory_region_init_hva_range
       memory_region_find_rsvd_hva -> memory_region_find_hva_range
  - replace using ram_addr with "void *rsvd_hva"
  - guard linux specific calls with ifdef
  - split memory reservation into qemu_ram_reserve_hva()
---
 exec.c                    | 30 ++++++++++++++++++++++
 include/exec/cpu-common.h |  2 ++
 include/exec/memory.h     | 63 +++++++++++++++++++++++++++++++++++++++++++++--
 memory.c                  | 50 +++++++++++++++++++++++++++++++++++++
 4 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/exec.c b/exec.c
index ca53537..562dae5 100644
--- a/exec.c
+++ b/exec.c
@@ -1339,6 +1339,36 @@ static int memory_try_enable_merging(void *addr, size_t 
len)
     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
 }
 
+#ifdef __linux__
+void *qemu_ram_reserve_hva(ram_addr_t length)
+{
+    return mmap(0, length, PROT_NONE,
+                MAP_NORESERVE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+}
+
+void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva)
+{
+    RAMBlock *block = find_ram_block(addr);
+
+    assert(block);
+    block->host = mremap(block->host, block->used_length,
+                      block->used_length,
+                      MREMAP_MAYMOVE | MREMAP_FIXED, new_hva);
+    memory_try_enable_merging(block->host, block->used_length);
+    qemu_ram_setup_dump(block->host, block->used_length);
+}
+#else
+void *qemu_ram_reserve_hva(ram_addr_t length)
+{
+    return NULL;
+}
+
+void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva)
+{
+    assert(0);
+}
+#endif
+
 /* Only legal before guest might have detected the memory size: e.g. on
  * incoming migration, or right after reset.
  *
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 9fb1d54..301f50b 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -62,6 +62,8 @@ typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, 
uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
 
 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+void *qemu_ram_reserve_hva(ram_addr_t length);
+void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva);
 /* This should not be used by devices.  */
 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 1394715..1f2cbd1 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -173,6 +173,7 @@ struct MemoryRegion {
     bool terminates;
     bool romd_mode;
     bool ram;
+    void *rsvd_hva;
     bool skip_dump;
     bool readonly; /* For RAM regions */
     bool enabled;
@@ -285,6 +286,26 @@ void memory_region_init(MemoryRegion *mr,
                         uint64_t size);
 
 /**
+ * memory_region_init_hva_range: Initialize a reserved HVA memory region
+ *
+ * The container for RAM memory regions.
+ * When adding subregion with memory_region_add_subregion(), subregion's
+ * backing host memory will be remapped inside of the reserved by this
+ * region HVA.
+ * Supported only on Linux. If memory reservation and remapping is not
+ * implemented for platform, this call degrades to regular 
memory_region_init().
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @owner: the object that tracks the region's reference count
+ * @name: used for debugging; not visible to the user or ABI
+ * @size: size of the region; any subregions beyond this size will be clipped
+ */
+void memory_region_init_hva_range(MemoryRegion *mr,
+                                  struct Object *owner,
+                                  const char *name,
+                                  uint64_t size);
+
+/**
  * memory_region_ref: Add 1 to a memory region's reference count
  *
  * Whenever memory regions are accessed outside the BQL, they need to be
@@ -634,8 +655,8 @@ int memory_region_get_fd(MemoryRegion *mr);
  * memory_region_get_ram_ptr: Get a pointer into a RAM memory region.
  *
  * Returns a host pointer to a RAM memory region (created with
- * memory_region_init_ram() or memory_region_init_ram_ptr()).  Use with
- * care.
+ * memory_region_init_ram() or memory_region_init_ram_ptr()) or
+ * memory_region_init_hva_range(). Use with care.
  *
  * @mr: the memory region being queried.
  */
@@ -909,6 +930,24 @@ void memory_region_del_eventfd(MemoryRegion *mr,
 void memory_region_add_subregion(MemoryRegion *mr,
                                  hwaddr offset,
                                  MemoryRegion *subregion);
+
+/**
+ * memory_region_add_subregion_to_hva: Add a subregion to a HVA container.
+ *
+ * the same as memory_region_add_subregion() with only difference that
+ * it remaps RAM subregion's backing memory into HVA range of @mr.
+ * If HVA region is not supported by host the call degrades to and behaves as
+ * memory_region_add_subregion().
+ *
+ * @mr: the region to contain the new subregion; must be a container
+ *      initialized with memory_region_init().
+ * @offset: the offset relative to @mr where @subregion is added.
+ * @subregion: the subregion to be added.
+ */
+void memory_region_add_subregion_to_hva(MemoryRegion *mr,
+                                        hwaddr offset,
+                                        MemoryRegion *subregion);
+
 /**
  * memory_region_add_subregion_overlap: Add a subregion to a container
  *                                      with overlap.
@@ -1052,6 +1091,26 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size);
 
 /**
+ * memory_region_find_hva_range: finds a parent MemoryRegion with
+ * reserved HVA and translates it into a #MemoryRegionSection.
+ *
+ * Locates the first parent #MemoryRegion of @mr that is
+ * of reserved HVA type.
+ *
+ * Returns a #MemoryRegionSection that describes a reserved HVA
+ * memory region.
+ *    address@hidden is offset of found
+ *      (in the address@hidden field) memory region relative to the address
+ *      space that contains it.
+ *    address@hidden is offset of @mr relative
+ *      to the returned region (in the address@hidden field).
+ *    address@hidden is size of found memory region
+ *
+ * @mr: a MemoryRegion whose HVA parent is looked up
+ */
+MemoryRegionSection memory_region_find_hva_range(MemoryRegion *mr);
+
+/**
  * address_space_sync_dirty_bitmap: synchronize the dirty log for all memory
  *
  * Synchronizes the dirty page log for an entire address space.
diff --git a/memory.c b/memory.c
index ec07ae8..bf6aa4e 100644
--- a/memory.c
+++ b/memory.c
@@ -929,6 +929,15 @@ void memory_region_init(MemoryRegion *mr,
     }
 }
 
+void memory_region_init_hva_range(MemoryRegion *mr,
+                                  Object *owner,
+                                  const char *name,
+                                  uint64_t size)
+{
+    memory_region_init(mr, owner, name, size);
+    mr->rsvd_hva = qemu_ram_reserve_hva(memory_region_size(mr));
+}
+
 static void memory_region_get_addr(Object *obj, Visitor *v, void *opaque,
                                    const char *name, Error **errp)
 {
@@ -1517,6 +1526,10 @@ int memory_region_get_fd(MemoryRegion *mr)
 
 void *memory_region_get_ram_ptr(MemoryRegion *mr)
 {
+    if (mr->rsvd_hva) {
+        return mr->rsvd_hva;
+    }
+
     if (mr->alias) {
         return memory_region_get_ram_ptr(mr->alias) + mr->alias_offset;
     }
@@ -1777,6 +1790,17 @@ void memory_region_add_subregion_overlap(MemoryRegion 
*mr,
     memory_region_add_subregion_common(mr, offset, subregion);
 }
 
+void memory_region_add_subregion_to_hva(MemoryRegion *mr,
+                                        hwaddr offset,
+                                        MemoryRegion *subregion)
+{
+    if (mr->rsvd_hva && subregion->ram) {
+        qemu_ram_remap_hva(subregion->ram_addr,
+                           memory_region_get_ram_ptr(mr) + offset);
+    }
+    memory_region_add_subregion(mr, offset, subregion);
+}
+
 void memory_region_del_subregion(MemoryRegion *mr,
                                  MemoryRegion *subregion)
 {
@@ -1897,6 +1921,32 @@ bool memory_region_is_mapped(MemoryRegion *mr)
     return mr->container ? true : false;
 }
 
+MemoryRegionSection memory_region_find_hva_range(MemoryRegion *mr)
+{
+    MemoryRegionSection ret = { .mr = NULL };
+    MemoryRegion *hva_container = NULL;
+    hwaddr addr = 0;
+    MemoryRegion *root;
+
+    for (root = mr; root->container; root = root->container) {
+        if (!hva_container && root->rsvd_hva) {
+            hva_container = root;
+            ret.offset_within_region = addr;
+        }
+        addr += root->addr;
+    }
+
+    ret.address_space = memory_region_to_address_space(root);
+    if (!ret.address_space || !hva_container) {
+        return ret;
+    }
+
+    ret.mr = hva_container;
+    ret.offset_within_address_space = addr;
+    ret.size = int128_make64(memory_region_size(ret.mr));
+    return ret;
+}
+
 MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size)
 {
-- 
1.8.3.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]