qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH] QEMU may write to system_memory before gues


From: Jia He
Subject: Re: [Qemu-devel] [RFC PATCH] QEMU may write to system_memory before guest starts
Date: Tue, 19 Mar 2019 15:35:28 +0800
User-agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Thunderbird/60.5.3

Thanks Yury.

On 2019/3/14 19:03, Yury Kotov wrote:
This patch isn't intended to merge. Just to reproduce a problem.

The test for x-ignore-shread capability fails on aarch64 + tcg:
Memory content inconsistency at 44c00000 first_byte = 2 last_byte = 1 current = 
d1 hit_edge = 1
Memory content inconsistency at 44c01000 first_byte = 2 last_byte = 1 current = 
77 hit_edge = 1

I expected that QEMU doesn't write to guest RAM until VM starts, but it
happens in this test. By this patch I found what causes this problem.

Backtrace:
0  0x00007fb9e40affea in __memcpy_avx_unaligned () at 
../sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S:118
1  0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458
2  0x000055f4a296de3a in address_space_write_rom () at exec.c:3479
3  0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101
4  0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
5  0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675
6  0x000055f4a2c9851d in main () at vl.c:4552

more than that

even in kvm mode, there is similar writing, and cause exception when destination VM is restoring.

This is the backtrace with your debugging patch

(gdb) bt
#0  __GI_raise (address@hidden) at ../sysdeps/unix/sysv/linux/raise.c:51
#1  0x0000ffffbe7988b4 in __GI_abort () at abort.c:79
#2  0x0000aaaaaae41c88 in kvm_set_phys_mem (kml=0xaaaaabef7b88, section=0xffffffffe0c0, add=true) at /root/hj/q_migrate/qemu/accel/kvm/kvm-all.c:821 #3  0x0000aaaaaae41d0c in kvm_region_add (listener=0xaaaaabef7b88, section=0xffffffffe0c0) at /root/hj/q_migrate/qemu/accel/kvm/kvm-all.c:831 #4  0x0000aaaaaae22ca0 in address_space_update_topology_pass (as=0xaaaaabd16ce0 <address_space_memory>, old_view=0xaaaaabeec150, new_view=0xaaaaac03d3e0, adding=true)
    at /root/hj/q_migrate/qemu/memory.c:958
#5  0x0000aaaaaae22fc4 in address_space_set_flatview (as=0xaaaaabd16ce0 <address_space_memory>) at /root/hj/q_migrate/qemu/memory.c:1034 #6  0x0000aaaaaae231e0 in memory_region_transaction_commit () at /root/hj/q_migrate/qemu/memory.c:1086 #7  0x0000aaaaaae26c90 in memory_region_update_container_subregions (subregion=0xaaaaabd81290) at /root/hj/q_migrate/qemu/memory.c:2358 #8  0x0000aaaaaae26d04 in memory_region_add_subregion_common (mr=0xaaaaabe43000, offset=1073741824, subregion=0xaaaaabd81290) at /root/hj/q_migrate/qemu/memory.c:2368 #9  0x0000aaaaaae26d3c in memory_region_add_subregion (mr=0xaaaaabe43000, offset=1073741824, subregion=0xaaaaabd81290) at /root/hj/q_migrate/qemu/memory.c:2376 #10 0x0000aaaaaaf63570 in machvirt_init (machine=0xaaaaabeb9c00) at /root/hj/q_migrate/qemu/hw/arm/virt.c:1611 #11 0x0000aaaaab1ff558 in machine_run_board_init (machine=0xaaaaabeb9c00) at hw/core/machine.c:965 #12 0x0000aaaaab14070c in main (argc=51, argv=0xffffffffee78, envp=0xfffffffff018) at vl.c:4459

---

Cheers,

Jia He

To fix this particular we can skip these writes for the first system_reset
if -incoming is set. But I'm not sure how to fix this problem in general.
May be to introduce a contract which forbids to write to system_ram in such
case?

What do you think?

Signed-off-by: Yury Kotov <address@hidden>
---
  backends/hostmem-file.c   |  2 +-
  exec.c                    | 15 +++++++++++++--
  include/exec/cpu-common.h |  2 ++
  include/exec/memory.h     |  3 +++
  include/qemu/mmap-alloc.h |  2 +-
  migration/ram.c           |  2 ++
  util/mmap-alloc.c         |  6 ++++--
  util/oslib-posix.c        |  2 +-
  vl.c                      |  8 ++++++++
  9 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index ce54788048..146fa2bc70 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -61,7 +61,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error 
**errp)
      memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
                                       name,
                                       backend->size, fb->align,
-                                     (backend->share ? RAM_SHARED : 0) |
+                                     (backend->share ? (RAM_SHARED | 
RAM_READONLY) : 0) |
                                       (fb->is_pmem ? RAM_PMEM : 0),
                                       fb->mem_path, errp);
      g_free(name);
diff --git a/exec.c b/exec.c
index 1d4f3784d6..cd86e9b837 100644
--- a/exec.c
+++ b/exec.c
@@ -1863,7 +1863,7 @@ static void *file_ram_alloc(RAMBlock *block,
      }
area = qemu_ram_mmap(fd, memory, block->mr->align,
-                         block->flags & RAM_SHARED);
+                         block->flags & RAM_SHARED, block->flags & 
RAM_READONLY);
      if (area == MAP_FAILED) {
          error_setg_errno(errp, errno,
                           "unable to map backing store for guest RAM");
@@ -2259,7 +2259,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, 
MemoryRegion *mr,
      int64_t file_size;
/* Just support these ram flags by now. */
-    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
+    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_READONLY)) == 0);
if (xen_enabled()) {
          error_setg(errp, "-mem-path not supported with Xen");
@@ -2485,6 +2485,17 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
          }
      }
  }
+
+void qemu_ram_unprotect_all(void)
+{
+    RAMBlock *block;
+    rcu_read_lock();
+    RAMBLOCK_FOREACH(block) {
+        int ret = mprotect(block->host, block->max_length, PROT_READ | 
PROT_WRITE);
+        assert(ret == 0);
+    }
+    rcu_read_unlock();
+}
  #endif /* !_WIN32 */
/* Return a host pointer to ram allocated with qemu_ram_alloc.
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index cef8b88a2a..2ad875be06 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -63,6 +63,8 @@ typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, 
uint32_t value);
  typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+void qemu_ram_unprotect_all(void);
+
  /* This should not be used by devices.  */
  ram_addr_t qemu_ram_addr_from_host(void *ptr);
  RAMBlock *qemu_ram_block_by_name(const char *name);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 1625913f84..b1cb5a48d7 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -126,6 +126,9 @@ typedef struct IOMMUNotifier IOMMUNotifier;
  /* RAM is a persistent kind memory */
  #define RAM_PMEM (1 << 5)
+/* RAM is readonly*/
+#define RAM_READONLY (1 << 6)
+
  static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                         IOMMUNotifierFlag flags,
                                         hwaddr start, hwaddr end,
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
index ef04f0ed5b..f704d9b7e3 100644
--- a/include/qemu/mmap-alloc.h
+++ b/include/qemu/mmap-alloc.h
@@ -7,7 +7,7 @@ size_t qemu_fd_getpagesize(int fd);
size_t qemu_mempath_getpagesize(const char *mem_path); -void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared, bool 
readonly);
void qemu_ram_munmap(int fd, void *ptr, size_t size); diff --git a/migration/ram.c b/migration/ram.c
index 35bd6213e9..252fc80e6c 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -4211,6 +4211,8 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
       */
      rcu_read_lock();
+ qemu_ram_unprotect_all();
+
      if (postcopy_running) {
          ret = ram_load_postcopy(f);
      }
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 8565885420..7dc194d909 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -75,9 +75,10 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
      return getpagesize();
  }
-void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared, bool 
readonly)
  {
      int flags;
+    int prots;
      int guardfd;
      size_t offset;
      size_t pagesize;
@@ -128,9 +129,10 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, 
bool shared)
      flags = MAP_FIXED;
      flags |= fd == -1 ? MAP_ANONYMOUS : 0;
      flags |= shared ? MAP_SHARED : MAP_PRIVATE;
+    prots = PROT_READ | (readonly ? 0 : PROT_WRITE);
      offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
- ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, flags, fd, 0);
+    ptr = mmap(guardptr + offset, size, prots, flags, fd, 0);
if (ptr == MAP_FAILED) {
          munmap(guardptr, total);
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 37c5854b9c..c57466f8d9 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -203,7 +203,7 @@ void *qemu_memalign(size_t alignment, size_t size)
  void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
  {
      size_t align = QEMU_VMALLOC_ALIGN;
-    void *ptr = qemu_ram_mmap(-1, size, align, shared);
+    void *ptr = qemu_ram_mmap(-1, size, align, shared, false);
if (ptr == MAP_FAILED) {
          return NULL;
diff --git a/vl.c b/vl.c
index 4c5cc0d8ad..2cc675ad21 100644
--- a/vl.c
+++ b/vl.c
@@ -2950,6 +2950,12 @@ static void register_global_properties(MachineState *ms)
      user_register_global_props();
  }
+static void unprotect_ram(void *opaque)
+{
+    error_report(__func__);
+    qemu_ram_unprotect_all();
+}
+
  int main(int argc, char **argv, char **envp)
  {
      int i;
@@ -4537,6 +4543,8 @@ int main(int argc, char **argv, char **envp)
replay_start(); + qemu_register_reset(unprotect_ram, NULL);
+
      /* This checkpoint is required by replay to separate prior clock
         reading from the other reads, because timer polling functions query
         clock values from the log. */




reply via email to

[Prev in Thread] Current Thread [Next in Thread]