qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and c


From: Hailiang Zhang
Subject: Re: [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and copy the content during setup stage
Date: Thu, 14 Jul 2016 16:02:14 +0800
User-agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Thunderbird/38.5.1

On 2016/7/14 1:52, Dr. David Alan Gilbert wrote:
* zhanghailiang (address@hidden) wrote:
If we modify VM's RAM (pages) during setup stage after enable write-protect
notification in snapshot thread, the modification action will get stuck because
we only remove the page's write-protect in savevm process, it blocked by itself.

To fix this bug, we will remove page's write-protect in fault thread during
the setup stage. Besides, we should not try to get global lock after setup 
stage,
or there maybe an deadlock error.

Hmm this complicates things a bit more doesn't it.
What's the order of:
    a) setup
    b) savings devices
    c) Being able to transmit the pages?

Are these pages that are being modified during setup, being modified
as part of the device state save?


Yes, I'm not sure if the problem still exist or not after exchanging the 
sequence
of 'save devices' and 'enable ram notify'.

I'll look into it.

Hailaing

Dave


Signed-off-by: zhanghailiang <address@hidden>
---
  include/migration/migration.h |  4 ++--
  migration/migration.c         |  2 +-
  migration/postcopy-ram.c      | 17 ++++++++++++++++-
  migration/ram.c               | 37 +++++++++++++++++++++++++++++++------
  4 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index ef4c071..435de31 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -127,7 +127,7 @@ struct MigrationSrcPageRequest {
      RAMBlock *rb;
      hwaddr    offset;
      hwaddr    len;
-
+    uint8_t *pages_copy_addr;
      QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req;
  };

@@ -333,7 +333,7 @@ void global_state_store_running(void);

  void flush_page_queue(MigrationState *ms);
  int ram_save_queue_pages(MigrationState *ms, const char *rbname,
-                         ram_addr_t start, ram_addr_t len);
+                         ram_addr_t start, ram_addr_t len, bool copy_pages);

  PostcopyState postcopy_state_get(void);
  /* Set the state and return the old state */
diff --git a/migration/migration.c b/migration/migration.c
index 3765c3b..bf4c7a1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1248,7 +1248,7 @@ static void migrate_handle_rp_req_pages(MigrationState 
*ms, const char* rbname,
          return;
      }

-    if (ram_save_queue_pages(ms, rbname, start, len)) {
+    if (ram_save_queue_pages(ms, rbname, start, len, false)) {
          mark_source_rp_bad(ms);
      }
  }
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 61392d3..2cf477d 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -543,13 +543,28 @@ static void *postcopy_ram_fault_thread(void *opaque)
              MigrationState *ms = container_of(us, MigrationState,
                                                userfault_state);
              ret = ram_save_queue_pages(ms, qemu_ram_get_idstr(rb), rb_offset,
-                                       hostpagesize);
+                                       hostpagesize, true);

              if (ret < 0) {
                  error_report("%s: Save: %"PRIx64 " failed!",
                               __func__, (uint64_t)msg.arg.pagefault.address);
                  break;
              }
+
+            /* Note: In the setup process, snapshot_thread may modify VM's
+            * write-protected pages, we should not block it there, or there
+            * will be an deadlock error.
+            */
+            if (migration_in_setup(ms)) {
+                uint64_t host = msg.arg.pagefault.address;
+
+                host &= ~(hostpagesize - 1);
+                ret = ram_set_pages_wp(host, getpagesize(), true,
+                                       us->userfault_fd);
+                if (ret < 0) {
+                    error_report("Remove page's write-protect failed");
+                }
+            }
          }
      }
      trace_postcopy_ram_fault_thread_exit();
diff --git a/migration/ram.c b/migration/ram.c
index 8656719..747f9aa 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -233,6 +233,7 @@ struct PageSearchStatus {
      ram_addr_t   offset;
      /* Set once we wrap around */
      bool         complete_round;
+    uint8_t *pages_copy;
  };
  typedef struct PageSearchStatus PageSearchStatus;

@@ -742,7 +743,12 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus 
*pss,
      RAMBlock *block = pss->block;
      ram_addr_t offset = pss->offset;

-    p = block->host + offset;
+    /* If we have a copy of this page, use the backup page first */
+    if (pss->pages_copy) {
+        p = pss->pages_copy;
+    } else {
+        p = block->host + offset;
+    }

      /* In doubt sent page as normal */
      bytes_xmit = 0;
@@ -926,7 +932,12 @@ static int ram_save_compressed_page(QEMUFile *f, 
PageSearchStatus *pss,
      RAMBlock *block = pss->block;
      ram_addr_t offset = pss->offset;

-    p = block->host + offset;
+    /* If we have a copy of this page, use the backup first */
+    if (pss->pages_copy) {
+        p = pss->pages_copy;
+    } else {
+        p = block->host + offset;
+    }

      bytes_xmit = 0;
      ret = ram_control_save_page(f, block->offset,
@@ -1043,7 +1054,7 @@ static bool find_dirty_block(QEMUFile *f, 
PageSearchStatus *pss,
   * Returns:      block (or NULL if none available)
   */
  static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
-                              ram_addr_t *ram_addr_abs)
+                              ram_addr_t *ram_addr_abs, uint8 
**pages_copy_addr)
  {
      RAMBlock *block = NULL;

@@ -1055,7 +1066,7 @@ static RAMBlock *unqueue_page(MigrationState *ms, 
ram_addr_t *offset,
          *offset = entry->offset;
          *ram_addr_abs = (entry->offset + entry->rb->offset) &
                          TARGET_PAGE_MASK;
-
+        *pages_copy_addr = entry->pages_copy_addr;
          if (entry->len > TARGET_PAGE_SIZE) {
              entry->len -= TARGET_PAGE_SIZE;
              entry->offset += TARGET_PAGE_SIZE;
@@ -1086,9 +1097,10 @@ static bool get_queued_page(MigrationState *ms, 
PageSearchStatus *pss,
      RAMBlock  *block;
      ram_addr_t offset;
      bool dirty;
+    uint8 *pages_backup_addr = NULL;

      do {
-        block = unqueue_page(ms, &offset, ram_addr_abs);
+        block = unqueue_page(ms, &offset, ram_addr_abs, &pages_backup_addr);
          /*
           * We're sending this page, and since it's postcopy nothing else
           * will dirty it, and we must make sure it doesn't get sent again
@@ -1130,6 +1142,7 @@ static bool get_queued_page(MigrationState *ms, 
PageSearchStatus *pss,
           */
          pss->block = block;
          pss->offset = offset;
+        pss->pages_copy = pages_backup_addr;
      }

      return !!block;
@@ -1166,7 +1179,7 @@ void flush_page_queue(MigrationState *ms)
   *   Return: 0 on success
   */
  int ram_save_queue_pages(MigrationState *ms, const char *rbname,
-                         ram_addr_t start, ram_addr_t len)
+                         ram_addr_t start, ram_addr_t len, bool copy_pages)
  {
      RAMBlock *ramblock;

@@ -1206,6 +1219,17 @@ int ram_save_queue_pages(MigrationState *ms, const char 
*rbname,
      new_entry->rb = ramblock;
      new_entry->offset = start;
      new_entry->len = len;
+    if (copy_pages) {
+        /* Fix me: Better to realize a memory pool */
+        new_entry->pages_copy_addr = g_try_malloc0(len);
+
+        if (!new_entry->pages_copy_addr) {
+            error_report("%s: Failed to alloc memory", __func__);
+            return -1;
+        }
+
+        memcpy(new_entry->pages_copy_addr, ramblock_ptr(ramblock, start), len);
+    }

      memory_region_ref(ramblock->mr);
      qemu_mutex_lock(&ms->src_page_req_mutex);
@@ -1342,6 +1366,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,
      pss.block = last_seen_block;
      pss.offset = last_offset;
      pss.complete_round = false;
+    pss.pages_copy = NULL;

      if (!pss.block) {
          pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
--
1.8.3.1


--
Dr. David Alan Gilbert / address@hidden / Manchester, UK

.





reply via email to

[Prev in Thread] Current Thread [Next in Thread]