[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC 29/29] migration: reset migrate thread vars when resum
From: |
Peter Xu |
Subject: |
[Qemu-devel] [RFC 29/29] migration: reset migrate thread vars when resumed |
Date: |
Fri, 28 Jul 2017 16:06:38 +0800 |
Firstly, MigThrError enumeration is introduced to describe the error in
migration_detect_error() better. This gives the migration_thread() a
chance to know whether a recovery has happened.
Then, if a recovery is detected, migration_thread() will reset its local
variables to prepare for that.
Signed-off-by: Peter Xu <address@hidden>
---
migration/migration.c | 40 +++++++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 11 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index ecebe30..439bc22 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2159,6 +2159,15 @@ static bool postcopy_should_start(MigrationState *s)
return atomic_read(&s->start_postcopy) || s->start_postcopy_fast;
}
+typedef enum MigThrError {
+ /* No error detected */
+ MIG_THR_ERR_NONE = 0,
+ /* Detected error, but resumed successfully */
+ MIG_THR_ERR_RECOVERED = 1,
+ /* Detected fatal error, need to exit */
+ MIG_THR_ERR_FATAL = 2,
+} MigThrError;
+
static int postcopy_resume_handshake(MigrationState *s)
{
qemu_mutex_lock(&s->resume_lock);
@@ -2209,10 +2218,10 @@ static int postcopy_do_resume(MigrationState *s)
/*
* We don't return until we are in a safe state to continue current
- * postcopy migration. Returns true to continue the migration, or
- * false to terminate current migration.
+ * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or
+ * MIG_THR_ERR_FATAL if unrecovery failure happened.
*/
-static bool postcopy_pause(MigrationState *s)
+static MigThrError postcopy_pause(MigrationState *s)
{
assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
@@ -2247,7 +2256,7 @@ do_pause:
if (postcopy_do_resume(s) == 0) {
/* Let's continue! */
trace_postcopy_pause_continued();
- return true;
+ return MIG_THR_ERR_RECOVERED;
} else {
/*
* Something wrong happened during the recovery, let's
@@ -2258,12 +2267,11 @@ do_pause:
}
} else {
/* This is not right... Time to quit. */
- return false;
+ return MIG_THR_ERR_FATAL;
}
}
-/* Return true if we want to stop the migration, otherwise false. */
-static bool migration_detect_error(MigrationState *s)
+static MigThrError migration_detect_error(MigrationState *s)
{
int ret;
@@ -2272,7 +2280,7 @@ static bool migration_detect_error(MigrationState *s)
if (!ret) {
/* Everything is fine */
- return false;
+ return MIG_THR_ERR_NONE;
}
if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
@@ -2281,7 +2289,7 @@ static bool migration_detect_error(MigrationState *s)
* while. After that, it can be continued by a
* recovery phase.
*/
- return !postcopy_pause(s);
+ return postcopy_pause(s);
} else {
/*
* For precopy (or postcopy with error outside IO), we fail
@@ -2291,7 +2299,7 @@ static bool migration_detect_error(MigrationState *s)
trace_migration_thread_file_err();
/* Time to stop the migration, now. */
- return true;
+ return MIG_THR_ERR_FATAL;
}
}
@@ -2319,6 +2327,7 @@ static void *migration_thread(void *opaque)
/* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
bool enable_colo = migrate_colo_enabled();
+ MigThrError thr_error;
rcu_register_thread();
@@ -2395,8 +2404,17 @@ static void *migration_thread(void *opaque)
* Try to detect any kind of failures, and see whether we
* should stop the migration now.
*/
- if (migration_detect_error(s)) {
+ thr_error = migration_detect_error(s);
+ if (thr_error == MIG_THR_ERR_FATAL) {
+ /* Stop migration */
break;
+ } else if (thr_error == MIG_THR_ERR_RECOVERED) {
+ /*
+ * Just recovered from a e.g. network failure, reset all
+ * the local variables.
+ */
+ initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ initial_bytes = 0;
}
current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
--
2.7.4
- [Qemu-devel] [RFC 18/29] migration: new state "postcopy-recover", (continued)
- [Qemu-devel] [RFC 18/29] migration: new state "postcopy-recover", Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 20/29] migration: wakeup dst ram-load-thread for recover, Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 21/29] migration: new cmd MIG_CMD_RECV_BITMAP, Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 22/29] migration: new message MIG_RP_MSG_RECV_BITMAP, Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 23/29] migration: new cmd MIG_CMD_POSTCOPY_RESUME, Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 24/29] migration: new message MIG_RP_MSG_RESUME_ACK, Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 25/29] migration: introduce SaveVMHandlers.resume_prepare, Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 26/29] migration: synchronize dirty bitmap for resume, Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 27/29] migration: setup ramstate for resume, Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 28/29] migration: final handshake for the resume, Peter Xu, 2017/07/28
- [Qemu-devel] [RFC 29/29] migration: reset migrate thread vars when resumed,
Peter Xu <=
- Re: [Qemu-devel] [RFC 00/29] Migration: postcopy failure recovery, Peter Xu, 2017/07/28