qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC PATCH] spapr: support time base offset migration


From: Alexey Kardashevskiy
Subject: [Qemu-devel] [RFC PATCH] spapr: support time base offset migration
Date: Tue, 3 Sep 2013 17:31:42 +1000

This allows guests to have a different timebase origin from the host.

This is needed for migration, where a guest can migrate from one host
to another and the two hosts might have a different timebase origin.
However, the timebase seen by the guest must not go backwards, and
should go forwards only by a small amount corresponding to the time
taken for the migration.

This is only supported for recent POWER hardware which has the TBU40
(timebase upper 40 bits) register. That includes POWER6, 7, 8 but not
970.

This adds kvm_access_one_reg() to access a special register which is not
in env->spr.

The feature must be present in the host kernel.

Signed-off-by: Alexey Kardashevskiy <address@hidden>
---

This is an RFC but not a final patch. Can break something but I just do not see 
what.

---
 hw/ppc/ppc.c         | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/ppc.h |  4 ++++
 target-ppc/kvm.c     | 23 +++++++++++++++++++++++
 target-ppc/machine.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 trace-events         |  3 +++
 5 files changed, 123 insertions(+)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 1e3cab3..7d08c9a 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -31,6 +31,7 @@
 #include "hw/loader.h"
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
+#include "trace.h"
 
 //#define PPC_DEBUG_IRQ
 #define PPC_DEBUG_TB
@@ -796,6 +797,54 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint32_t 
freq)
     cpu_ppc_store_purr(cpu, 0x0000000000000000ULL);
 }
 
+/*
+ * Calculate timebase on the destination side of migration
+ *
+ * We calculate new timebase offset as shown below:
+ * 1) Gtb2 = Gtb1 + max(tod2 - tod1, 0)
+ *    Gtb2 = tb2 + off2
+ *    Gtb1 = tb1 + off1
+ * 2) tb2 + off2 = tb1 + off1 + max(tod2 - tod1, 0)
+ * 3) off2 = tb1 - tb2 + off1 + max(tod2 - tod1, 0)
+ *
+ * where:
+ * Gtb2 - destination guest timebase
+ * tb2 - destination host timebase
+ * off2 - destination timebase offset
+ * tod2 - destination time of the day
+ * Gtb1 - source guest timebase
+ * tb1 - source host timebase
+ * off1 - source timebase offset
+ * tod1 - source time of the day
+ *
+ * The result we want is in @off2
+ *
+ * Two conditions must be met for @off2:
+ * 1) off2 must be multiple of 2^24 ticks as it will be set via TBU40 SPR
+ * 2) Gtb2 >= Gtb1
+ */
+void cpu_ppc_adjust_tb_offset(ppc_tb_t *tb_env)
+{
+    uint64_t tb2, tod2, off2;
+    int ratio = tb_env->tb_freq / 1000000;
+    struct timeval tv;
+
+    tb2 = cpu_get_real_ticks();
+    gettimeofday(&tv, NULL);
+    tod2 = tv.tv_sec * 1000000 + tv.tv_usec;
+
+    off2 = tb_env->timebase - tb2 + tb_env->tb_offset;
+    if (tod2 > tb_env->time_of_the_day) {
+        off2 += (tod2 - tb_env->time_of_the_day) * ratio;
+    }
+    off2 = ROUND_UP(off2, 1 << 24);
+
+    trace_ppc_tb_adjust(tb_env->tb_offset, off2,
+                        (int64_t)off2 - tb_env->tb_offset);
+
+    tb_env->tb_offset = off2;
+}
+
 /* Set up (once) timebase frequency (in Hz) */
 clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
 {
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index 132ab97..235871c 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -32,6 +32,9 @@ struct ppc_tb_t {
     uint64_t purr_start;
     void *opaque;
     uint32_t flags;
+    /* Cached values for live migration purposes */
+    uint64_t timebase;
+    uint64_t time_of_the_day;
 };
 
 /* PPC Timers flags */
@@ -46,6 +49,7 @@ struct ppc_tb_t {
                                                */
 
 uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset);
+void cpu_ppc_adjust_tb_offset(ppc_tb_t *tb_env);
 clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq);
 /* Embedded PowerPC DCR management */
 typedef uint32_t (*dcr_read_cb)(void *opaque, int dcrn);
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 7af9e3d..93df955 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -35,6 +35,7 @@
 #include "hw/sysbus.h"
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/ppc.h"
 #include "sysemu/watchdog.h"
 
 //#define DEBUG_KVM
@@ -761,6 +762,22 @@ static int kvm_put_vpa(CPUState *cs)
 }
 #endif /* TARGET_PPC64 */
 
+static int kvm_access_one_reg(CPUState *cs, bool set, __u64 id, void *addr)
+{
+    struct kvm_one_reg reg = {
+        .id = id,
+        .addr = (uintptr_t)addr,
+    };
+    int ret = kvm_vcpu_ioctl(cs, set ? KVM_SET_ONE_REG : KVM_GET_ONE_REG, 
&reg);
+
+    if (ret) {
+        DPRINTF("Unable to %s time base offset to KVM: %s\n",
+                set ? "set" : "get", strerror(errno));
+    }
+
+    return ret;
+}
+
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -873,6 +890,9 @@ int kvm_arch_put_registers(CPUState *cs, int level)
                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
             }
         }
+
+        kvm_access_one_reg(cs, 1, KVM_REG_PPC_TB_OFFSET,
+                           &env->tb_env->tb_offset);
 #endif /* TARGET_PPC64 */
     }
 
@@ -1082,6 +1102,9 @@ int kvm_arch_get_registers(CPUState *cs)
                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
             }
         }
+
+        kvm_access_one_reg(cs, 0, KVM_REG_PPC_TB_OFFSET,
+                           &env->tb_env->tb_offset);
 #endif
     }
 
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index 12e1512..d1ffc7f 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -1,5 +1,6 @@
 #include "hw/hw.h"
 #include "hw/boards.h"
+#include "hw/ppc/ppc.h"
 #include "sysemu/kvm.h"
 #include "helper_regs.h"
 
@@ -459,6 +460,45 @@ static const VMStateDescription vmstate_tlbmas = {
     }
 };
 
+static void timebase_pre_save(void *opaque)
+{
+    ppc_tb_t *tb_env = opaque;
+    struct timeval tv;
+
+    gettimeofday(&tv, NULL);
+    tb_env->time_of_the_day = tv.tv_sec * 1000000 + tv.tv_usec;
+    tb_env->timebase = cpu_get_real_ticks();
+}
+
+static int timebase_post_load(void *opaque, int version_id)
+{
+    ppc_tb_t *tb_env = opaque;
+
+    if (!tb_env) {
+        printf("NO TB!\n");
+        return -1;
+    }
+    cpu_ppc_adjust_tb_offset(tb_env);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_timebase = {
+    .name = "cpu/timebase",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = timebase_pre_save,
+    .post_load = timebase_post_load,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT64(timebase, ppc_tb_t),
+        VMSTATE_INT64(tb_offset, ppc_tb_t),
+        VMSTATE_UINT64(time_of_the_day, ppc_tb_t),
+        VMSTATE_UINT32_EQUAL(tb_freq, ppc_tb_t),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 const VMStateDescription vmstate_ppc_cpu = {
     .name = "cpu",
     .version_id = 5,
@@ -498,6 +538,10 @@ const VMStateDescription vmstate_ppc_cpu = {
         VMSTATE_UINT64_EQUAL(env.insns_flags, PowerPCCPU),
         VMSTATE_UINT64_EQUAL(env.insns_flags2, PowerPCCPU),
         VMSTATE_UINT32_EQUAL(env.nb_BATs, PowerPCCPU),
+
+        /* Time offset */
+        VMSTATE_STRUCT_POINTER(env.tb_env, PowerPCCPU,
+                               vmstate_timebase, ppc_tb_t *),
         VMSTATE_END_OF_LIST()
     },
     .subsections = (VMStateSubsection []) {
diff --git a/trace-events b/trace-events
index 935b953..24cf4d2 100644
--- a/trace-events
+++ b/trace-events
@@ -1141,6 +1141,9 @@ spapr_iommu_stuff(uint64_t liobn, uint64_t ioba, uint64_t 
tce, uint64_t ret) "li
 spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, 
unsigned pgsize) "liobn=%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=%x"
 spapr_iommu_new_table(uint64_t liobn, void *tcet, void *table, int fd) 
"liobn=%"PRIx64" tcet=%p table=%p fd=%d"
 
+# hw/ppc/ppc.c
+ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff) "adjusted from 
%"PRIx64" to %"PRIx64", diff %"PRId64
+
 # util/hbitmap.c
 hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long 
cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
 hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, 
uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
-- 
1.8.4.rc4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]