[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save |
Date: |
Mon, 7 Nov 2016 15:46:11 +0000 |
User-agent: |
Mutt/1.7.1 (2016-10-04) |
* Marcelo Tosatti (address@hidden) wrote:
> This patch, relative to pre-copy migration codepath,
> measures the time between vm_stop() and pre_save(),
> which includes copying the remaining RAM to destination,
> and advances the clock by that amount.
>
> In a VM with 5 seconds downtime, this reduces the guest
> clock difference on destination from 5s to 0.2s.
>
> Tested with Linux and Windows 2012 R2 guests with -cpu XXX,+hv-time.
One thing that bothers me is that it's only this clock that's
getting corrected; doesn't it cause things to get upset when
one clock moves and the others dont?
Shouldn't the pause delay be recorded somewhere architecturally
independent and then be a thing that kvm-clock happens to use and
other clocks might as well?
Dave
> Signed-off-by: Marcelo Tosatti <address@hidden>
>
> ---
>
> v2: use subsection (Juan Quintela)
> fix older machine types support
>
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 0f75dd3..a2a02ac 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -22,9 +22,11 @@
> #include "kvm_i386.h"
> #include "hw/sysbus.h"
> #include "hw/kvm/clock.h"
> +#include "migration/migration.h"
>
> #include <linux/kvm.h>
> #include <linux/kvm_para.h>
> +#include <time.h>
>
> #define TYPE_KVM_CLOCK "kvmclock"
> #define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK)
> @@ -35,7 +37,13 @@ typedef struct KVMClockState {
> /*< public >*/
>
> uint64_t clock;
> + uint64_t ns;
> bool clock_valid;
> +
> + uint64_t advance_clock;
> + struct timespec t_aftervmstop;
> +
> + bool adv_clock_enabled;
> } KVMClockState;
>
> struct pvclock_vcpu_time_info {
> @@ -100,6 +108,11 @@ static void kvmclock_vm_state_change(void *opaque, int
> running,
> s->clock = time_at_migration;
> }
>
> + if (s->advance_clock && s->clock + s->advance_clock > s->clock) {
> + s->clock += s->advance_clock;
> + s->advance_clock = 0;
> + }
> +
> data.clock = s->clock;
> ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
> if (ret < 0) {
> @@ -135,6 +148,18 @@ static void kvmclock_vm_state_change(void *opaque, int
> running,
> abort();
> }
> s->clock = data.clock;
> + /*
> + * Transition from VM-running to VM-stopped via migration?
> + * Record when the VM was stopped.
> + */
> +
> + if (state == RUN_STATE_FINISH_MIGRATE &&
> + !migration_in_postcopy(migrate_get_current())) {
> + clock_gettime(CLOCK_MONOTONIC, &s->t_aftervmstop);
> + } else {
> + s->t_aftervmstop.tv_sec = 0;
> + s->t_aftervmstop.tv_nsec = 0;
> + }
>
> /*
> * If the VM is stopped, declare the clock state valid to
> @@ -152,6 +177,77 @@ static void kvmclock_realize(DeviceState *dev, Error
> **errp)
> qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
> }
>
> +static uint64_t clock_delta(struct timespec *before, struct timespec *after)
> +{
> + if (before->tv_sec > after->tv_sec ||
> + (before->tv_sec == after->tv_sec &&
> + before->tv_nsec > after->tv_nsec)) {
> + fprintf(stderr, "clock_delta failed: before=(%ld sec, %ld nsec),"
> + "after=(%ld sec, %ld nsec)\n", before->tv_sec,
> + before->tv_nsec, after->tv_sec, after->tv_nsec);
> + abort();
> + }
> +
> + return (after->tv_sec - before->tv_sec) * 1000000000ULL +
> + after->tv_nsec - before->tv_nsec;
> +}
> +
> +static void kvmclock_pre_save(void *opaque)
> +{
> + KVMClockState *s = opaque;
> + struct timespec now;
> + uint64_t ns;
> +
> + if (s->t_aftervmstop.tv_sec == 0) {
> + return;
> + }
> +
> + clock_gettime(CLOCK_MONOTONIC, &now);
> +
> + ns = clock_delta(&s->t_aftervmstop, &now);
> +
> + /*
> + * Linux guests can overflow if time jumps
> + * forward in large increments.
> + * Cap maximum adjustment to 10 minutes.
> + */
> + ns = MIN(ns, 600000000000ULL);
> +
> + if (s->clock + ns > s->clock) {
> + s->ns = ns;
> + }
> +}
> +
> +static int kvmclock_post_load(void *opaque, int version_id)
> +{
> + KVMClockState *s = opaque;
> +
> + /* save the value from incoming migration */
> + s->advance_clock = s->ns;
> +
> + return 0;
> +}
> +
> +static bool kvmclock_ns_needed(void *opaque)
> +{
> + KVMClockState *s = opaque;
> +
> + return s->adv_clock_enabled;
> +}
> +
> +static const VMStateDescription kvmclock_advance_ns = {
> + .name = "kvmclock/advance_ns",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .needed = kvmclock_ns_needed,
> + .pre_save = kvmclock_pre_save,
> + .post_load = kvmclock_post_load,
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64(ns, KVMClockState),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> static const VMStateDescription kvmclock_vmsd = {
> .name = "kvmclock",
> .version_id = 1,
> @@ -159,15 +255,25 @@ static const VMStateDescription kvmclock_vmsd = {
> .fields = (VMStateField[]) {
> VMSTATE_UINT64(clock, KVMClockState),
> VMSTATE_END_OF_LIST()
> + },
> + .subsections = (const VMStateDescription * []) {
> + &kvmclock_advance_ns,
> + NULL
> }
> };
>
> +static Property kvmclock_properties[] = {
> + DEFINE_PROP_BOOL("advance_clock", KVMClockState, adv_clock_enabled,
> true),
> + DEFINE_PROP_END_OF_LIST(),
> +};
> +
> static void kvmclock_class_init(ObjectClass *klass, void *data)
> {
> DeviceClass *dc = DEVICE_CLASS(klass);
>
> dc->realize = kvmclock_realize;
> dc->vmsd = &kvmclock_vmsd;
> + dc->props = kvmclock_properties;
> }
>
> static const TypeInfo kvmclock_info = {
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 98dc772..243352e 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -370,6 +370,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t
> *);
> #define PC_COMPAT_2_7 \
> HW_COMPAT_2_7 \
> {\
> + .driver = "kvmclock",\
> + .property = "advance_clock",\
> + .value = "off",\
> + },\
> + {\
> .driver = TYPE_X86_CPU,\
> .property = "l3-cache",\
> .value = "off",\
>
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK
- Re: [Qemu-devel] [QEMU PATCH] kvmclock: advance clock by time window between vm_stop and pre_save, (continued)
Re: [Qemu-devel] [QEMU PATCH] kvmclock: advance clock by time window between vm_stop and pre_save, Marcelo Tosatti, 2016/11/04
Re: [Qemu-devel] [QEMU PATCH] kvmclock: advance clock by time window between vm_stop and pre_save, Marcelo Tosatti, 2016/11/04
[Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Marcelo Tosatti, 2016/11/04
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Juan Quintela, 2016/11/04
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save,
Dr. David Alan Gilbert <=
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Marcelo Tosatti, 2016/11/07
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Dr. David Alan Gilbert, 2016/11/07
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Marcelo Tosatti, 2016/11/07
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Dr. David Alan Gilbert, 2016/11/08
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Marcelo Tosatti, 2016/11/08
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Marcelo Tosatti, 2016/11/09
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Paolo Bonzini, 2016/11/09
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Dr. David Alan Gilbert, 2016/11/09
- Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Paolo Bonzini, 2016/11/09
Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save, Marcelo Tosatti, 2016/11/10