[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH v5 23/36] spapr/xive: add migration support for KV
From: |
David Gibson |
Subject: |
Re: [Qemu-ppc] [PATCH v5 23/36] spapr/xive: add migration support for KVM |
Date: |
Thu, 29 Nov 2018 14:43:58 +1100 |
User-agent: |
Mutt/1.10.1 (2018-07-13) |
On Fri, Nov 16, 2018 at 11:57:16AM +0100, Cédric Le Goater wrote:
> This extends the KVM XIVE models to handle the state synchronization
> with KVM, for the monitor usage and for the migration.
>
> The migration priority of the XIVE interrupt controller sPAPRXive is
> raised for KVM. It operates first and orchestrates the capture
> sequence of the states of all the XIVE models. The XIVE sources are
> masked to quiesce the interrupt flow and a XIVE xync is performed to
> stabilize the OS Event Queues. The state of the ENDs are then captured
> by the XIVE interrupt controller model, sPAPRXive, and the state of
> the thread contexts by the thread interrupt presenter model,
> XiveTCTX. When done, a rollback is performed to restore the sources to
> their initial state.
>
> The sPAPRXive 'post_load' method is called from the sPAPR machine,
> after all XIVE device states have been transfered and loaded. First,
> sPAPRXive restores the XIVE routing tables: ENDT and EAT. Next, are
> restored the thread interrupt context registers and the source PQ
> bits.
>
> The get/set operations rely on their KVM counterpart in the host
> kernel which acts as a proxy for OPAL, the host firmware.
>
> Signed-off-by: Cédric Le Goater <address@hidden>
> ---
>
> WIP:
>
> If migration occurs when a VCPU is 'ceded', some the OS event
> notification queues are mapped to the ZERO_PAGE on the receiving
> side. As if the HW had triggered a page fault before the dirty
> page was transferred from the source or as if we were not using
> the correct page table.
>
> include/hw/ppc/spapr_xive.h | 5 +
> include/hw/ppc/xive.h | 3 +
> include/migration/vmstate.h | 1 +
> linux-headers/asm-powerpc/kvm.h | 33 +++
> hw/intc/spapr_xive.c | 32 +++
> hw/intc/spapr_xive_kvm.c | 494 ++++++++++++++++++++++++++++++++
> hw/intc/xive.c | 46 +++
> hw/ppc/spapr_irq.c | 2 +-
> 8 files changed, 615 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 9c817bb7ae74..d2517c040958 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -55,12 +55,17 @@ typedef struct sPAPRXiveClass {
> XiveRouterClass parent_class;
>
> DeviceRealize parent_realize;
> +
> + void (*synchronize_state)(sPAPRXive *xive);
> + int (*pre_save)(sPAPRXive *xsrc);
> + int (*post_load)(sPAPRXive *xsrc, int version_id);
This should go away if the KVM and non-KVM versions are in the same
object.
> } sPAPRXiveClass;
>
> bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi);
> bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
> void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
> qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn);
> +int spapr_xive_post_load(sPAPRXive *xive, int version_id);
>
> /*
> * sPAPR NVT and END indexing helpers
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index 7aaf5a182cb3..c8201462d698 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -309,6 +309,9 @@ typedef struct XiveTCTXClass {
> DeviceClass parent_class;
>
> DeviceRealize parent_realize;
> +
> + void (*synchronize_state)(XiveTCTX *tctx);
> + int (*post_load)(XiveTCTX *tctx, int version_id);
.. and this too.
> } XiveTCTXClass;
>
> /*
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 2b501d04669a..ee2e836cc1c1 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -154,6 +154,7 @@ typedef enum {
> MIG_PRI_PCI_BUS, /* Must happen before IOMMU */
> MIG_PRI_GICV3_ITS, /* Must happen before PCI devices */
> MIG_PRI_GICV3, /* Must happen before the ITS */
> + MIG_PRI_XIVE_IC, /* Must happen before all XIVE models */
Ugh.. explicit priority / order levels are a pretty bad code smell.
Usually migration ordering can be handled by getting the object
heirarchy right. What exactly is the problem you're addessing with
this?
> MIG_PRI_MAX,
> } MigrationPriority;
>
> diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
> index f34c971491dd..9d55ade23634 100644
> --- a/linux-headers/asm-powerpc/kvm.h
> +++ b/linux-headers/asm-powerpc/kvm.h
Again, linux-headers need to be split out.
> @@ -480,6 +480,8 @@ struct kvm_ppc_cpu_char {
> #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
> #define KVM_REG_PPC_ICP_PPRI_MASK 0xff
>
> +#define KVM_REG_PPC_NVT_STATE (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8d)
> +
> /* Device control API: PPC-specific devices */
> #define KVM_DEV_MPIC_GRP_MISC 1
> #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
> @@ -681,10 +683,41 @@ struct kvm_ppc_cpu_char {
> #define KVM_DEV_XIVE_GET_TIMA_FD 2
> #define KVM_DEV_XIVE_VC_BASE 3
> #define KVM_DEV_XIVE_GRP_SOURCES 2 /* 64-bit source attributes */
> +#define KVM_DEV_XIVE_GRP_SYNC 3 /* 64-bit source
> attributes */
> +#define KVM_DEV_XIVE_GRP_EAS 4 /* 64-bit eas attributes */
> +#define KVM_DEV_XIVE_GRP_EQ 5 /* 64-bit eq attributes */
>
> /* Layout of 64-bit XIVE source attribute values */
> #define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0)
> #define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1)
>
> +/* Layout of 64-bit eas attribute values */
> +#define KVM_XIVE_EAS_PRIORITY_SHIFT 0
> +#define KVM_XIVE_EAS_PRIORITY_MASK 0x7
> +#define KVM_XIVE_EAS_SERVER_SHIFT 3
> +#define KVM_XIVE_EAS_SERVER_MASK 0xfffffff8ULL
> +#define KVM_XIVE_EAS_MASK_SHIFT 32
> +#define KVM_XIVE_EAS_MASK_MASK 0x100000000ULL
> +#define KVM_XIVE_EAS_EISN_SHIFT 33
> +#define KVM_XIVE_EAS_EISN_MASK 0xfffffffe00000000ULL
> +
> +/* Layout of 64-bit eq attribute */
> +#define KVM_XIVE_EQ_PRIORITY_SHIFT 0
> +#define KVM_XIVE_EQ_PRIORITY_MASK 0x7
> +#define KVM_XIVE_EQ_SERVER_SHIFT 3
> +#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL
> +
> +/* Layout of 64-bit eq attribute values */
> +struct kvm_ppc_xive_eq {
> + __u32 flags;
> + __u32 qsize;
> + __u64 qpage;
> + __u32 qtoggle;
> + __u32 qindex;
> +};
> +
> +#define KVM_XIVE_EQ_FLAG_ENABLED 0x00000001
> +#define KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY 0x00000002
> +#define KVM_XIVE_EQ_FLAG_ESCALATE 0x00000004
>
> #endif /* __LINUX_KVM_POWERPC_H */
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index ec85f7e4f88d..c5c0e063dc33 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -27,9 +27,14 @@
>
> void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
> {
> + sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
> int i;
> uint32_t offset = 0;
>
> + if (sxc->synchronize_state) {
> + sxc->synchronize_state(xive);
> + }
> +
> monitor_printf(mon, "XIVE Source %08x .. %08x\n", offset,
> offset + xive->source.nr_irqs - 1);
> xive_source_pic_print_info(&xive->source, offset, mon);
> @@ -354,10 +359,37 @@ static const VMStateDescription vmstate_spapr_xive_eas
> = {
> },
> };
>
> +static int vmstate_spapr_xive_pre_save(void *opaque)
> +{
> + sPAPRXive *xive = SPAPR_XIVE_BASE(opaque);
> + sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
> +
> + if (sxc->pre_save) {
> + return sxc->pre_save(xive);
> + }
> +
> + return 0;
> +}
> +
> +/* handled at the machine level */
> +int spapr_xive_post_load(sPAPRXive *xive, int version_id)
> +{
> + sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
> +
> + if (sxc->post_load) {
> + return sxc->post_load(xive, version_id);
> + }
> +
> + return 0;
> +}
> +
> static const VMStateDescription vmstate_spapr_xive_base = {
> .name = TYPE_SPAPR_XIVE,
> .version_id = 1,
> .minimum_version_id = 1,
> + .pre_save = vmstate_spapr_xive_pre_save,
> + .post_load = NULL, /* handled at the machine level */
> + .priority = MIG_PRI_XIVE_IC,
> .fields = (VMStateField[]) {
> VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
> VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs,
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 767f90826e43..176083c37d61 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -58,6 +58,58 @@ static void kvm_cpu_enable(CPUState *cs)
> /*
> * XIVE Thread Interrupt Management context (KVM)
> */
> +static void xive_tctx_kvm_set_state(XiveTCTX *tctx, Error **errp)
> +{
> + uint64_t state[4];
> + int ret;
> +
> + /* word0 and word1 of the OS ring. */
> + state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
> +
> + /* VP identifier. Only for KVM pr_debug() */
> + state[1] = *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]);
> +
> + ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> + if (ret != 0) {
> + error_setg_errno(errp, errno, "Could restore KVM XIVE CPU %ld state",
> + kvm_arch_vcpu_id(tctx->cs));
> + }
> +}
> +
> +static void xive_tctx_kvm_get_state(XiveTCTX *tctx, Error **errp)
> +{
> + uint64_t state[4] = { 0 };
> + int ret;
> +
> + ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> + if (ret != 0) {
> + error_setg_errno(errp, errno, "Could capture KVM XIVE CPU %ld state",
> + kvm_arch_vcpu_id(tctx->cs));
> + return;
> + }
> +
> + /* word0 and word1 of the OS ring. */
> + *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
> +
> + /*
> + * KVM also returns word2 containing the VP CAM line value which
> + * is interesting to print out the VP identifier in the QEMU
> + * monitor. No need to restore it.
> + */
> + *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
> +}
> +
> +static void xive_tctx_kvm_do_synchronize_state(CPUState *cpu,
> + run_on_cpu_data arg)
> +{
> + xive_tctx_kvm_get_state(arg.host_ptr, &error_fatal);
> +}
> +
> +static void xive_tctx_kvm_synchronize_state(XiveTCTX *tctx)
> +{
> + run_on_cpu(tctx->cs, xive_tctx_kvm_do_synchronize_state,
> + RUN_ON_CPU_HOST_PTR(tctx));
> +}
>
> static void xive_tctx_kvm_init(XiveTCTX *tctx, Error **errp)
> {
> @@ -112,6 +164,8 @@ static void xive_tctx_kvm_class_init(ObjectClass *klass,
> void *data)
>
> device_class_set_parent_realize(dc, xive_tctx_kvm_realize,
> &xtc->parent_realize);
> +
> + xtc->synchronize_state = xive_tctx_kvm_synchronize_state;
> }
>
> static const TypeInfo xive_tctx_kvm_info = {
> @@ -166,6 +220,34 @@ static void xive_source_kvm_reset(DeviceState *dev)
> xive_source_kvm_init(xsrc, &error_fatal);
> }
>
> +/*
> + * This is used to perform the magic loads on the ESB pages, described
> + * in xive.h.
> + */
> +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
> +{
> + unsigned long addr = (unsigned long) xsrc->esb_mmap +
> + xive_source_esb_mgmt(xsrc, srcno) + offset;
> +
> + /* Prevent the compiler from optimizing away the load */
> + volatile uint64_t value = *((uint64_t *) addr);
> +
> + return be64_to_cpu(value) & 0x3;
> +}
> +
> +static void xive_source_kvm_get_state(XiveSource *xsrc)
> +{
> + int i;
> +
> + for (i = 0; i < xsrc->nr_irqs; i++) {
> + /* Perform a load without side effect to retrieve the PQ bits */
> + uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> +
> + /* and save PQ locally */
> + xive_source_esb_set(xsrc, i, pq);
> + }
> +}
> +
> static void xive_source_kvm_set_irq(void *opaque, int srcno, int val)
> {
> XiveSource *xsrc = opaque;
> @@ -295,6 +377,414 @@ static const TypeInfo xive_source_kvm_info = {
> /*
> * sPAPR XIVE Router (KVM)
> */
> +static int spapr_xive_kvm_set_eq_state(sPAPRXive *xive, CPUState *cs,
> + Error **errp)
> +{
> + XiveRouter *xrtr = XIVE_ROUTER(xive);
> + unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> + int ret;
> + int i;
> +
> + for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
> + Error *local_err = NULL;
> + XiveEND end;
> + uint8_t end_blk;
> + uint32_t end_idx;
> + struct kvm_ppc_xive_eq kvm_eq = { 0 };
> + uint64_t kvm_eq_idx;
> +
> + if (!spapr_xive_priority_is_valid(i)) {
> + continue;
> + }
> +
> + spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, &end_idx);
> +
> + ret = xive_router_get_end(xrtr, end_blk, end_idx, &end);
> + if (ret) {
> + error_setg(errp, "XIVE: No END for CPU %ld priority %d",
> + vcpu_id, i);
> + return ret;
> + }
> +
> + if (!(end.w0 & END_W0_VALID)) {
> + continue;
> + }
> +
> + /* Build the KVM state from the local END structure */
> + kvm_eq.flags = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY;
> + kvm_eq.qsize = GETFIELD(END_W0_QSIZE, end.w0) + 12;
> + kvm_eq.qpage = (((uint64_t)(end.w2 & 0x0fffffff)) << 32) | end.w3;
> + kvm_eq.qtoggle = GETFIELD(END_W1_GENERATION, end.w1);
> + kvm_eq.qindex = GETFIELD(END_W1_PAGE_OFF, end.w1);
> +
> + /* Encode the tuple (server, prio) as a KVM EQ index */
> + kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
> + KVM_XIVE_EQ_PRIORITY_MASK;
> + kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
> + KVM_XIVE_EQ_SERVER_MASK;
> +
> + ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
> + &kvm_eq, true, &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return ret;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int spapr_xive_kvm_get_eq_state(sPAPRXive *xive, CPUState *cs,
> + Error **errp)
> +{
> + XiveRouter *xrtr = XIVE_ROUTER(xive);
> + unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> + int ret;
> + int i;
> +
> + for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
> + Error *local_err = NULL;
> + struct kvm_ppc_xive_eq kvm_eq = { 0 };
> + uint64_t kvm_eq_idx;
> + XiveEND end = { 0 };
> + uint8_t end_blk, nvt_blk;
> + uint32_t end_idx, nvt_idx;
> +
> + /* Skip priorities reserved for the hypervisor */
> + if (!spapr_xive_priority_is_valid(i)) {
> + continue;
> + }
> +
> + /* Encode the tuple (server, prio) as a KVM EQ index */
> + kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
> + KVM_XIVE_EQ_PRIORITY_MASK;
> + kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
> + KVM_XIVE_EQ_SERVER_MASK;
> +
> + ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
> + &kvm_eq, false, &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return ret;
> + }
> +
> + if (!(kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED)) {
> + continue;
> + }
> +
> + /* Update the local END structure with the KVM input */
> + if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED) {
> + end.w0 |= END_W0_VALID | END_W0_ENQUEUE;
> + }
> + if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY) {
> + end.w0 |= END_W0_UCOND_NOTIFY;
> + }
> + if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ESCALATE) {
> + end.w0 |= END_W0_ESCALATE_CTL;
> + }
> + end.w0 |= SETFIELD(END_W0_QSIZE, 0ul, kvm_eq.qsize - 12);
> +
> + end.w1 = SETFIELD(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
> + SETFIELD(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
> + end.w2 = (kvm_eq.qpage >> 32) & 0x0fffffff;
> + end.w3 = kvm_eq.qpage & 0xffffffff;
> + end.w4 = 0;
> + end.w5 = 0;
> +
> + ret = spapr_xive_cpu_to_nvt(xive, POWERPC_CPU(cs), &nvt_blk,
> &nvt_idx);
> + if (ret) {
> + error_setg(errp, "XIVE: No NVT for CPU %ld", vcpu_id);
> + return ret;
> + }
> +
> + end.w6 = SETFIELD(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
> + SETFIELD(END_W6_NVT_INDEX, 0ul, nvt_idx);
> + end.w7 = SETFIELD(END_W7_F0_PRIORITY, 0ul, i);
> +
> + spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, &end_idx);
> +
> + ret = xive_router_set_end(xrtr, end_blk, end_idx, &end);
> + if (ret) {
> + error_setg(errp, "XIVE: No END for CPU %ld priority %d",
> + vcpu_id, i);
> + return ret;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static void spapr_xive_kvm_set_eas_state(sPAPRXive *xive, Error **errp)
> +{
> + XiveSource *xsrc = &xive->source;
> + int i;
> +
> + for (i = 0; i < xsrc->nr_irqs; i++) {
> + XiveEAS *eas = &xive->eat[i];
> + uint32_t end_idx;
> + uint32_t end_blk;
> + uint32_t eisn;
> + uint8_t priority;
> + uint32_t server;
> + uint64_t kvm_eas;
> + Error *local_err = NULL;
> +
> + /* No need to set MASKED EAS, this is the default state after reset
> */
> + if (!(eas->w & EAS_VALID) || eas->w & EAS_MASKED) {
> + continue;
> + }
> +
> + end_idx = GETFIELD(EAS_END_INDEX, eas->w);
> + end_blk = GETFIELD(EAS_END_BLOCK, eas->w);
> + eisn = GETFIELD(EAS_END_DATA, eas->w);
> +
> + spapr_xive_end_to_target(xive, end_blk, end_idx, &server, &priority);
> +
> + kvm_eas = priority << KVM_XIVE_EAS_PRIORITY_SHIFT &
> + KVM_XIVE_EAS_PRIORITY_MASK;
> + kvm_eas |= server << KVM_XIVE_EAS_SERVER_SHIFT &
> + KVM_XIVE_EAS_SERVER_MASK;
> + kvm_eas |= ((uint64_t)eisn << KVM_XIVE_EAS_EISN_SHIFT) &
> + KVM_XIVE_EAS_EISN_MASK;
> +
> + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, true,
> + &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return;
> + }
> + }
> +}
> +
> +static void spapr_xive_kvm_get_eas_state(sPAPRXive *xive, Error **errp)
> +{
> + XiveSource *xsrc = &xive->source;
> + int i;
> +
> + for (i = 0; i < xsrc->nr_irqs; i++) {
> + XiveEAS *eas = &xive->eat[i];
> + XiveEAS new_eas;
> + uint64_t kvm_eas;
> + uint8_t priority;
> + uint32_t server;
> + uint32_t end_idx;
> + uint8_t end_blk;
> + uint32_t eisn;
> + Error *local_err = NULL;
> +
> + if (!(eas->w & EAS_VALID)) {
> + continue;
> + }
> +
> + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, false,
> + &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return;
> + }
> +
> + priority = (kvm_eas & KVM_XIVE_EAS_PRIORITY_MASK) >>
> + KVM_XIVE_EAS_PRIORITY_SHIFT;
> + server = (kvm_eas & KVM_XIVE_EAS_SERVER_MASK) >>
> + KVM_XIVE_EAS_SERVER_SHIFT;
> + eisn = (kvm_eas & KVM_XIVE_EAS_EISN_MASK) >> KVM_XIVE_EAS_EISN_SHIFT;
> +
> + if (spapr_xive_target_to_end(xive, server, priority, &end_blk,
> + &end_idx)) {
> + error_setg(errp, "XIVE: invalid tuple CPU %d priority %d",
> server,
> + priority);
> + return;
> + }
> +
> + new_eas.w = EAS_VALID;
> + if (kvm_eas & KVM_XIVE_EAS_MASK_MASK) {
> + new_eas.w |= EAS_MASKED;
> + }
> +
> + new_eas.w = SETFIELD(EAS_END_INDEX, new_eas.w, end_idx);
> + new_eas.w = SETFIELD(EAS_END_BLOCK, new_eas.w, end_blk);
> + new_eas.w = SETFIELD(EAS_END_DATA, new_eas.w, eisn);
> +
> + *eas = new_eas;
> + }
> +}
> +
> +static void spapr_xive_kvm_sync_all(sPAPRXive *xive, Error **errp)
> +{
> + XiveSource *xsrc = &xive->source;
> + Error *local_err = NULL;
> + int i;
> +
> + /* Sync the KVM source. This reaches the XIVE HW through OPAL */
> + for (i = 0; i < xsrc->nr_irqs; i++) {
> + XiveEAS *eas = &xive->eat[i];
> +
> + if (!(eas->w & EAS_VALID)) {
> + continue;
> + }
> +
> + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SYNC, i, NULL, true,
> + &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return;
> + }
> + }
> +}
> +
> +/*
> + * The sPAPRXive KVM model migration priority is higher to make sure
Higher than what?
> + * its 'pre_save' method runs before all the other XIVE models. It
If the other XIVE components are children of sPAPRXive (which I think
they are or could be), then I believe the parent object's pre_save
will automatically be called first.
> + * orchestrates the capture sequence of the XIVE states in the
> + * following order:
> + *
> + * 1. mask all the sources by setting PQ=01, which returns the
> + * previous value and save it.
> + * 2. sync the sources in KVM to stabilize all the queues
> + * sync the ENDs to make sure END -> VP is fully completed
> + * 3. dump the EAS table
> + * 4. dump the END table
> + * 5. dump the thread context (IPB)
> + *
> + * Rollback to restore the current configuration of the sources
> + */
> +static int spapr_xive_kvm_pre_save(sPAPRXive *xive)
> +{
> + XiveSource *xsrc = &xive->source;
> + Error *local_err = NULL;
> + CPUState *cs;
> + int i;
> + int ret = 0;
> +
> + /* Quiesce the sources, to stop the flow of event notifications */
> + for (i = 0; i < xsrc->nr_irqs; i++) {
> + /*
> + * Mask and save the ESB PQs locally in the XiveSource object.
> + */
> + uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_01);
> + xive_source_esb_set(xsrc, i, pq);
> + }
> +
> + /* Sync the sources in KVM */
> + spapr_xive_kvm_sync_all(xive, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + goto out;
> + }
> +
> + /* Grab the EAT (could be done earlier ?) */
> + spapr_xive_kvm_get_eas_state(xive, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + goto out;
> + }
> +
> + /*
> + * Grab the ENDs. The EQ index and the toggle bit are what we want
> + * to capture
> + */
> + CPU_FOREACH(cs) {
> + spapr_xive_kvm_get_eq_state(xive, cs, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + goto out;
> + }
> + }
> +
> + /* Capture the thread interrupt contexts */
> + CPU_FOREACH(cs) {
> + PowerPCCPU *cpu = POWERPC_CPU(cs);
> +
> + /* TODO: Check if we need to use under run_on_cpu() ? */
> + xive_tctx_kvm_get_state(XIVE_TCTX_KVM(cpu->intc), &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + goto out;
> + }
> + }
> +
> + /* All done. */
> +
> +out:
> + /* Restore the sources to their initial state */
> + for (i = 0; i < xsrc->nr_irqs; i++) {
> + uint8_t pq = xive_source_esb_get(xsrc, i);
> + if (xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)) != 0x1) {
> + error_report("XIVE: IRQ %d has an invalid state", i);
> + }
> + }
> +
> + /*
> + * The XiveSource and the XiveTCTX states will be collected by
> + * their respective vmstate handlers afterwards.
> + */
> + return ret;
> +}
> +
> +/*
> + * The sPAPRXive 'post_load' method is called by the sPAPR machine,
> + * after all XIVE device states have been transfered and loaded.
> + *
> + * All should be in place when the VCPUs resume execution.
> + */
> +static int spapr_xive_kvm_post_load(sPAPRXive *xive, int version_id)
> +{
> + XiveSource *xsrc = &xive->source;
> + Error *local_err = NULL;
> + CPUState *cs;
> + int i;
> +
> + /* Set the ENDs first. The targetting depends on it. */
> + CPU_FOREACH(cs) {
> + spapr_xive_kvm_set_eq_state(xive, cs, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + return -1;
> + }
> + }
> +
> + /* Restore the targetting, if any */
> + spapr_xive_kvm_set_eas_state(xive, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + return -1;
> + }
> +
> + /* Restore the thread interrupt contexts */
> + CPU_FOREACH(cs) {
> + PowerPCCPU *cpu = POWERPC_CPU(cs);
> +
> + xive_tctx_kvm_set_state(XIVE_TCTX_KVM(cpu->intc), &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + return -1;
> + }
> + }
> +
> + /*
> + * Get the saved state from the XiveSource model and restore the
> + * PQ bits
> + */
> + for (i = 0; i < xsrc->nr_irqs; i++) {
> + uint8_t pq = xive_source_esb_get(xsrc, i);
> + xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
> + }
> + return 0;
> +}
> +
> +static void spapr_xive_kvm_synchronize_state(sPAPRXive *xive)
> +{
> + XiveSource *xsrc = &xive->source;
> + CPUState *cs;
> +
> + xive_source_kvm_get_state(xsrc);
> +
> + spapr_xive_kvm_get_eas_state(xive, &error_fatal);
> +
> + CPU_FOREACH(cs) {
> + spapr_xive_kvm_get_eq_state(xive, cs, &error_fatal);
> + }
> +}
>
> static void spapr_xive_kvm_instance_init(Object *obj)
> {
> @@ -409,6 +899,10 @@ static void spapr_xive_kvm_class_init(ObjectClass
> *klass, void *data)
>
> dc->desc = "sPAPR XIVE KVM Interrupt Controller";
> dc->unrealize = spapr_xive_kvm_unrealize;
> +
> + sxc->synchronize_state = spapr_xive_kvm_synchronize_state;
> + sxc->pre_save = spapr_xive_kvm_pre_save;
> + sxc->post_load = spapr_xive_kvm_post_load;
> }
>
> static const TypeInfo spapr_xive_kvm_info = {
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 9bb37553c9ec..c9aedecc8216 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -438,9 +438,14 @@ static const struct {
>
> void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
> {
> + XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx);
> int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
> int i;
>
> + if (xtc->synchronize_state) {
> + xtc->synchronize_state(tctx);
> + }
> +
> monitor_printf(mon, "CPU[%04x]: QW NSR CPPR IPB LSMFB ACK# INC AGE
> PIPR"
> " W2\n", cpu_index);
>
> @@ -552,10 +557,23 @@ static void xive_tctx_base_unrealize(DeviceState *dev,
> Error **errp)
> qemu_unregister_reset(xive_tctx_base_reset, dev);
> }
>
> +static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
> +{
> + XiveTCTX *tctx = XIVE_TCTX_BASE(opaque);
> + XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx);
> +
> + if (xtc->post_load) {
> + return xtc->post_load(tctx, version_id);
> + }
> +
> + return 0;
> +}
> +
> static const VMStateDescription vmstate_xive_tctx_base = {
> .name = TYPE_XIVE_TCTX,
> .version_id = 1,
> .minimum_version_id = 1,
> + .post_load = vmstate_xive_tctx_post_load,
> .fields = (VMStateField[]) {
> VMSTATE_BUFFER(regs, XiveTCTX),
> VMSTATE_END_OF_LIST()
> @@ -581,9 +599,37 @@ static const TypeInfo xive_tctx_base_info = {
> .class_size = sizeof(XiveTCTXClass),
> };
>
> +static int xive_tctx_post_load(XiveTCTX *tctx, int version_id)
> +{
> + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr);
> +
> + /*
> + * When we collect the states from KVM XIVE irqchip, we set word2
> + * of the thread context to print out the OS CAM line under the
> + * QEMU monitor.
> + *
> + * This breaks migration on a guest using TCG or not using a KVM
> + * irqchip. Fix with an extra reset of the thread contexts.
> + */
> + if (xrc->reset_tctx) {
> + xrc->reset_tctx(tctx->xrtr, tctx);
> + }
> + return 0;
> +}
> +
> +static void xive_tctx_class_init(ObjectClass *klass, void *data)
> +{
> + XiveTCTXClass *xtc = XIVE_TCTX_BASE_CLASS(klass);
> +
> + xtc->post_load = xive_tctx_post_load;
> +}
> +
> static const TypeInfo xive_tctx_info = {
> .name = TYPE_XIVE_TCTX,
> .parent = TYPE_XIVE_TCTX_BASE,
> + .instance_size = sizeof(XiveTCTX),
> + .class_init = xive_tctx_class_init,
> + .class_size = sizeof(XiveTCTXClass),
> };
>
> Object *xive_tctx_create(Object *cpu, const char *type, XiveRouter *xrtr,
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 92ef53743b64..6fac6ca70595 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -359,7 +359,7 @@ static Object
> *spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr,
>
> static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int version_id)
> {
> - return 0;
> + return spapr_xive_post_load(spapr->xive, version_id);
> }
>
> /*
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature
- Re: [Qemu-ppc] [PATCH v5 19/36] spapr: add a 'pseries-3.1-xive' machine type, (continued)
[Qemu-ppc] [PATCH v5 21/36] spapr: extend the sPAPR IRQ backend for XICS migration, Cédric Le Goater, 2018/11/16
[Qemu-ppc] [PATCH v5 23/36] spapr/xive: add migration support for KVM, Cédric Le Goater, 2018/11/16
- Re: [Qemu-ppc] [PATCH v5 23/36] spapr/xive: add migration support for KVM,
David Gibson <=
[Qemu-ppc] [PATCH v5 24/36] spapr: add a 'reset' method to the sPAPR IRQ backend, Cédric Le Goater, 2018/11/16
[Qemu-ppc] [PATCH v5 22/36] spapr/xive: add models for KVM support, Cédric Le Goater, 2018/11/16