qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v5 23/36] spapr/xive: add migration support for


From: David Gibson
Subject: Re: [Qemu-devel] [PATCH v5 23/36] spapr/xive: add migration support for KVM
Date: Thu, 29 Nov 2018 14:43:58 +1100
User-agent: Mutt/1.10.1 (2018-07-13)

On Fri, Nov 16, 2018 at 11:57:16AM +0100, Cédric Le Goater wrote:
> This extends the KVM XIVE models to handle the state synchronization
> with KVM, for the monitor usage and for the migration.
> 
> The migration priority of the XIVE interrupt controller sPAPRXive is
> raised for KVM. It operates first and orchestrates the capture
> sequence of the states of all the XIVE models. The XIVE sources are
> masked to quiesce the interrupt flow and a XIVE xync is performed to
> stabilize the OS Event Queues. The state of the ENDs are then captured
> by the XIVE interrupt controller model, sPAPRXive, and the state of
> the thread contexts by the thread interrupt presenter model,
> XiveTCTX. When done, a rollback is performed to restore the sources to
> their initial state.
> 
> The sPAPRXive 'post_load' method is called from the sPAPR machine,
> after all XIVE device states have been transfered and loaded. First,
> sPAPRXive restores the XIVE routing tables: ENDT and EAT. Next, are
> restored the thread interrupt context registers and the source PQ
> bits.
> 
> The get/set operations rely on their KVM counterpart in the host
> kernel which acts as a proxy for OPAL, the host firmware.
> 
> Signed-off-by: Cédric Le Goater <address@hidden>
> ---
> 
>  WIP:
>  
>     If migration occurs when a VCPU is 'ceded', some the OS event
>     notification queues are mapped to the ZERO_PAGE on the receiving
>     side. As if the HW had triggered a page fault before the dirty
>     page was transferred from the source or as if we were not using
>     the correct page table.
> 
>  include/hw/ppc/spapr_xive.h     |   5 +
>  include/hw/ppc/xive.h           |   3 +
>  include/migration/vmstate.h     |   1 +
>  linux-headers/asm-powerpc/kvm.h |  33 +++
>  hw/intc/spapr_xive.c            |  32 +++
>  hw/intc/spapr_xive_kvm.c        | 494 ++++++++++++++++++++++++++++++++
>  hw/intc/xive.c                  |  46 +++
>  hw/ppc/spapr_irq.c              |   2 +-
>  8 files changed, 615 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 9c817bb7ae74..d2517c040958 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -55,12 +55,17 @@ typedef struct sPAPRXiveClass {
>      XiveRouterClass parent_class;
>  
>      DeviceRealize   parent_realize;
> +
> +    void (*synchronize_state)(sPAPRXive *xive);
> +    int  (*pre_save)(sPAPRXive *xsrc);
> +    int  (*post_load)(sPAPRXive *xsrc, int version_id);

This should go away if the KVM and non-KVM versions are in the same
object.

>  } sPAPRXiveClass;
>  
>  bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi);
>  bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>  qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn);
> +int spapr_xive_post_load(sPAPRXive *xive, int version_id);
>  
>  /*
>   * sPAPR NVT and END indexing helpers
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index 7aaf5a182cb3..c8201462d698 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -309,6 +309,9 @@ typedef struct XiveTCTXClass {
>      DeviceClass       parent_class;
>  
>      DeviceRealize     parent_realize;
> +
> +    void (*synchronize_state)(XiveTCTX *tctx);
> +    int  (*post_load)(XiveTCTX *tctx, int version_id);

.. and this too.

>  } XiveTCTXClass;
>  
>  /*
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 2b501d04669a..ee2e836cc1c1 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -154,6 +154,7 @@ typedef enum {
>      MIG_PRI_PCI_BUS,            /* Must happen before IOMMU */
>      MIG_PRI_GICV3_ITS,          /* Must happen before PCI devices */
>      MIG_PRI_GICV3,              /* Must happen before the ITS */
> +    MIG_PRI_XIVE_IC,            /* Must happen before all XIVE models */

Ugh.. explicit priority / order levels are a pretty bad code smell.
Usually migration ordering can be handled by getting the object
heirarchy right.  What exactly is the problem you're addessing with
this?


>      MIG_PRI_MAX,
>  } MigrationPriority;
>  
> diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
> index f34c971491dd..9d55ade23634 100644
> --- a/linux-headers/asm-powerpc/kvm.h
> +++ b/linux-headers/asm-powerpc/kvm.h

Again, linux-headers need to be split out.

> @@ -480,6 +480,8 @@ struct kvm_ppc_cpu_char {
>  #define  KVM_REG_PPC_ICP_PPRI_SHIFT  16      /* pending irq priority */
>  #define  KVM_REG_PPC_ICP_PPRI_MASK   0xff
>  
> +#define KVM_REG_PPC_NVT_STATE        (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8d)
> +
>  /* Device control API: PPC-specific devices */
>  #define KVM_DEV_MPIC_GRP_MISC                1
>  #define   KVM_DEV_MPIC_BASE_ADDR     0       /* 64-bit */
> @@ -681,10 +683,41 @@ struct kvm_ppc_cpu_char {
>  #define   KVM_DEV_XIVE_GET_TIMA_FD   2
>  #define   KVM_DEV_XIVE_VC_BASE               3
>  #define KVM_DEV_XIVE_GRP_SOURCES     2       /* 64-bit source attributes */
> +#define KVM_DEV_XIVE_GRP_SYNC                3       /* 64-bit source 
> attributes */
> +#define KVM_DEV_XIVE_GRP_EAS         4       /* 64-bit eas attributes */
> +#define KVM_DEV_XIVE_GRP_EQ          5       /* 64-bit eq attributes */
>  
>  /* Layout of 64-bit XIVE source attribute values */
>  #define KVM_XIVE_LEVEL_SENSITIVE     (1ULL << 0)
>  #define KVM_XIVE_LEVEL_ASSERTED              (1ULL << 1)
>  
> +/* Layout of 64-bit eas attribute values */
> +#define KVM_XIVE_EAS_PRIORITY_SHIFT  0
> +#define KVM_XIVE_EAS_PRIORITY_MASK   0x7
> +#define KVM_XIVE_EAS_SERVER_SHIFT    3
> +#define KVM_XIVE_EAS_SERVER_MASK     0xfffffff8ULL
> +#define KVM_XIVE_EAS_MASK_SHIFT              32
> +#define KVM_XIVE_EAS_MASK_MASK               0x100000000ULL
> +#define KVM_XIVE_EAS_EISN_SHIFT              33
> +#define KVM_XIVE_EAS_EISN_MASK               0xfffffffe00000000ULL
> +
> +/* Layout of 64-bit eq attribute */
> +#define KVM_XIVE_EQ_PRIORITY_SHIFT   0
> +#define KVM_XIVE_EQ_PRIORITY_MASK    0x7
> +#define KVM_XIVE_EQ_SERVER_SHIFT     3
> +#define KVM_XIVE_EQ_SERVER_MASK              0xfffffff8ULL
> +
> +/* Layout of 64-bit eq attribute values */
> +struct kvm_ppc_xive_eq {
> +     __u32 flags;
> +     __u32 qsize;
> +     __u64 qpage;
> +     __u32 qtoggle;
> +     __u32 qindex;
> +};
> +
> +#define KVM_XIVE_EQ_FLAG_ENABLED     0x00000001
> +#define KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY       0x00000002
> +#define KVM_XIVE_EQ_FLAG_ESCALATE    0x00000004
>  
>  #endif /* __LINUX_KVM_POWERPC_H */
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index ec85f7e4f88d..c5c0e063dc33 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -27,9 +27,14 @@
>  
>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
>  {
> +    sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
>      int i;
>      uint32_t offset = 0;
>  
> +    if (sxc->synchronize_state) {
> +        sxc->synchronize_state(xive);
> +    }
> +
>      monitor_printf(mon, "XIVE Source %08x .. %08x\n", offset,
>                     offset + xive->source.nr_irqs - 1);
>      xive_source_pic_print_info(&xive->source, offset, mon);
> @@ -354,10 +359,37 @@ static const VMStateDescription vmstate_spapr_xive_eas 
> = {
>      },
>  };
>  
> +static int vmstate_spapr_xive_pre_save(void *opaque)
> +{
> +    sPAPRXive *xive = SPAPR_XIVE_BASE(opaque);
> +    sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
> +
> +    if (sxc->pre_save) {
> +        return sxc->pre_save(xive);
> +    }
> +
> +    return 0;
> +}
> +
> +/* handled at the machine level */
> +int spapr_xive_post_load(sPAPRXive *xive, int version_id)
> +{
> +    sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
> +
> +    if (sxc->post_load) {
> +        return sxc->post_load(xive, version_id);
> +    }
> +
> +    return 0;
> +}
> +
>  static const VMStateDescription vmstate_spapr_xive_base = {
>      .name = TYPE_SPAPR_XIVE,
>      .version_id = 1,
>      .minimum_version_id = 1,
> +    .pre_save = vmstate_spapr_xive_pre_save,
> +    .post_load = NULL, /* handled at the machine level */
> +    .priority = MIG_PRI_XIVE_IC,
>      .fields = (VMStateField[]) {
>          VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
>          VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs,
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 767f90826e43..176083c37d61 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -58,6 +58,58 @@ static void kvm_cpu_enable(CPUState *cs)
>  /*
>   * XIVE Thread Interrupt Management context (KVM)
>   */
> +static void xive_tctx_kvm_set_state(XiveTCTX *tctx, Error **errp)
> +{
> +    uint64_t state[4];
> +    int ret;
> +
> +    /* word0 and word1 of the OS ring. */
> +    state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
> +
> +    /* VP identifier. Only for KVM pr_debug() */
> +    state[1] = *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]);
> +
> +    ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> +    if (ret != 0) {
> +        error_setg_errno(errp, errno, "Could restore KVM XIVE CPU %ld state",
> +                         kvm_arch_vcpu_id(tctx->cs));
> +    }
> +}
> +
> +static void xive_tctx_kvm_get_state(XiveTCTX *tctx, Error **errp)
> +{
> +    uint64_t state[4] = { 0 };
> +    int ret;
> +
> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> +    if (ret != 0) {
> +        error_setg_errno(errp, errno, "Could capture KVM XIVE CPU %ld state",
> +                         kvm_arch_vcpu_id(tctx->cs));
> +        return;
> +    }
> +
> +    /* word0 and word1 of the OS ring. */
> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
> +
> +    /*
> +     * KVM also returns word2 containing the VP CAM line value which
> +     * is interesting to print out the VP identifier in the QEMU
> +     * monitor. No need to restore it.
> +     */
> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
> +}
> +
> +static void xive_tctx_kvm_do_synchronize_state(CPUState *cpu,
> +                                              run_on_cpu_data arg)
> +{
> +    xive_tctx_kvm_get_state(arg.host_ptr, &error_fatal);
> +}
> +
> +static void xive_tctx_kvm_synchronize_state(XiveTCTX *tctx)
> +{
> +    run_on_cpu(tctx->cs, xive_tctx_kvm_do_synchronize_state,
> +               RUN_ON_CPU_HOST_PTR(tctx));
> +}
>  
>  static void xive_tctx_kvm_init(XiveTCTX *tctx, Error **errp)
>  {
> @@ -112,6 +164,8 @@ static void xive_tctx_kvm_class_init(ObjectClass *klass, 
> void *data)
>  
>      device_class_set_parent_realize(dc, xive_tctx_kvm_realize,
>                                      &xtc->parent_realize);
> +
> +    xtc->synchronize_state = xive_tctx_kvm_synchronize_state;
>  }
>  
>  static const TypeInfo xive_tctx_kvm_info = {
> @@ -166,6 +220,34 @@ static void xive_source_kvm_reset(DeviceState *dev)
>      xive_source_kvm_init(xsrc, &error_fatal);
>  }
>  
> +/*
> + * This is used to perform the magic loads on the ESB pages, described
> + * in xive.h.
> + */
> +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
> +{
> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
> +        xive_source_esb_mgmt(xsrc, srcno) + offset;
> +
> +    /* Prevent the compiler from optimizing away the load */
> +    volatile uint64_t value = *((uint64_t *) addr);
> +
> +    return be64_to_cpu(value) & 0x3;
> +}
> +
> +static void xive_source_kvm_get_state(XiveSource *xsrc)
> +{
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        /* Perform a load without side effect to retrieve the PQ bits */
> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> +
> +        /* and save PQ locally */
> +        xive_source_esb_set(xsrc, i, pq);
> +    }
> +}
> +
>  static void xive_source_kvm_set_irq(void *opaque, int srcno, int val)
>  {
>      XiveSource *xsrc = opaque;
> @@ -295,6 +377,414 @@ static const TypeInfo xive_source_kvm_info = {
>  /*
>   * sPAPR XIVE Router (KVM)
>   */
> +static int spapr_xive_kvm_set_eq_state(sPAPRXive *xive, CPUState *cs,
> +                                       Error **errp)
> +{
> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> +    int ret;
> +    int i;
> +
> +    for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
> +        Error *local_err = NULL;
> +        XiveEND end;
> +        uint8_t end_blk;
> +        uint32_t end_idx;
> +        struct kvm_ppc_xive_eq kvm_eq = { 0 };
> +        uint64_t kvm_eq_idx;
> +
> +        if (!spapr_xive_priority_is_valid(i)) {
> +            continue;
> +        }
> +
> +        spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, &end_idx);
> +
> +        ret = xive_router_get_end(xrtr, end_blk, end_idx, &end);
> +        if (ret) {
> +            error_setg(errp, "XIVE: No END for CPU %ld priority %d",
> +                       vcpu_id, i);
> +            return ret;
> +        }
> +
> +        if (!(end.w0 & END_W0_VALID)) {
> +            continue;
> +        }
> +
> +        /* Build the KVM state from the local END structure */
> +        kvm_eq.flags   = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY;
> +        kvm_eq.qsize   = GETFIELD(END_W0_QSIZE, end.w0) + 12;
> +        kvm_eq.qpage   = (((uint64_t)(end.w2 & 0x0fffffff)) << 32) | end.w3;
> +        kvm_eq.qtoggle = GETFIELD(END_W1_GENERATION, end.w1);
> +        kvm_eq.qindex  = GETFIELD(END_W1_PAGE_OFF, end.w1);
> +
> +        /* Encode the tuple (server, prio) as a KVM EQ index */
> +        kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
> +            KVM_XIVE_EQ_PRIORITY_MASK;
> +        kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
> +            KVM_XIVE_EQ_SERVER_MASK;
> +
> +        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
> +                                &kvm_eq, true, &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int spapr_xive_kvm_get_eq_state(sPAPRXive *xive, CPUState *cs,
> +                                       Error **errp)
> +{
> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> +    int ret;
> +    int i;
> +
> +    for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
> +        Error *local_err = NULL;
> +        struct kvm_ppc_xive_eq kvm_eq = { 0 };
> +        uint64_t kvm_eq_idx;
> +        XiveEND end = { 0 };
> +        uint8_t end_blk, nvt_blk;
> +        uint32_t end_idx, nvt_idx;
> +
> +        /* Skip priorities reserved for the hypervisor */
> +        if (!spapr_xive_priority_is_valid(i)) {
> +            continue;
> +        }
> +
> +        /* Encode the tuple (server, prio) as a KVM EQ index */
> +        kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
> +            KVM_XIVE_EQ_PRIORITY_MASK;
> +        kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
> +            KVM_XIVE_EQ_SERVER_MASK;
> +
> +        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
> +                                &kvm_eq, false, &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return ret;
> +        }
> +
> +        if (!(kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED)) {
> +            continue;
> +        }
> +
> +        /* Update the local END structure with the KVM input */
> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED) {
> +                end.w0 |= END_W0_VALID | END_W0_ENQUEUE;
> +        }
> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY) {
> +                end.w0 |= END_W0_UCOND_NOTIFY;
> +        }
> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ESCALATE) {
> +                end.w0 |= END_W0_ESCALATE_CTL;
> +        }
> +        end.w0 |= SETFIELD(END_W0_QSIZE, 0ul, kvm_eq.qsize - 12);
> +
> +        end.w1 = SETFIELD(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
> +            SETFIELD(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
> +        end.w2 = (kvm_eq.qpage >> 32) & 0x0fffffff;
> +        end.w3 = kvm_eq.qpage & 0xffffffff;
> +        end.w4 = 0;
> +        end.w5 = 0;
> +
> +        ret = spapr_xive_cpu_to_nvt(xive, POWERPC_CPU(cs), &nvt_blk, 
> &nvt_idx);
> +        if (ret) {
> +            error_setg(errp, "XIVE: No NVT for CPU %ld", vcpu_id);
> +            return ret;
> +        }
> +
> +        end.w6 = SETFIELD(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
> +            SETFIELD(END_W6_NVT_INDEX, 0ul, nvt_idx);
> +        end.w7 = SETFIELD(END_W7_F0_PRIORITY, 0ul, i);
> +
> +        spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, &end_idx);
> +
> +        ret = xive_router_set_end(xrtr, end_blk, end_idx, &end);
> +        if (ret) {
> +            error_setg(errp, "XIVE: No END for CPU %ld priority %d",
> +                       vcpu_id, i);
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static void spapr_xive_kvm_set_eas_state(sPAPRXive *xive, Error **errp)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        XiveEAS *eas = &xive->eat[i];
> +        uint32_t end_idx;
> +        uint32_t end_blk;
> +        uint32_t eisn;
> +        uint8_t priority;
> +        uint32_t server;
> +        uint64_t kvm_eas;
> +        Error *local_err = NULL;
> +
> +        /* No need to set MASKED EAS, this is the default state after reset 
> */
> +        if (!(eas->w & EAS_VALID) || eas->w & EAS_MASKED) {
> +            continue;
> +        }
> +
> +        end_idx = GETFIELD(EAS_END_INDEX, eas->w);
> +        end_blk = GETFIELD(EAS_END_BLOCK, eas->w);
> +        eisn = GETFIELD(EAS_END_DATA, eas->w);
> +
> +        spapr_xive_end_to_target(xive, end_blk, end_idx, &server, &priority);
> +
> +        kvm_eas = priority << KVM_XIVE_EAS_PRIORITY_SHIFT &
> +            KVM_XIVE_EAS_PRIORITY_MASK;
> +        kvm_eas |= server << KVM_XIVE_EAS_SERVER_SHIFT &
> +            KVM_XIVE_EAS_SERVER_MASK;
> +        kvm_eas |= ((uint64_t)eisn << KVM_XIVE_EAS_EISN_SHIFT) &
> +            KVM_XIVE_EAS_EISN_MASK;
> +
> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, true,
> +                          &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +    }
> +}
> +
> +static void spapr_xive_kvm_get_eas_state(sPAPRXive *xive, Error **errp)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        XiveEAS *eas = &xive->eat[i];
> +        XiveEAS new_eas;
> +        uint64_t kvm_eas;
> +        uint8_t priority;
> +        uint32_t server;
> +        uint32_t end_idx;
> +        uint8_t end_blk;
> +        uint32_t eisn;
> +        Error *local_err = NULL;
> +
> +        if (!(eas->w & EAS_VALID)) {
> +            continue;
> +        }
> +
> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, false,
> +                          &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +
> +        priority = (kvm_eas & KVM_XIVE_EAS_PRIORITY_MASK) >>
> +            KVM_XIVE_EAS_PRIORITY_SHIFT;
> +        server = (kvm_eas & KVM_XIVE_EAS_SERVER_MASK) >>
> +            KVM_XIVE_EAS_SERVER_SHIFT;
> +        eisn = (kvm_eas & KVM_XIVE_EAS_EISN_MASK) >> KVM_XIVE_EAS_EISN_SHIFT;
> +
> +        if (spapr_xive_target_to_end(xive, server, priority, &end_blk,
> +                                     &end_idx)) {
> +            error_setg(errp, "XIVE: invalid tuple CPU %d priority %d", 
> server,
> +                       priority);
> +            return;
> +        }
> +
> +        new_eas.w = EAS_VALID;
> +        if (kvm_eas & KVM_XIVE_EAS_MASK_MASK) {
> +            new_eas.w |= EAS_MASKED;
> +        }
> +
> +        new_eas.w = SETFIELD(EAS_END_INDEX, new_eas.w, end_idx);
> +        new_eas.w = SETFIELD(EAS_END_BLOCK, new_eas.w, end_blk);
> +        new_eas.w = SETFIELD(EAS_END_DATA, new_eas.w, eisn);
> +
> +        *eas = new_eas;
> +    }
> +}
> +
> +static void spapr_xive_kvm_sync_all(sPAPRXive *xive, Error **errp)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    Error *local_err = NULL;
> +    int i;
> +
> +    /* Sync the KVM source. This reaches the XIVE HW through OPAL */
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        XiveEAS *eas = &xive->eat[i];
> +
> +        if (!(eas->w & EAS_VALID)) {
> +            continue;
> +        }
> +
> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SYNC, i, NULL, true,
> +                          &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +    }
> +}
> +
> +/*
> + * The sPAPRXive KVM model migration priority is higher to make sure

Higher than what?

> + * its 'pre_save' method runs before all the other XIVE models. It

If the other XIVE components are children of sPAPRXive (which I think
they are or could be), then I believe the parent object's pre_save
will automatically be called first.

> + * orchestrates the capture sequence of the XIVE states in the
> + * following order:
> + *
> + *   1. mask all the sources by setting PQ=01, which returns the
> + *      previous value and save it.
> + *   2. sync the sources in KVM to stabilize all the queues
> + *      sync the ENDs to make sure END -> VP is fully completed
> + *   3. dump the EAS table
> + *   4. dump the END table
> + *   5. dump the thread context (IPB)
> + *
> + *  Rollback to restore the current configuration of the sources



> + */
> +static int spapr_xive_kvm_pre_save(sPAPRXive *xive)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    Error *local_err = NULL;
> +    CPUState *cs;
> +    int i;
> +    int ret = 0;
> +
> +    /* Quiesce the sources, to stop the flow of event notifications */
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        /*
> +         * Mask and save the ESB PQs locally in the XiveSource object.
> +         */
> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_01);
> +        xive_source_esb_set(xsrc, i, pq);
> +    }
> +
> +    /* Sync the sources in KVM */
> +    spapr_xive_kvm_sync_all(xive, &local_err);
> +    if (local_err) {
> +        error_report_err(local_err);
> +        goto out;
> +    }
> +
> +    /* Grab the EAT (could be done earlier ?) */
> +    spapr_xive_kvm_get_eas_state(xive, &local_err);
> +    if (local_err) {
> +        error_report_err(local_err);
> +        goto out;
> +    }
> +
> +    /*
> +     * Grab the ENDs. The EQ index and the toggle bit are what we want
> +     * to capture
> +     */
> +    CPU_FOREACH(cs) {
> +        spapr_xive_kvm_get_eq_state(xive, cs, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            goto out;
> +        }
> +    }
> +
> +    /* Capture the thread interrupt contexts */
> +    CPU_FOREACH(cs) {
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +
> +        /* TODO: Check if we need to use under run_on_cpu() ? */
> +        xive_tctx_kvm_get_state(XIVE_TCTX_KVM(cpu->intc), &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            goto out;
> +        }
> +    }
> +
> +    /* All done. */
> +
> +out:
> +    /* Restore the sources to their initial state */
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        uint8_t pq = xive_source_esb_get(xsrc, i);
> +        if (xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)) != 0x1) {
> +            error_report("XIVE: IRQ %d has an invalid state", i);
> +        }
> +    }
> +
> +    /*
> +     * The XiveSource and the XiveTCTX states will be collected by
> +     * their respective vmstate handlers afterwards.
> +     */
> +    return ret;
> +}
> +
> +/*
> + * The sPAPRXive 'post_load' method is called by the sPAPR machine,
> + * after all XIVE device states have been transfered and loaded.
> + *
> + * All should be in place when the VCPUs resume execution.
> + */
> +static int spapr_xive_kvm_post_load(sPAPRXive *xive, int version_id)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    Error *local_err = NULL;
> +    CPUState *cs;
> +    int i;
> +
> +    /* Set the ENDs first. The targetting depends on it. */
> +    CPU_FOREACH(cs) {
> +        spapr_xive_kvm_set_eq_state(xive, cs, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return -1;
> +        }
> +    }
> +
> +    /* Restore the targetting, if any */
> +    spapr_xive_kvm_set_eas_state(xive, &local_err);
> +    if (local_err) {
> +        error_report_err(local_err);
> +        return -1;
> +    }
> +
> +    /* Restore the thread interrupt contexts */
> +    CPU_FOREACH(cs) {
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +
> +        xive_tctx_kvm_set_state(XIVE_TCTX_KVM(cpu->intc), &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return -1;
> +        }
> +    }
> +
> +    /*
> +     * Get the saved state from the XiveSource model and restore the
> +     * PQ bits
> +     */
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        uint8_t pq = xive_source_esb_get(xsrc, i);
> +        xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
> +    }
> +    return 0;
> +}
> +
> +static void spapr_xive_kvm_synchronize_state(sPAPRXive *xive)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    CPUState *cs;
> +
> +    xive_source_kvm_get_state(xsrc);
> +
> +    spapr_xive_kvm_get_eas_state(xive, &error_fatal);
> +
> +    CPU_FOREACH(cs) {
> +        spapr_xive_kvm_get_eq_state(xive, cs, &error_fatal);
> +    }
> +}
>  
>  static void spapr_xive_kvm_instance_init(Object *obj)
>  {
> @@ -409,6 +899,10 @@ static void spapr_xive_kvm_class_init(ObjectClass 
> *klass, void *data)
>  
>      dc->desc = "sPAPR XIVE KVM Interrupt Controller";
>      dc->unrealize = spapr_xive_kvm_unrealize;
> +
> +    sxc->synchronize_state = spapr_xive_kvm_synchronize_state;
> +    sxc->pre_save = spapr_xive_kvm_pre_save;
> +    sxc->post_load = spapr_xive_kvm_post_load;
>  }
>  
>  static const TypeInfo spapr_xive_kvm_info = {
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 9bb37553c9ec..c9aedecc8216 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -438,9 +438,14 @@ static const struct {
>  
>  void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
>  {
> +    XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx);
>      int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
>      int i;
>  
> +    if (xtc->synchronize_state) {
> +        xtc->synchronize_state(tctx);
> +    }
> +
>      monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE 
> PIPR"
>                     "  W2\n", cpu_index);
>  
> @@ -552,10 +557,23 @@ static void xive_tctx_base_unrealize(DeviceState *dev, 
> Error **errp)
>      qemu_unregister_reset(xive_tctx_base_reset, dev);
>  }
>  
> +static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
> +{
> +    XiveTCTX *tctx = XIVE_TCTX_BASE(opaque);
> +    XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx);
> +
> +    if (xtc->post_load) {
> +        return xtc->post_load(tctx, version_id);
> +    }
> +
> +    return 0;
> +}
> +
>  static const VMStateDescription vmstate_xive_tctx_base = {
>      .name = TYPE_XIVE_TCTX,
>      .version_id = 1,
>      .minimum_version_id = 1,
> +    .post_load = vmstate_xive_tctx_post_load,
>      .fields = (VMStateField[]) {
>          VMSTATE_BUFFER(regs, XiveTCTX),
>          VMSTATE_END_OF_LIST()
> @@ -581,9 +599,37 @@ static const TypeInfo xive_tctx_base_info = {
>      .class_size    = sizeof(XiveTCTXClass),
>  };
>  
> +static int xive_tctx_post_load(XiveTCTX *tctx, int version_id)
> +{
> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr);
> +
> +    /*
> +     * When we collect the states from KVM XIVE irqchip, we set word2
> +     * of the thread context to print out the OS CAM line under the
> +     * QEMU monitor.
> +     *
> +     * This breaks migration on a guest using TCG or not using a KVM
> +     * irqchip. Fix with an extra reset of the thread contexts.
> +     */
> +    if (xrc->reset_tctx) {
> +        xrc->reset_tctx(tctx->xrtr, tctx);
> +    }
> +    return 0;
> +}
> +
> +static void xive_tctx_class_init(ObjectClass *klass, void *data)
> +{
> +    XiveTCTXClass *xtc = XIVE_TCTX_BASE_CLASS(klass);
> +
> +    xtc->post_load = xive_tctx_post_load;
> +}
> +
>  static const TypeInfo xive_tctx_info = {
>      .name          = TYPE_XIVE_TCTX,
>      .parent        = TYPE_XIVE_TCTX_BASE,
> +    .instance_size = sizeof(XiveTCTX),
> +    .class_init    = xive_tctx_class_init,
> +    .class_size    = sizeof(XiveTCTXClass),
>  };
>  
>  Object *xive_tctx_create(Object *cpu, const char *type, XiveRouter *xrtr,
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 92ef53743b64..6fac6ca70595 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -359,7 +359,7 @@ static Object 
> *spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr,
>  
>  static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int version_id)
>  {
> -    return 0;
> +    return spapr_xive_post_load(spapr->xive, version_id);
>  }
>  
>  /*

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]