qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH v5 23/36] spapr/xive: add migration support for KV


From: David Gibson
Subject: Re: [Qemu-ppc] [PATCH v5 23/36] spapr/xive: add migration support for KVM
Date: Fri, 30 Nov 2018 12:24:35 +1100
User-agent: Mutt/1.10.1 (2018-07-13)

On Thu, Nov 29, 2018 at 05:19:51PM +0100, Cédric Le Goater wrote:
> David,
> 
> Could you tell what you think about the KVM interfaces for migration,
> the ones capturing and restoring the states ? 
> 
> On 11/29/18 4:43 AM, David Gibson wrote:
> > On Fri, Nov 16, 2018 at 11:57:16AM +0100, Cédric Le Goater wrote:
> >> This extends the KVM XIVE models to handle the state synchronization
> >> with KVM, for the monitor usage and for the migration.
> >>
> >> The migration priority of the XIVE interrupt controller sPAPRXive is
> >> raised for KVM. It operates first and orchestrates the capture
> >> sequence of the states of all the XIVE models. The XIVE sources are
> >> masked to quiesce the interrupt flow and a XIVE xync is performed to
> >> stabilize the OS Event Queues. The state of the ENDs are then captured
> >> by the XIVE interrupt controller model, sPAPRXive, and the state of
> >> the thread contexts by the thread interrupt presenter model,
> >> XiveTCTX. When done, a rollback is performed to restore the sources to
> >> their initial state.
> >>
> >> The sPAPRXive 'post_load' method is called from the sPAPR machine,
> >> after all XIVE device states have been transfered and loaded. First,
> >> sPAPRXive restores the XIVE routing tables: ENDT and EAT. Next, are
> >> restored the thread interrupt context registers and the source PQ
> >> bits.
> >>
> >> The get/set operations rely on their KVM counterpart in the host
> >> kernel which acts as a proxy for OPAL, the host firmware.
> >>
> >> Signed-off-by: Cédric Le Goater <address@hidden>
> >> ---
> >>
> >>  WIP:
> >>  
> >>     If migration occurs when a VCPU is 'ceded', some the OS event
> >>     notification queues are mapped to the ZERO_PAGE on the receiving
> >>     side. As if the HW had triggered a page fault before the dirty
> >>     page was transferred from the source or as if we were not using
> >>     the correct page table.
> 
> 
> v6 adds a VM change state handler to make XIVE reach a quiescent state. 
> The sequence is a little more sophisticated and an extra KVM call 
> marks the EQ page dirty.

Ok.

> 
> >>
> >>  include/hw/ppc/spapr_xive.h     |   5 +
> >>  include/hw/ppc/xive.h           |   3 +
> >>  include/migration/vmstate.h     |   1 +
> >>  linux-headers/asm-powerpc/kvm.h |  33 +++
> >>  hw/intc/spapr_xive.c            |  32 +++
> >>  hw/intc/spapr_xive_kvm.c        | 494 ++++++++++++++++++++++++++++++++
> >>  hw/intc/xive.c                  |  46 +++
> >>  hw/ppc/spapr_irq.c              |   2 +-
> >>  8 files changed, 615 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> >> index 9c817bb7ae74..d2517c040958 100644
> >> --- a/include/hw/ppc/spapr_xive.h
> >> +++ b/include/hw/ppc/spapr_xive.h
> >> @@ -55,12 +55,17 @@ typedef struct sPAPRXiveClass {
> >>      XiveRouterClass parent_class;
> >>  
> >>      DeviceRealize   parent_realize;
> >> +
> >> +    void (*synchronize_state)(sPAPRXive *xive);
> >> +    int  (*pre_save)(sPAPRXive *xsrc);
> >> +    int  (*post_load)(sPAPRXive *xsrc, int version_id);
> > 
> > This should go away if the KVM and non-KVM versions are in the same
> > object.
> 
> yes.
> 
> >>  } sPAPRXiveClass;
> >>  
> >>  bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi);
> >>  bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
> >>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
> >>  qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn);
> >> +int spapr_xive_post_load(sPAPRXive *xive, int version_id);
> >>  
> >>  /*
> >>   * sPAPR NVT and END indexing helpers
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> index 7aaf5a182cb3..c8201462d698 100644
> >> --- a/include/hw/ppc/xive.h
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -309,6 +309,9 @@ typedef struct XiveTCTXClass {
> >>      DeviceClass       parent_class;
> >>  
> >>      DeviceRealize     parent_realize;
> >> +
> >> +    void (*synchronize_state)(XiveTCTX *tctx);
> >> +    int  (*post_load)(XiveTCTX *tctx, int version_id);
> > 
> > .. and this too.
> > 
> >>  } XiveTCTXClass;
> >>  
> >>  /*
> >> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> >> index 2b501d04669a..ee2e836cc1c1 100644
> >> --- a/include/migration/vmstate.h
> >> +++ b/include/migration/vmstate.h
> >> @@ -154,6 +154,7 @@ typedef enum {
> >>      MIG_PRI_PCI_BUS,            /* Must happen before IOMMU */
> >>      MIG_PRI_GICV3_ITS,          /* Must happen before PCI devices */
> >>      MIG_PRI_GICV3,              /* Must happen before the ITS */
> >> +    MIG_PRI_XIVE_IC,            /* Must happen before all XIVE models */
> > 
> > Ugh.. explicit priority / order levels are a pretty bad code smell.
> > Usually migration ordering can be handled by getting the object
> > heirarchy right.  What exactly is the problem you're addessing with
> > this?
> 
> I wanted sPAPRXive to capture the state on behalf of all XIVE models. 
> But with the addition of the VMState change handler I think I can 
> remove this priority. I will check. 
> 
> > 
> >>      MIG_PRI_MAX,
> >>  } MigrationPriority;
> >>  
> >> diff --git a/linux-headers/asm-powerpc/kvm.h 
> >> b/linux-headers/asm-powerpc/kvm.h
> >> index f34c971491dd..9d55ade23634 100644
> >> --- a/linux-headers/asm-powerpc/kvm.h
> >> +++ b/linux-headers/asm-powerpc/kvm.h
> > 
> > Again, linux-headers need to be split out.
> > 
> >> @@ -480,6 +480,8 @@ struct kvm_ppc_cpu_char {
> >>  #define  KVM_REG_PPC_ICP_PPRI_SHIFT       16      /* pending irq priority 
> >> */
> >>  #define  KVM_REG_PPC_ICP_PPRI_MASK        0xff
> >>  
> >> +#define KVM_REG_PPC_NVT_STATE     (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8d)
> >> +
> >>  /* Device control API: PPC-specific devices */
> >>  #define KVM_DEV_MPIC_GRP_MISC             1
> >>  #define   KVM_DEV_MPIC_BASE_ADDR  0       /* 64-bit */
> >> @@ -681,10 +683,41 @@ struct kvm_ppc_cpu_char {
> >>  #define   KVM_DEV_XIVE_GET_TIMA_FD        2
> >>  #define   KVM_DEV_XIVE_VC_BASE            3
> >>  #define KVM_DEV_XIVE_GRP_SOURCES  2       /* 64-bit source attributes */
> >> +#define KVM_DEV_XIVE_GRP_SYNC             3       /* 64-bit source 
> >> attributes */
> >> +#define KVM_DEV_XIVE_GRP_EAS              4       /* 64-bit eas 
> >> attributes */
> >> +#define KVM_DEV_XIVE_GRP_EQ               5       /* 64-bit eq attributes 
> >> */
> >>  
> >>  /* Layout of 64-bit XIVE source attribute values */
> >>  #define KVM_XIVE_LEVEL_SENSITIVE  (1ULL << 0)
> >>  #define KVM_XIVE_LEVEL_ASSERTED           (1ULL << 1)
> >>  
> >> +/* Layout of 64-bit eas attribute values */
> >> +#define KVM_XIVE_EAS_PRIORITY_SHIFT       0
> >> +#define KVM_XIVE_EAS_PRIORITY_MASK        0x7
> >> +#define KVM_XIVE_EAS_SERVER_SHIFT 3
> >> +#define KVM_XIVE_EAS_SERVER_MASK  0xfffffff8ULL
> >> +#define KVM_XIVE_EAS_MASK_SHIFT           32
> >> +#define KVM_XIVE_EAS_MASK_MASK            0x100000000ULL
> >> +#define KVM_XIVE_EAS_EISN_SHIFT           33
> >> +#define KVM_XIVE_EAS_EISN_MASK            0xfffffffe00000000ULL
> >> +
> >> +/* Layout of 64-bit eq attribute */
> >> +#define KVM_XIVE_EQ_PRIORITY_SHIFT        0
> >> +#define KVM_XIVE_EQ_PRIORITY_MASK 0x7
> >> +#define KVM_XIVE_EQ_SERVER_SHIFT  3
> >> +#define KVM_XIVE_EQ_SERVER_MASK           0xfffffff8ULL
> >> +
> >> +/* Layout of 64-bit eq attribute values */
> >> +struct kvm_ppc_xive_eq {
> >> +  __u32 flags;
> >> +  __u32 qsize;
> >> +  __u64 qpage;
> >> +  __u32 qtoggle;
> >> +  __u32 qindex;
> >> +};
> >> +
> >> +#define KVM_XIVE_EQ_FLAG_ENABLED  0x00000001
> >> +#define KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY    0x00000002
> >> +#define KVM_XIVE_EQ_FLAG_ESCALATE 0x00000004
> >>  
> >>  #endif /* __LINUX_KVM_POWERPC_H */
> >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> >> index ec85f7e4f88d..c5c0e063dc33 100644
> >> --- a/hw/intc/spapr_xive.c
> >> +++ b/hw/intc/spapr_xive.c
> >> @@ -27,9 +27,14 @@
> >>  
> >>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
> >>  {
> >> +    sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
> >>      int i;
> >>      uint32_t offset = 0;
> >>  
> >> +    if (sxc->synchronize_state) {
> >> +        sxc->synchronize_state(xive);
> >> +    }
> >> +
> >>      monitor_printf(mon, "XIVE Source %08x .. %08x\n", offset,
> >>                     offset + xive->source.nr_irqs - 1);
> >>      xive_source_pic_print_info(&xive->source, offset, mon);
> >> @@ -354,10 +359,37 @@ static const VMStateDescription 
> >> vmstate_spapr_xive_eas = {
> >>      },
> >>  };
> >>  
> >> +static int vmstate_spapr_xive_pre_save(void *opaque)
> >> +{
> >> +    sPAPRXive *xive = SPAPR_XIVE_BASE(opaque);
> >> +    sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
> >> +
> >> +    if (sxc->pre_save) {
> >> +        return sxc->pre_save(xive);
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +/* handled at the machine level */
> >> +int spapr_xive_post_load(sPAPRXive *xive, int version_id)
> >> +{
> >> +    sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
> >> +
> >> +    if (sxc->post_load) {
> >> +        return sxc->post_load(xive, version_id);
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >>  static const VMStateDescription vmstate_spapr_xive_base = {
> >>      .name = TYPE_SPAPR_XIVE,
> >>      .version_id = 1,
> >>      .minimum_version_id = 1,
> >> +    .pre_save = vmstate_spapr_xive_pre_save,
> >> +    .post_load = NULL, /* handled at the machine level */
> >> +    .priority = MIG_PRI_XIVE_IC,
> >>      .fields = (VMStateField[]) {
> >>          VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
> >>          VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs,
> >> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> >> index 767f90826e43..176083c37d61 100644
> >> --- a/hw/intc/spapr_xive_kvm.c
> >> +++ b/hw/intc/spapr_xive_kvm.c
> >> @@ -58,6 +58,58 @@ static void kvm_cpu_enable(CPUState *cs)
> >>  /*
> >>   * XIVE Thread Interrupt Management context (KVM)
> >>   */
> >> +static void xive_tctx_kvm_set_state(XiveTCTX *tctx, Error **errp)
> >> +{
> >> +    uint64_t state[4];
> >> +    int ret;
> >> +
> >> +    /* word0 and word1 of the OS ring. */
> >> +    state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
> >> +
> >> +    /* VP identifier. Only for KVM pr_debug() */
> >> +    state[1] = *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]);
> >> +
> >> +    ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> >> +    if (ret != 0) {
> >> +        error_setg_errno(errp, errno, "Could restore KVM XIVE CPU %ld 
> >> state",
> >> +                         kvm_arch_vcpu_id(tctx->cs));
> >> +    }
> >> +}
> >> +
> >> +static void xive_tctx_kvm_get_state(XiveTCTX *tctx, Error **errp)
> >> +{
> >> +    uint64_t state[4] = { 0 };
> >> +    int ret;
> >> +
> >> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> >> +    if (ret != 0) {
> >> +        error_setg_errno(errp, errno, "Could capture KVM XIVE CPU %ld 
> >> state",
> >> +                         kvm_arch_vcpu_id(tctx->cs));
> >> +        return;
> >> +    }
> >> +
> >> +    /* word0 and word1 of the OS ring. */
> >> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
> >> +
> >> +    /*
> >> +     * KVM also returns word2 containing the VP CAM line value which
> >> +     * is interesting to print out the VP identifier in the QEMU
> >> +     * monitor. No need to restore it.
> >> +     */
> >> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
> >> +}
> >> +
> >> +static void xive_tctx_kvm_do_synchronize_state(CPUState *cpu,
> >> +                                              run_on_cpu_data arg)
> >> +{
> >> +    xive_tctx_kvm_get_state(arg.host_ptr, &error_fatal);
> >> +}
> >> +
> >> +static void xive_tctx_kvm_synchronize_state(XiveTCTX *tctx)
> >> +{
> >> +    run_on_cpu(tctx->cs, xive_tctx_kvm_do_synchronize_state,
> >> +               RUN_ON_CPU_HOST_PTR(tctx));
> >> +}
> >>  
> >>  static void xive_tctx_kvm_init(XiveTCTX *tctx, Error **errp)
> >>  {
> >> @@ -112,6 +164,8 @@ static void xive_tctx_kvm_class_init(ObjectClass 
> >> *klass, void *data)
> >>  
> >>      device_class_set_parent_realize(dc, xive_tctx_kvm_realize,
> >>                                      &xtc->parent_realize);
> >> +
> >> +    xtc->synchronize_state = xive_tctx_kvm_synchronize_state;
> >>  }
> >>  
> >>  static const TypeInfo xive_tctx_kvm_info = {
> >> @@ -166,6 +220,34 @@ static void xive_source_kvm_reset(DeviceState *dev)
> >>      xive_source_kvm_init(xsrc, &error_fatal);
> >>  }
> >>  
> >> +/*
> >> + * This is used to perform the magic loads on the ESB pages, described
> >> + * in xive.h.
> >> + */
> >> +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
> >> +{
> >> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
> >> +        xive_source_esb_mgmt(xsrc, srcno) + offset;
> >> +
> >> +    /* Prevent the compiler from optimizing away the load */
> >> +    volatile uint64_t value = *((uint64_t *) addr);
> >> +
> >> +    return be64_to_cpu(value) & 0x3;
> >> +}
> >> +
> >> +static void xive_source_kvm_get_state(XiveSource *xsrc)
> >> +{
> >> +    int i;
> >> +
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        /* Perform a load without side effect to retrieve the PQ bits */
> >> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> >> +
> >> +        /* and save PQ locally */
> >> +        xive_source_esb_set(xsrc, i, pq);
> >> +    }
> >> +}
> >> +
> >>  static void xive_source_kvm_set_irq(void *opaque, int srcno, int val)
> >>  {
> >>      XiveSource *xsrc = opaque;
> >> @@ -295,6 +377,414 @@ static const TypeInfo xive_source_kvm_info = {
> >>  /*
> >>   * sPAPR XIVE Router (KVM)
> >>   */
> >> +static int spapr_xive_kvm_set_eq_state(sPAPRXive *xive, CPUState *cs,
> >> +                                       Error **errp)
> >> +{
> >> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> >> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> >> +    int ret;
> >> +    int i;
> >> +
> >> +    for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
> >> +        Error *local_err = NULL;
> >> +        XiveEND end;
> >> +        uint8_t end_blk;
> >> +        uint32_t end_idx;
> >> +        struct kvm_ppc_xive_eq kvm_eq = { 0 };
> >> +        uint64_t kvm_eq_idx;
> >> +
> >> +        if (!spapr_xive_priority_is_valid(i)) {
> >> +            continue;
> >> +        }
> >> +
> >> +        spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, 
> >> &end_idx);
> >> +
> >> +        ret = xive_router_get_end(xrtr, end_blk, end_idx, &end);
> >> +        if (ret) {
> >> +            error_setg(errp, "XIVE: No END for CPU %ld priority %d",
> >> +                       vcpu_id, i);
> >> +            return ret;
> >> +        }
> >> +
> >> +        if (!(end.w0 & END_W0_VALID)) {
> >> +            continue;
> >> +        }
> >> +
> >> +        /* Build the KVM state from the local END structure */
> >> +        kvm_eq.flags   = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY;
> >> +        kvm_eq.qsize   = GETFIELD(END_W0_QSIZE, end.w0) + 12;
> >> +        kvm_eq.qpage   = (((uint64_t)(end.w2 & 0x0fffffff)) << 32) | 
> >> end.w3;
> >> +        kvm_eq.qtoggle = GETFIELD(END_W1_GENERATION, end.w1);
> >> +        kvm_eq.qindex  = GETFIELD(END_W1_PAGE_OFF, end.w1);
> >> +
> >> +        /* Encode the tuple (server, prio) as a KVM EQ index */
> >> +        kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
> >> +            KVM_XIVE_EQ_PRIORITY_MASK;
> >> +        kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
> >> +            KVM_XIVE_EQ_SERVER_MASK;
> >> +
> >> +        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
> >> +                                &kvm_eq, true, &local_err);
> >> +        if (local_err) {
> >> +            error_propagate(errp, local_err);
> >> +            return ret;
> >> +        }
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int spapr_xive_kvm_get_eq_state(sPAPRXive *xive, CPUState *cs,
> >> +                                       Error **errp)
> >> +{
> >> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> >> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> >> +    int ret;
> >> +    int i;
> >> +
> >> +    for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
> >> +        Error *local_err = NULL;
> >> +        struct kvm_ppc_xive_eq kvm_eq = { 0 };
> >> +        uint64_t kvm_eq_idx;
> >> +        XiveEND end = { 0 };
> >> +        uint8_t end_blk, nvt_blk;
> >> +        uint32_t end_idx, nvt_idx;
> >> +
> >> +        /* Skip priorities reserved for the hypervisor */
> >> +        if (!spapr_xive_priority_is_valid(i)) {
> >> +            continue;
> >> +        }
> >> +
> >> +        /* Encode the tuple (server, prio) as a KVM EQ index */
> >> +        kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
> >> +            KVM_XIVE_EQ_PRIORITY_MASK;
> >> +        kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
> >> +            KVM_XIVE_EQ_SERVER_MASK;
> >> +
> >> +        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
> >> +                                &kvm_eq, false, &local_err);
> >> +        if (local_err) {
> >> +            error_propagate(errp, local_err);
> >> +            return ret;
> >> +        }
> >> +
> >> +        if (!(kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED)) {
> >> +            continue;
> >> +        }
> >> +
> >> +        /* Update the local END structure with the KVM input */
> >> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED) {
> >> +                end.w0 |= END_W0_VALID | END_W0_ENQUEUE;
> >> +        }
> >> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY) {
> >> +                end.w0 |= END_W0_UCOND_NOTIFY;
> >> +        }
> >> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ESCALATE) {
> >> +                end.w0 |= END_W0_ESCALATE_CTL;
> >> +        }
> >> +        end.w0 |= SETFIELD(END_W0_QSIZE, 0ul, kvm_eq.qsize - 12);
> >> +
> >> +        end.w1 = SETFIELD(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
> >> +            SETFIELD(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
> >> +        end.w2 = (kvm_eq.qpage >> 32) & 0x0fffffff;
> >> +        end.w3 = kvm_eq.qpage & 0xffffffff;
> >> +        end.w4 = 0;
> >> +        end.w5 = 0;
> >> +
> >> +        ret = spapr_xive_cpu_to_nvt(xive, POWERPC_CPU(cs), &nvt_blk, 
> >> &nvt_idx);
> >> +        if (ret) {
> >> +            error_setg(errp, "XIVE: No NVT for CPU %ld", vcpu_id);
> >> +            return ret;
> >> +        }
> >> +
> >> +        end.w6 = SETFIELD(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
> >> +            SETFIELD(END_W6_NVT_INDEX, 0ul, nvt_idx);
> >> +        end.w7 = SETFIELD(END_W7_F0_PRIORITY, 0ul, i);
> >> +
> >> +        spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, 
> >> &end_idx);
> >> +
> >> +        ret = xive_router_set_end(xrtr, end_blk, end_idx, &end);
> >> +        if (ret) {
> >> +            error_setg(errp, "XIVE: No END for CPU %ld priority %d",
> >> +                       vcpu_id, i);
> >> +            return ret;
> >> +        }
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static void spapr_xive_kvm_set_eas_state(sPAPRXive *xive, Error **errp)
> >> +{
> >> +    XiveSource *xsrc = &xive->source;
> >> +    int i;
> >> +
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        XiveEAS *eas = &xive->eat[i];
> >> +        uint32_t end_idx;
> >> +        uint32_t end_blk;
> >> +        uint32_t eisn;
> >> +        uint8_t priority;
> >> +        uint32_t server;
> >> +        uint64_t kvm_eas;
> >> +        Error *local_err = NULL;
> >> +
> >> +        /* No need to set MASKED EAS, this is the default state after 
> >> reset */
> >> +        if (!(eas->w & EAS_VALID) || eas->w & EAS_MASKED) {
> >> +            continue;
> >> +        }
> >> +
> >> +        end_idx = GETFIELD(EAS_END_INDEX, eas->w);
> >> +        end_blk = GETFIELD(EAS_END_BLOCK, eas->w);
> >> +        eisn = GETFIELD(EAS_END_DATA, eas->w);
> >> +
> >> +        spapr_xive_end_to_target(xive, end_blk, end_idx, &server, 
> >> &priority);
> >> +
> >> +        kvm_eas = priority << KVM_XIVE_EAS_PRIORITY_SHIFT &
> >> +            KVM_XIVE_EAS_PRIORITY_MASK;
> >> +        kvm_eas |= server << KVM_XIVE_EAS_SERVER_SHIFT &
> >> +            KVM_XIVE_EAS_SERVER_MASK;
> >> +        kvm_eas |= ((uint64_t)eisn << KVM_XIVE_EAS_EISN_SHIFT) &
> >> +            KVM_XIVE_EAS_EISN_MASK;
> >> +
> >> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, 
> >> true,
> >> +                          &local_err);
> >> +        if (local_err) {
> >> +            error_propagate(errp, local_err);
> >> +            return;
> >> +        }
> >> +    }
> >> +}
> >> +
> >> +static void spapr_xive_kvm_get_eas_state(sPAPRXive *xive, Error **errp)
> >> +{
> >> +    XiveSource *xsrc = &xive->source;
> >> +    int i;
> >> +
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        XiveEAS *eas = &xive->eat[i];
> >> +        XiveEAS new_eas;
> >> +        uint64_t kvm_eas;
> >> +        uint8_t priority;
> >> +        uint32_t server;
> >> +        uint32_t end_idx;
> >> +        uint8_t end_blk;
> >> +        uint32_t eisn;
> >> +        Error *local_err = NULL;
> >> +
> >> +        if (!(eas->w & EAS_VALID)) {
> >> +            continue;
> >> +        }
> >> +
> >> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, 
> >> false,
> >> +                          &local_err);
> >> +        if (local_err) {
> >> +            error_propagate(errp, local_err);
> >> +            return;
> >> +        }
> >> +
> >> +        priority = (kvm_eas & KVM_XIVE_EAS_PRIORITY_MASK) >>
> >> +            KVM_XIVE_EAS_PRIORITY_SHIFT;
> >> +        server = (kvm_eas & KVM_XIVE_EAS_SERVER_MASK) >>
> >> +            KVM_XIVE_EAS_SERVER_SHIFT;
> >> +        eisn = (kvm_eas & KVM_XIVE_EAS_EISN_MASK) >> 
> >> KVM_XIVE_EAS_EISN_SHIFT;
> >> +
> >> +        if (spapr_xive_target_to_end(xive, server, priority, &end_blk,
> >> +                                     &end_idx)) {
> >> +            error_setg(errp, "XIVE: invalid tuple CPU %d priority %d", 
> >> server,
> >> +                       priority);
> >> +            return;
> >> +        }
> >> +
> >> +        new_eas.w = EAS_VALID;
> >> +        if (kvm_eas & KVM_XIVE_EAS_MASK_MASK) {
> >> +            new_eas.w |= EAS_MASKED;
> >> +        }
> >> +
> >> +        new_eas.w = SETFIELD(EAS_END_INDEX, new_eas.w, end_idx);
> >> +        new_eas.w = SETFIELD(EAS_END_BLOCK, new_eas.w, end_blk);
> >> +        new_eas.w = SETFIELD(EAS_END_DATA, new_eas.w, eisn);
> >> +
> >> +        *eas = new_eas;
> >> +    }
> >> +}
> >> +
> >> +static void spapr_xive_kvm_sync_all(sPAPRXive *xive, Error **errp)
> >> +{
> >> +    XiveSource *xsrc = &xive->source;
> >> +    Error *local_err = NULL;
> >> +    int i;
> >> +
> >> +    /* Sync the KVM source. This reaches the XIVE HW through OPAL */
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        XiveEAS *eas = &xive->eat[i];
> >> +
> >> +        if (!(eas->w & EAS_VALID)) {
> >> +            continue;
> >> +        }
> >> +
> >> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SYNC, i, NULL, true,
> >> +                          &local_err);
> >> +        if (local_err) {
> >> +            error_propagate(errp, local_err);
> >> +            return;
> >> +        }
> >> +    }
> >> +}
> >> +
> >> +/*
> >> + * The sPAPRXive KVM model migration priority is higher to make sure
> > 
> > Higher than what?
> 
> Than the XiveTCTX and XiveSource models.
> 
> >> + * its 'pre_save' method runs before all the other XIVE models. It
> > 
> > If the other XIVE components are children of sPAPRXive (which I think
> > they are or could be), then I believe the parent object's pre_save
> > will automatically be called first.
> 
> ok. XiveTCTX are not children of sPAPRXive but that might not be 
> a problem anymore with the VMState change handler.

Ah, right.  You might need the handler in the machine itself then - we
already have something like that for XICS, IIRC.

> 
> Thanks
> 
> C.
> 
> >> + * orchestrates the capture sequence of the XIVE states in the
> >> + * following order:
> >> + *
> >> + *   1. mask all the sources by setting PQ=01, which returns the
> >> + *      previous value and save it.
> >> + *   2. sync the sources in KVM to stabilize all the queues
> >> + *      sync the ENDs to make sure END -> VP is fully completed
> >> + *   3. dump the EAS table
> >> + *   4. dump the END table
> >> + *   5. dump the thread context (IPB)
> >> + *
> >> + *  Rollback to restore the current configuration of the sources
> > 
> > 
> > 
> >> + */
> >> +static int spapr_xive_kvm_pre_save(sPAPRXive *xive)
> >> +{
> >> +    XiveSource *xsrc = &xive->source;
> >> +    Error *local_err = NULL;
> >> +    CPUState *cs;
> >> +    int i;
> >> +    int ret = 0;
> >> +
> >> +    /* Quiesce the sources, to stop the flow of event notifications */
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        /*
> >> +         * Mask and save the ESB PQs locally in the XiveSource object.
> >> +         */
> >> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_01);
> >> +        xive_source_esb_set(xsrc, i, pq);
> >> +    }
> >> +
> >> +    /* Sync the sources in KVM */
> >> +    spapr_xive_kvm_sync_all(xive, &local_err);
> >> +    if (local_err) {
> >> +        error_report_err(local_err);
> >> +        goto out;
> >> +    }
> >> +
> >> +    /* Grab the EAT (could be done earlier ?) */
> >> +    spapr_xive_kvm_get_eas_state(xive, &local_err);
> >> +    if (local_err) {
> >> +        error_report_err(local_err);
> >> +        goto out;
> >> +    }
> >> +
> >> +    /*
> >> +     * Grab the ENDs. The EQ index and the toggle bit are what we want
> >> +     * to capture
> >> +     */
> >> +    CPU_FOREACH(cs) {
> >> +        spapr_xive_kvm_get_eq_state(xive, cs, &local_err);
> >> +        if (local_err) {
> >> +            error_report_err(local_err);
> >> +            goto out;
> >> +        }
> >> +    }
> >> +
> >> +    /* Capture the thread interrupt contexts */
> >> +    CPU_FOREACH(cs) {
> >> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> >> +
> >> +        /* TODO: Check if we need to use under run_on_cpu() ? */
> >> +        xive_tctx_kvm_get_state(XIVE_TCTX_KVM(cpu->intc), &local_err);
> >> +        if (local_err) {
> >> +            error_report_err(local_err);
> >> +            goto out;
> >> +        }
> >> +    }
> >> +
> >> +    /* All done. */
> >> +
> >> +out:
> >> +    /* Restore the sources to their initial state */
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        uint8_t pq = xive_source_esb_get(xsrc, i);
> >> +        if (xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)) != 
> >> 0x1) {
> >> +            error_report("XIVE: IRQ %d has an invalid state", i);
> >> +        }
> >> +    }
> >> +
> >> +    /*
> >> +     * The XiveSource and the XiveTCTX states will be collected by
> >> +     * their respective vmstate handlers afterwards.
> >> +     */
> >> +    return ret;
> >> +}
> >> +
> >> +/*
> >> + * The sPAPRXive 'post_load' method is called by the sPAPR machine,
> >> + * after all XIVE device states have been transfered and loaded.
> >> + *
> >> + * All should be in place when the VCPUs resume execution.
> >> + */
> >> +static int spapr_xive_kvm_post_load(sPAPRXive *xive, int version_id)
> >> +{
> >> +    XiveSource *xsrc = &xive->source;
> >> +    Error *local_err = NULL;
> >> +    CPUState *cs;
> >> +    int i;
> >> +
> >> +    /* Set the ENDs first. The targetting depends on it. */
> >> +    CPU_FOREACH(cs) {
> >> +        spapr_xive_kvm_set_eq_state(xive, cs, &local_err);
> >> +        if (local_err) {
> >> +            error_report_err(local_err);
> >> +            return -1;
> >> +        }
> >> +    }
> >> +
> >> +    /* Restore the targetting, if any */
> >> +    spapr_xive_kvm_set_eas_state(xive, &local_err);
> >> +    if (local_err) {
> >> +        error_report_err(local_err);
> >> +        return -1;
> >> +    }
> >> +
> >> +    /* Restore the thread interrupt contexts */
> >> +    CPU_FOREACH(cs) {
> >> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> >> +
> >> +        xive_tctx_kvm_set_state(XIVE_TCTX_KVM(cpu->intc), &local_err);
> >> +        if (local_err) {
> >> +            error_report_err(local_err);
> >> +            return -1;
> >> +        }
> >> +    }
> >> +
> >> +    /*
> >> +     * Get the saved state from the XiveSource model and restore the
> >> +     * PQ bits
> >> +     */
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        uint8_t pq = xive_source_esb_get(xsrc, i);
> >> +        xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
> >> +    }
> >> +    return 0;
> >> +}
> >> +
> >> +static void spapr_xive_kvm_synchronize_state(sPAPRXive *xive)
> >> +{
> >> +    XiveSource *xsrc = &xive->source;
> >> +    CPUState *cs;
> >> +
> >> +    xive_source_kvm_get_state(xsrc);
> >> +
> >> +    spapr_xive_kvm_get_eas_state(xive, &error_fatal);
> >> +
> >> +    CPU_FOREACH(cs) {
> >> +        spapr_xive_kvm_get_eq_state(xive, cs, &error_fatal);
> >> +    }
> >> +}
> >>  
> >>  static void spapr_xive_kvm_instance_init(Object *obj)
> >>  {
> >> @@ -409,6 +899,10 @@ static void spapr_xive_kvm_class_init(ObjectClass 
> >> *klass, void *data)
> >>  
> >>      dc->desc = "sPAPR XIVE KVM Interrupt Controller";
> >>      dc->unrealize = spapr_xive_kvm_unrealize;
> >> +
> >> +    sxc->synchronize_state = spapr_xive_kvm_synchronize_state;
> >> +    sxc->pre_save = spapr_xive_kvm_pre_save;
> >> +    sxc->post_load = spapr_xive_kvm_post_load;
> >>  }
> >>  
> >>  static const TypeInfo spapr_xive_kvm_info = {
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> index 9bb37553c9ec..c9aedecc8216 100644
> >> --- a/hw/intc/xive.c
> >> +++ b/hw/intc/xive.c
> >> @@ -438,9 +438,14 @@ static const struct {
> >>  
> >>  void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
> >>  {
> >> +    XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx);
> >>      int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
> >>      int i;
> >>  
> >> +    if (xtc->synchronize_state) {
> >> +        xtc->synchronize_state(tctx);
> >> +    }
> >> +
> >>      monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC 
> >> AGE PIPR"
> >>                     "  W2\n", cpu_index);
> >>  
> >> @@ -552,10 +557,23 @@ static void xive_tctx_base_unrealize(DeviceState 
> >> *dev, Error **errp)
> >>      qemu_unregister_reset(xive_tctx_base_reset, dev);
> >>  }
> >>  
> >> +static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
> >> +{
> >> +    XiveTCTX *tctx = XIVE_TCTX_BASE(opaque);
> >> +    XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx);
> >> +
> >> +    if (xtc->post_load) {
> >> +        return xtc->post_load(tctx, version_id);
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >>  static const VMStateDescription vmstate_xive_tctx_base = {
> >>      .name = TYPE_XIVE_TCTX,
> >>      .version_id = 1,
> >>      .minimum_version_id = 1,
> >> +    .post_load = vmstate_xive_tctx_post_load,
> >>      .fields = (VMStateField[]) {
> >>          VMSTATE_BUFFER(regs, XiveTCTX),
> >>          VMSTATE_END_OF_LIST()
> >> @@ -581,9 +599,37 @@ static const TypeInfo xive_tctx_base_info = {
> >>      .class_size    = sizeof(XiveTCTXClass),
> >>  };
> >>  
> >> +static int xive_tctx_post_load(XiveTCTX *tctx, int version_id)
> >> +{
> >> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr);
> >> +
> >> +    /*
> >> +     * When we collect the states from KVM XIVE irqchip, we set word2
> >> +     * of the thread context to print out the OS CAM line under the
> >> +     * QEMU monitor.
> >> +     *
> >> +     * This breaks migration on a guest using TCG or not using a KVM
> >> +     * irqchip. Fix with an extra reset of the thread contexts.
> >> +     */
> >> +    if (xrc->reset_tctx) {
> >> +        xrc->reset_tctx(tctx->xrtr, tctx);
> >> +    }
> >> +    return 0;
> >> +}
> >> +
> >> +static void xive_tctx_class_init(ObjectClass *klass, void *data)
> >> +{
> >> +    XiveTCTXClass *xtc = XIVE_TCTX_BASE_CLASS(klass);
> >> +
> >> +    xtc->post_load = xive_tctx_post_load;
> >> +}
> >> +
> >>  static const TypeInfo xive_tctx_info = {
> >>      .name          = TYPE_XIVE_TCTX,
> >>      .parent        = TYPE_XIVE_TCTX_BASE,
> >> +    .instance_size = sizeof(XiveTCTX),
> >> +    .class_init    = xive_tctx_class_init,
> >> +    .class_size    = sizeof(XiveTCTXClass),
> >>  };
> >>  
> >>  Object *xive_tctx_create(Object *cpu, const char *type, XiveRouter *xrtr,
> >> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> >> index 92ef53743b64..6fac6ca70595 100644
> >> --- a/hw/ppc/spapr_irq.c
> >> +++ b/hw/ppc/spapr_irq.c
> >> @@ -359,7 +359,7 @@ static Object 
> >> *spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr,
> >>  
> >>  static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int 
> >> version_id)
> >>  {
> >> -    return 0;
> >> +    return spapr_xive_post_load(spapr->xive, version_id);
> >>  }
> >>  
> >>  /*
> > 
> 

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]