qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v5 23/36] spapr/xive: add migration support for


From: Cédric Le Goater
Subject: Re: [Qemu-devel] [PATCH v5 23/36] spapr/xive: add migration support for KVM
Date: Thu, 29 Nov 2018 17:19:51 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.3.0

David,

Could you tell what you think about the KVM interfaces for migration,
the ones capturing and restoring the states ? 

On 11/29/18 4:43 AM, David Gibson wrote:
> On Fri, Nov 16, 2018 at 11:57:16AM +0100, Cédric Le Goater wrote:
>> This extends the KVM XIVE models to handle the state synchronization
>> with KVM, for the monitor usage and for the migration.
>>
>> The migration priority of the XIVE interrupt controller sPAPRXive is
>> raised for KVM. It operates first and orchestrates the capture
>> sequence of the states of all the XIVE models. The XIVE sources are
>> masked to quiesce the interrupt flow and a XIVE xync is performed to
>> stabilize the OS Event Queues. The state of the ENDs are then captured
>> by the XIVE interrupt controller model, sPAPRXive, and the state of
>> the thread contexts by the thread interrupt presenter model,
>> XiveTCTX. When done, a rollback is performed to restore the sources to
>> their initial state.
>>
>> The sPAPRXive 'post_load' method is called from the sPAPR machine,
>> after all XIVE device states have been transfered and loaded. First,
>> sPAPRXive restores the XIVE routing tables: ENDT and EAT. Next, are
>> restored the thread interrupt context registers and the source PQ
>> bits.
>>
>> The get/set operations rely on their KVM counterpart in the host
>> kernel which acts as a proxy for OPAL, the host firmware.
>>
>> Signed-off-by: Cédric Le Goater <address@hidden>
>> ---
>>
>>  WIP:
>>  
>>     If migration occurs when a VCPU is 'ceded', some the OS event
>>     notification queues are mapped to the ZERO_PAGE on the receiving
>>     side. As if the HW had triggered a page fault before the dirty
>>     page was transferred from the source or as if we were not using
>>     the correct page table.


v6 adds a VM change state handler to make XIVE reach a quiescent state. 
The sequence is a little more sophisticated and an extra KVM call 
marks the EQ page dirty.

>>
>>  include/hw/ppc/spapr_xive.h     |   5 +
>>  include/hw/ppc/xive.h           |   3 +
>>  include/migration/vmstate.h     |   1 +
>>  linux-headers/asm-powerpc/kvm.h |  33 +++
>>  hw/intc/spapr_xive.c            |  32 +++
>>  hw/intc/spapr_xive_kvm.c        | 494 ++++++++++++++++++++++++++++++++
>>  hw/intc/xive.c                  |  46 +++
>>  hw/ppc/spapr_irq.c              |   2 +-
>>  8 files changed, 615 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>> index 9c817bb7ae74..d2517c040958 100644
>> --- a/include/hw/ppc/spapr_xive.h
>> +++ b/include/hw/ppc/spapr_xive.h
>> @@ -55,12 +55,17 @@ typedef struct sPAPRXiveClass {
>>      XiveRouterClass parent_class;
>>  
>>      DeviceRealize   parent_realize;
>> +
>> +    void (*synchronize_state)(sPAPRXive *xive);
>> +    int  (*pre_save)(sPAPRXive *xsrc);
>> +    int  (*post_load)(sPAPRXive *xsrc, int version_id);
> 
> This should go away if the KVM and non-KVM versions are in the same
> object.

yes.

>>  } sPAPRXiveClass;
>>  
>>  bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi);
>>  bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
>>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>>  qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn);
>> +int spapr_xive_post_load(sPAPRXive *xive, int version_id);
>>  
>>  /*
>>   * sPAPR NVT and END indexing helpers
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index 7aaf5a182cb3..c8201462d698 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -309,6 +309,9 @@ typedef struct XiveTCTXClass {
>>      DeviceClass       parent_class;
>>  
>>      DeviceRealize     parent_realize;
>> +
>> +    void (*synchronize_state)(XiveTCTX *tctx);
>> +    int  (*post_load)(XiveTCTX *tctx, int version_id);
> 
> .. and this too.
> 
>>  } XiveTCTXClass;
>>  
>>  /*
>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
>> index 2b501d04669a..ee2e836cc1c1 100644
>> --- a/include/migration/vmstate.h
>> +++ b/include/migration/vmstate.h
>> @@ -154,6 +154,7 @@ typedef enum {
>>      MIG_PRI_PCI_BUS,            /* Must happen before IOMMU */
>>      MIG_PRI_GICV3_ITS,          /* Must happen before PCI devices */
>>      MIG_PRI_GICV3,              /* Must happen before the ITS */
>> +    MIG_PRI_XIVE_IC,            /* Must happen before all XIVE models */
> 
> Ugh.. explicit priority / order levels are a pretty bad code smell.
> Usually migration ordering can be handled by getting the object
> heirarchy right.  What exactly is the problem you're addessing with
> this?

I wanted sPAPRXive to capture the state on behalf of all XIVE models. 
But with the addition of the VMState change handler I think I can 
remove this priority. I will check. 

> 
>>      MIG_PRI_MAX,
>>  } MigrationPriority;
>>  
>> diff --git a/linux-headers/asm-powerpc/kvm.h 
>> b/linux-headers/asm-powerpc/kvm.h
>> index f34c971491dd..9d55ade23634 100644
>> --- a/linux-headers/asm-powerpc/kvm.h
>> +++ b/linux-headers/asm-powerpc/kvm.h
> 
> Again, linux-headers need to be split out.
> 
>> @@ -480,6 +480,8 @@ struct kvm_ppc_cpu_char {
>>  #define  KVM_REG_PPC_ICP_PPRI_SHIFT 16      /* pending irq priority */
>>  #define  KVM_REG_PPC_ICP_PPRI_MASK  0xff
>>  
>> +#define KVM_REG_PPC_NVT_STATE       (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8d)
>> +
>>  /* Device control API: PPC-specific devices */
>>  #define KVM_DEV_MPIC_GRP_MISC               1
>>  #define   KVM_DEV_MPIC_BASE_ADDR    0       /* 64-bit */
>> @@ -681,10 +683,41 @@ struct kvm_ppc_cpu_char {
>>  #define   KVM_DEV_XIVE_GET_TIMA_FD  2
>>  #define   KVM_DEV_XIVE_VC_BASE              3
>>  #define KVM_DEV_XIVE_GRP_SOURCES    2       /* 64-bit source attributes */
>> +#define KVM_DEV_XIVE_GRP_SYNC               3       /* 64-bit source 
>> attributes */
>> +#define KVM_DEV_XIVE_GRP_EAS                4       /* 64-bit eas 
>> attributes */
>> +#define KVM_DEV_XIVE_GRP_EQ         5       /* 64-bit eq attributes */
>>  
>>  /* Layout of 64-bit XIVE source attribute values */
>>  #define KVM_XIVE_LEVEL_SENSITIVE    (1ULL << 0)
>>  #define KVM_XIVE_LEVEL_ASSERTED             (1ULL << 1)
>>  
>> +/* Layout of 64-bit eas attribute values */
>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT 0
>> +#define KVM_XIVE_EAS_PRIORITY_MASK  0x7
>> +#define KVM_XIVE_EAS_SERVER_SHIFT   3
>> +#define KVM_XIVE_EAS_SERVER_MASK    0xfffffff8ULL
>> +#define KVM_XIVE_EAS_MASK_SHIFT             32
>> +#define KVM_XIVE_EAS_MASK_MASK              0x100000000ULL
>> +#define KVM_XIVE_EAS_EISN_SHIFT             33
>> +#define KVM_XIVE_EAS_EISN_MASK              0xfffffffe00000000ULL
>> +
>> +/* Layout of 64-bit eq attribute */
>> +#define KVM_XIVE_EQ_PRIORITY_SHIFT  0
>> +#define KVM_XIVE_EQ_PRIORITY_MASK   0x7
>> +#define KVM_XIVE_EQ_SERVER_SHIFT    3
>> +#define KVM_XIVE_EQ_SERVER_MASK             0xfffffff8ULL
>> +
>> +/* Layout of 64-bit eq attribute values */
>> +struct kvm_ppc_xive_eq {
>> +    __u32 flags;
>> +    __u32 qsize;
>> +    __u64 qpage;
>> +    __u32 qtoggle;
>> +    __u32 qindex;
>> +};
>> +
>> +#define KVM_XIVE_EQ_FLAG_ENABLED    0x00000001
>> +#define KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY      0x00000002
>> +#define KVM_XIVE_EQ_FLAG_ESCALATE   0x00000004
>>  
>>  #endif /* __LINUX_KVM_POWERPC_H */
>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>> index ec85f7e4f88d..c5c0e063dc33 100644
>> --- a/hw/intc/spapr_xive.c
>> +++ b/hw/intc/spapr_xive.c
>> @@ -27,9 +27,14 @@
>>  
>>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
>>  {
>> +    sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
>>      int i;
>>      uint32_t offset = 0;
>>  
>> +    if (sxc->synchronize_state) {
>> +        sxc->synchronize_state(xive);
>> +    }
>> +
>>      monitor_printf(mon, "XIVE Source %08x .. %08x\n", offset,
>>                     offset + xive->source.nr_irqs - 1);
>>      xive_source_pic_print_info(&xive->source, offset, mon);
>> @@ -354,10 +359,37 @@ static const VMStateDescription vmstate_spapr_xive_eas 
>> = {
>>      },
>>  };
>>  
>> +static int vmstate_spapr_xive_pre_save(void *opaque)
>> +{
>> +    sPAPRXive *xive = SPAPR_XIVE_BASE(opaque);
>> +    sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
>> +
>> +    if (sxc->pre_save) {
>> +        return sxc->pre_save(xive);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/* handled at the machine level */
>> +int spapr_xive_post_load(sPAPRXive *xive, int version_id)
>> +{
>> +    sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive);
>> +
>> +    if (sxc->post_load) {
>> +        return sxc->post_load(xive, version_id);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>  static const VMStateDescription vmstate_spapr_xive_base = {
>>      .name = TYPE_SPAPR_XIVE,
>>      .version_id = 1,
>>      .minimum_version_id = 1,
>> +    .pre_save = vmstate_spapr_xive_pre_save,
>> +    .post_load = NULL, /* handled at the machine level */
>> +    .priority = MIG_PRI_XIVE_IC,
>>      .fields = (VMStateField[]) {
>>          VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
>>          VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs,
>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>> index 767f90826e43..176083c37d61 100644
>> --- a/hw/intc/spapr_xive_kvm.c
>> +++ b/hw/intc/spapr_xive_kvm.c
>> @@ -58,6 +58,58 @@ static void kvm_cpu_enable(CPUState *cs)
>>  /*
>>   * XIVE Thread Interrupt Management context (KVM)
>>   */
>> +static void xive_tctx_kvm_set_state(XiveTCTX *tctx, Error **errp)
>> +{
>> +    uint64_t state[4];
>> +    int ret;
>> +
>> +    /* word0 and word1 of the OS ring. */
>> +    state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
>> +
>> +    /* VP identifier. Only for KVM pr_debug() */
>> +    state[1] = *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]);
>> +
>> +    ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
>> +    if (ret != 0) {
>> +        error_setg_errno(errp, errno, "Could restore KVM XIVE CPU %ld 
>> state",
>> +                         kvm_arch_vcpu_id(tctx->cs));
>> +    }
>> +}
>> +
>> +static void xive_tctx_kvm_get_state(XiveTCTX *tctx, Error **errp)
>> +{
>> +    uint64_t state[4] = { 0 };
>> +    int ret;
>> +
>> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
>> +    if (ret != 0) {
>> +        error_setg_errno(errp, errno, "Could capture KVM XIVE CPU %ld 
>> state",
>> +                         kvm_arch_vcpu_id(tctx->cs));
>> +        return;
>> +    }
>> +
>> +    /* word0 and word1 of the OS ring. */
>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
>> +
>> +    /*
>> +     * KVM also returns word2 containing the VP CAM line value which
>> +     * is interesting to print out the VP identifier in the QEMU
>> +     * monitor. No need to restore it.
>> +     */
>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
>> +}
>> +
>> +static void xive_tctx_kvm_do_synchronize_state(CPUState *cpu,
>> +                                              run_on_cpu_data arg)
>> +{
>> +    xive_tctx_kvm_get_state(arg.host_ptr, &error_fatal);
>> +}
>> +
>> +static void xive_tctx_kvm_synchronize_state(XiveTCTX *tctx)
>> +{
>> +    run_on_cpu(tctx->cs, xive_tctx_kvm_do_synchronize_state,
>> +               RUN_ON_CPU_HOST_PTR(tctx));
>> +}
>>  
>>  static void xive_tctx_kvm_init(XiveTCTX *tctx, Error **errp)
>>  {
>> @@ -112,6 +164,8 @@ static void xive_tctx_kvm_class_init(ObjectClass *klass, 
>> void *data)
>>  
>>      device_class_set_parent_realize(dc, xive_tctx_kvm_realize,
>>                                      &xtc->parent_realize);
>> +
>> +    xtc->synchronize_state = xive_tctx_kvm_synchronize_state;
>>  }
>>  
>>  static const TypeInfo xive_tctx_kvm_info = {
>> @@ -166,6 +220,34 @@ static void xive_source_kvm_reset(DeviceState *dev)
>>      xive_source_kvm_init(xsrc, &error_fatal);
>>  }
>>  
>> +/*
>> + * This is used to perform the magic loads on the ESB pages, described
>> + * in xive.h.
>> + */
>> +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
>> +{
>> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
>> +        xive_source_esb_mgmt(xsrc, srcno) + offset;
>> +
>> +    /* Prevent the compiler from optimizing away the load */
>> +    volatile uint64_t value = *((uint64_t *) addr);
>> +
>> +    return be64_to_cpu(value) & 0x3;
>> +}
>> +
>> +static void xive_source_kvm_get_state(XiveSource *xsrc)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        /* Perform a load without side effect to retrieve the PQ bits */
>> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
>> +
>> +        /* and save PQ locally */
>> +        xive_source_esb_set(xsrc, i, pq);
>> +    }
>> +}
>> +
>>  static void xive_source_kvm_set_irq(void *opaque, int srcno, int val)
>>  {
>>      XiveSource *xsrc = opaque;
>> @@ -295,6 +377,414 @@ static const TypeInfo xive_source_kvm_info = {
>>  /*
>>   * sPAPR XIVE Router (KVM)
>>   */
>> +static int spapr_xive_kvm_set_eq_state(sPAPRXive *xive, CPUState *cs,
>> +                                       Error **errp)
>> +{
>> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
>> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
>> +    int ret;
>> +    int i;
>> +
>> +    for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
>> +        Error *local_err = NULL;
>> +        XiveEND end;
>> +        uint8_t end_blk;
>> +        uint32_t end_idx;
>> +        struct kvm_ppc_xive_eq kvm_eq = { 0 };
>> +        uint64_t kvm_eq_idx;
>> +
>> +        if (!spapr_xive_priority_is_valid(i)) {
>> +            continue;
>> +        }
>> +
>> +        spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, &end_idx);
>> +
>> +        ret = xive_router_get_end(xrtr, end_blk, end_idx, &end);
>> +        if (ret) {
>> +            error_setg(errp, "XIVE: No END for CPU %ld priority %d",
>> +                       vcpu_id, i);
>> +            return ret;
>> +        }
>> +
>> +        if (!(end.w0 & END_W0_VALID)) {
>> +            continue;
>> +        }
>> +
>> +        /* Build the KVM state from the local END structure */
>> +        kvm_eq.flags   = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY;
>> +        kvm_eq.qsize   = GETFIELD(END_W0_QSIZE, end.w0) + 12;
>> +        kvm_eq.qpage   = (((uint64_t)(end.w2 & 0x0fffffff)) << 32) | end.w3;
>> +        kvm_eq.qtoggle = GETFIELD(END_W1_GENERATION, end.w1);
>> +        kvm_eq.qindex  = GETFIELD(END_W1_PAGE_OFF, end.w1);
>> +
>> +        /* Encode the tuple (server, prio) as a KVM EQ index */
>> +        kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
>> +            KVM_XIVE_EQ_PRIORITY_MASK;
>> +        kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
>> +            KVM_XIVE_EQ_SERVER_MASK;
>> +
>> +        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
>> +                                &kvm_eq, true, &local_err);
>> +        if (local_err) {
>> +            error_propagate(errp, local_err);
>> +            return ret;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int spapr_xive_kvm_get_eq_state(sPAPRXive *xive, CPUState *cs,
>> +                                       Error **errp)
>> +{
>> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
>> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
>> +    int ret;
>> +    int i;
>> +
>> +    for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
>> +        Error *local_err = NULL;
>> +        struct kvm_ppc_xive_eq kvm_eq = { 0 };
>> +        uint64_t kvm_eq_idx;
>> +        XiveEND end = { 0 };
>> +        uint8_t end_blk, nvt_blk;
>> +        uint32_t end_idx, nvt_idx;
>> +
>> +        /* Skip priorities reserved for the hypervisor */
>> +        if (!spapr_xive_priority_is_valid(i)) {
>> +            continue;
>> +        }
>> +
>> +        /* Encode the tuple (server, prio) as a KVM EQ index */
>> +        kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
>> +            KVM_XIVE_EQ_PRIORITY_MASK;
>> +        kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
>> +            KVM_XIVE_EQ_SERVER_MASK;
>> +
>> +        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
>> +                                &kvm_eq, false, &local_err);
>> +        if (local_err) {
>> +            error_propagate(errp, local_err);
>> +            return ret;
>> +        }
>> +
>> +        if (!(kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED)) {
>> +            continue;
>> +        }
>> +
>> +        /* Update the local END structure with the KVM input */
>> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED) {
>> +                end.w0 |= END_W0_VALID | END_W0_ENQUEUE;
>> +        }
>> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY) {
>> +                end.w0 |= END_W0_UCOND_NOTIFY;
>> +        }
>> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ESCALATE) {
>> +                end.w0 |= END_W0_ESCALATE_CTL;
>> +        }
>> +        end.w0 |= SETFIELD(END_W0_QSIZE, 0ul, kvm_eq.qsize - 12);
>> +
>> +        end.w1 = SETFIELD(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
>> +            SETFIELD(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
>> +        end.w2 = (kvm_eq.qpage >> 32) & 0x0fffffff;
>> +        end.w3 = kvm_eq.qpage & 0xffffffff;
>> +        end.w4 = 0;
>> +        end.w5 = 0;
>> +
>> +        ret = spapr_xive_cpu_to_nvt(xive, POWERPC_CPU(cs), &nvt_blk, 
>> &nvt_idx);
>> +        if (ret) {
>> +            error_setg(errp, "XIVE: No NVT for CPU %ld", vcpu_id);
>> +            return ret;
>> +        }
>> +
>> +        end.w6 = SETFIELD(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
>> +            SETFIELD(END_W6_NVT_INDEX, 0ul, nvt_idx);
>> +        end.w7 = SETFIELD(END_W7_F0_PRIORITY, 0ul, i);
>> +
>> +        spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, &end_idx);
>> +
>> +        ret = xive_router_set_end(xrtr, end_blk, end_idx, &end);
>> +        if (ret) {
>> +            error_setg(errp, "XIVE: No END for CPU %ld priority %d",
>> +                       vcpu_id, i);
>> +            return ret;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void spapr_xive_kvm_set_eas_state(sPAPRXive *xive, Error **errp)
>> +{
>> +    XiveSource *xsrc = &xive->source;
>> +    int i;
>> +
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        XiveEAS *eas = &xive->eat[i];
>> +        uint32_t end_idx;
>> +        uint32_t end_blk;
>> +        uint32_t eisn;
>> +        uint8_t priority;
>> +        uint32_t server;
>> +        uint64_t kvm_eas;
>> +        Error *local_err = NULL;
>> +
>> +        /* No need to set MASKED EAS, this is the default state after reset 
>> */
>> +        if (!(eas->w & EAS_VALID) || eas->w & EAS_MASKED) {
>> +            continue;
>> +        }
>> +
>> +        end_idx = GETFIELD(EAS_END_INDEX, eas->w);
>> +        end_blk = GETFIELD(EAS_END_BLOCK, eas->w);
>> +        eisn = GETFIELD(EAS_END_DATA, eas->w);
>> +
>> +        spapr_xive_end_to_target(xive, end_blk, end_idx, &server, 
>> &priority);
>> +
>> +        kvm_eas = priority << KVM_XIVE_EAS_PRIORITY_SHIFT &
>> +            KVM_XIVE_EAS_PRIORITY_MASK;
>> +        kvm_eas |= server << KVM_XIVE_EAS_SERVER_SHIFT &
>> +            KVM_XIVE_EAS_SERVER_MASK;
>> +        kvm_eas |= ((uint64_t)eisn << KVM_XIVE_EAS_EISN_SHIFT) &
>> +            KVM_XIVE_EAS_EISN_MASK;
>> +
>> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, true,
>> +                          &local_err);
>> +        if (local_err) {
>> +            error_propagate(errp, local_err);
>> +            return;
>> +        }
>> +    }
>> +}
>> +
>> +static void spapr_xive_kvm_get_eas_state(sPAPRXive *xive, Error **errp)
>> +{
>> +    XiveSource *xsrc = &xive->source;
>> +    int i;
>> +
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        XiveEAS *eas = &xive->eat[i];
>> +        XiveEAS new_eas;
>> +        uint64_t kvm_eas;
>> +        uint8_t priority;
>> +        uint32_t server;
>> +        uint32_t end_idx;
>> +        uint8_t end_blk;
>> +        uint32_t eisn;
>> +        Error *local_err = NULL;
>> +
>> +        if (!(eas->w & EAS_VALID)) {
>> +            continue;
>> +        }
>> +
>> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, 
>> false,
>> +                          &local_err);
>> +        if (local_err) {
>> +            error_propagate(errp, local_err);
>> +            return;
>> +        }
>> +
>> +        priority = (kvm_eas & KVM_XIVE_EAS_PRIORITY_MASK) >>
>> +            KVM_XIVE_EAS_PRIORITY_SHIFT;
>> +        server = (kvm_eas & KVM_XIVE_EAS_SERVER_MASK) >>
>> +            KVM_XIVE_EAS_SERVER_SHIFT;
>> +        eisn = (kvm_eas & KVM_XIVE_EAS_EISN_MASK) >> 
>> KVM_XIVE_EAS_EISN_SHIFT;
>> +
>> +        if (spapr_xive_target_to_end(xive, server, priority, &end_blk,
>> +                                     &end_idx)) {
>> +            error_setg(errp, "XIVE: invalid tuple CPU %d priority %d", 
>> server,
>> +                       priority);
>> +            return;
>> +        }
>> +
>> +        new_eas.w = EAS_VALID;
>> +        if (kvm_eas & KVM_XIVE_EAS_MASK_MASK) {
>> +            new_eas.w |= EAS_MASKED;
>> +        }
>> +
>> +        new_eas.w = SETFIELD(EAS_END_INDEX, new_eas.w, end_idx);
>> +        new_eas.w = SETFIELD(EAS_END_BLOCK, new_eas.w, end_blk);
>> +        new_eas.w = SETFIELD(EAS_END_DATA, new_eas.w, eisn);
>> +
>> +        *eas = new_eas;
>> +    }
>> +}
>> +
>> +static void spapr_xive_kvm_sync_all(sPAPRXive *xive, Error **errp)
>> +{
>> +    XiveSource *xsrc = &xive->source;
>> +    Error *local_err = NULL;
>> +    int i;
>> +
>> +    /* Sync the KVM source. This reaches the XIVE HW through OPAL */
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        XiveEAS *eas = &xive->eat[i];
>> +
>> +        if (!(eas->w & EAS_VALID)) {
>> +            continue;
>> +        }
>> +
>> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SYNC, i, NULL, true,
>> +                          &local_err);
>> +        if (local_err) {
>> +            error_propagate(errp, local_err);
>> +            return;
>> +        }
>> +    }
>> +}
>> +
>> +/*
>> + * The sPAPRXive KVM model migration priority is higher to make sure
> 
> Higher than what?

Than the XiveTCTX and XiveSource models.

>> + * its 'pre_save' method runs before all the other XIVE models. It
> 
> If the other XIVE components are children of sPAPRXive (which I think
> they are or could be), then I believe the parent object's pre_save
> will automatically be called first.

ok. XiveTCTX are not children of sPAPRXive but that might not be 
a problem anymore with the VMState change handler.

Thanks

C.

>> + * orchestrates the capture sequence of the XIVE states in the
>> + * following order:
>> + *
>> + *   1. mask all the sources by setting PQ=01, which returns the
>> + *      previous value and save it.
>> + *   2. sync the sources in KVM to stabilize all the queues
>> + *      sync the ENDs to make sure END -> VP is fully completed
>> + *   3. dump the EAS table
>> + *   4. dump the END table
>> + *   5. dump the thread context (IPB)
>> + *
>> + *  Rollback to restore the current configuration of the sources
> 
> 
> 
>> + */
>> +static int spapr_xive_kvm_pre_save(sPAPRXive *xive)
>> +{
>> +    XiveSource *xsrc = &xive->source;
>> +    Error *local_err = NULL;
>> +    CPUState *cs;
>> +    int i;
>> +    int ret = 0;
>> +
>> +    /* Quiesce the sources, to stop the flow of event notifications */
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        /*
>> +         * Mask and save the ESB PQs locally in the XiveSource object.
>> +         */
>> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_01);
>> +        xive_source_esb_set(xsrc, i, pq);
>> +    }
>> +
>> +    /* Sync the sources in KVM */
>> +    spapr_xive_kvm_sync_all(xive, &local_err);
>> +    if (local_err) {
>> +        error_report_err(local_err);
>> +        goto out;
>> +    }
>> +
>> +    /* Grab the EAT (could be done earlier ?) */
>> +    spapr_xive_kvm_get_eas_state(xive, &local_err);
>> +    if (local_err) {
>> +        error_report_err(local_err);
>> +        goto out;
>> +    }
>> +
>> +    /*
>> +     * Grab the ENDs. The EQ index and the toggle bit are what we want
>> +     * to capture
>> +     */
>> +    CPU_FOREACH(cs) {
>> +        spapr_xive_kvm_get_eq_state(xive, cs, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    /* Capture the thread interrupt contexts */
>> +    CPU_FOREACH(cs) {
>> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
>> +
>> +        /* TODO: Check if we need to use under run_on_cpu() ? */
>> +        xive_tctx_kvm_get_state(XIVE_TCTX_KVM(cpu->intc), &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    /* All done. */
>> +
>> +out:
>> +    /* Restore the sources to their initial state */
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        uint8_t pq = xive_source_esb_get(xsrc, i);
>> +        if (xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)) != 0x1) {
>> +            error_report("XIVE: IRQ %d has an invalid state", i);
>> +        }
>> +    }
>> +
>> +    /*
>> +     * The XiveSource and the XiveTCTX states will be collected by
>> +     * their respective vmstate handlers afterwards.
>> +     */
>> +    return ret;
>> +}
>> +
>> +/*
>> + * The sPAPRXive 'post_load' method is called by the sPAPR machine,
>> + * after all XIVE device states have been transfered and loaded.
>> + *
>> + * All should be in place when the VCPUs resume execution.
>> + */
>> +static int spapr_xive_kvm_post_load(sPAPRXive *xive, int version_id)
>> +{
>> +    XiveSource *xsrc = &xive->source;
>> +    Error *local_err = NULL;
>> +    CPUState *cs;
>> +    int i;
>> +
>> +    /* Set the ENDs first. The targetting depends on it. */
>> +    CPU_FOREACH(cs) {
>> +        spapr_xive_kvm_set_eq_state(xive, cs, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    /* Restore the targetting, if any */
>> +    spapr_xive_kvm_set_eas_state(xive, &local_err);
>> +    if (local_err) {
>> +        error_report_err(local_err);
>> +        return -1;
>> +    }
>> +
>> +    /* Restore the thread interrupt contexts */
>> +    CPU_FOREACH(cs) {
>> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
>> +
>> +        xive_tctx_kvm_set_state(XIVE_TCTX_KVM(cpu->intc), &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    /*
>> +     * Get the saved state from the XiveSource model and restore the
>> +     * PQ bits
>> +     */
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        uint8_t pq = xive_source_esb_get(xsrc, i);
>> +        xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
>> +    }
>> +    return 0;
>> +}
>> +
>> +static void spapr_xive_kvm_synchronize_state(sPAPRXive *xive)
>> +{
>> +    XiveSource *xsrc = &xive->source;
>> +    CPUState *cs;
>> +
>> +    xive_source_kvm_get_state(xsrc);
>> +
>> +    spapr_xive_kvm_get_eas_state(xive, &error_fatal);
>> +
>> +    CPU_FOREACH(cs) {
>> +        spapr_xive_kvm_get_eq_state(xive, cs, &error_fatal);
>> +    }
>> +}
>>  
>>  static void spapr_xive_kvm_instance_init(Object *obj)
>>  {
>> @@ -409,6 +899,10 @@ static void spapr_xive_kvm_class_init(ObjectClass 
>> *klass, void *data)
>>  
>>      dc->desc = "sPAPR XIVE KVM Interrupt Controller";
>>      dc->unrealize = spapr_xive_kvm_unrealize;
>> +
>> +    sxc->synchronize_state = spapr_xive_kvm_synchronize_state;
>> +    sxc->pre_save = spapr_xive_kvm_pre_save;
>> +    sxc->post_load = spapr_xive_kvm_post_load;
>>  }
>>  
>>  static const TypeInfo spapr_xive_kvm_info = {
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index 9bb37553c9ec..c9aedecc8216 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -438,9 +438,14 @@ static const struct {
>>  
>>  void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
>>  {
>> +    XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx);
>>      int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
>>      int i;
>>  
>> +    if (xtc->synchronize_state) {
>> +        xtc->synchronize_state(tctx);
>> +    }
>> +
>>      monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE 
>> PIPR"
>>                     "  W2\n", cpu_index);
>>  
>> @@ -552,10 +557,23 @@ static void xive_tctx_base_unrealize(DeviceState *dev, 
>> Error **errp)
>>      qemu_unregister_reset(xive_tctx_base_reset, dev);
>>  }
>>  
>> +static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
>> +{
>> +    XiveTCTX *tctx = XIVE_TCTX_BASE(opaque);
>> +    XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx);
>> +
>> +    if (xtc->post_load) {
>> +        return xtc->post_load(tctx, version_id);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>  static const VMStateDescription vmstate_xive_tctx_base = {
>>      .name = TYPE_XIVE_TCTX,
>>      .version_id = 1,
>>      .minimum_version_id = 1,
>> +    .post_load = vmstate_xive_tctx_post_load,
>>      .fields = (VMStateField[]) {
>>          VMSTATE_BUFFER(regs, XiveTCTX),
>>          VMSTATE_END_OF_LIST()
>> @@ -581,9 +599,37 @@ static const TypeInfo xive_tctx_base_info = {
>>      .class_size    = sizeof(XiveTCTXClass),
>>  };
>>  
>> +static int xive_tctx_post_load(XiveTCTX *tctx, int version_id)
>> +{
>> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr);
>> +
>> +    /*
>> +     * When we collect the states from KVM XIVE irqchip, we set word2
>> +     * of the thread context to print out the OS CAM line under the
>> +     * QEMU monitor.
>> +     *
>> +     * This breaks migration on a guest using TCG or not using a KVM
>> +     * irqchip. Fix with an extra reset of the thread contexts.
>> +     */
>> +    if (xrc->reset_tctx) {
>> +        xrc->reset_tctx(tctx->xrtr, tctx);
>> +    }
>> +    return 0;
>> +}
>> +
>> +static void xive_tctx_class_init(ObjectClass *klass, void *data)
>> +{
>> +    XiveTCTXClass *xtc = XIVE_TCTX_BASE_CLASS(klass);
>> +
>> +    xtc->post_load = xive_tctx_post_load;
>> +}
>> +
>>  static const TypeInfo xive_tctx_info = {
>>      .name          = TYPE_XIVE_TCTX,
>>      .parent        = TYPE_XIVE_TCTX_BASE,
>> +    .instance_size = sizeof(XiveTCTX),
>> +    .class_init    = xive_tctx_class_init,
>> +    .class_size    = sizeof(XiveTCTXClass),
>>  };
>>  
>>  Object *xive_tctx_create(Object *cpu, const char *type, XiveRouter *xrtr,
>> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
>> index 92ef53743b64..6fac6ca70595 100644
>> --- a/hw/ppc/spapr_irq.c
>> +++ b/hw/ppc/spapr_irq.c
>> @@ -359,7 +359,7 @@ static Object 
>> *spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr,
>>  
>>  static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int 
>> version_id)
>>  {
>> -    return 0;
>> +    return spapr_xive_post_load(spapr->xive, version_id);
>>  }
>>  
>>  /*
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]