[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH for-5.2] spapr/xive: Create IPIs in KVM on demand
From: |
Greg Kurz |
Subject: |
Re: [PATCH for-5.2] spapr/xive: Create IPIs in KVM on demand |
Date: |
Mon, 16 Nov 2020 15:04:15 +0100 |
On Mon, 16 Nov 2020 14:43:12 +0100
Cédric Le Goater <clg@kaod.org> wrote:
> On 11/13/20 4:14 PM, Greg Kurz wrote:
> > Recent commit acbdb9956fe9 introduced a dedicated path to create
> > IPIs in KVM. This is done from under kvmppc_xive_cpu_connect() with
> > the assumption that the IPI number is equal to the vCPU id. The
> > latter is wrong: the guest chooses an arbitrary LISN from the
> > "ibm,xive-lisn-ranges" and assigns it to a target vCPU with the
> > H_INT_SET_SOURCE_CONFIG hcall. This went unnoticed so far because
> > IPI numbers and vCPU ids happen to match by default. This is no
> > longer the case though when setting the VSMT machine property to
> > a value that is different from (ie. bigger than) the number of
> > vCPUs per core (ie. -smp threads). Wrong IPIs end up being created
> > in KVM but the guest still uses the ones it has allocated and gets
> > very confused (see BugLink below).
> >
> > Fix this by creating the IPI at the only place where we have
> > its appropriate number : when the guest allocates it with the
> > H_INT_SET_SOURCE_CONFIG hcall. We detect this is an IPI because
> > it is < SPAPR_XIRQ_BASE and we get the vCPU id from the hcall
> > arguments. The EAS of the IPI is tracked in the kvm_enabled_cpus
> > list. It is now used instead of vcpu_id to filter unallocated IPIs
> > out in xive_source_is_valid(). It also allows to only reset the
> > IPI on the first call to H_INT_SET_SOURCE_CONFIG.
> >
> > Restore unmasked IPIs from the vCPU contexts in kvmppc_xive_post_load().
> > Masked ones will be created when the guests eventually unmask them
> > with H_INT_SET_SOURCE_CONFIG.
> >
> > Reported-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
> > Fixes: acbdb9956fe9 ("spapr/xive: Allocate IPIs independently from the
> > other sources")
> > BugLink: https://bugs.launchpad.net/qemu/+bug/1900241
> > Cc: clg@kaod.org
> > Signed-off-by: Greg Kurz <groug@kaod.org>
>
>
> I am quite certain this is correct but the complexity looks like a
> potential source of bugs. So I think it is simpler to revert the
> optimization introduced by acbdb9956fe9 and find a better solution
> covering SMT also.
>
I tend to agree. Even if I could successfully test various scenarios
around hotplug and migration, I'm really not super confident to merge
this in an RC. I'll post a series that reverts acbdb9956fe9 ASAP.
> Thanks,
>
> C.
>
>
>
> > ---
> > hw/intc/spapr_xive_kvm.c | 141
> > +++++++++++++++++++++++++++++++++++++++++-----
> > 1 file changed, 127 insertions(+), 14 deletions(-)
> >
> > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> > index 66bf4c06fe55..4e5871c3aac2 100644
> > --- a/hw/intc/spapr_xive_kvm.c
> > +++ b/hw/intc/spapr_xive_kvm.c
> > @@ -30,6 +30,7 @@
> > */
> > typedef struct KVMEnabledCPU {
> > unsigned long vcpu_id;
> > + XiveEAS *ipi_eas;
> > QLIST_ENTRY(KVMEnabledCPU) node;
> > } KVMEnabledCPU;
> >
> > @@ -55,6 +56,7 @@ static void kvm_cpu_enable(CPUState *cs)
> >
> > enabled_cpu = g_malloc(sizeof(*enabled_cpu));
> > enabled_cpu->vcpu_id = vcpu_id;
> > + enabled_cpu->ipi_eas = NULL;
> > QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
> > }
> >
> > @@ -156,26 +158,29 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx,
> > Error **errp)
> > */
> > typedef struct {
> > SpaprXive *xive;
> > + uint32_t lisn;
> > Error *err;
> > int rc;
> > } XiveInitIPI;
> >
> > static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg)
> > {
> > - unsigned long ipi = kvm_arch_vcpu_id(cs);
> > XiveInitIPI *s = arg.host_ptr;
> > + unsigned long ipi = s->lisn;
> > uint64_t state = 0;
> >
> > s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi,
> > &state, true, &s->err);
> > }
> >
> > -static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error
> > **errp)
> > +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, uint32_t
> > lisn,
> > + Error **errp)
> > {
> > XiveInitIPI s = {
> > .xive = xive,
> > .err = NULL,
> > .rc = 0,
> > + .lisn = lisn,
> > };
> >
> > run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s));
> > @@ -214,12 +219,6 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error
> > **errp)
> > return ret;
> > }
> >
> > - /* Create/reset the vCPU IPI */
> > - ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp);
> > - if (ret < 0) {
> > - return ret;
> > - }
> > -
> > kvm_cpu_enable(tctx->cs);
> > return 0;
> > }
> > @@ -228,6 +227,62 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error
> > **errp)
> > * XIVE Interrupt Source (KVM)
> > */
> >
> > +static bool spapr_xive_is_ipi(uint32_t lisn)
> > +{
> > + return lisn < SPAPR_XIRQ_BASE;
> > +}
> > +
> > +static bool kvm_ipi_is_enabled(SpaprXive *xive, uint32_t lisn)
> > +{
> > + KVMEnabledCPU *enabled_cpu;
> > +
> > + g_assert(spapr_xive_is_ipi(lisn));
> > +
> > + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
> > + if (enabled_cpu->ipi_eas == &xive->eat[lisn]) {
> > + return true;
> > + }
> > + }
> > + return false;
> > +}
> > +
> > +static int kvm_ipi_enable(SpaprXive *xive, uint32_t lisn, uint32_t vcpu_id,
> > + Error **errp)
> > +{
> > + KVMEnabledCPU *enabled_cpu;
> > +
> > + g_assert(spapr_xive_is_ipi(lisn));
> > +
> > + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
> > + if (enabled_cpu->vcpu_id == vcpu_id) {
> > + CPUState *cs = CPU(spapr_find_cpu(vcpu_id));
> > + XiveEAS *eas = &xive->eat[lisn];
> > +
> > + /* No change ? */
> > + if (enabled_cpu->ipi_eas && enabled_cpu->ipi_eas == eas) {
> > + return 0;
> > + }
> > +
> > + /* XXX: abort ? */
> > + if (!cs) {
> > + break;
> > + }
> > +
> > + /* Create/reset the vCPU IPI */
> > + int ret = kvmppc_xive_reset_ipi(xive, cs, lisn, errp);
> > + if (ret < 0) {
> > + return ret;
> > + }
> > +
> > + enabled_cpu->ipi_eas = eas;
> > + return 0;
> > + }
> > + }
> > +
> > + error_setg(errp, "vCPU #%d not found", vcpu_id);
> > + return -ESRCH;
> > +}
> > +
> > int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS
> > *eas,
> > Error **errp)
> > {
> > @@ -248,6 +303,14 @@ int kvmppc_xive_set_source_config(SpaprXive *xive,
> > uint32_t lisn, XiveEAS *eas,
> >
> > spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
> >
> > + if (spapr_xive_is_ipi(lisn)) {
> > + /* Create the vCPU IPI */
> > + int ret = kvm_ipi_enable(xive, lisn, server, errp);
> > + if (ret < 0) {
> > + return ret;
> > + }
> > + }
> > +
> > kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
> > KVM_XIVE_SOURCE_PRIORITY_MASK;
> > kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
> > @@ -280,7 +343,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int
> > srcno, Error **errp)
> > assert(xive->fd != -1);
> >
> > /*
> > - * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect()
> > + * The vCPU IPIs are now allocated in kvmppc_xive_set_source_config()
> > * and not with all sources in kvmppc_xive_source_reset()
> > */
> > assert(srcno >= SPAPR_XIRQ_BASE);
> > @@ -300,12 +363,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc,
> > int srcno, Error **errp)
> > * To be valid, a source must have been claimed by the machine (valid
> > * entry in the EAS table) and if it is a vCPU IPI, the vCPU should
> > * have been enabled, which means the IPI has been allocated in
> > - * kvmppc_xive_cpu_connect().
> > + * kvmppc_xive_set_source_config().
> > */
> > static bool xive_source_is_valid(SpaprXive *xive, int i)
> > {
> > return xive_eas_is_valid(&xive->eat[i]) &&
> > - (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i));
> > + (!spapr_xive_is_ipi(i) || kvm_ipi_is_enabled(xive, i));
> > }
> >
> > static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
> > @@ -314,8 +377,8 @@ static int kvmppc_xive_source_reset(XiveSource *xsrc,
> > Error **errp)
> > int i;
> >
> > /*
> > - * Skip the vCPU IPIs. These are created/reset when the vCPUs are
> > - * connected in kvmppc_xive_cpu_connect()
> > + * Skip the vCPU IPIs. These are created/reset on-demand in
> > + * kvmppc_xive_set_source_config().
> > */
> > for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) {
> > int ret;
> > @@ -724,7 +787,57 @@ int kvmppc_xive_post_load(SpaprXive *xive, int
> > version_id)
> > }
> >
> > /* Restore the EAT */
> > - for (i = 0; i < xive->nr_irqs; i++) {
> > +
> > + /* IPIs are restored from the appropriate vCPU context */
> > + CPU_FOREACH(cs) {
> > + /*
> > + * The EAT has valid entries to accomodate all possible vCPUs,
> > + * but we only want to allocate in KVM the IPIs that were
> > + * actually allocated before migration. Let's consider the full
> > + * list of IPIs to find an EAS that matches the vCPU id.
> > + *
> > + * If an IPI appears unmasked in the EAT, it is a proof that the
> > + * guest did successfully call H_INT_SET_SOURCE_CONFIG and we
> > + * should thus create the IPI at the KVM level if the END index
> > + * matches the vCPU id.
> > + *
> > + * If an IPI appears masked in the EAT, then we don't know exactly
> > + * what happened before migration but we don't care. The IPI will
> > + * be created when the guest eventually unmasks it with a
> > subsequent
> > + * call to H_INT_SET_SOURCE_CONFIG.
> > + */
> > + for (i = 0; i < SPAPR_XIRQ_BASE; i++) {
> > + XiveEAS *eas = &xive->eat[i];
> > + uint32_t end_idx;
> > + uint32_t end_blk;
> > + uint8_t priority;
> > + uint32_t server;
> > +
> > + if (!xive_eas_is_valid(eas)) {
> > + continue;
> > + }
> > +
> > + if (xive_eas_is_masked(eas)) {
> > + continue;
> > + }
> > +
> > + end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
> > + end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
> > + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
> > + if (server != kvm_arch_vcpu_id(cs)) {
> > + continue;
> > + }
> > +
> > + ret = kvmppc_xive_set_source_config(xive, i, eas, &local_err);
> > + if (ret < 0) {
> > + goto fail;
> > + }
> > + break;
> > + }
> > + }
> > +
> > + /* Now restore non-IPIs */
> > + for (i = SPAPR_XIRQ_BASE; i < xive->nr_irqs; i++) {
> > if (!xive_source_is_valid(xive, i)) {
> > continue;
> > }
> >
> >
>