qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v9 6/6] migration: Include migration support for


From: Greg Kurz
Subject: Re: [Qemu-devel] [PATCH v9 6/6] migration: Include migration support for machine check handling
Date: Thu, 6 Jun 2019 14:24:46 +0200

On Thu, 6 Jun 2019 16:55:18 +0530
Aravinda Prasad <address@hidden> wrote:

> On Thursday 06 June 2019 08:36 AM, David Gibson wrote:
> > On Wed, May 29, 2019 at 11:10:57AM +0530, Aravinda Prasad wrote:  
> >> This patch includes migration support for machine check
> >> handling. Especially this patch blocks VM migration
> >> requests until the machine check error handling is
> >> complete as (i) these errors are specific to the source
> >> hardware and is irrelevant on the target hardware,
> >> (ii) these errors cause data corruption and should
> >> be handled before migration.
> >>
> >> Signed-off-by: Aravinda Prasad <address@hidden>
> >> ---
> >>  hw/ppc/spapr.c         |   20 ++++++++++++++++++++
> >>  hw/ppc/spapr_events.c  |   17 +++++++++++++++++
> >>  hw/ppc/spapr_rtas.c    |    4 ++++
> >>  include/hw/ppc/spapr.h |    2 ++
> >>  4 files changed, 43 insertions(+)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index e8a77636..31c4850 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2104,6 +2104,25 @@ static const VMStateDescription vmstate_spapr_dtb = 
> >> {
> >>      },
> >>  };
> >>  
> >> +static bool spapr_fwnmi_needed(void *opaque)
> >> +{
> >> +    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
> >> +
> >> +    return (spapr->guest_machine_check_addr == -1) ? 0 : 1;  
> > 
> > Since we're introducing a PAPR capability to enable this, it would
> > actually be better to check that here, rather than the runtime state.
> > That leads to less cases and easier to understand semantics for the
> > migration stream.  
> 
> I am fine with this approach as well.
> 
> >   
> >> +}
> >> +
> >> +static const VMStateDescription vmstate_spapr_machine_check = {
> >> +    .name = "spapr_machine_check",
> >> +    .version_id = 1,
> >> +    .minimum_version_id = 1,
> >> +    .needed = spapr_fwnmi_needed,
> >> +    .fields = (VMStateField[]) {
> >> +        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
> >> +        VMSTATE_INT32(mc_status, SpaprMachineState),
> >> +        VMSTATE_END_OF_LIST()
> >> +    },
> >> +};
> >> +
> >>  static const VMStateDescription vmstate_spapr = {
> >>      .name = "spapr",
> >>      .version_id = 3,
> >> @@ -2137,6 +2156,7 @@ static const VMStateDescription vmstate_spapr = {
> >>          &vmstate_spapr_dtb,
> >>          &vmstate_spapr_cap_large_decr,
> >>          &vmstate_spapr_cap_ccf_assist,
> >> +        &vmstate_spapr_machine_check,
> >>          NULL
> >>      }
> >>  };
> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> >> index 573c0b7..35e21e4 100644
> >> --- a/hw/ppc/spapr_events.c
> >> +++ b/hw/ppc/spapr_events.c
> >> @@ -41,6 +41,7 @@
> >>  #include "qemu/bcd.h"
> >>  #include "hw/ppc/spapr_ovec.h"
> >>  #include <libfdt.h>
> >> +#include "migration/blocker.h"
> >>  
> >>  #define RTAS_LOG_VERSION_MASK                   0xff000000
> >>  #define   RTAS_LOG_VERSION_6                    0x06000000
> >> @@ -855,6 +856,22 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, 
> >> bool recovered)
> >>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> >>  {
> >>      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >> +    int ret;
> >> +    Error *local_err = NULL;
> >> +
> >> +    error_setg(&spapr->fwnmi_migration_blocker,
> >> +            "Live migration not supported during machine check handling");
> >> +    ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
> >> +    if (ret < 0) {
> >> +        /*
> >> +         * We don't want to abort and let the migration to continue. In a
> >> +         * rare case, the machine check handler will run on the target
> >> +         * hardware. Though this is not preferable, it is better than 
> >> aborting
> >> +         * the migration or killing the VM.
> >> +         */
> >> +        error_free(spapr->fwnmi_migration_blocker);  
> > 
> > You should set fwnmi_migration_blocker to NULL here as well.  
> 
> ok.
> 
> > 
> > As mentioned on an earlier iteration, the migration blocker is the
> > same every time.  Couldn't you just create it once and free at final
> > teardown, rather than recreating it for every NMI?  
> 
> That means, we create the error string at the time when ibm,nmi-register
> is invoked and tear it down during machine reset?
> 

No, I think David is asking to create the error string during machine init,
for all the machine lifetime. In which case, we don't even need to call
error_free() at all.

> Regards,
> Aravinda
> 
> >   
> >> +        warn_report_err(local_err);
> >> +    }
> >>  
> >>      while (spapr->mc_status != -1) {
> >>          /*
> >> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> >> index 91a7ab9..c849223 100644
> >> --- a/hw/ppc/spapr_rtas.c
> >> +++ b/hw/ppc/spapr_rtas.c
> >> @@ -50,6 +50,7 @@
> >>  #include "target/ppc/mmu-hash64.h"
> >>  #include "target/ppc/mmu-book3s-v3.h"
> >>  #include "kvm_ppc.h"
> >> +#include "migration/blocker.h"
> >>  
> >>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState 
> >> *spapr,
> >>                                     uint32_t token, uint32_t nargs,
> >> @@ -404,6 +405,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> >>          spapr->mc_status = -1;
> >>          qemu_cond_signal(&spapr->mc_delivery_cond);
> >>          rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> >> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
> >> +        error_free(spapr->fwnmi_migration_blocker);
> >> +        spapr->fwnmi_migration_blocker = NULL;
> >>      }
> >>  }
> >>  
> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >> index bd75d4b..6c0cfd8 100644
> >> --- a/include/hw/ppc/spapr.h
> >> +++ b/include/hw/ppc/spapr.h
> >> @@ -214,6 +214,8 @@ struct SpaprMachineState {
> >>      SpaprCapabilities def, eff, mig;
> >>  
> >>      unsigned gpu_numa_id;
> >> +
> >> +    Error *fwnmi_migration_blocker;
> >>  };
> >>  
> >>  #define H_SUCCESS         0
> >>  
> >   
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]