qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v14 6/7] migration: Include migration support for machine che


From: Aravinda Prasad
Subject: Re: [PATCH v14 6/7] migration: Include migration support for machine check handling
Date: Wed, 25 Sep 2019 11:42:02 +0530
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.6.0


On Wednesday 25 September 2019 07:09 AM, David Gibson wrote:
> On Wed, Sep 18, 2019 at 01:42:51PM +0530, Aravinda Prasad wrote:
>> This patch includes migration support for machine check
>> handling. Especially this patch blocks VM migration
>> requests until the machine check error handling is
>> complete as these errors are specific to the source
>> hardware and is irrelevant on the target hardware.
>>
>> Signed-off-by: Aravinda Prasad <address@hidden>
>> ---
>>  hw/ppc/spapr.c         |   63 
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>>  hw/ppc/spapr_events.c  |   16 +++++++++++-
>>  hw/ppc/spapr_rtas.c    |    2 ++
>>  include/hw/ppc/spapr.h |    2 ++
>>  4 files changed, 82 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 6992b32..a72a4b1 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -46,6 +46,7 @@
>>  #include "migration/qemu-file-types.h"
>>  #include "migration/global_state.h"
>>  #include "migration/register.h"
>> +#include "migration/blocker.h"
>>  #include "mmu-hash64.h"
>>  #include "mmu-book3s-v3.h"
>>  #include "cpu-models.h"
>> @@ -1829,6 +1830,8 @@ static void spapr_machine_reset(MachineState *machine)
>>  
>>      /* Signal all vCPUs waiting on this condition */
>>      qemu_cond_broadcast(&spapr->mc_delivery_cond);
>> +
>> +    migrate_del_blocker(spapr->fwnmi_migration_blocker);
>>  }
>>  
>>  static void spapr_create_nvram(SpaprMachineState *spapr)
>> @@ -2119,6 +2122,60 @@ static const VMStateDescription vmstate_spapr_dtb = {
>>      },
>>  };
>>  
>> +static bool spapr_fwnmi_needed(void *opaque)
>> +{
>> +    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
>> +
>> +    return spapr->guest_machine_check_addr != -1;
>> +}
>> +
>> +static int spapr_fwnmi_post_load(void *opaque, int version_id)
>> +{
>> +    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
>> +
>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
>> +
>> +        if (kvmppc_has_cap_ppc_fwnmi()) {
>> +            return 0;
>> +        }
>> +
>> +        return kvmppc_set_fwnmi();
>> +    }
> 
> I don't see that you need this.  The spapr caps need to be set the
> same on source and destination (the caps infrastructure handles that),
> so setup should already be handled by the caps .apply hooks.

Will check.

> 
>> +
>> +    return 0;
>> +}
>> +
>> +static int spapr_fwnmi_pre_save(void *opaque)
>> +{
>> +    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
>> +
>> +    /*
>> +     * With -only-migratable QEMU option, we cannot block migration.
>> +     * Hence check if machine check handling is in progress and print
>> +     * a warning message.
>> +     */
>> +    if (spapr->mc_status != -1) {
>> +        warn_report("A machine check is being handled during migration. The"
>> +                "handler may run and log hardware error on the 
>> destination");
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static const VMStateDescription vmstate_spapr_machine_check = {
>> +    .name = "spapr_machine_check",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .needed = spapr_fwnmi_needed,
>> +    .post_load = spapr_fwnmi_post_load,
>> +    .pre_save = spapr_fwnmi_pre_save,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
>> +        VMSTATE_INT32(mc_status, SpaprMachineState),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
> 
> So, I know I suggested earlier that you postpone the migration support
> to a later patch in the series.  But at least for the actual vmstate
> stuff, I think that was due to considerations that have since gone.  I
> don't see any reason you can't put this in as soon as you add the
> machine_check_addr and mc_status fields.
> 
> The migration blocker stuff might have to come in a later patch, but
> that's ok.

I feel I can retain this. Let me know if you want me to change.

Regards,
Aravinda

> 
>> +
>>  static const VMStateDescription vmstate_spapr = {
>>      .name = "spapr",
>>      .version_id = 3,
>> @@ -2152,6 +2209,7 @@ static const VMStateDescription vmstate_spapr = {
>>          &vmstate_spapr_dtb,
>>          &vmstate_spapr_cap_large_decr,
>>          &vmstate_spapr_cap_ccf_assist,
>> +        &vmstate_spapr_machine_check,
>>          NULL
>>      }
>>  };
>> @@ -2948,6 +3006,11 @@ static void spapr_machine_init(MachineState *machine)
>>              exit(1);
>>          }
>>  
>> +        /* Create the error string for live migration blocker */
>> +        error_setg(&spapr->fwnmi_migration_blocker,
>> +            "A machine check is being handled during migration. The handler"
>> +            "may run and log hardware error on the destination");
>> +
>>          /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>>          spapr_fwnmi_register();
>>      }
>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>> index ecc3d68..71caa03 100644
>> --- a/hw/ppc/spapr_events.c
>> +++ b/hw/ppc/spapr_events.c
>> @@ -43,6 +43,7 @@
>>  #include "qemu/main-loop.h"
>>  #include "hw/ppc/spapr_ovec.h"
>>  #include <libfdt.h>
>> +#include "migration/blocker.h"
>>  
>>  #define RTAS_LOG_VERSION_MASK                   0xff000000
>>  #define   RTAS_LOG_VERSION_6                    0x06000000
>> @@ -844,6 +845,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>>  {
>>      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>>      CPUState *cs = CPU(cpu);
>> +    int ret;
>> +    Error *local_err = NULL;
>>  
>>      if (spapr->guest_machine_check_addr == -1) {
>>          /*
>> @@ -873,8 +876,19 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool 
>> recovered)
>>              return;
>>          }
>>      }
>> -    spapr->mc_status = cpu->vcpu_id;
>>  
>> +    ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
>> +    if (ret == -EBUSY) {
>> +        /*
>> +         * We don't want to abort so we let the migration to continue.
>> +         * In a rare case, the machine check handler will run on the target.
>> +         * Though this is not preferable, it is better than aborting
>> +         * the migration or killing the VM.
>> +         */
>> +        warn_report_err(local_err);
>> +    }
>> +
>> +    spapr->mc_status = cpu->vcpu_id;
>>      spapr_mce_dispatch_elog(cpu, recovered);
>>  }
>>  
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index b569538..c652ec3 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -50,6 +50,7 @@
>>  #include "hw/ppc/fdt.h"
>>  #include "target/ppc/mmu-hash64.h"
>>  #include "target/ppc/mmu-book3s-v3.h"
>> +#include "migration/blocker.h"
>>  
>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState 
>> *spapr,
>>                                     uint32_t token, uint32_t nargs,
>> @@ -446,6 +447,7 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>>       */
>>      spapr->mc_status = -1;
>>      qemu_cond_signal(&spapr->mc_delivery_cond);
>> +    migrate_del_blocker(spapr->fwnmi_migration_blocker);
>>      rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>  }
>>  
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index dada821..ea7625e 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -217,6 +217,8 @@ struct SpaprMachineState {
>>  
>>      unsigned gpu_numa_id;
>>      SpaprTpmProxy *tpm_proxy;
>> +
>> +    Error *fwnmi_migration_blocker;
>>  };
>>  
>>  #define H_SUCCESS         0
>>
> 

-- 
Regards,
Aravinda



reply via email to

[Prev in Thread] Current Thread [Next in Thread]