qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [RFC PATCH v1 08/10] spapr: CPU hotplug support


From: Igor Mammedov
Subject: Re: [Qemu-ppc] [RFC PATCH v1 08/10] spapr: CPU hotplug support
Date: Mon, 7 Mar 2016 12:01:13 +0100

On Mon, 7 Mar 2016 11:59:42 +0530
Bharata B Rao <address@hidden> wrote:

> On Mon, Mar 07, 2016 at 02:49:06PM +1100, David Gibson wrote:
> > On Fri, Mar 04, 2016 at 12:24:19PM +0530, Bharata B Rao wrote:  
> > > Set up device tree entries for the hotplugged CPU core and use the
> > > exising EPOW event infrastructure to send CPU hotplug notification to
> > > the guest.
> > > 
> > > Signed-off-by: Bharata B Rao <address@hidden>
> > > ---
> > >  hw/ppc/spapr.c                  | 73 
> > > ++++++++++++++++++++++++++++++++++++++++-
> > >  hw/ppc/spapr_cpu_core.c         | 60 +++++++++++++++++++++++++++++++++
> > >  hw/ppc/spapr_events.c           |  3 ++
> > >  hw/ppc/spapr_rtas.c             | 24 ++++++++++++++
> > >  include/hw/ppc/spapr.h          |  4 +++
> > >  include/hw/ppc/spapr_cpu_core.h |  2 ++
> > >  6 files changed, 165 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index 5acb612..6c4ac50 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -603,6 +603,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, void 
> > > *fdt, int offset,
> > >      size_t page_sizes_prop_size;
> > >      uint32_t vcpus_per_socket = smp_threads * smp_cores;
> > >      uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> > > +    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> > > +    sPAPRDRConnector *drc;
> > > +    sPAPRDRConnectorClass *drck;
> > > +    int drc_index;
> > > +
> > > +    if (smc->dr_cpu_enabled) {
> > > +        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, 
> > > index);
> > > +        g_assert(drc);
> > > +        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > > +        drc_index = drck->get_index(drc);
> > > +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", 
> > > drc_index)));
> > > +    }
> > >  
> > >      /* Note: we keep CI large pages off for now because a 64K capable 
> > > guest
> > >       * provisioned with large pages might otherwise try to map a qemu
> > > @@ -987,6 +999,16 @@ static void spapr_finalize_fdt(sPAPRMachineState 
> > > *spapr,
> > >          _FDT(spapr_drc_populate_dt(fdt, 0, NULL, 
> > > SPAPR_DR_CONNECTOR_TYPE_LMB));
> > >      }
> > >  
> > > +    if (smc->dr_cpu_enabled) {
> > > +        int offset = fdt_path_offset(fdt, "/cpus");
> > > +        ret = spapr_drc_populate_dt(fdt, offset, NULL,
> > > +                                    SPAPR_DR_CONNECTOR_TYPE_CPU);
> > > +        if (ret < 0) {
> > > +            error_report("Couldn't set up CPU DR device tree 
> > > properties");
> > > +            exit(1);
> > > +        }
> > > +    }
> > > +
> > >      _FDT((fdt_pack(fdt)));
> > >  
> > >      if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
> > > @@ -1181,7 +1203,7 @@ static void ppc_spapr_reset(void)
> > >  
> > >  }
> > >  
> > > -static void spapr_cpu_reset(void *opaque)
> > > +void spapr_cpu_reset(void *opaque)
> > >  {
> > >      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > >      PowerPCCPU *cpu = opaque;
> > > @@ -1622,6 +1644,8 @@ static void spapr_boot_set(void *opaque, const char 
> > > *boot_device,
> > >  void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error 
> > > **errp)
> > >  {
> > >      CPUPPCState *env = &cpu->env;
> > > +    CPUState *cs = CPU(cpu);
> > > +    int i;
> > >  
> > >      /* Set time-base frequency to 512 MHz */
> > >      cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> > > @@ -1646,6 +1670,14 @@ void spapr_cpu_init(sPAPRMachineState *spapr, 
> > > PowerPCCPU *cpu, Error **errp)
> > >          }
> > >      }
> > >  
> > > +    /* Set NUMA node for the added CPUs  */
> > > +    for (i = 0; i < nb_numa_nodes; i++) {
> > > +        if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> > > +            cs->numa_node = i;
> > > +            break;
> > > +        }
> > > +    }
> > > +  
> > 
> > This hunk seems like it belongs in a different patch.  
> 
> It appears that this would be needed by other archs also to set the
> NUMA node for the hot-plugged CPU. How about make an API out of this
> and use this something like below ? Igor ?
I don't think we should try to make it generic (at leas for now),
each target might have its own notion of cpu to node mapping.

As I see -numa CLI interface is somewhat broken and allow(s|ed)
to create nonsense configurations. But it's a separate topic,
which I'd leave out this discussion.

--
I have a not completely thought out idea to obsolete -numa CLI
in favor of "-device some-type,node=XX" where 'node' property
could replace current -numa interface.
For example on x86 for memory -numa is used only for initial
memory and we already use -device pc-dimm,node=XX for hotpluggable memory.
However if initial memory is converted to pc-dimm devices, then
it could be possible to replace -numa mem CLI with -device pc-dimm
as well.
Applying the same idea to CPUs could obsolete -numa cpus CLI.

> 
> -------------------------------------------------------------------
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 0aeefd2..8347234 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1112,6 +1112,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
>          error_propagate(errp, local_err);
>          return;
>      }
> +    numa_set_cpu(CPU(cpu));
>      object_unref(OBJECT(cpu));
>  }
>  
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index a42f8c0..f2b3b67 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1645,7 +1645,6 @@ void spapr_cpu_init(sPAPRMachineState *spapr, 
> PowerPCCPU *cpu, Error **errp)
>  {
>      CPUPPCState *env = &cpu->env;
>      CPUState *cs = CPU(cpu);
> -    int i;
>  
>      /* Set time-base frequency to 512 MHz */
>      cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> @@ -1671,12 +1670,7 @@ void spapr_cpu_init(sPAPRMachineState *spapr, 
> PowerPCCPU *cpu, Error **errp)
>      }
>  
>      /* Set NUMA node for the added CPUs  */
> -    for (i = 0; i < nb_numa_nodes; i++) {
> -        if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> -            cs->numa_node = i;
> -            break;
> -        }
> -    }
> +    numa_set_cpu(cs);
>  
>      xics_cpu_setup(spapr->icp, cpu);
>      qemu_register_reset(spapr_cpu_reset, cpu);
> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> index bb184c9..648d68b 100644
> --- a/include/sysemu/numa.h
> +++ b/include/sysemu/numa.h
> @@ -31,5 +31,6 @@ extern QemuOptsList qemu_numa_opts;
>  void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
>  void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
>  uint32_t numa_get_node(ram_addr_t addr, Error **errp);
> +void numa_set_cpu(CPUState *cpu);
>  
>  #endif
> diff --git a/numa.c b/numa.c
> index 4c4f7f5..1b47c15 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -396,20 +396,32 @@ void parse_numa_opts(MachineClass *mc)
>      }
>  }
>  
> +static void numa_set_cpu_numa_node(CPUState *cpu)
> +{
> +    int i;
> +
> +    for (i = 0; i < nb_numa_nodes; i++) {
> +        if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
> +            cpu->numa_node = i;
> +            break;
> +        }
> +    }
> +}
> +
>  void numa_post_machine_init(void)
>  {
>      CPUState *cpu;
> -    int i;
>  
>      CPU_FOREACH(cpu) {
> -        for (i = 0; i < nb_numa_nodes; i++) {
> -            if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
> -                cpu->numa_node = i;
> -            }
> -        }
> +        numa_set_cpu_numa_node(cpu);
>      }
>  }
>  
> +void numa_set_cpu(CPUState *cpu)
> +{
> +    numa_set_cpu_numa_node(cpu);
> +}
> +
>  static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
>                                             const char *name,
>                                             uint64_t ram_size)
> 
> -------------------------------------------------------------------
> >   
> > >      xics_cpu_setup(spapr->icp, cpu);
> > >      qemu_register_reset(spapr_cpu_reset, cpu);
> > >  }
> > > @@ -1768,6 +1800,7 @@ static void ppc_spapr_init(MachineState *machine)
> > >      char *filename;
> > >      int spapr_cores = smp_cpus / smp_threads;
> > >      int spapr_max_cores = max_cpus / smp_threads;
> > > +    int smt = kvmppc_smt_threads();
> > >  
> > >      if (smp_cpus % smp_threads) {
> > >          error_report("smp_cpus (%u) must be multiple of threads (%u)",
> > > @@ -1834,6 +1867,15 @@ static void ppc_spapr_init(MachineState *machine)
> > >          spapr_validate_node_memory(machine, &error_fatal);
> > >      }
> > >  
> > > +    if (smc->dr_cpu_enabled) {
> > > +        for (i = 0; i < spapr_max_cores; i++) {
> > > +            sPAPRDRConnector *drc =
> > > +                spapr_dr_connector_new(OBJECT(spapr),
> > > +                                       SPAPR_DR_CONNECTOR_TYPE_CPU, i * 
> > > smt);
> > > +            qemu_register_reset(spapr_drc_reset, drc);
> > > +        }
> > > +    }
> > > +  
> > 
> > Nit: would this be cleaner to include in the same loop that constructs
> > the (empty) links and boot-time cpu cores?  
> 
> Seems possible, will change.
> 
> >   
> > >      /* init CPUs */
> > >      if (machine->cpu_model == NULL) {
> > >          machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
> > > @@ -2267,6 +2309,27 @@ out:
> > >      error_propagate(errp, local_err);
> > >  }
> > >  
> > > +void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs,
> > > +                                    int *fdt_offset, sPAPRMachineState 
> > > *spapr)
> > > +{
> > > +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> > > +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> > > +    int id = ppc_get_vcpu_dt_id(cpu);
> > > +    void *fdt;
> > > +    int offset, fdt_size;
> > > +    char *nodename;
> > > +
> > > +    fdt = create_device_tree(&fdt_size);
> > > +    nodename = g_strdup_printf("address@hidden", dc->fw_name, id);
> > > +    offset = fdt_add_subnode(fdt, 0, nodename);
> > > +
> > > +    spapr_populate_cpu_dt(cs, fdt, offset, spapr);
> > > +    g_free(nodename);
> > > +
> > > +    *fdt_offset = offset;
> > > +    return fdt;
> > > +}
> > > +
> > >  static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> > >                                        DeviceState *dev, Error **errp)
> > >  {
> > > @@ -2307,6 +2370,12 @@ static void 
> > > spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> > >          }
> > >  
> > >          spapr_memory_plug(hotplug_dev, dev, node, errp);
> > > +    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
> > > +        if (!smc->dr_cpu_enabled && dev->hotplugged) {
> > > +            error_setg(errp, "CPU hotplug not supported for this 
> > > machine");
> > > +            return;
> > > +        }
> > > +        spapr_core_plug(hotplug_dev, dev, errp);
> > >      }
> > >  }
> > >  
> > > @@ -2366,6 +2435,7 @@ static void spapr_machine_class_init(ObjectClass 
> > > *oc, void *data)
> > >      mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
> > >  
> > >      smc->dr_lmb_enabled = true;
> > > +    smc->dr_cpu_enabled = true;
> > >      fwc->get_dev_path = spapr_get_fw_dev_path;
> > >      nc->nmi_monitor_handler = spapr_nmi;
> > >  }
> > > @@ -2445,6 +2515,7 @@ static void 
> > > spapr_machine_2_5_class_options(MachineClass *mc)
> > >  
> > >      spapr_machine_2_6_class_options(mc);
> > >      smc->use_ohci_by_default = true;
> > > +    smc->dr_cpu_enabled = false;
> > >      SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_5);
> > >  }
> > >  
> > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > index 9ddf3ce..4c233d7 100644
> > > --- a/hw/ppc/spapr_cpu_core.c
> > > +++ b/hw/ppc/spapr_cpu_core.c
> > > @@ -14,6 +14,65 @@
> > >  #include "qapi/visitor.h"
> > >  #include <sysemu/cpus.h>
> > >  
> > > +void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > > +                     Error **errp)
> > > +{
> > > +    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> > > +    sPAPRMachineState *ms = SPAPR_MACHINE(qdev_get_machine());
> > > +    sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
> > > +    PowerPCCPU *cpu = &core->threads[0];
> > > +    CPUState *cs = CPU(cpu);
> > > +    int id = ppc_get_vcpu_dt_id(cpu);
> > > +    sPAPRDRConnector *drc =
> > > +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
> > > +    sPAPRDRConnectorClass *drck;
> > > +    Error *local_err = NULL;
> > > +    void *fdt = NULL;
> > > +    int fdt_offset = 0;
> > > +
> > > +    if (!smc->dr_cpu_enabled) {
> > > +        /*
> > > +         * This is a cold plugged CPU core but the machine doesn't 
> > > support
> > > +         * DR. So skip the hotplug path ensuring that the core is brought
> > > +         * up online with out an associated DR connector.
> > > +         */
> > > +        return;
> > > +    }
> > > +
> > > +    g_assert(drc);
> > > +
> > > +    /*
> > > +     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
> > > +     * coldplugged CPUs DT entries are setup in spapr_finalize_fdt().
> > > +     */
> > > +    if (dev->hotplugged) {
> > > +        fdt = spapr_populate_hotplug_cpu_dt(dev, cs, &fdt_offset, ms);
> > > +        dev->hotplugged = true;
> > > +    }
> > > +
> > > +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > > +    drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, 
> > > &local_err);
> > > +    if (local_err) {
> > > +        g_free(fdt);
> > > +        error_propagate(errp, local_err);
> > > +        return;
> > > +    }
> > > +
> > > +    if (dev->hotplugged) {
> > > +        /*
> > > +         * Send hotplug notification interrupt to the guest only in case
> > > +         * of hotplugged CPUs.
> > > +         */
> > > +        spapr_hotplug_req_add_by_index(drc);
> > > +    } else {
> > > +        /*
> > > +         * Set the right DRC states for cold plugged CPU.
> > > +         */
> > > +        drck->set_allocation_state(drc, 
> > > SPAPR_DR_ALLOCATION_STATE_USABLE);
> > > +        drck->set_isolation_state(drc, 
> > > SPAPR_DR_ISOLATION_STATE_UNISOLATED);
> > > +    }
> > > +}
> > > +
> > >  static int spapr_cpu_core_realize_child(Object *child, void *opaque)
> > >  {
> > >      Error **errp = opaque;
> > > @@ -30,6 +89,7 @@ static int spapr_cpu_core_realize_child(Object *child, 
> > > void *opaque)
> > >      if (*errp) {
> > >          return 1;
> > >      }
> > > +    spapr_cpu_reset(cpu);  
> > 
> > This also looks like it belongs in a different patch.  
> 
> You mean a separate patch for this or push this around to an existing
> patch of the series ?
> 
> Regards,
> Bharata.
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]