qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH v5 15/36] spapr: introdude a new machine IRQ backe


From: Cédric Le Goater
Subject: Re: [Qemu-ppc] [PATCH v5 15/36] spapr: introdude a new machine IRQ backend for XIVE
Date: Wed, 28 Nov 2018 18:16:58 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.3.0

On 11/28/18 4:28 AM, David Gibson wrote:
> On Fri, Nov 16, 2018 at 11:57:08AM +0100, Cédric Le Goater wrote:
>> The XIVE IRQ backend uses the same layout as the new XICS backend but
>> covers the full range of the IRQ number space. The IRQ numbers for the
>> CPU IPIs are allocated at the bottom of this space, below 4K, to
>> preserve compatibility with XICS which does not use that range.
>>
>> This should be enough given that the maximum number of CPUs is 1024
>> for the sPAPR machine under QEMU. For the record, the biggest POWER8
>> or POWER9 system has a maximum of 1536 HW threads (16 sockets, 192
>> cores, SMT8).
>>
>> Signed-off-by: Cédric Le Goater <address@hidden>
>> ---
>>  include/hw/ppc/spapr.h     |   2 +
>>  include/hw/ppc/spapr_irq.h |   7 ++-
>>  hw/ppc/spapr.c             |   2 +-
>>  hw/ppc/spapr_irq.c         | 119 ++++++++++++++++++++++++++++++++++++-
>>  4 files changed, 124 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 6279711fe8f7..1fbc2663e06c 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -16,6 +16,7 @@ typedef struct sPAPREventLogEntry sPAPREventLogEntry;
>>  typedef struct sPAPREventSource sPAPREventSource;
>>  typedef struct sPAPRPendingHPT sPAPRPendingHPT;
>>  typedef struct ICSState ICSState;
>> +typedef struct sPAPRXive sPAPRXive;
>>  
>>  #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
>>  #define SPAPR_ENTRY_POINT       0x100
>> @@ -175,6 +176,7 @@ struct sPAPRMachineState {
>>      const char *icp_type;
>>      int32_t irq_map_nr;
>>      unsigned long *irq_map;
>> +    sPAPRXive  *xive;
>>  
>>      bool cmd_line_caps[SPAPR_CAP_NUM];
>>      sPAPRCapabilities def, eff, mig;
>> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
>> index 0e9229bf219e..c854ae527808 100644
>> --- a/include/hw/ppc/spapr_irq.h
>> +++ b/include/hw/ppc/spapr_irq.h
>> @@ -13,6 +13,7 @@
>>  /*
>>   * IRQ range offsets per device type
>>   */
>> +#define SPAPR_IRQ_IPI        0x0
>>  #define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
>>  #define SPAPR_IRQ_HOTPLUG    0x1001
>>  #define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
>> @@ -33,7 +34,8 @@ typedef struct sPAPRIrq {
>>      uint32_t    nr_irqs;
>>      uint32_t    nr_msis;
>>  
>> -    void (*init)(sPAPRMachineState *spapr, int nr_irqs, Error **errp);
>> +    void (*init)(sPAPRMachineState *spapr, int nr_irqs, int nr_servers,
>> +                 Error **errp);
>>      int (*claim)(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
>>      void (*free)(sPAPRMachineState *spapr, int irq, int num);
>>      qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq);
>> @@ -42,8 +44,9 @@ typedef struct sPAPRIrq {
>>  
>>  extern sPAPRIrq spapr_irq_xics;
>>  extern sPAPRIrq spapr_irq_xics_legacy;
>> +extern sPAPRIrq spapr_irq_xive;
>>  
>> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp);
>> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp);
> 
> I don't see why nr_servers needs to become a parameter, since it can
> be derived from spapr within this routine.

ok. This is true. We can use directly xics_max_server_number(spapr).

>>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error 
>> **errp);
>>  void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num);
>>  qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index e470efe7993c..9f8c19e56e7a 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2594,7 +2594,7 @@ static void spapr_machine_init(MachineState *machine)
>>      spapr_set_vsmt_mode(spapr, &error_fatal);
>>  
>>      /* Set up Interrupt Controller before we create the VCPUs */
>> -    spapr_irq_init(spapr, &error_fatal);
>> +    spapr_irq_init(spapr, xics_max_server_number(spapr), &error_fatal);
> 
> We should rename xics_max_server_number() since it's no longer xics
> specific.

yes.

>>      /* Set up containers for ibm,client-architecture-support negotiated 
>> options
>>       */
>> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
>> index bac450ffff23..2569ae1bc7f8 100644
>> --- a/hw/ppc/spapr_irq.c
>> +++ b/hw/ppc/spapr_irq.c
>> @@ -12,6 +12,7 @@
>>  #include "qemu/error-report.h"
>>  #include "qapi/error.h"
>>  #include "hw/ppc/spapr.h"
>> +#include "hw/ppc/spapr_xive.h"
>>  #include "hw/ppc/xics.h"
>>  #include "sysemu/kvm.h"
>>  
>> @@ -91,7 +92,7 @@ error:
>>  }
>>  
>>  static void spapr_irq_init_xics(sPAPRMachineState *spapr, int nr_irqs,
>> -                                Error **errp)
>> +                                int nr_servers, Error **errp)
>>  {
>>      MachineState *machine = MACHINE(spapr);
>>      Error *local_err = NULL;
>> @@ -204,10 +205,122 @@ sPAPRIrq spapr_irq_xics = {
>>      .print_info  = spapr_irq_print_info_xics,
>>  };
>>  
>> + /*
>> + * XIVE IRQ backend.
>> + */
>> +static sPAPRXive *spapr_xive_create(sPAPRMachineState *spapr,
>> +                                    const char *type_xive, int nr_irqs,
>> +                                    int nr_servers, Error **errp)
>> +{
>> +    sPAPRXive *xive;
>> +    Error *local_err = NULL;
>> +    Object *obj;
>> +    uint32_t nr_ends = nr_servers << 3; /* 8 priority ENDs per CPU */
>> +    int i;
>> +
>> +    obj = object_new(type_xive);
> 
> What's the reason for making the type a parameter, rather than just
> using the #define here.

KVM.

>> +    object_property_set_int(obj, nr_irqs, "nr-irqs", &error_abort);
>> +    object_property_set_int(obj, nr_ends, "nr-ends", &error_abort);
> 
> This is still within the sPAPR code, and you have a pointer to the
> MachineState, so I don't see why you could't just derive nr_irqs and
> nr_servers from that, rather than having them passed in.

for nr_servers I agree. nr_irqs comes from the machine class and it will
not make any sense using the machine class in the init routine of the
'dual' sPAPR IRQ backend supporting both modes. See patch 34 which
initializes both backend for the 'dual' machine.
 
>> +    object_property_set_bool(obj, true, "realized", &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return NULL;
>> +    }
>> +    qdev_set_parent_bus(DEVICE(obj), sysbus_get_default());
> 
> Whereas the XiveSource and XiveRouter I think make more sense as
> "device components" rather than SysBusDevice subclasses, 

Yes. I changed that.

> I think it
> *does* make sense for the PAPR-XIVE object to be a full fledged
> SysBusDevice.

Ah. That I didn't do but thinking of it, it makes sense as it is the
object managing the TIMA and ESB memory region mapping for the machine. 

> And for that reason, I think it makes more sense to create it with
> qdev_create(), which should avoid having to manually fiddle with the
> parent bus.

OK. I will give it a try. 

>> +    xive = SPAPR_XIVE(obj);
>> +
>> +    /* Enable the CPU IPIs */
>> +    for (i = 0; i < nr_servers; ++i) {
>> +        spapr_xive_irq_enable(xive, SPAPR_IRQ_IPI + i, false);
> 
> This comment possibly belonged on an earlier patch.  I don't love the
> "..._enable" name - to me that suggests something runtime rather than
> configuration time.  A better option isn't quickly occurring to me
> though :/.

Instead, I could call the sPAPR IRQ claim method  : 

    for (i = 0; i < nr_servers; ++i) {
        spapr_irq_xive.claim(spapr, SPAPR_IRQ_IPI + i, false, &local_err);
    }


What it does is to set the EAS_VALID bit in the EAT (it also sets the 
LSI bit). what about :
        
        spapr_xive_irq_validate() 
        spapr_xive_irq_invalidate() 

or to map the sPAPR IRQ backend names :

        spapr_xive_irq_claim() 
        spapr_xive_irq_free() 


> 
>> +    }
>> +
>> +    return xive;
>> +}
>> +
>> +static void spapr_irq_init_xive(sPAPRMachineState *spapr, int nr_irqs,
>> +                                int nr_servers, Error **errp)
>> +{
>> +    MachineState *machine = MACHINE(spapr);
>> +    Error *local_err = NULL;
>> +
>> +    /* KVM XIVE support */
>> +    if (kvm_enabled()) {
>> +        if (machine_kernel_irqchip_required(machine)) {
>> +            error_setg(errp, "kernel_irqchip requested. no XIVE support");
>> +            return;
>> +        }
>> +    }
>> +
>> +    /* QEMU XIVE support */
>> +    spapr->xive = spapr_xive_create(spapr, TYPE_SPAPR_XIVE, nr_irqs, 
>> nr_servers,
>> +                                    &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +}
>> +
>> +static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
>> +                                Error **errp)
>> +{
>> +    if (!spapr_xive_irq_enable(spapr->xive, irq, lsi)) {
>> +        error_setg(errp, "IRQ %d is invalid", irq);
>> +        return -1;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num)
>> +{
>> +    int i;
>> +
>> +    for (i = irq; i < irq + num; ++i) {
>> +        spapr_xive_irq_disable(spapr->xive, i);
>> +    }
>> +}
>> +
>> +static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq)
>> +{
>> +    return spapr_xive_qirq(spapr->xive, irq);
>> +}
>> +
>> +static void spapr_irq_print_info_xive(sPAPRMachineState *spapr,
>> +                                      Monitor *mon)
>> +{
>> +    CPUState *cs;
>> +
>> +    CPU_FOREACH(cs) {
>> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
>> +
>> +        xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon);
>> +    }
>> +
>> +    spapr_xive_pic_print_info(spapr->xive, mon);
> 
> Any reason the info dumping routines are split into two?

Not the same objects. Are you suggesting that we could print all the info 
from the sPAPR XIVE model ? including the XiveTCTX. I thought of doing 
that also. Fine for me if it's ok for you.

Thanks,

C.

> 
>> +}
>> +
>> +/*
>> + * XIVE uses the full IRQ number space. Set it to 8K to be compatible
>> + * with XICS.
>> + */
>> +
>> +#define SPAPR_IRQ_XIVE_NR_IRQS     0x2000
>> +#define SPAPR_IRQ_XIVE_NR_MSIS     (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI)
>> +
>> +sPAPRIrq spapr_irq_xive = {
>> +    .nr_irqs     = SPAPR_IRQ_XIVE_NR_IRQS,
>> +    .nr_msis     = SPAPR_IRQ_XIVE_NR_MSIS,
>> +
>> +    .init        = spapr_irq_init_xive,
>> +    .claim       = spapr_irq_claim_xive,
>> +    .free        = spapr_irq_free_xive,
>> +    .qirq        = spapr_qirq_xive,
>> +    .print_info  = spapr_irq_print_info_xive,
>> +};
>> +
>>  /*
>>   * sPAPR IRQ frontend routines for devices
>>   */
>> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
>> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp)
>>  {
>>      sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>>  
>> @@ -216,7 +329,7 @@ void spapr_irq_init(sPAPRMachineState *spapr, Error 
>> **errp)
>>          spapr_irq_msi_init(spapr, smc->irq->nr_msis);
>>      }
>>  
>> -    smc->irq->init(spapr, smc->irq->nr_irqs, errp);
>> +    smc->irq->init(spapr, smc->irq->nr_irqs, nr_servers, errp);
>>  }
>>  
>>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error 
>> **errp)
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]