qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH] spapr/irq: Only claim VALID interrupts at the KVM


From: David Gibson
Subject: Re: [Qemu-ppc] [PATCH] spapr/irq: Only claim VALID interrupts at the KVM level
Date: Wed, 11 Sep 2019 11:58:09 +1000
User-agent: Mutt/1.12.1 (2019-06-15)

On Tue, Sep 10, 2019 at 08:13:26AM +0200, Cédric Le Goater wrote:
> A typical pseries VM with 16 vCPUs, one disk, one network adapater
> uses less than 100 interrupts but the whole IRQ number space of the
> QEMU machine is allocated at reset time and it is 8K wide. This is
> wasting considerably the global IRQ space of the overall system which
> has 1M interrupts per socket on a POWER9.
> 
> To optimise the HW resources, only request at the KVM level interrupts
> which have been claimed. This will help up increase the maximum number
> of VMs per system.
> 
> To keep migration compatibility, we introduce a machine class
> attribute to adapt the reset behavior on older pseries machines.
> 
> Signed-off-by: Cédric Le Goater <address@hidden>

The overall idea looks good to me.

I don't really see what the point of the reset-all property is.  How
this is handled shouldn't be guest visible at all AFAICT, so there's
no need to grandfather it for older machine types.

Since ics_irq_free() has uses in the existing code unrelated to this,
it would be nice to pull introducing that out into a separate patch.

> ---
>  include/hw/ppc/spapr.h      |  1 +
>  include/hw/ppc/spapr_xive.h |  1 +
>  include/hw/ppc/xics.h       |  6 ++++++
>  hw/intc/spapr_xive.c        |  1 +
>  hw/intc/spapr_xive_kvm.c    | 34 +++++++++++++++++++++++++++++++---
>  hw/intc/xics.c              |  1 +
>  hw/intc/xics_kvm.c          | 14 ++++++++++++++
>  hw/ppc/spapr.c              |  1 +
>  hw/ppc/spapr_irq.c          | 13 +++++++------
>  9 files changed, 63 insertions(+), 9 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 03111fd55bc8..6c622954a60c 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -131,6 +131,7 @@ struct SpaprMachineClass {
>      SpaprResizeHpt resize_hpt_default;
>      SpaprCapabilities default_caps;
>      SpaprIrq *irq;
> +    bool irq_reset_all;
>  };
>  
>  /**
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index bfd40f01d882..b33913eb0f28 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -45,6 +45,7 @@ typedef struct SpaprXive {
>      void          *tm_mmap;
>      MemoryRegion  tm_mmio_kvm;
>      VMChangeStateEntry *change;
> +    bool          reset_all;
>  } SpaprXive;
>  
>  /*
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index f2a8d6a4b4f9..856815362406 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -117,6 +117,7 @@ struct ICSState {
>      DeviceState parent_obj;
>      /*< public >*/
>      uint32_t nr_irqs;
> +    bool reset_all;
>      uint32_t offset;
>      ICSIRQState *irqs;
>      XICSFabric *xics;
> @@ -179,6 +180,11 @@ void ics_simple_write_xive(ICSState *ics, int nr, int 
> server,
>                             uint8_t priority, uint8_t saved_priority);
>  void ics_simple_set_irq(void *opaque, int srcno, int val);
>  
> +static inline bool ics_irq_free(ICSState *ics, uint32_t srcno)
> +{
> +    return !(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK);
> +}
> +
>  void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
>  void icp_pic_print_info(ICPState *icp, Monitor *mon);
>  void ics_pic_print_info(ICSState *ics, Monitor *mon);
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index c1c97192a7d2..b717d9e09314 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -492,6 +492,7 @@ static Property spapr_xive_properties[] = {
>      DEFINE_PROP_UINT32("nr-ends", SpaprXive, nr_ends, 0),
>      DEFINE_PROP_UINT64("vc-base", SpaprXive, vc_base, SPAPR_XIVE_VC_BASE),
>      DEFINE_PROP_UINT64("tm-base", SpaprXive, tm_base, SPAPR_XIVE_TM_BASE),
> +    DEFINE_PROP_BOOL("reset-all", ICSState, reset_all, false),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 17af4d19f54e..225abce36270 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -253,13 +253,23 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int 
> srcno, Error **errp)
>                        true, errp);
>  }
>  
> +static bool xive_source_skip_reset(SpaprXive *xive, int srcno)
> +{
> +    return !xive->reset_all && !xive_eas_is_valid(&xive->eat[srcno]);
> +}
> +
>  static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
>  {
> +    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
>      int i;
>  
>      for (i = 0; i < xsrc->nr_irqs; i++) {
>          Error *local_err = NULL;
>  
> +        if (xive_source_skip_reset(xive, i)) {
> +            continue;
> +        }
> +
>          kvmppc_xive_source_reset_one(xsrc, i, &local_err);
>          if (local_err) {
>              error_propagate(errp, local_err);
> @@ -328,11 +338,18 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int 
> srcno, uint32_t offset,
>  
>  static void kvmppc_xive_source_get_state(XiveSource *xsrc)
>  {
> +    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
>      int i;
>  
>      for (i = 0; i < xsrc->nr_irqs; i++) {
> +        uint8_t pq;
> +
> +        if (xive_source_skip_reset(xive, i)) {
> +            continue;
> +        }
> +
>          /* Perform a load without side effect to retrieve the PQ bits */
> -        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> +        pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
>  
>          /* and save PQ locally */
>          xive_source_esb_set(xsrc, i, pq);
> @@ -521,9 +538,14 @@ static void kvmppc_xive_change_state_handler(void 
> *opaque, int running,
>       */
>      if (running) {
>          for (i = 0; i < xsrc->nr_irqs; i++) {
> -            uint8_t pq = xive_source_esb_get(xsrc, i);
> +            uint8_t pq;
>              uint8_t old_pq;
>  
> +            if (xive_source_skip_reset(xive, i)) {
> +                continue;
> +            }
> +
> +            pq = xive_source_esb_get(xsrc, i);
>              old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
>  
>              /*
> @@ -545,7 +567,13 @@ static void kvmppc_xive_change_state_handler(void 
> *opaque, int running,
>       * migration is in progress.
>       */
>      for (i = 0; i < xsrc->nr_irqs; i++) {
> -        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> +        uint8_t pq;
> +
> +        if (xive_source_skip_reset(xive, i)) {
> +            continue;
> +        }
> +
> +        pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
>  
>          /*
>           * PQ is set to PENDING to possibly catch a triggered
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index b2fca2975cc4..ae3f83cf0aad 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -746,6 +746,7 @@ static const VMStateDescription vmstate_ics_base = {
>  
>  static Property ics_base_properties[] = {
>      DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
> +    DEFINE_PROP_BOOL("reset-all", ICSState, reset_all, false),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
> index a4d2e876cc5f..aa017b99801c 100644
> --- a/hw/intc/xics_kvm.c
> +++ b/hw/intc/xics_kvm.c
> @@ -177,6 +177,12 @@ void icp_kvm_realize(DeviceState *dev, Error **errp)
>  /*
>   * ICS-KVM
>   */
> +
> +static bool ics_irq_skip_reset(ICSState *ics, int srcno)
> +{
> +    return !ics->reset_all && ics_irq_free(ics, srcno);
> +}
> +
>  void ics_get_kvm_state(ICSState *ics)
>  {
>      uint64_t state;
> @@ -190,6 +196,10 @@ void ics_get_kvm_state(ICSState *ics)
>      for (i = 0; i < ics->nr_irqs; i++) {
>          ICSIRQState *irq = &ics->irqs[i];
>  
> +        if (ics_irq_skip_reset(ics, i)) {
> +            continue;
> +        }
> +
>          kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES,
>                            i + ics->offset, &state, false, &error_fatal);
>  
> @@ -301,6 +311,10 @@ int ics_set_kvm_state(ICSState *ics, Error **errp)
>          Error *local_err = NULL;
>          int ret;
>  
> +        if (ics_irq_skip_reset(ics, i)) {
> +            continue;
> +        }
> +
>          ret = ics_set_kvm_state_one(ics, i, &local_err);
>          if (ret < 0) {
>              error_propagate(errp, local_err);
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 7124053b43b7..af89f3b6c698 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4594,6 +4594,7 @@ static void 
> spapr_machine_4_1_class_options(MachineClass *mc)
>      smc->linux_pci_probe = false;
>      compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
>      compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> +    smc->irq_reset_all = true;
>  }
>  
>  DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 06fe2432bae5..8c3682613404 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -97,11 +97,13 @@ static void spapr_irq_init_xics(SpaprMachineState *spapr, 
> int nr_irqs,
>  {
>      Object *obj;
>      Error *local_err = NULL;
> +    bool reset_all = SPAPR_MACHINE_GET_CLASS(spapr)->irq_reset_all;
>  
>      obj = object_new(TYPE_ICS_SIMPLE);
>      object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort);
>      object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
>                                     &error_fatal);
> +    object_property_set_bool(obj, reset_all, "reset-all",  &error_fatal);
>      object_property_set_int(obj, nr_irqs, "nr-irqs",  &error_fatal);
>      object_property_set_bool(obj, true, "realized", &local_err);
>      if (local_err) {
> @@ -114,9 +116,6 @@ static void spapr_irq_init_xics(SpaprMachineState *spapr, 
> int nr_irqs,
>      xics_spapr_init(spapr);
>  }
>  
> -#define ICS_IRQ_FREE(ics, srcno)   \
> -    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
> -
>  static int spapr_irq_claim_xics(SpaprMachineState *spapr, int irq, bool lsi,
>                                  Error **errp)
>  {
> @@ -129,7 +128,7 @@ static int spapr_irq_claim_xics(SpaprMachineState *spapr, 
> int irq, bool lsi,
>          return -1;
>      }
>  
> -    if (!ICS_IRQ_FREE(ics, irq - ics->offset)) {
> +    if (!ics_irq_free(ics, irq - ics->offset)) {
>          error_setg(errp, "IRQ %d is not free", irq);
>          return -1;
>      }
> @@ -147,7 +146,7 @@ static void spapr_irq_free_xics(SpaprMachineState *spapr, 
> int irq, int num)
>      if (ics_valid_irq(ics, irq)) {
>          trace_spapr_irq_free(0, irq, num);
>          for (i = srcno; i < srcno + num; ++i) {
> -            if (ICS_IRQ_FREE(ics, i)) {
> +            if (ics_irq_free(ics, i)) {
>                  trace_spapr_irq_free_warn(0, i);
>              }
>              memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
> @@ -270,9 +269,11 @@ static void spapr_irq_init_xive(SpaprMachineState 
> *spapr, int nr_irqs,
>  {
>      uint32_t nr_servers = spapr_max_server_number(spapr);
>      DeviceState *dev;
> +    bool reset_all = SPAPR_MACHINE_GET_CLASS(spapr)->irq_reset_all;
>      int i;
>  
>      dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
> +    object_property_set_bool(OBJECT(dev), reset_all, "reset-all", 
> &error_fatal);
>      qdev_prop_set_uint32(dev, "nr-irqs", nr_irqs);
>      /*
>       * 8 XIVE END structures per CPU. One for each available priority
> @@ -767,7 +768,7 @@ static int ics_find_free_block(ICSState *ics, int num, 
> int alignnum)
>              return -1;
>          }
>          for (i = first; i < first + num; ++i) {
> -            if (!ICS_IRQ_FREE(ics, i)) {
> +            if (!ics_irq_free(ics, i)) {
>                  break;
>              }
>          }

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]