[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 1/8] spapr: introduce an IRQ allocator at the ma
From: |
Greg Kurz |
Subject: |
Re: [Qemu-devel] [PATCH 1/8] spapr: introduce an IRQ allocator at the machine level |
Date: |
Wed, 8 Nov 2017 10:54:04 +0100 |
On Sun, 29 Oct 2017 19:12:10 +0100
Cédric Le Goater <address@hidden> wrote:
> Currently, the ICSState 'ics' object of the sPAPR machine acts as the
> global interrupt source handler and also as the IRQ number allocator
> for the machine. Some IRQ numbers are allocated very early in the
> machine initialization sequence to populate the device tree, and this
> is a problem to introduce the new POWER XIVE interrupt model, as it
> needs to share the IRQ numbers with the older model.
>
> To prepare ground for XIVE, here is a proposal adding a set of new
> XICSFabric operations to let the machine handle directly the IRQ
> number allocation and to decorrelate the allocation from the interrupt
> source object :
>
> bool (*irq_test)(XICSFabric *xi, int irq);
> int (*irq_alloc_block)(XICSFabric *xi, int count, int align);
> void (*irq_free_block)(XICSFabric *xi, int irq, int num);
>
> In these prototypes, the 'irq' parameter refers to a number in the
> global IRQ number space. Indexes for arrays storing different state
> informations on the interrupts, like the ICSIRQState, are named
> 'srcno'.
>
> On the sPAPR platform, these IRQ operations are simply backed by a
> bitmap 'irq_map' in the machine.
>
> 'irq_base' is a base number in sync with the ICSState 'offset'. It
> lets us allocate only the subset of the IRQ numbers used on the sPAPR
> platform but we could also choose to waste some extra bytes (512) and
> allocate the whole number space. 'nr_irqs' is the total number of
> IRQs, required to manipulate the bitmap.
>
> Signed-off-by: Cédric Le Goater <address@hidden>
> ---
Hi,
The idea makes sense but this patch brings too many changes IMHO.
> hw/intc/xics.c | 3 ++-
> hw/intc/xics_spapr.c | 57 ++++++++++++----------------------------------
> hw/ppc/spapr.c | 62
> +++++++++++++++++++++++++++++++++++++++++++++++++-
> include/hw/ppc/spapr.h | 4 ++++
> include/hw/ppc/xics.h | 4 ++++
> 5 files changed, 85 insertions(+), 45 deletions(-)
>
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index cc9816e7f204..2c4899f278e2 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -53,6 +53,7 @@ void icp_pic_print_info(ICPState *icp, Monitor *mon)
> void ics_pic_print_info(ICSState *ics, Monitor *mon)
> {
> uint32_t i;
> + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>
> monitor_printf(mon, "ICS %4x..%4x %p\n",
> ics->offset, ics->offset + ics->nr_irqs - 1, ics);
> @@ -64,7 +65,7 @@ void ics_pic_print_info(ICSState *ics, Monitor *mon)
> for (i = 0; i < ics->nr_irqs; i++) {
> ICSIRQState *irq = ics->irqs + i;
>
> - if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
> + if (!xic->irq_test(ics->xics, i + ics->offset)) {
> continue;
> }
> monitor_printf(mon, " %4x %s %02x %02x\n",
> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> index d98ea8b13068..f2e20bca5b2e 100644
> --- a/hw/intc/xics_spapr.c
> +++ b/hw/intc/xics_spapr.c
> @@ -245,50 +245,23 @@ void xics_spapr_init(sPAPRMachineState *spapr)
> spapr_register_hypercall(H_IPOLL, h_ipoll);
> }
>
> -#define ICS_IRQ_FREE(ics, srcno) \
> - (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
> -
> -static int ics_find_free_block(ICSState *ics, int num, int alignnum)
> -{
> - int first, i;
> -
> - for (first = 0; first < ics->nr_irqs; first += alignnum) {
> - if (num > (ics->nr_irqs - first)) {
> - return -1;
> - }
> - for (i = first; i < first + num; ++i) {
> - if (!ICS_IRQ_FREE(ics, i)) {
> - break;
> - }
> - }
> - if (i == (first + num)) {
> - return first;
> - }
> - }
> -
> - return -1;
> -}
> -
> int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
> {
> int irq;
> + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>
> - if (!ics) {
> - return -1;
> - }
If spapr_ics_alloc() is never called with ics == NULL, then you
should assert. Also, this looks like this deserves a separate
patch.
> if (irq_hint) {
> - if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
> + if (xic->irq_test(ics->xics, irq_hint)) {
> error_setg(errp, "can't allocate IRQ %d: already in use",
> irq_hint);
> return -1;
> }
> irq = irq_hint;
> } else {
> - irq = ics_find_free_block(ics, 1, 1);
> + irq = xic->irq_alloc_block(ics->xics, 1, 0);
> if (irq < 0) {
> error_setg(errp, "can't allocate IRQ: no IRQ left");
> return -1;
> }
> - irq += ics->offset;
> }
>
> ics_set_irq_type(ics, irq - ics->offset, lsi);
> @@ -305,10 +278,8 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool
> lsi,
> bool align, Error **errp)
> {
> int i, first = -1;
> + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>
> - if (!ics) {
> - return -1;
> - }
>
Ditto.
> /*
> * MSIMesage::data is used for storing VIRQ so
> @@ -320,9 +291,9 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool
> lsi,
> if (align) {
> assert((num == 1) || (num == 2) || (num == 4) ||
> (num == 8) || (num == 16) || (num == 32));
> - first = ics_find_free_block(ics, num, num);
> + first = xic->irq_alloc_block(ics->xics, num, num);
> } else {
> - first = ics_find_free_block(ics, num, 1);
> + first = xic->irq_alloc_block(ics->xics, num, 0);
> }
> if (first < 0) {
> error_setg(errp, "can't find a free %d-IRQ block", num);
> @@ -331,25 +302,25 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool
> lsi,
>
> if (first >= 0) {
It looks like this check isn't needed since we return in the first < 0 case.
Maybe you can fix this in a preliminary patch ?
> for (i = first; i < first + num; ++i) {
> - ics_set_irq_type(ics, i, lsi);
> + ics_set_irq_type(ics, i - ics->offset, lsi);
> }
> }
> - first += ics->offset;
>
> trace_xics_alloc_block(first, num, lsi, align);
>
> return first;
> }
>
> -static void ics_free(ICSState *ics, int srcno, int num)
> +static void ics_free(ICSState *ics, int irq, int num)
> {
> int i;
> + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>
> - for (i = srcno; i < srcno + num; ++i) {
> - if (ICS_IRQ_FREE(ics, i)) {
> - trace_xics_ics_free_warn(0, i + ics->offset);
> + for (i = irq; i < irq + num; ++i) {
> + if (xic->irq_test(ics->xics, i)) {
> + trace_xics_ics_free_warn(0, i);
> }
> - memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
> + xic->irq_free_block(ics->xics, i, 1);
> }
> }
>
> @@ -357,7 +328,7 @@ void spapr_ics_free(ICSState *ics, int irq, int num)
> {
> if (ics_valid_irq(ics, irq)) {
> trace_xics_ics_free(0, irq, num);
> - ics_free(ics, irq - ics->offset, num);
> + ics_free(ics, irq, num);
> }
> }
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d682f013d422..88da4bad2328 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1681,6 +1681,24 @@ static const VMStateDescription
> vmstate_spapr_patb_entry = {
> },
> };
>
> +static bool spapr_irq_map_needed(void *opaque)
> +{
> + sPAPRMachineState *spapr = opaque;
> +
> + return find_first_bit(spapr->irq_map, spapr->nr_irqs) < spapr->nr_irqs;
> +}
This will allow migration from older QEMU. Maybe you can add a machine property
so that the subsection is only generated for newer pseries, and you'll support
migration to older QEMU (see details at the end of === Subsections === in
docs/devel/migration.txt).
> +
> +static const VMStateDescription vmstate_spapr_irq_map = {
> + .name = "spapr_irq_map",
> + .version_id = 0,
> + .minimum_version_id = 0,
> + .needed = spapr_irq_map_needed,
> + .fields = (VMStateField[]) {
> + VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, nr_irqs),
> + VMSTATE_END_OF_LIST()
> + },
> +};
> +
> static const VMStateDescription vmstate_spapr = {
> .name = "spapr",
> .version_id = 3,
> @@ -1700,6 +1718,7 @@ static const VMStateDescription vmstate_spapr = {
> &vmstate_spapr_ov5_cas,
> &vmstate_spapr_patb_entry,
> &vmstate_spapr_pending_events,
> + &vmstate_spapr_irq_map,
> NULL
> }
> };
> @@ -2337,8 +2356,13 @@ static void ppc_spapr_init(MachineState *machine)
> /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
> load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
>
> + /* Initialize the IRQ allocator */
> + spapr->nr_irqs = XICS_IRQS_SPAPR;
> + spapr->irq_map = bitmap_new(spapr->nr_irqs);
> + spapr->irq_base = XICS_IRQ_BASE;
> +
> /* Set up Interrupt Controller before we create the VCPUs */
> - xics_system_init(machine, XICS_IRQS_SPAPR, &error_fatal);
> + xics_system_init(machine, spapr->nr_irqs, &error_fatal);
>
> /* Set up containers for ibm,client-architecture-support negotiated
> options
> */
> @@ -3536,6 +3560,38 @@ static ICPState *spapr_icp_get(XICSFabric *xi, int
> vcpu_id)
> return cpu ? ICP(cpu->intc) : NULL;
> }
>
> +static bool spapr_irq_test(XICSFabric *xi, int irq)
> +{
> + sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
> + int srcno = irq - spapr->irq_base;
> +
> + return test_bit(srcno, spapr->irq_map);
> +}
> +
> +static int spapr_irq_alloc_block(XICSFabric *xi, int count, int align)
> +{
> + sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
> + int start = 0;
> + int srcno;
> +
> + srcno = bitmap_find_next_zero_area(spapr->irq_map, spapr->nr_irqs, start,
> + count, align);
> + if (srcno == spapr->nr_irqs) {
> + return -1;
> + }
> +
> + bitmap_set(spapr->irq_map, srcno, count);
> + return srcno + spapr->irq_base;
> +}
> +
> +static void spapr_irq_free_block(XICSFabric *xi, int irq, int num)
> +{
> + sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
> + int srcno = irq - spapr->irq_base;
> +
> + bitmap_clear(spapr->irq_map, srcno, num);
> +}
> +
> static void spapr_pic_print_info(InterruptStatsProvider *obj,
> Monitor *mon)
> {
> @@ -3630,6 +3686,10 @@ static void spapr_machine_class_init(ObjectClass *oc,
> void *data)
> xic->ics_get = spapr_ics_get;
> xic->ics_resend = spapr_ics_resend;
> xic->icp_get = spapr_icp_get;
> + xic->irq_test = spapr_irq_test;
> + xic->irq_alloc_block = spapr_irq_alloc_block;
> + xic->irq_free_block = spapr_irq_free_block;
> +
> ispc->print_info = spapr_pic_print_info;
> /* Force NUMA node memory size to be a multiple of
> * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 9d21ca9bde3a..b962bfe09bb5 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -7,6 +7,7 @@
> #include "hw/ppc/spapr_drc.h"
> #include "hw/mem/pc-dimm.h"
> #include "hw/ppc/spapr_ovec.h"
> +#include "qemu/bitmap.h"
>
> struct VIOsPAPRBus;
> struct sPAPRPHBState;
> @@ -78,6 +79,9 @@ struct sPAPRMachineState {
> struct VIOsPAPRBus *vio_bus;
> QLIST_HEAD(, sPAPRPHBState) phbs;
> struct sPAPRNVRAM *nvram;
> + int32_t nr_irqs;
> + unsigned long *irq_map;
> + uint32_t irq_base;
> ICSState *ics;
> sPAPRRTCState rtc;
>
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 28d248abad61..30e7f2e0a7dd 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -175,6 +175,10 @@ typedef struct XICSFabricClass {
> ICSState *(*ics_get)(XICSFabric *xi, int irq);
> void (*ics_resend)(XICSFabric *xi);
> ICPState *(*icp_get)(XICSFabric *xi, int server);
> + /* IRQ allocator helpers */
> + bool (*irq_test)(XICSFabric *xi, int irq);
> + int (*irq_alloc_block)(XICSFabric *xi, int count, int align);
> + void (*irq_free_block)(XICSFabric *xi, int irq, int num);
API looks good to me. I suggest you introduce it in a dedicated patch, and
change the allocator implementation in another patch.
> } XICSFabricClass;
>
> #define XICS_IRQS_SPAPR 1024
- Re: [Qemu-devel] [PATCH 1/8] spapr: introduce an IRQ allocator at the machine level,
Greg Kurz <=