qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH 07/26] ppc/xive: add MMIO handlers to the XI


From: Cédric Le Goater
Subject: Re: [Qemu-devel] [RFC PATCH 07/26] ppc/xive: add MMIO handlers to the XIVE interrupt source
Date: Mon, 24 Jul 2017 17:55:42 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.8.0

On 07/24/2017 06:29 AM, David Gibson wrote:
> On Wed, Jul 05, 2017 at 07:13:20PM +0200, Cédric Le Goater wrote:
>> Each interrupt source is associated with a 2-bit state machine called
>> an Event State Buffer (ESB). It is controlled by MMIO to trigger
>> events.
>>
>> See code for more details on the states.
>>
>> Signed-off-by: Cédric Le Goater <address@hidden>
>> ---
>>  hw/intc/xive.c        | 230 
>> ++++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/xive.h |   3 +
>>  2 files changed, 233 insertions(+)
>>
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index 9ff14c0da595..816031b8ac81 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -32,6 +32,226 @@ static void xive_icp_irq(XiveICSState *xs, int lisn)
>>  }
>>  
>>  /*
>> + * "magic" Event State Buffer (ESB) MMIO offsets.
>> + *
>> + * Each interrupt source has a 2-bit state machine called ESB
>> + * which can be controlled by MMIO. It's made of 2 bits, P and
>> + * Q. P indicates that an interrupt is pending (has been sent
>> + * to a queue and is waiting for an EOI). Q indicates that the
>> + * interrupt has been triggered while pending.
>> + *
>> + * This acts as a coalescing mechanism in order to guarantee
>> + * that a given interrupt only occurs at most once in a queue.
>> + *
>> + * When doing an EOI, the Q bit will indicate if the interrupt
>> + * needs to be re-triggered.
>> + *
>> + * The following offsets into the ESB MMIO allow to read or
>> + * manipulate the PQ bits. They must be used with an 8-bytes
>> + * load instruction. They all return the previous state of the
>> + * interrupt (atomically).
>> + *
>> + * Additionally, some ESB pages support doing an EOI via a
>> + * store at 0 and some ESBs support doing a trigger via a
>> + * separate trigger page.
>> + */
>> +#define XIVE_ESB_GET            0x800
>> +#define XIVE_ESB_SET_PQ_00      0xc00
>> +#define XIVE_ESB_SET_PQ_01      0xd00
>> +#define XIVE_ESB_SET_PQ_10      0xe00
>> +#define XIVE_ESB_SET_PQ_11      0xf00
>> +
>> +#define XIVE_ESB_VAL_P          0x2
>> +#define XIVE_ESB_VAL_Q          0x1
>> +
>> +#define XIVE_ESB_RESET          0x0
>> +#define XIVE_ESB_PENDING        0x2
>> +#define XIVE_ESB_QUEUED         0x3
>> +#define XIVE_ESB_OFF            0x1
>> +
>> +static uint8_t xive_pq_get(XIVE *x, uint32_t lisn)
>> +{
>> +    uint32_t idx = lisn;
>> +    uint32_t byte = idx / 4;
>> +    uint32_t bit  = (idx % 4) * 2;
>> +    uint8_t* pqs = (uint8_t *) x->sbe;
>> +
>> +    return (pqs[byte] >> bit) & 0x3;
>> +}
>> +
>> +static void xive_pq_set(XIVE *x, uint32_t lisn, uint8_t pq)
>> +{
>> +    uint32_t idx = lisn;
>> +    uint32_t byte = idx / 4;
>> +    uint32_t bit  = (idx % 4) * 2;
>> +    uint8_t* pqs = (uint8_t *) x->sbe;
>> +
>> +    pqs[byte] &= ~(0x3 << bit);
>> +    pqs[byte] |= (pq & 0x3) << bit;
> 
> I know it probably amounts to the same thing given the context, but
> I'd be more comfortable with a temporary and an obviously atomic
> update than two writes to the real state variable.

yes. I will look better.

>> +}
>> +
>> +static bool xive_pq_eoi(XIVE *x, uint32_t lisn)
>> +{
>> +    uint8_t old_pq = xive_pq_get(x, lisn);
>> +
>> +    switch (old_pq) {
>> +    case XIVE_ESB_RESET:
>> +        xive_pq_set(x, lisn, XIVE_ESB_RESET);
>> +        return false;
>> +    case XIVE_ESB_PENDING:
>> +        xive_pq_set(x, lisn, XIVE_ESB_RESET);
>> +        return false;
>> +    case XIVE_ESB_QUEUED:
>> +        xive_pq_set(x, lisn, XIVE_ESB_PENDING);
>> +        return true;
>> +    case XIVE_ESB_OFF:
>> +        xive_pq_set(x, lisn, XIVE_ESB_OFF);
>> +        return false;
>> +    default:
>> +         g_assert_not_reached();
>> +    }
>> +}
>> +
>> +static bool xive_pq_trigger(XIVE *x, uint32_t lisn)
>> +{
>> +    uint8_t old_pq = xive_pq_get(x, lisn);
>> +
>> +    switch (old_pq) {
>> +    case XIVE_ESB_RESET:
>> +        xive_pq_set(x, lisn, XIVE_ESB_PENDING);
>> +        return true;
>> +    case XIVE_ESB_PENDING:
>> +        xive_pq_set(x, lisn, XIVE_ESB_QUEUED);
>> +        return true;
>> +    case XIVE_ESB_QUEUED:
>> +        xive_pq_set(x, lisn, XIVE_ESB_QUEUED);
>> +        return true;
>> +    case XIVE_ESB_OFF:
>> +        xive_pq_set(x, lisn, XIVE_ESB_OFF);
>> +        return false;
>> +    default:
>> +         g_assert_not_reached();
>> +    }
>> +}
>> +
>> +/*
>> + * XIVE Interrupt Source MMIOs
>> + */
>> +static void xive_ics_eoi(XiveICSState *xs, uint32_t srcno)
>> +{
>> +    ICSIRQState *irq = &ICS_BASE(xs)->irqs[srcno];
>> +
>> +    if (irq->flags & XICS_FLAGS_IRQ_LSI) {
>> +        irq->status &= ~XICS_STATUS_SENT;
>> +    }
>> +}
>> +
>> +/* TODO: handle second page */
> 
> Is this comment still relevent?

Some HW have a second page to trigger the event. I am not sure we need 
to model it though. I will make some inquiries. 

>> +static uint64_t xive_esb_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> +    XiveICSState *xs = ICS_XIVE(opaque);
>> +    XIVE *x = xs->xive;
>> +    uint32_t offset = addr & 0xF00;
>> +    uint32_t srcno = addr >> xs->esb_shift;
>> +    uint32_t lisn = srcno + ICS_BASE(xs)->offset;
>> +    XiveIVE *ive;
>> +    uint64_t ret = -1;
>> +
>> +    ive = xive_get_ive(x, lisn);
>> +    if (!ive || !(ive->w & IVE_VALID))  {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %d\n", lisn);
>> +        goto out;
>> +    }
>> +
>> +    if (srcno >= ICS_BASE(xs)->nr_irqs) {
>> +        qemu_log_mask(LOG_GUEST_ERROR,
>> +                      "XIVE: invalid IRQ number: %d/%d lisn: %d\n",
>> +                      srcno, ICS_BASE(xs)->nr_irqs, lisn);
>> +        goto out;
>> +    }
>> +
>> +    switch (offset) {
>> +    case 0:
>> +        xive_ics_eoi(xs, srcno);
>> +
>> +        /* return TRUE or FALSE depending on PQ value */
>> +        ret = xive_pq_eoi(x, lisn);
>> +        break;
>> +
>> +    case XIVE_ESB_GET:
>> +        ret = xive_pq_get(x, lisn);
>> +        break;
>> +
>> +    case XIVE_ESB_SET_PQ_00:
>> +    case XIVE_ESB_SET_PQ_01:
>> +    case XIVE_ESB_SET_PQ_10:
>> +    case XIVE_ESB_SET_PQ_11:
>> +        ret = xive_pq_get(x, lisn);
>> +        xive_pq_set(x, lisn, (offset >> 8) & 0x3);
> 
> Again I'd prefer xive_pq_set() return the old value itself, for more
> obvious atomicity.

yes. ok.

> 
>> +        break;
>> +    default:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB addr %d\n", 
>> offset);
>> +    }
>> +
>> +out:
>> +    return ret;
>> +}
>> +
>> +static void xive_esb_write(void *opaque, hwaddr addr,
>> +                           uint64_t value, unsigned size)
>> +{
>> +    XiveICSState *xs = ICS_XIVE(opaque);
>> +    XIVE *x = xs->xive;
>> +    uint32_t offset = addr & 0xF00;
>> +    uint32_t srcno = addr >> xs->esb_shift;
>> +    uint32_t lisn = srcno + ICS_BASE(xs)->offset;
>> +    XiveIVE *ive;
>> +    bool notify = false;
>> +
>> +    ive = xive_get_ive(x, lisn);
>> +    if (!ive || !(ive->w & IVE_VALID))  {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %d\n", lisn);
>> +        return;
>> +    }
> 
> Having this code associated with the individual ICS look directly at
> the IVE table in the core xive object seems a bit dubious.

The IVE table holds the validity and mask status of the interrupt 
entries, so we need that lookup. However, (continues below) ...

> This also
> points out another mismatch between the re-used ICS code and the new
> XIVE code: ICS gathers all the per-source-irq flags/state into the
> irqstate structure, whereas xive has per-irq information in the
> centralized ecb and IVE tables.  There can certainly be good reasons
> for that, but using both at once is kind of clunky.

I understand that you would rather put the esbs in the source they 
belong to. That is the case on real HW but it makes the modeling a 
bit more difficult. We would need to choose a MMIO address to give 
to the guest OS. I had some issues with the allocator (I need 
to look at this problem closer).

It might also be an "issue" for KVM. Ben talked about maintaining 
all the esbs of a guest under a single memory region to be able to 
map the pages in the host.

Any how, I agree this is another point to discuss in the sPAPR 
model.

Thanks,

C. 


>> +    if (srcno >= ICS_BASE(xs)->nr_irqs) {
>> +        qemu_log_mask(LOG_GUEST_ERROR,
>> +                      "XIVE: invalid IRQ number: %d/%d lisn: %d\n",
>> +                      srcno, ICS_BASE(xs)->nr_irqs, lisn);
>> +        return;
>> +    }
>> +
>> +    switch (offset) {
>> +    case 0:
>> +        /* TODO: should we trigger even if the IVE is masked ? */
>> +        notify = xive_pq_trigger(x, lisn);
>> +        break;
>> +    default:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %d\n",
>> +                      offset);
>> +        return;
>> +    }
>> +
>> +    if (notify && !(ive->w & IVE_MASKED)) {
>> +        qemu_irq_pulse(ICS_BASE(xs)->qirqs[srcno]);
>> +    }
>> +}
>> +
>> +static const MemoryRegionOps xive_esb_ops = {
>> +    .read = xive_esb_read,
>> +    .write = xive_esb_write,
>> +    .endianness = DEVICE_BIG_ENDIAN,
>> +    .valid = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    },
>> +    .impl = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    },
>> +};
>> +
>> +/*
>>   * XIVE Interrupt Source
>>   */
>>  static void xive_ics_set_irq_msi(XiveICSState *xs, int srcno, int val)
>> @@ -106,15 +326,25 @@ static void xive_ics_realize(ICSState *ics, Error 
>> **errp)
>>          return;
>>      }
>>  
>> +    if (!xs->esb_shift) {
>> +        error_setg(errp, "ESB page size needs to be greater 0");
>> +        return;
>> +    }
>> +
>>      ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
>>      ics->qirqs = qemu_allocate_irqs(xive_ics_set_irq, xs, ics->nr_irqs);
>>  
>> +    memory_region_init_io(&xs->esb_iomem, OBJECT(xs), &xive_esb_ops, xs,
>> +                          "xive.esb",
>> +                          (1ull << xs->esb_shift) * ICS_BASE(xs)->nr_irqs);
>> +
>>      qemu_register_reset(xive_ics_reset, xs);
>>  }
>>  
>>  static Property xive_ics_properties[] = {
>>      DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
>>      DEFINE_PROP_UINT32("irq-base", ICSState, offset, 0),
>> +    DEFINE_PROP_UINT32("shift", XiveICSState, esb_shift, 0),
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>>  
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index 544cc6e0c796..5303d96f5f59 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -33,6 +33,9 @@ typedef struct XiveICSState XiveICSState;
>>  struct XiveICSState {
>>      ICSState parent_obj;
>>  
>> +    uint32_t     esb_shift;
>> +    MemoryRegion esb_iomem;
>> +
>>      XIVE         *xive;
>>  };
>>  
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]