qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH v5 16/36] spapr: add hcalls support for the XIVE e


From: Cédric Le Goater
Subject: Re: [Qemu-ppc] [PATCH v5 16/36] spapr: add hcalls support for the XIVE exploitation interrupt mode
Date: Thu, 29 Nov 2018 17:04:50 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.3.0

On 11/29/18 2:23 AM, David Gibson wrote:
> On Wed, Nov 28, 2018 at 11:21:37PM +0100, Cédric Le Goater wrote:
>> On 11/28/18 5:25 AM, David Gibson wrote:
>>> On Fri, Nov 16, 2018 at 11:57:09AM +0100, Cédric Le Goater wrote:
>>>> The different XIVE virtualization structures (sources and event queues)
>>>> are configured with a set of Hypervisor calls :
>>>>
>>>>  - H_INT_GET_SOURCE_INFO
>>>>
>>>>    used to obtain the address of the MMIO page of the Event State
>>>>    Buffer (ESB) entry associated with the source.
>>>>
>>>>  - H_INT_SET_SOURCE_CONFIG
>>>>
>>>>    assigns a source to a "target".
>>>>
>>>>  - H_INT_GET_SOURCE_CONFIG
>>>>
>>>>    determines which "target" and "priority" is assigned to a source
>>>>
>>>>  - H_INT_GET_QUEUE_INFO
>>>>
>>>>    returns the address of the notification management page associated
>>>>    with the specified "target" and "priority".
>>>>
>>>>  - H_INT_SET_QUEUE_CONFIG
>>>>
>>>>    sets or resets the event queue for a given "target" and "priority".
>>>>    It is also used to set the notification configuration associated
>>>>    with the queue, only unconditional notification is supported for
>>>>    the moment. Reset is performed with a queue size of 0 and queueing
>>>>    is disabled in that case.
>>>>
>>>>  - H_INT_GET_QUEUE_CONFIG
>>>>
>>>>    returns the queue settings for a given "target" and "priority".
>>>>
>>>>  - H_INT_RESET
>>>>
>>>>    resets all of the guest's internal interrupt structures to their
>>>>    initial state, losing all configuration set via the hcalls
>>>>    H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
>>>>
>>>>  - H_INT_SYNC
>>>>
>>>>    issue a synchronisation on a source to make sure all notifications
>>>>    have reached their queue.
>>>>
>>>> Calls that still need to be addressed :
>>>>
>>>>    H_INT_SET_OS_REPORTING_LINE
>>>>    H_INT_GET_OS_REPORTING_LINE
>>>>
>>>> See the code for more documentation on each hcall.
>>>>
>>>> Signed-off-by: Cédric Le Goater <address@hidden>
>>>> ---
>>>>  include/hw/ppc/spapr.h      |  15 +-
>>>>  include/hw/ppc/spapr_xive.h |   6 +
>>>>  hw/intc/spapr_xive_hcall.c  | 892 ++++++++++++++++++++++++++++++++++++
>>>>  hw/ppc/spapr_irq.c          |   2 +
>>>>  hw/intc/Makefile.objs       |   2 +-
>>>>  5 files changed, 915 insertions(+), 2 deletions(-)
>>>>  create mode 100644 hw/intc/spapr_xive_hcall.c
>>>>
>>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>>> index 1fbc2663e06c..8415faea7b82 100644
>>>> --- a/include/hw/ppc/spapr.h
>>>> +++ b/include/hw/ppc/spapr.h
>>>> @@ -452,7 +452,20 @@ struct sPAPRMachineState {
>>>>  #define H_INVALIDATE_PID        0x378
>>>>  #define H_REGISTER_PROC_TBL     0x37C
>>>>  #define H_SIGNAL_SYS_RESET      0x380
>>>> -#define MAX_HCALL_OPCODE        H_SIGNAL_SYS_RESET
>>>> +
>>>> +#define H_INT_GET_SOURCE_INFO   0x3A8
>>>> +#define H_INT_SET_SOURCE_CONFIG 0x3AC
>>>> +#define H_INT_GET_SOURCE_CONFIG 0x3B0
>>>> +#define H_INT_GET_QUEUE_INFO    0x3B4
>>>> +#define H_INT_SET_QUEUE_CONFIG  0x3B8
>>>> +#define H_INT_GET_QUEUE_CONFIG  0x3BC
>>>> +#define H_INT_SET_OS_REPORTING_LINE 0x3C0
>>>> +#define H_INT_GET_OS_REPORTING_LINE 0x3C4
>>>> +#define H_INT_ESB               0x3C8
>>>> +#define H_INT_SYNC              0x3CC
>>>> +#define H_INT_RESET             0x3D0
>>>> +
>>>> +#define MAX_HCALL_OPCODE        H_INT_RESET
>>>>  
>>>>  /* The hcalls above are standardized in PAPR and implemented by pHyp
>>>>   * as well.
>>>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>>>> index 3f65b8f485fd..418511f3dc10 100644
>>>> --- a/include/hw/ppc/spapr_xive.h
>>>> +++ b/include/hw/ppc/spapr_xive.h
>>>> @@ -60,4 +60,10 @@ int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t 
>>>> target, uint8_t prio,
>>>>  int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
>>>>                            uint8_t *out_end_blk, uint32_t *out_end_idx);
>>>>  
>>>> +bool spapr_xive_priority_is_valid(uint8_t priority);
>>>
>>> AFAICT this could be a local function.
>>
>> the KVM model uses it also, when collecting state from the KVM device 
>> to build the QEMU ENDT.
>>
>>>> +
>>>> +typedef struct sPAPRMachineState sPAPRMachineState;
>>>> +
>>>> +void spapr_xive_hcall_init(sPAPRMachineState *spapr);
>>>> +
>>>>  #endif /* PPC_SPAPR_XIVE_H */
>>>> diff --git a/hw/intc/spapr_xive_hcall.c b/hw/intc/spapr_xive_hcall.c
>>>> new file mode 100644
>>>> index 000000000000..52e4e23995f5
>>>> --- /dev/null
>>>> +++ b/hw/intc/spapr_xive_hcall.c
>>>> @@ -0,0 +1,892 @@
>>>> +/*
>>>> + * QEMU PowerPC sPAPR XIVE interrupt controller model
>>>> + *
>>>> + * Copyright (c) 2017-2018, IBM Corporation.
>>>> + *
>>>> + * This code is licensed under the GPL version 2 or later. See the
>>>> + * COPYING file in the top-level directory.
>>>> + */
>>>> +
>>>> +#include "qemu/osdep.h"
>>>> +#include "qemu/log.h"
>>>> +#include "qapi/error.h"
>>>> +#include "cpu.h"
>>>> +#include "hw/ppc/fdt.h"
>>>> +#include "hw/ppc/spapr.h"
>>>> +#include "hw/ppc/spapr_xive.h"
>>>> +#include "hw/ppc/xive_regs.h"
>>>> +#include "monitor/monitor.h"
>>>
>>> Fwiw, I don't think it's particularly necessary to split the hcall
>>> handling out into a separate .c file.
>>
>> ok. let's move it to spapr_xive then ? It might help in reducing the 
>> exported funtions. 
> 
> Yes, I think so.
> 
>>>> +/*
>>>> + * OPAL uses the priority 7 EQ to automatically escalate interrupts
>>>> + * for all other queues (DD2.X POWER9). So only priorities [0..6] are
>>>> + * available for the guest.
>>>
>>> Referencing OPAL behaviour doesn't really make sense in the context of
>>> PAPR.  
>>
>> It's an OPAL constraint which pHyp doesn't have. So its a QEMU/KVM 
>> constraint also.
> 
> Right, I realized that a few patches on.  Maybe rephrase this to
> 
>    Linux hosts under OPAL reserve priority 7 for their own escalation
>    interrupts.  So we only allow the guest to use priorities [0..6].

OK.

> The point here is that we're emphasizing that this is a design
> decision to make the host implementation easier, rather than a
> fundamental constraint.
> 
>>> What I think you're getting at is that the PAPR spec only
>>> allows a PAPR guest to use priorities 0..6 (or at least it will if the
>>> XIVE updated spec ever gets published).  
>>
>> It's not in the spec. the XIVE sPAPR spec should be frozen soon btw. 
>>  
>>> The fact that this allows the
>>> host use 7 for escalations is a design rationale 
>>> but not really relevant to the guest device itself. 
>>
>> The guest should be aware of which priorities are reserved for
>> the hypervisor though.
>>
>>>> + */
>>>> +bool spapr_xive_priority_is_valid(uint8_t priority)
>>>> +{
>>>> +    switch (priority) {
>>>> +    case 0 ... 6:
>>>> +        return true;
>>>> +    case 7: /* OPAL escalation queue */
>>>> +    default:
>>>> +        return false;
>>>> +    }
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_GET_SOURCE_INFO hcall() is used to obtain the logical
>>>> + * real address of the MMIO page through which the Event State Buffer
>>>> + * entry associated with the value of the "lisn" parameter is managed.
>>>> + *
>>>> + * Parameters:
>>>> + * Input
>>>> + * - "flags"
>>>> + *       Bits 0-63 reserved
>>>> + * - "lisn" is per "interrupts", "interrupt-map", or
>>>> + *       "ibm,xive-lisn-ranges" properties, or as returned by the
>>>> + *       ibm,query-interrupt-source-number RTAS call, or as returned
>>>> + *       by the H_ALLOCATE_VAS_WINDOW hcall
>>>
>>> I've not heard of H_ALLOCATE_VAS_WINDOW.  Is that something we intend
>>> to implement in kvm/qemu, or is it only of interest for PowerVM?
>>
>> The hcall is part of the PAPR NX Interfaces and it returns interrupt
>> numbers. I don't know if any work has been done on the topic.  
> 
> What's a "PAPR NX"?

A way for the PAPR guests to access the POWER coprocessors doing 
compression and encryption. I really don't know much about this.

>>> Also, putting the register numbers on the inputs as well as the
>>> outputs would be helpful.
>>
>> yes. I will add them.
>>
>>>> + *
>>>> + * Output
>>>> + * - R4: "flags"
>>>> + *       Bits 0-59: Reserved
>>>> + *       Bit 60: H_INT_ESB must be used for Event State Buffer
>>>> + *               management
>>>> + *       Bit 61: 1 == LSI  0 == MSI
>>>> + *       Bit 62: the full function page supports trigger
>>>> + *       Bit 63: Store EOI Supported
>>>> + * - R5: Logical Real address of full function Event State Buffer
>>>> + *       management page, -1 if ESB hcall flag is set to 1.
>>>
>>> You've defined what H_INT_ESB means above, so it will be clearer if
>>> you reference that by name here.
>>
>> yes. 
>>
>>>> + * - R6: Logical Real Address of trigger only Event State Buffer
>>>> + *       management page or -1.
>>>> + * - R7: Power of 2 page size for the ESB management pages returned in
>>>> + *       R5 and R6.
>>>> + */
>>>> +
>>>> +#define SPAPR_XIVE_SRC_H_INT_ESB     PPC_BIT(60) /* ESB manage with 
>>>> H_INT_ESB */
>>>> +#define SPAPR_XIVE_SRC_LSI           PPC_BIT(61) /* Virtual LSI type */
>>>> +#define SPAPR_XIVE_SRC_TRIGGER       PPC_BIT(62) /* Trigger and management
>>>> +                                                    on same page */
>>>> +#define SPAPR_XIVE_SRC_STORE_EOI     PPC_BIT(63) /* Store EOI support */
>>>
>>> Probably makes sense to put these #defines in spapr.h since they form
>>> part of the PAPR interface definition.
>>
>> ok.
>>
>>>
>>>> +static target_ulong h_int_get_source_info(PowerPCCPU *cpu,
>>>> +                                          sPAPRMachineState *spapr,
>>>> +                                          target_ulong opcode,
>>>> +                                          target_ulong *args)
>>>> +{
>>>> +    sPAPRXive *xive = spapr->xive;
>>>> +    XiveSource *xsrc = &xive->source;
>>>> +    XiveEAS eas;
>>>> +    target_ulong flags  = args[0];
>>>> +    target_ulong lisn   = args[1];
>>>> +
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    if (flags) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    if (xive_router_get_eas(XIVE_ROUTER(xive), lisn, &eas)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    if (!(eas.w & EAS_VALID)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    /* All sources are emulated under the main XIVE object and share
>>>> +     * the same characteristics.
>>>> +     */
>>>> +    args[0] = 0;
>>>> +    if (!xive_source_esb_has_2page(xsrc)) {
>>>> +        args[0] |= SPAPR_XIVE_SRC_TRIGGER;
>>>> +    }
>>>> +    if (xsrc->esb_flags & XIVE_SRC_STORE_EOI) {
>>>> +        args[0] |= SPAPR_XIVE_SRC_STORE_EOI;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * Force the use of the H_INT_ESB hcall in case of an LSI
>>>> +     * interrupt. This is necessary under KVM to re-trigger the
>>>> +     * interrupt if the level is still asserted
>>>> +     */
>>>> +    if (xive_source_irq_is_lsi(xsrc, lisn)) {
>>>> +        args[0] |= SPAPR_XIVE_SRC_H_INT_ESB | SPAPR_XIVE_SRC_LSI;
>>>> +    }
>>>> +
>>>> +    if (!(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) {
>>>> +        args[1] = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn);
>>>> +    } else {
>>>> +        args[1] = -1;
>>>> +    }
>>>> +
>>>> +    if (xive_source_esb_has_2page(xsrc)) {
>>>> +        args[2] = xive->vc_base + xive_source_esb_page(xsrc, lisn);
>>>> +    } else {
>>>> +        args[2] = -1;
>>>> +    }
>>>
>>> Do we also need to keep this address clear in the H_INT_ESB case?
>>
>> I think not, but the specs are not very clear on that topic. I will
>> ask for clarification and use a -1 for now. We can not do loads on
>> the trigger page so it can not be used by the H_INT_ESB hcall.
>>
>>>
>>>> +    args[3] = TARGET_PAGE_SIZE;
>>>
>>> That seems wrong.  
>>
>> This is utterly wrong. it should be a power of 2 number ... I got
>> it right under KVM though. I guess that ioremap() under Linux rounds 
>> up the size to the page size in use, so, that's why it didn't blow
>> up under TCG.
>>
>>> TARGET_PAGE_SIZE is generally 4kiB, but won't these usually
>>> actually be 64kiB?
>>
>> yes. So what should I use to get a PAGE_SHIFT instead ? 
> 
> Erm, that gets a bit tricky, since qemu in a sense doesn't know the
> guest's page size.
> 
> But.. don't you actually want the esb_shift here, not PAGE_SHIFT - it
> could matter for the 2 page * 64kiB variant, yes?

Yes. we just want the page_shift of the ESB page, whether it's one or
two pages. The other registers inform the guest if there are one or 
two ESB page in use. 


>>>> +
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_SET_SOURCE_CONFIG hcall() is used to assign a Logical
>>>> + * Interrupt Source to a target. The Logical Interrupt Source is
>>>> + * designated with the "lisn" parameter and the target is designated
>>>> + * with the "target" and "priority" parameters.  Upon return from the
>>>> + * hcall(), no additional interrupts will be directed to the old EQ.
>>>> + *
>>>> + * TODO: The old EQ should be investigated for interrupts that
>>>> + * occurred prior to or during the hcall().
>>>
>>> Isn't that the responsibility of the guest?
>>
>> It should yes.
> 
> Right, so not a TODO for the qemu code.

yes

> 
>>
>>>
>>>> + *
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *      Bits 0-61: Reserved
>>>> + *      Bit 62: set the "eisn" in the EA
>>>
>>> What's the "EA"?  Do you mean the EAS?
>>
>> Another XIVE acronym, EA for Event Assignment. I think we can forget
>> this one and just use EAS.
>>  
>>>
>>>> + *      Bit 63: masks the interrupt source in the hardware interrupt
>>>> + *      control structure. An interrupt masked by this mechanism will
>>>> + *      be dropped, but it's source state bits will still be
>>>> + *      set. There is no race-free way of unmasking and restoring the
>>>> + *      source. Thus this should only be used in interrupts that are
>>>> + *      also masked at the source, and only in cases where the
>>>> + *      interrupt is not meant to be used for a large amount of time
>>>> + *      because no valid target exists for it for example
>>>> + * - "lisn" is per "interrupts", "interrupt-map", or
>>>> + *      "ibm,xive-lisn-ranges" properties, or as returned by the
>>>> + *      ibm,query-interrupt-source-number RTAS call, or as returned by
>>>> + *      the H_ALLOCATE_VAS_WINDOW hcall
>>>> + * - "target" is per "ibm,ppc-interrupt-server#s" or
>>>> + *      "ibm,ppc-interrupt-gserver#s"
>>>> + * - "priority" is a valid priority not in
>>>> + *      "ibm,plat-res-int-priorities"
>>>> + * - "eisn" is the guest EISN associated with the "lisn"
>>>
>>> I don't think the EISN term has been used before in the series.  
>>
>> Effective Interrupt Source Number, which is the event data enqueued
>> in the OS EQ.
>>
>> I'm planning on adding some more acronyms used by the sPAPR hcalls
>> in this file. There are only a couple.
> 
> That would be helpful.
> 
>>> I'm guessing this is the guest-assigned global interrupt number?
>>
>> yes 
>>
>>>> + *
>>>> + * Output:
>>>> + * - None
>>>> + */
>>>> +
>>>> +#define SPAPR_XIVE_SRC_SET_EISN PPC_BIT(62)
>>>> +#define SPAPR_XIVE_SRC_MASK     PPC_BIT(63)
>>>> +
>>>> +static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
>>>> +                                            sPAPRMachineState *spapr,
>>>> +                                            target_ulong opcode,
>>>> +                                            target_ulong *args)
>>>> +{
>>>> +    sPAPRXive *xive = spapr->xive;
>>>> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
>>>> +    XiveEAS eas, new_eas;
>>>> +    target_ulong flags    = args[0];
>>>> +    target_ulong lisn     = args[1];
>>>> +    target_ulong target   = args[2];
>>>> +    target_ulong priority = args[3];
>>>> +    target_ulong eisn     = args[4];
>>>> +    uint8_t end_blk;
>>>> +    uint32_t end_idx;
>>>> +
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    if (flags & ~(SPAPR_XIVE_SRC_SET_EISN | SPAPR_XIVE_SRC_MASK)) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    if (xive_router_get_eas(xrtr, lisn, &eas)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    if (!(eas.w & EAS_VALID)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    /* priority 0xff is used to reset the EAS */
>>>> +    if (priority == 0xff) {
>>>> +        new_eas.w = EAS_VALID | EAS_MASKED;
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    if (flags & SPAPR_XIVE_SRC_MASK) {
>>>> +        new_eas.w = eas.w | EAS_MASKED;
>>>> +    } else {
>>>> +        new_eas.w = eas.w & ~EAS_MASKED;
>>>> +    }
>>>> +
>>>> +    if (!spapr_xive_priority_is_valid(priority)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid priority %ld 
>>>> requested\n",
>>>> +                      priority);
>>>> +        return H_P4;
>>>> +    }
>>>> +
>>>> +    /* Validate that "target" is part of the list of threads allocated
>>>> +     * to the partition. For that, find the END corresponding to the
>>>> +     * target.
>>>> +     */
>>>> +    if (spapr_xive_target_to_end(xive, target, priority, &end_blk, 
>>>> &end_idx)) {
>>>> +        return H_P3;
>>>> +    }
>>>> +
>>>> +    new_eas.w = SETFIELD(EAS_END_BLOCK, new_eas.w, end_blk);
>>>> +    new_eas.w = SETFIELD(EAS_END_INDEX, new_eas.w, end_idx);
>>>> +
>>>> +    if (flags & SPAPR_XIVE_SRC_SET_EISN) {
>>>> +        new_eas.w = SETFIELD(EAS_END_DATA, new_eas.w, eisn);
>>>> +    }
>>>> +
>>>> +out:
>>>> +    if (xive_router_set_eas(xrtr, lisn, &new_eas)) {
>>>> +        return H_HARDWARE;
>>>> +    }
>>>
>>> As noted earlier in the series, the spapr specific code owns the
>>> memory backing the EAT, so you can just access it directly rather than
>>> using a method here.
>>
>> Yes. I will give a try. I wonder if I need accessors for the tables
>> ?
> 
> You'll still need the read accessor since the routing core uses that.
> I don't think you need a write accessor though.
> 
>>
>>>
>>>> +
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_GET_SOURCE_CONFIG hcall() is used to determine to which
>>>> + * target/priority pair is assigned to the specified Logical Interrupt
>>>> + * Source.
>>>> + *
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *      Bits 0-63 Reserved
>>>> + * - "lisn" is per "interrupts", "interrupt-map", or
>>>> + *      "ibm,xive-lisn-ranges" properties, or as returned by the
>>>> + *      ibm,query-interrupt-source-number RTAS call, or as
>>>> + *      returned by the H_ALLOCATE_VAS_WINDOW hcall
>>>> + *
>>>> + * Output:
>>>> + * - R4: Target to which the specified Logical Interrupt Source is
>>>> + *       assigned
>>>> + * - R5: Priority to which the specified Logical Interrupt Source is
>>>> + *       assigned
>>>> + * - R6: EISN for the specified Logical Interrupt Source (this will be
>>>> + *       equivalent to the LISN if not changed by H_INT_SET_SOURCE_CONFIG)
>>>> + */
>>>> +static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
>>>> +                                            sPAPRMachineState *spapr,
>>>> +                                            target_ulong opcode,
>>>> +                                            target_ulong *args)
>>>> +{
>>>> +    sPAPRXive *xive = spapr->xive;
>>>> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
>>>> +    target_ulong flags = args[0];
>>>> +    target_ulong lisn = args[1];
>>>> +    XiveEAS eas;
>>>> +    XiveEND end;
>>>> +    uint8_t end_blk, nvt_blk;
>>>> +    uint32_t end_idx, nvt_idx;
>>>> +
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    if (flags) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    if (xive_router_get_eas(xrtr, lisn, &eas)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    if (!(eas.w & EAS_VALID)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    end_blk = GETFIELD(EAS_END_BLOCK, eas.w);
>>>> +    end_idx = GETFIELD(EAS_END_INDEX, eas.w);
>>>> +    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
>>>> +        /* Not sure what to return here */
>>>> +        return H_HARDWARE;
>>>
>>> IIUC this indicates a bug in the PAPR specific code, not the guest, so
>>> an assert() is probably the right answer.
>>
>> ok
>>
>>>> +    }
>>>> +
>>>> +    nvt_blk = GETFIELD(END_W6_NVT_BLOCK, end.w6);
>>>> +    nvt_idx = GETFIELD(END_W6_NVT_INDEX, end.w6);
>>>> +    args[0] = spapr_xive_nvt_to_target(xive, nvt_blk, nvt_idx);
>>>
>>> AIUI there's a specific END for each target & priority, so you could
>>> avoid this second level lookup, 
>>
>> yes 
>>
>>> although I guess this might be
>>> valuable if we do more complicated internal routing in the future.
>>
>> I am not sure of that but I'd rather keep these converting helpers
>> for the moment.
> 
> Ok.
> 
>>>> +    if (eas.w & EAS_MASKED) {
>>>> +        args[1] = 0xff;
>>>> +    } else {
>>>> +        args[1] = GETFIELD(END_W7_F0_PRIORITY, end.w7);
>>>> +    }
>>>> +
>>>> +    args[2] = GETFIELD(EAS_END_DATA, eas.w);
>>>> +
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_GET_QUEUE_INFO hcall() is used to get the logical real
>>>> + * address of the notification management page associated with the
>>>> + * specified target and priority.
>>>> + *
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *       Bits 0-63 Reserved
>>>> + * - "target" is per "ibm,ppc-interrupt-server#s" or
>>>> + *       "ibm,ppc-interrupt-gserver#s"
>>>> + * - "priority" is a valid priority not in
>>>> + *       "ibm,plat-res-int-priorities"
>>>> + *
>>>> + * Output:
>>>> + * - R4: Logical real address of notification page
>>>> + * - R5: Power of 2 page size of the notification page
>>>> + */
>>>> +static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
>>>> +                                         sPAPRMachineState *spapr,
>>>> +                                         target_ulong opcode,
>>>> +                                         target_ulong *args)
>>>> +{
>>>> +    sPAPRXive *xive = spapr->xive;
>>>> +    XiveENDSource *end_xsrc = &xive->end_source;
>>>> +    target_ulong flags = args[0];
>>>> +    target_ulong target = args[1];
>>>> +    target_ulong priority = args[2];
>>>> +    XiveEND end;
>>>> +    uint8_t end_blk;
>>>> +    uint32_t end_idx;
>>>> +
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    if (flags) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * H_STATE should be returned if a H_INT_RESET is in progress.
>>>> +     * This is not needed when running the emulation under QEMU
>>>> +     */
>>>> +
>>>> +    if (!spapr_xive_priority_is_valid(priority)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid priority %ld 
>>>> requested\n",
>>>> +                      priority);
>>>> +        return H_P3;
>>>> +    }
>>>> +
>>>> +    /* Validate that "target" is part of the list of threads allocated
>>>> +     * to the partition. For that, find the END corresponding to the
>>>> +     * target.
>>>> +     */
>>>> +    if (spapr_xive_target_to_end(xive, target, priority, &end_blk, 
>>>> &end_idx)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    if (xive_router_get_end(XIVE_ROUTER(xive), end_blk, end_idx, &end)) {
>>>> +        return H_HARDWARE;
>>>> +    }
>>>> +
>>>> +    args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * 
>>>> end_idx;
>>>> +    if (end.w0 & END_W0_ENQUEUE) {
>>>> +        args[1] = GETFIELD(END_W0_QSIZE, end.w0) + 12;
>>>> +    } else {
>>>> +        args[1] = 0;
>>>> +    }
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_SET_QUEUE_CONFIG hcall() is used to set or reset a EQ for
>>>> + * a given "target" and "priority".  It is also used to set the
>>>> + * notification config associated with the EQ.  An EQ size of 0 is
>>>> + * used to reset the EQ config for a given target and priority. If
>>>> + * resetting the EQ config, the END associated with the given "target"
>>>> + * and "priority" will be changed to disable queueing.
>>>> + *
>>>> + * Upon return from the hcall(), no additional interrupts will be
>>>> + * directed to the old EQ (if one was set). The old EQ (if one was
>>>> + * set) should be investigated for interrupts that occurred prior to
>>>> + * or during the hcall().
>>>> + *
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *      Bits 0-62: Reserved
>>>> + *      Bit 63: Unconditional Notify (n) per the XIVE spec
>>>> + * - "target" is per "ibm,ppc-interrupt-server#s" or
>>>> + *       "ibm,ppc-interrupt-gserver#s"
>>>> + * - "priority" is a valid priority not in
>>>> + *       "ibm,plat-res-int-priorities"
>>>> + * - "eventQueue": The logical real address of the start of the EQ
>>>> + * - "eventQueueSize": The power of 2 EQ size per "ibm,xive-eq-sizes"
>>>> + *
>>>> + * Output:
>>>> + * - None
>>>> + */
>>>> +
>>>> +#define SPAPR_XIVE_END_ALWAYS_NOTIFY PPC_BIT(63)
>>>> +
>>>> +static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
>>>> +                                           sPAPRMachineState *spapr,
>>>> +                                           target_ulong opcode,
>>>> +                                           target_ulong *args)
>>>> +{
>>>> +    sPAPRXive *xive = spapr->xive;
>>>> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
>>>> +    target_ulong flags = args[0];
>>>> +    target_ulong target = args[1];
>>>> +    target_ulong priority = args[2];
>>>> +    target_ulong qpage = args[3];
>>>> +    target_ulong qsize = args[4];
>>>> +    XiveEND end;
>>>> +    uint8_t end_blk, nvt_blk;
>>>> +    uint32_t end_idx, nvt_idx;
>>>> +    uint32_t qdata;
>>>> +
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    if (flags & ~SPAPR_XIVE_END_ALWAYS_NOTIFY) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * H_STATE should be returned if a H_INT_RESET is in progress.
>>>> +     * This is not needed when running the emulation under QEMU
>>>> +     */
>>>> +
>>>> +    if (!spapr_xive_priority_is_valid(priority)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid priority %ld 
>>>> requested\n",
>>>> +                      priority);
>>>> +        return H_P3;
>>>> +    }
>>>> +
>>>> +    /* Validate that "target" is part of the list of threads allocated
>>>> +     * to the partition. For that, find the END corresponding to the
>>>> +     * target.
>>>> +     */
>>>> +
>>>> +    if (spapr_xive_target_to_end(xive, target, priority, &end_blk, 
>>>> &end_idx)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
>>>> +        return H_HARDWARE;
>>>
>>> Again, I think this indicates a qemu (spapr) code bug, so could be an 
>>> assert().
>>
>> ok
>>
>>>
>>>> +    }
>>>> +
>>>> +    switch (qsize) {
>>>> +    case 12:
>>>> +    case 16:
>>>> +    case 21:
>>>> +    case 24:
>>>> +        end.w3 = ((uint64_t)qpage) & 0xffffffff;
>>>
>>> It just occurred to me that I haven't been looking for this across any
>>> of these reviews.  Don't you need byteswaps when accessing these
>>> in-memory structures?
>>
>> yes this is done when some event data is enqueued in the EQ.
> 
> I'm not talking about the data in the EQ itself, but the fields in the
> END (and the NVT).

XIVE is all BE.

> 
>>
>>>
>>>> +        end.w2 = (((uint64_t)qpage)) >> 32 & 0x0fffffff;
>>>> +        end.w0 |= END_W0_ENQUEUE;
>>>> +        end.w0 = SETFIELD(END_W0_QSIZE, end.w0, qsize - 12);
>>>> +        break;
>>>> +    case 0:
>>>> +        /* reset queue and disable queueing */
>>>> +        xive_end_reset(&end);
>>>> +        goto out;
>>>> +
>>>> +    default:
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EQ size 
>>>> %"PRIx64"\n",
>>>> +                      qsize);
>>>> +        return H_P5;
>>>> +    }
>>>> +
>>>> +    if (qsize) {
>>>> +        /*
>>>> +         * Let's validate the EQ address with a read of the first EQ
>>>> +         * entry. We could also check that the full queue has been
>>>> +         * zeroed by the OS.
>>>> +         */
>>>> +        if (address_space_read(&address_space_memory, qpage,
>>>> +                               MEMTXATTRS_UNSPECIFIED,
>>>> +                               (uint8_t *) &qdata, sizeof(qdata))) {
>>>> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ data 
>>>> @0x%"
>>>> +                          HWADDR_PRIx "\n", qpage);
>>>> +            return H_P4;
>>>
>>> Just checking the first entry doesn't seem entirely safe.  Using
>>> address_space_map() and making sure the returned plen doesn't get
>>> reduced below the queue size might be a better option.
>>
>> ok. That was on my todo list.
>>
>>>
>>>> +        }
>>>> +    }
>>>> +
>>>> +    if (spapr_xive_target_to_nvt(xive, target, &nvt_blk, &nvt_idx)) {
>>>> +        return H_HARDWARE;
>>>
>>> That could be caused by a bogus 'target' value, couldn't it?  
>>
>> yes. It should have returned H_P2 above when spapr_xive_target_to_end() 
>> is called.
>>
>>> In which
>>> case it a) should probably be checked earlier and b) should be
>>> H_PARAMETER or similar, not H_HARDWARE, yes?
>>
>> H_P2 may be again. It should be checked earlier
>>
>>>
>>>> +    }
>>>> +
>>>> +    /* Ensure the priority and target are correctly set (they will not
>>>> +     * be right after allocation)
>>>
>>> AIUI there's a static association from END to target in the PAPR
>>> model. 
>>
>> yes. 8 priorities per cpu.
>>
>>> So it seems to make more sense to get that set up right at
>>> initialization / reset, rather than doing it lazily when the 
>>> queue is configured.
>>
>> Ah. You would preconfigure the word6 and word7 then. Yes, it would
>> save us some of the conversion fuss. I will look at it.
>>
>>>> +     */
>>>> +    end.w6 = SETFIELD(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
>>>> +        SETFIELD(END_W6_NVT_INDEX, 0ul, nvt_idx);
>>>> +    end.w7 = SETFIELD(END_W7_F0_PRIORITY, 0ul, priority);
>>>> +
>>>> +    if (flags & SPAPR_XIVE_END_ALWAYS_NOTIFY) {
>>>> +        end.w0 |= END_W0_UCOND_NOTIFY;
>>>> +    } else {
>>>> +        end.w0 &= ~END_W0_UCOND_NOTIFY;
>>>> +    }
>>>> +
>>>> +    /* The generation bit for the END starts at 1 and The END page
>>>> +     * offset counter starts at 0.
>>>> +     */
>>>> +    end.w1 = END_W1_GENERATION | SETFIELD(END_W1_PAGE_OFF, 0ul, 0ul);
>>>> +    end.w0 |= END_W0_VALID;
>>>> +
>>>> +    /* TODO: issue syncs required to ensure all in-flight interrupts
>>>> +     * are complete on the old END */
>>>> +out:
>>>> +    /* Update END */
>>>> +    if (xive_router_set_end(xrtr, end_blk, end_idx, &end)) {
>>>> +        return H_HARDWARE;
>>>> +    }
>>>
>>> Again the PAPR code owns the ENDs, so it can update them directly
>>> rather than going through an abstraction.
>>
>> ok.
>>
>>>
>>>> +
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_GET_QUEUE_CONFIG hcall() is used to get a EQ for a given
>>>> + * target and priority.
>>>> + *
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *      Bits 0-62: Reserved
>>>> + *      Bit 63: Debug: Return debug data
>>>> + * - "target" is per "ibm,ppc-interrupt-server#s" or
>>>> + *       "ibm,ppc-interrupt-gserver#s"
>>>> + * - "priority" is a valid priority not in
>>>> + *       "ibm,plat-res-int-priorities"
>>>> + *
>>>> + * Output:
>>>> + * - R4: "flags":
>>>> + *       Bits 0-61: Reserved
>>>> + *       Bit 62: The value of Event Queue Generation Number (g) per
>>>> + *              the XIVE spec if "Debug" = 1
>>>> + *       Bit 63: The value of Unconditional Notify (n) per the XIVE spec
>>>> + * - R5: The logical real address of the start of the EQ
>>>> + * - R6: The power of 2 EQ size per "ibm,xive-eq-sizes"
>>>> + * - R7: The value of Event Queue Offset Counter per XIVE spec
>>>> + *       if "Debug" = 1, else 0
>>>> + *
>>>> + */
>>>> +
>>>> +#define SPAPR_XIVE_END_DEBUG     PPC_BIT(63)
>>>> +
>>>> +static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
>>>> +                                           sPAPRMachineState *spapr,
>>>> +                                           target_ulong opcode,
>>>> +                                           target_ulong *args)
>>>> +{
>>>> +    sPAPRXive *xive = spapr->xive;
>>>> +    target_ulong flags = args[0];
>>>> +    target_ulong target = args[1];
>>>> +    target_ulong priority = args[2];
>>>> +    XiveEND end;
>>>> +    uint8_t end_blk;
>>>> +    uint32_t end_idx;
>>>> +
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    if (flags & ~SPAPR_XIVE_END_DEBUG) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * H_STATE should be returned if a H_INT_RESET is in progress.
>>>> +     * This is not needed when running the emulation under QEMU
>>>> +     */
>>>> +
>>>> +    if (!spapr_xive_priority_is_valid(priority)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid priority %ld 
>>>> requested\n",
>>>> +                      priority);
>>>> +        return H_P3;
>>>> +    }
>>>> +
>>>> +    /* Validate that "target" is part of the list of threads allocated
>>>> +     * to the partition. For that, find the END corresponding to the
>>>> +     * target.
>>>> +     */
>>>> +    if (spapr_xive_target_to_end(xive, target, priority, &end_blk, 
>>>> &end_idx)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    if (xive_router_get_end(XIVE_ROUTER(xive), end_blk, end_idx, &end)) {
>>>> +        return H_HARDWARE;
>>>
>>> Again, assert() seems appropriate here.
>>
>> ok
>>
>>>
>>>> +    }
>>>> +
>>>> +    args[0] = 0;
>>>> +    if (end.w0 & END_W0_UCOND_NOTIFY) {
>>>> +        args[0] |= SPAPR_XIVE_END_ALWAYS_NOTIFY;
>>>> +    }
>>>> +
>>>> +    if (end.w0 & END_W0_ENQUEUE) {
>>>> +        args[1] =
>>>> +            (((uint64_t)(end.w2 & 0x0fffffff)) << 32) | end.w3;
>>>> +        args[2] = GETFIELD(END_W0_QSIZE, end.w0) + 12;
>>>> +    } else {
>>>> +        args[1] = 0;
>>>> +        args[2] = 0;
>>>> +    }
>>>> +
>>>> +    /* TODO: do we need any locking on the END ? */
>>>> +    if (flags & SPAPR_XIVE_END_DEBUG) {
>>>> +        /* Load the event queue generation number into the return flags */
>>>> +        args[0] |= (uint64_t)GETFIELD(END_W1_GENERATION, end.w1) << 62;
>>>> +
>>>> +        /* Load R7 with the event queue offset counter */
>>>> +        args[3] = GETFIELD(END_W1_PAGE_OFF, end.w1);
>>>> +    } else {
>>>> +        args[3] = 0;
>>>> +    }
>>>> +
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_SET_OS_REPORTING_LINE hcall() is used to set the
>>>> + * reporting cache line pair for the calling thread.  The reporting
>>>> + * cache lines will contain the OS interrupt context when the OS
>>>> + * issues a CI store byte to @TIMA+0xC10 to acknowledge the OS
>>>> + * interrupt. The reporting cache lines can be reset by inputting -1
>>>> + * in "reportingLine".  Issuing the CI store byte without reporting
>>>> + * cache lines registered will result in the data not being accessible
>>>> + * to the OS.
>>>> + *
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *      Bits 0-63: Reserved
>>>> + * - "reportingLine": The logical real address of the reporting cache
>>>> + *    line pair
>>>> + *
>>>> + * Output:
>>>> + * - None
>>>> + */
>>>> +static target_ulong h_int_set_os_reporting_line(PowerPCCPU *cpu,
>>>> +                                                sPAPRMachineState *spapr,
>>>> +                                                target_ulong opcode,
>>>> +                                                target_ulong *args)
>>>> +{
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * H_STATE should be returned if a H_INT_RESET is in progress.
>>>> +     * This is not needed when running the emulation under QEMU
>>>> +     */
>>>> +
>>>> +    /* TODO: H_INT_SET_OS_REPORTING_LINE */
>>>> +    return H_FUNCTION;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_GET_OS_REPORTING_LINE hcall() is used to get the logical
>>>> + * real address of the reporting cache line pair set for the input
>>>> + * "target".  If no reporting cache line pair has been set, -1 is
>>>> + * returned.
>>>> + *
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *      Bits 0-63: Reserved
>>>> + * - "target" is per "ibm,ppc-interrupt-server#s" or
>>>> + *       "ibm,ppc-interrupt-gserver#s"
>>>> + * - "reportingLine": The logical real address of the reporting cache
>>>> + *   line pair
>>>> + *
>>>> + * Output:
>>>> + * - R4: The logical real address of the reporting line if set, else -1
>>>> + */
>>>> +static target_ulong h_int_get_os_reporting_line(PowerPCCPU *cpu,
>>>> +                                                sPAPRMachineState *spapr,
>>>> +                                                target_ulong opcode,
>>>> +                                                target_ulong *args)
>>>> +{
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * H_STATE should be returned if a H_INT_RESET is in progress.
>>>> +     * This is not needed when running the emulation under QEMU
>>>> +     */
>>>> +
>>>> +    /* TODO: H_INT_GET_OS_REPORTING_LINE */
>>>> +    return H_FUNCTION;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_ESB hcall() is used to issue a load or store to the ESB
>>>> + * page for the input "lisn".  This hcall is only supported for LISNs
>>>> + * that have the ESB hcall flag set to 1 when returned from hcall()
>>>> + * H_INT_GET_SOURCE_INFO.
>>>
>>> Is there a reason for specifically restricting this to LISNs which
>>> advertise it, rather than allowing it for anything? 
>>
>> It's in the specs but I did not implement the check. So H_INT_ESB can be 
>> used today by the OS for any interrupt number. Same under KVM.
>>
>> But I should say so somewhere.
>>
>>> Obviously using
>>> the direct MMIOs will generally be a faster option when possible, but
>>> I could see occasions where it might be simpler for the guest to
>>> always use H_INT_ESB (e.g. for micro-guests like kvm-unit-tests).
>>
>> can not you use direct load and stores in these guests ? I haven't 
>> looked at how they are implemented.
> 
> It's not that you can't, but that might involve setting up mappings
> and so forth which could be more trouble than using an hcall.  At the
> very least they'll also need H_INT_ESB support for the irqs that
> require it, so allowing it for everything avoids one code variant.

ok. All good then.

Thanks,

C.

> 
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *      Bits 0-62: Reserved
>>>> + *      bit 63: Store: Store=1, store operation, else load operation
>>>> + * - "lisn" is per "interrupts", "interrupt-map", or
>>>> + *      "ibm,xive-lisn-ranges" properties, or as returned by the
>>>> + *      ibm,query-interrupt-source-number RTAS call, or as
>>>> + *      returned by the H_ALLOCATE_VAS_WINDOW hcall
>>>> + * - "esbOffset" is the offset into the ESB page for the load or store 
>>>> operation
>>>> + * - "storeData" is the data to write for a store operation
>>>> + *
>>>> + * Output:
>>>> + * - R4: R4: The value of the load if load operation, else -1
>>>> + */
>>>> +
>>>> +#define SPAPR_XIVE_ESB_STORE PPC_BIT(63)
>>>> +
>>>> +static target_ulong h_int_esb(PowerPCCPU *cpu,
>>>> +                              sPAPRMachineState *spapr,
>>>> +                              target_ulong opcode,
>>>> +                              target_ulong *args)
>>>> +{
>>>> +    sPAPRXive *xive = spapr->xive;
>>>> +    XiveEAS eas;
>>>> +    target_ulong flags  = args[0];
>>>> +    target_ulong lisn   = args[1];
>>>> +    target_ulong offset = args[2];
>>>> +    target_ulong data   = args[3];
>>>> +    hwaddr mmio_addr;
>>>> +    XiveSource *xsrc = &xive->source;
>>>> +
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    if (flags & ~SPAPR_XIVE_ESB_STORE) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    if (xive_router_get_eas(XIVE_ROUTER(xive), lisn, &eas)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    if (!(eas.w & EAS_VALID)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    if (offset > (1ull << xsrc->esb_shift)) {
>>>> +        return H_P3;
>>>> +    }
>>>> +
>>>> +    mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
>>>> +
>>>> +    if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
>>>> +                      (flags & SPAPR_XIVE_ESB_STORE))) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
>>>> +                      HWADDR_PRIx "\n", mmio_addr);
>>>> +        return H_HARDWARE;
>>>> +    }
>>>> +    args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_SYNC hcall() is used to issue hardware syncs that will
>>>> + * ensure any in flight events for the input lisn are in the event
>>>> + * queue.
>>>> + *
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *      Bits 0-63: Reserved
>>>> + * - "lisn" is per "interrupts", "interrupt-map", or
>>>> + *      "ibm,xive-lisn-ranges" properties, or as returned by the
>>>> + *      ibm,query-interrupt-source-number RTAS call, or as
>>>> + *      returned by the H_ALLOCATE_VAS_WINDOW hcall
>>>> + *
>>>> + * Output:
>>>> + * - None
>>>> + */
>>>> +static target_ulong h_int_sync(PowerPCCPU *cpu,
>>>> +                               sPAPRMachineState *spapr,
>>>> +                               target_ulong opcode,
>>>> +                               target_ulong *args)
>>>> +{
>>>> +    sPAPRXive *xive = spapr->xive;
>>>> +    XiveEAS eas;
>>>> +    target_ulong flags = args[0];
>>>> +    target_ulong lisn = args[1];
>>>> +
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    if (flags) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    if (xive_router_get_eas(XIVE_ROUTER(xive), lisn, &eas)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    if (!(eas.w & EAS_VALID)) {
>>>> +        return H_P2;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * H_STATE should be returned if a H_INT_RESET is in progress.
>>>> +     * This is not needed when running the emulation under QEMU
>>>> +     */
>>>> +
>>>> +    /* This is not real hardware. Nothing to be done */
>>>
>>> At least, not as long as all the XIVE operations are under the BQL.
>>
>> yes.
>>
>>>
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +/*
>>>> + * The H_INT_RESET hcall() is used to reset all of the partition's
>>>> + * interrupt exploitation structures to their initial state.  This
>>>> + * means losing all previously set interrupt state set via
>>>> + * H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
>>>> + *
>>>> + * Parameters:
>>>> + * Input:
>>>> + * - "flags"
>>>> + *      Bits 0-63: Reserved
>>>> + *
>>>> + * Output:
>>>> + * - None
>>>> + */
>>>> +static target_ulong h_int_reset(PowerPCCPU *cpu,
>>>> +                                sPAPRMachineState *spapr,
>>>> +                                target_ulong opcode,
>>>> +                                target_ulong *args)
>>>> +{
>>>> +    sPAPRXive *xive = spapr->xive;
>>>> +    target_ulong flags   = args[0];
>>>> +
>>>> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
>>>> +        return H_FUNCTION;
>>>> +    }
>>>> +
>>>> +    if (flags) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    device_reset(DEVICE(xive));
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +void spapr_xive_hcall_init(sPAPRMachineState *spapr)
>>>> +{
>>>> +    spapr_register_hypercall(H_INT_GET_SOURCE_INFO, 
>>>> h_int_get_source_info);
>>>> +    spapr_register_hypercall(H_INT_SET_SOURCE_CONFIG, 
>>>> h_int_set_source_config);
>>>> +    spapr_register_hypercall(H_INT_GET_SOURCE_CONFIG, 
>>>> h_int_get_source_config);
>>>> +    spapr_register_hypercall(H_INT_GET_QUEUE_INFO, h_int_get_queue_info);
>>>> +    spapr_register_hypercall(H_INT_SET_QUEUE_CONFIG, 
>>>> h_int_set_queue_config);
>>>> +    spapr_register_hypercall(H_INT_GET_QUEUE_CONFIG, 
>>>> h_int_get_queue_config);
>>>> +    spapr_register_hypercall(H_INT_SET_OS_REPORTING_LINE,
>>>> +                             h_int_set_os_reporting_line);
>>>> +    spapr_register_hypercall(H_INT_GET_OS_REPORTING_LINE,
>>>> +                             h_int_get_os_reporting_line);
>>>> +    spapr_register_hypercall(H_INT_ESB, h_int_esb);
>>>> +    spapr_register_hypercall(H_INT_SYNC, h_int_sync);
>>>> +    spapr_register_hypercall(H_INT_RESET, h_int_reset);
>>>> +}
>>>> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
>>>> index 2569ae1bc7f8..da6fcfaa3c52 100644
>>>> --- a/hw/ppc/spapr_irq.c
>>>> +++ b/hw/ppc/spapr_irq.c
>>>> @@ -258,6 +258,8 @@ static void spapr_irq_init_xive(sPAPRMachineState 
>>>> *spapr, int nr_irqs,
>>>>          error_propagate(errp, local_err);
>>>>          return;
>>>>      }
>>>> +
>>>> +    spapr_xive_hcall_init(spapr);
>>>>  }
>>>>  
>>>>  static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool 
>>>> lsi,
>>>> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
>>>> index 301a8e972d91..eacd26836ebf 100644
>>>> --- a/hw/intc/Makefile.objs
>>>> +++ b/hw/intc/Makefile.objs
>>>> @@ -38,7 +38,7 @@ obj-$(CONFIG_XICS) += xics.o
>>>>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>>>>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>>>>  obj-$(CONFIG_XIVE) += xive.o
>>>> -obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
>>>> +obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o spapr_xive_hcall.o
>>>>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>>>>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>>>>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
>>>
>>
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]