qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH v5 16/36] spapr: add hcalls support for the XIVE e


From: David Gibson
Subject: Re: [Qemu-ppc] [PATCH v5 16/36] spapr: add hcalls support for the XIVE exploitation interrupt mode
Date: Wed, 28 Nov 2018 15:25:47 +1100
User-agent: Mutt/1.10.1 (2018-07-13)

On Fri, Nov 16, 2018 at 11:57:09AM +0100, Cédric Le Goater wrote:
> The different XIVE virtualization structures (sources and event queues)
> are configured with a set of Hypervisor calls :
> 
>  - H_INT_GET_SOURCE_INFO
> 
>    used to obtain the address of the MMIO page of the Event State
>    Buffer (ESB) entry associated with the source.
> 
>  - H_INT_SET_SOURCE_CONFIG
> 
>    assigns a source to a "target".
> 
>  - H_INT_GET_SOURCE_CONFIG
> 
>    determines which "target" and "priority" is assigned to a source
> 
>  - H_INT_GET_QUEUE_INFO
> 
>    returns the address of the notification management page associated
>    with the specified "target" and "priority".
> 
>  - H_INT_SET_QUEUE_CONFIG
> 
>    sets or resets the event queue for a given "target" and "priority".
>    It is also used to set the notification configuration associated
>    with the queue, only unconditional notification is supported for
>    the moment. Reset is performed with a queue size of 0 and queueing
>    is disabled in that case.
> 
>  - H_INT_GET_QUEUE_CONFIG
> 
>    returns the queue settings for a given "target" and "priority".
> 
>  - H_INT_RESET
> 
>    resets all of the guest's internal interrupt structures to their
>    initial state, losing all configuration set via the hcalls
>    H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
> 
>  - H_INT_SYNC
> 
>    issue a synchronisation on a source to make sure all notifications
>    have reached their queue.
> 
> Calls that still need to be addressed :
> 
>    H_INT_SET_OS_REPORTING_LINE
>    H_INT_GET_OS_REPORTING_LINE
> 
> See the code for more documentation on each hcall.
> 
> Signed-off-by: Cédric Le Goater <address@hidden>
> ---
>  include/hw/ppc/spapr.h      |  15 +-
>  include/hw/ppc/spapr_xive.h |   6 +
>  hw/intc/spapr_xive_hcall.c  | 892 ++++++++++++++++++++++++++++++++++++
>  hw/ppc/spapr_irq.c          |   2 +
>  hw/intc/Makefile.objs       |   2 +-
>  5 files changed, 915 insertions(+), 2 deletions(-)
>  create mode 100644 hw/intc/spapr_xive_hcall.c
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 1fbc2663e06c..8415faea7b82 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -452,7 +452,20 @@ struct sPAPRMachineState {
>  #define H_INVALIDATE_PID        0x378
>  #define H_REGISTER_PROC_TBL     0x37C
>  #define H_SIGNAL_SYS_RESET      0x380
> -#define MAX_HCALL_OPCODE        H_SIGNAL_SYS_RESET
> +
> +#define H_INT_GET_SOURCE_INFO   0x3A8
> +#define H_INT_SET_SOURCE_CONFIG 0x3AC
> +#define H_INT_GET_SOURCE_CONFIG 0x3B0
> +#define H_INT_GET_QUEUE_INFO    0x3B4
> +#define H_INT_SET_QUEUE_CONFIG  0x3B8
> +#define H_INT_GET_QUEUE_CONFIG  0x3BC
> +#define H_INT_SET_OS_REPORTING_LINE 0x3C0
> +#define H_INT_GET_OS_REPORTING_LINE 0x3C4
> +#define H_INT_ESB               0x3C8
> +#define H_INT_SYNC              0x3CC
> +#define H_INT_RESET             0x3D0
> +
> +#define MAX_HCALL_OPCODE        H_INT_RESET
>  
>  /* The hcalls above are standardized in PAPR and implemented by pHyp
>   * as well.
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 3f65b8f485fd..418511f3dc10 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -60,4 +60,10 @@ int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t 
> target, uint8_t prio,
>  int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
>                            uint8_t *out_end_blk, uint32_t *out_end_idx);
>  
> +bool spapr_xive_priority_is_valid(uint8_t priority);

AFAICT this could be a local function.

> +
> +typedef struct sPAPRMachineState sPAPRMachineState;
> +
> +void spapr_xive_hcall_init(sPAPRMachineState *spapr);
> +
>  #endif /* PPC_SPAPR_XIVE_H */
> diff --git a/hw/intc/spapr_xive_hcall.c b/hw/intc/spapr_xive_hcall.c
> new file mode 100644
> index 000000000000..52e4e23995f5
> --- /dev/null
> +++ b/hw/intc/spapr_xive_hcall.c
> @@ -0,0 +1,892 @@
> +/*
> + * QEMU PowerPC sPAPR XIVE interrupt controller model
> + *
> + * Copyright (c) 2017-2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "qapi/error.h"
> +#include "cpu.h"
> +#include "hw/ppc/fdt.h"
> +#include "hw/ppc/spapr.h"
> +#include "hw/ppc/spapr_xive.h"
> +#include "hw/ppc/xive_regs.h"
> +#include "monitor/monitor.h"

Fwiw, I don't think it's particularly necessary to split the hcall
handling out into a separate .c file.

> +/*
> + * OPAL uses the priority 7 EQ to automatically escalate interrupts
> + * for all other queues (DD2.X POWER9). So only priorities [0..6] are
> + * available for the guest.

Referencing OPAL behaviour doesn't really make sense in the context of
PAPR.  What I think you're getting at is that the PAPR spec only
allows a PAPR guest to use priorities 0..6 (or at least it will if the
XIVE updated spec ever gets published).  The fact that this allows the
host use 7 for escalations is a design rationale but not really
relevant to the guest device itself.

> + */
> +bool spapr_xive_priority_is_valid(uint8_t priority)
> +{
> +    switch (priority) {
> +    case 0 ... 6:
> +        return true;
> +    case 7: /* OPAL escalation queue */
> +    default:
> +        return false;
> +    }
> +}
> +
> +/*
> + * The H_INT_GET_SOURCE_INFO hcall() is used to obtain the logical
> + * real address of the MMIO page through which the Event State Buffer
> + * entry associated with the value of the "lisn" parameter is managed.
> + *
> + * Parameters:
> + * Input
> + * - "flags"
> + *       Bits 0-63 reserved
> + * - "lisn" is per "interrupts", "interrupt-map", or
> + *       "ibm,xive-lisn-ranges" properties, or as returned by the
> + *       ibm,query-interrupt-source-number RTAS call, or as returned
> + *       by the H_ALLOCATE_VAS_WINDOW hcall

I've not heard of H_ALLOCATE_VAS_WINDOW.  Is that something we intend
to implement in kvm/qemu, or is it only of interest for PowerVM?

Also, putting the register numbers on the inputs as well as the
outputs would be helpful.

> + *
> + * Output
> + * - R4: "flags"
> + *       Bits 0-59: Reserved
> + *       Bit 60: H_INT_ESB must be used for Event State Buffer
> + *               management
> + *       Bit 61: 1 == LSI  0 == MSI
> + *       Bit 62: the full function page supports trigger
> + *       Bit 63: Store EOI Supported
> + * - R5: Logical Real address of full function Event State Buffer
> + *       management page, -1 if ESB hcall flag is set to 1.

You've defined what H_INT_ESB means above, so it will be clearer if
you reference that by name here.

> + * - R6: Logical Real Address of trigger only Event State Buffer
> + *       management page or -1.
> + * - R7: Power of 2 page size for the ESB management pages returned in
> + *       R5 and R6.
> + */
> +
> +#define SPAPR_XIVE_SRC_H_INT_ESB     PPC_BIT(60) /* ESB manage with 
> H_INT_ESB */
> +#define SPAPR_XIVE_SRC_LSI           PPC_BIT(61) /* Virtual LSI type */
> +#define SPAPR_XIVE_SRC_TRIGGER       PPC_BIT(62) /* Trigger and management
> +                                                    on same page */
> +#define SPAPR_XIVE_SRC_STORE_EOI     PPC_BIT(63) /* Store EOI support */

Probably makes sense to put these #defines in spapr.h since they form
part of the PAPR interface definition.

> +static target_ulong h_int_get_source_info(PowerPCCPU *cpu,
> +                                          sPAPRMachineState *spapr,
> +                                          target_ulong opcode,
> +                                          target_ulong *args)
> +{
> +    sPAPRXive *xive = spapr->xive;
> +    XiveSource *xsrc = &xive->source;
> +    XiveEAS eas;
> +    target_ulong flags  = args[0];
> +    target_ulong lisn   = args[1];
> +
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    if (flags) {
> +        return H_PARAMETER;
> +    }
> +
> +    if (xive_router_get_eas(XIVE_ROUTER(xive), lisn, &eas)) {
> +        return H_P2;
> +    }
> +
> +    if (!(eas.w & EAS_VALID)) {
> +        return H_P2;
> +    }
> +
> +    /* All sources are emulated under the main XIVE object and share
> +     * the same characteristics.
> +     */
> +    args[0] = 0;
> +    if (!xive_source_esb_has_2page(xsrc)) {
> +        args[0] |= SPAPR_XIVE_SRC_TRIGGER;
> +    }
> +    if (xsrc->esb_flags & XIVE_SRC_STORE_EOI) {
> +        args[0] |= SPAPR_XIVE_SRC_STORE_EOI;
> +    }
> +
> +    /*
> +     * Force the use of the H_INT_ESB hcall in case of an LSI
> +     * interrupt. This is necessary under KVM to re-trigger the
> +     * interrupt if the level is still asserted
> +     */
> +    if (xive_source_irq_is_lsi(xsrc, lisn)) {
> +        args[0] |= SPAPR_XIVE_SRC_H_INT_ESB | SPAPR_XIVE_SRC_LSI;
> +    }
> +
> +    if (!(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) {
> +        args[1] = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn);
> +    } else {
> +        args[1] = -1;
> +    }
> +
> +    if (xive_source_esb_has_2page(xsrc)) {
> +        args[2] = xive->vc_base + xive_source_esb_page(xsrc, lisn);
> +    } else {
> +        args[2] = -1;
> +    }

Do we also need to keep this address clear in the H_INT_ESB case?

> +    args[3] = TARGET_PAGE_SIZE;

That seems wrong.  TARGET_PAGE_SIZE is generally 4kiB, but won't these
usually actually be 64kiB?

> +
> +    return H_SUCCESS;
> +}
> +
> +/*
> + * The H_INT_SET_SOURCE_CONFIG hcall() is used to assign a Logical
> + * Interrupt Source to a target. The Logical Interrupt Source is
> + * designated with the "lisn" parameter and the target is designated
> + * with the "target" and "priority" parameters.  Upon return from the
> + * hcall(), no additional interrupts will be directed to the old EQ.
> + *
> + * TODO: The old EQ should be investigated for interrupts that
> + * occurred prior to or during the hcall().

Isn't that the responsibility of the guest?

> + *
> + * Parameters:
> + * Input:
> + * - "flags"
> + *      Bits 0-61: Reserved
> + *      Bit 62: set the "eisn" in the EA

What's the "EA"?  Do you mean the EAS?

> + *      Bit 63: masks the interrupt source in the hardware interrupt
> + *      control structure. An interrupt masked by this mechanism will
> + *      be dropped, but it's source state bits will still be
> + *      set. There is no race-free way of unmasking and restoring the
> + *      source. Thus this should only be used in interrupts that are
> + *      also masked at the source, and only in cases where the
> + *      interrupt is not meant to be used for a large amount of time
> + *      because no valid target exists for it for example
> + * - "lisn" is per "interrupts", "interrupt-map", or
> + *      "ibm,xive-lisn-ranges" properties, or as returned by the
> + *      ibm,query-interrupt-source-number RTAS call, or as returned by
> + *      the H_ALLOCATE_VAS_WINDOW hcall
> + * - "target" is per "ibm,ppc-interrupt-server#s" or
> + *      "ibm,ppc-interrupt-gserver#s"
> + * - "priority" is a valid priority not in
> + *      "ibm,plat-res-int-priorities"
> + * - "eisn" is the guest EISN associated with the "lisn"

I don't think the EISN term has been used before in the series.  I'm
guessing this is the guest-assigned global interrupt number?

> + *
> + * Output:
> + * - None
> + */
> +
> +#define SPAPR_XIVE_SRC_SET_EISN PPC_BIT(62)
> +#define SPAPR_XIVE_SRC_MASK     PPC_BIT(63)
> +
> +static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
> +                                            sPAPRMachineState *spapr,
> +                                            target_ulong opcode,
> +                                            target_ulong *args)
> +{
> +    sPAPRXive *xive = spapr->xive;
> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> +    XiveEAS eas, new_eas;
> +    target_ulong flags    = args[0];
> +    target_ulong lisn     = args[1];
> +    target_ulong target   = args[2];
> +    target_ulong priority = args[3];
> +    target_ulong eisn     = args[4];
> +    uint8_t end_blk;
> +    uint32_t end_idx;
> +
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    if (flags & ~(SPAPR_XIVE_SRC_SET_EISN | SPAPR_XIVE_SRC_MASK)) {
> +        return H_PARAMETER;
> +    }
> +
> +    if (xive_router_get_eas(xrtr, lisn, &eas)) {
> +        return H_P2;
> +    }
> +
> +    if (!(eas.w & EAS_VALID)) {
> +        return H_P2;
> +    }
> +
> +    /* priority 0xff is used to reset the EAS */
> +    if (priority == 0xff) {
> +        new_eas.w = EAS_VALID | EAS_MASKED;
> +        goto out;
> +    }
> +
> +    if (flags & SPAPR_XIVE_SRC_MASK) {
> +        new_eas.w = eas.w | EAS_MASKED;
> +    } else {
> +        new_eas.w = eas.w & ~EAS_MASKED;
> +    }
> +
> +    if (!spapr_xive_priority_is_valid(priority)) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid priority %ld 
> requested\n",
> +                      priority);
> +        return H_P4;
> +    }
> +
> +    /* Validate that "target" is part of the list of threads allocated
> +     * to the partition. For that, find the END corresponding to the
> +     * target.
> +     */
> +    if (spapr_xive_target_to_end(xive, target, priority, &end_blk, 
> &end_idx)) {
> +        return H_P3;
> +    }
> +
> +    new_eas.w = SETFIELD(EAS_END_BLOCK, new_eas.w, end_blk);
> +    new_eas.w = SETFIELD(EAS_END_INDEX, new_eas.w, end_idx);
> +
> +    if (flags & SPAPR_XIVE_SRC_SET_EISN) {
> +        new_eas.w = SETFIELD(EAS_END_DATA, new_eas.w, eisn);
> +    }
> +
> +out:
> +    if (xive_router_set_eas(xrtr, lisn, &new_eas)) {
> +        return H_HARDWARE;
> +    }

As noted earlier in the series, the spapr specific code owns the
memory backing the EAT, so you can just access it directly rather than
using a method here.

> +
> +    return H_SUCCESS;
> +}
> +
> +/*
> + * The H_INT_GET_SOURCE_CONFIG hcall() is used to determine to which
> + * target/priority pair is assigned to the specified Logical Interrupt
> + * Source.
> + *
> + * Parameters:
> + * Input:
> + * - "flags"
> + *      Bits 0-63 Reserved
> + * - "lisn" is per "interrupts", "interrupt-map", or
> + *      "ibm,xive-lisn-ranges" properties, or as returned by the
> + *      ibm,query-interrupt-source-number RTAS call, or as
> + *      returned by the H_ALLOCATE_VAS_WINDOW hcall
> + *
> + * Output:
> + * - R4: Target to which the specified Logical Interrupt Source is
> + *       assigned
> + * - R5: Priority to which the specified Logical Interrupt Source is
> + *       assigned
> + * - R6: EISN for the specified Logical Interrupt Source (this will be
> + *       equivalent to the LISN if not changed by H_INT_SET_SOURCE_CONFIG)
> + */
> +static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
> +                                            sPAPRMachineState *spapr,
> +                                            target_ulong opcode,
> +                                            target_ulong *args)
> +{
> +    sPAPRXive *xive = spapr->xive;
> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> +    target_ulong flags = args[0];
> +    target_ulong lisn = args[1];
> +    XiveEAS eas;
> +    XiveEND end;
> +    uint8_t end_blk, nvt_blk;
> +    uint32_t end_idx, nvt_idx;
> +
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    if (flags) {
> +        return H_PARAMETER;
> +    }
> +
> +    if (xive_router_get_eas(xrtr, lisn, &eas)) {
> +        return H_P2;
> +    }
> +
> +    if (!(eas.w & EAS_VALID)) {
> +        return H_P2;
> +    }
> +
> +    end_blk = GETFIELD(EAS_END_BLOCK, eas.w);
> +    end_idx = GETFIELD(EAS_END_INDEX, eas.w);
> +    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
> +        /* Not sure what to return here */
> +        return H_HARDWARE;

IIUC this indicates a bug in the PAPR specific code, not the guest, so
an assert() is probably the right answer.

> +    }
> +
> +    nvt_blk = GETFIELD(END_W6_NVT_BLOCK, end.w6);
> +    nvt_idx = GETFIELD(END_W6_NVT_INDEX, end.w6);
> +    args[0] = spapr_xive_nvt_to_target(xive, nvt_blk, nvt_idx);

AIUI there's a specific END for each target & priority, so you could
avoid this second level lookup, although I guess this might be
valuable if we do more complicated internal routing in the future.

> +    if (eas.w & EAS_MASKED) {
> +        args[1] = 0xff;
> +    } else {
> +        args[1] = GETFIELD(END_W7_F0_PRIORITY, end.w7);
> +    }
> +
> +    args[2] = GETFIELD(EAS_END_DATA, eas.w);
> +
> +    return H_SUCCESS;
> +}
> +
> +/*
> + * The H_INT_GET_QUEUE_INFO hcall() is used to get the logical real
> + * address of the notification management page associated with the
> + * specified target and priority.
> + *
> + * Parameters:
> + * Input:
> + * - "flags"
> + *       Bits 0-63 Reserved
> + * - "target" is per "ibm,ppc-interrupt-server#s" or
> + *       "ibm,ppc-interrupt-gserver#s"
> + * - "priority" is a valid priority not in
> + *       "ibm,plat-res-int-priorities"
> + *
> + * Output:
> + * - R4: Logical real address of notification page
> + * - R5: Power of 2 page size of the notification page
> + */
> +static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
> +                                         sPAPRMachineState *spapr,
> +                                         target_ulong opcode,
> +                                         target_ulong *args)
> +{
> +    sPAPRXive *xive = spapr->xive;
> +    XiveENDSource *end_xsrc = &xive->end_source;
> +    target_ulong flags = args[0];
> +    target_ulong target = args[1];
> +    target_ulong priority = args[2];
> +    XiveEND end;
> +    uint8_t end_blk;
> +    uint32_t end_idx;
> +
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    if (flags) {
> +        return H_PARAMETER;
> +    }
> +
> +    /*
> +     * H_STATE should be returned if a H_INT_RESET is in progress.
> +     * This is not needed when running the emulation under QEMU
> +     */
> +
> +    if (!spapr_xive_priority_is_valid(priority)) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid priority %ld 
> requested\n",
> +                      priority);
> +        return H_P3;
> +    }
> +
> +    /* Validate that "target" is part of the list of threads allocated
> +     * to the partition. For that, find the END corresponding to the
> +     * target.
> +     */
> +    if (spapr_xive_target_to_end(xive, target, priority, &end_blk, 
> &end_idx)) {
> +        return H_P2;
> +    }
> +
> +    if (xive_router_get_end(XIVE_ROUTER(xive), end_blk, end_idx, &end)) {
> +        return H_HARDWARE;
> +    }
> +
> +    args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
> +    if (end.w0 & END_W0_ENQUEUE) {
> +        args[1] = GETFIELD(END_W0_QSIZE, end.w0) + 12;
> +    } else {
> +        args[1] = 0;
> +    }
> +    return H_SUCCESS;
> +}
> +
> +/*
> + * The H_INT_SET_QUEUE_CONFIG hcall() is used to set or reset a EQ for
> + * a given "target" and "priority".  It is also used to set the
> + * notification config associated with the EQ.  An EQ size of 0 is
> + * used to reset the EQ config for a given target and priority. If
> + * resetting the EQ config, the END associated with the given "target"
> + * and "priority" will be changed to disable queueing.
> + *
> + * Upon return from the hcall(), no additional interrupts will be
> + * directed to the old EQ (if one was set). The old EQ (if one was
> + * set) should be investigated for interrupts that occurred prior to
> + * or during the hcall().
> + *
> + * Parameters:
> + * Input:
> + * - "flags"
> + *      Bits 0-62: Reserved
> + *      Bit 63: Unconditional Notify (n) per the XIVE spec
> + * - "target" is per "ibm,ppc-interrupt-server#s" or
> + *       "ibm,ppc-interrupt-gserver#s"
> + * - "priority" is a valid priority not in
> + *       "ibm,plat-res-int-priorities"
> + * - "eventQueue": The logical real address of the start of the EQ
> + * - "eventQueueSize": The power of 2 EQ size per "ibm,xive-eq-sizes"
> + *
> + * Output:
> + * - None
> + */
> +
> +#define SPAPR_XIVE_END_ALWAYS_NOTIFY PPC_BIT(63)
> +
> +static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
> +                                           sPAPRMachineState *spapr,
> +                                           target_ulong opcode,
> +                                           target_ulong *args)
> +{
> +    sPAPRXive *xive = spapr->xive;
> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> +    target_ulong flags = args[0];
> +    target_ulong target = args[1];
> +    target_ulong priority = args[2];
> +    target_ulong qpage = args[3];
> +    target_ulong qsize = args[4];
> +    XiveEND end;
> +    uint8_t end_blk, nvt_blk;
> +    uint32_t end_idx, nvt_idx;
> +    uint32_t qdata;
> +
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    if (flags & ~SPAPR_XIVE_END_ALWAYS_NOTIFY) {
> +        return H_PARAMETER;
> +    }
> +
> +    /*
> +     * H_STATE should be returned if a H_INT_RESET is in progress.
> +     * This is not needed when running the emulation under QEMU
> +     */
> +
> +    if (!spapr_xive_priority_is_valid(priority)) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid priority %ld 
> requested\n",
> +                      priority);
> +        return H_P3;
> +    }
> +
> +    /* Validate that "target" is part of the list of threads allocated
> +     * to the partition. For that, find the END corresponding to the
> +     * target.
> +     */
> +
> +    if (spapr_xive_target_to_end(xive, target, priority, &end_blk, 
> &end_idx)) {
> +        return H_P2;
> +    }
> +
> +    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
> +        return H_HARDWARE;

Again, I think this indicates a qemu (spapr) code bug, so could be an assert().

> +    }
> +
> +    switch (qsize) {
> +    case 12:
> +    case 16:
> +    case 21:
> +    case 24:
> +        end.w3 = ((uint64_t)qpage) & 0xffffffff;

It just occurred to me that I haven't been looking for this across any
of these reviews.  Don't you need byteswaps when accessing these
in-memory structures?

> +        end.w2 = (((uint64_t)qpage)) >> 32 & 0x0fffffff;
> +        end.w0 |= END_W0_ENQUEUE;
> +        end.w0 = SETFIELD(END_W0_QSIZE, end.w0, qsize - 12);
> +        break;
> +    case 0:
> +        /* reset queue and disable queueing */
> +        xive_end_reset(&end);
> +        goto out;
> +
> +    default:
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EQ size %"PRIx64"\n",
> +                      qsize);
> +        return H_P5;
> +    }
> +
> +    if (qsize) {
> +        /*
> +         * Let's validate the EQ address with a read of the first EQ
> +         * entry. We could also check that the full queue has been
> +         * zeroed by the OS.
> +         */
> +        if (address_space_read(&address_space_memory, qpage,
> +                               MEMTXATTRS_UNSPECIFIED,
> +                               (uint8_t *) &qdata, sizeof(qdata))) {
> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ data 
> @0x%"
> +                          HWADDR_PRIx "\n", qpage);
> +            return H_P4;

Just checking the first entry doesn't seem entirely safe.  Using
address_space_map() and making sure the returned plen doesn't get
reduced below the queue size might be a better option.

> +        }
> +    }
> +
> +    if (spapr_xive_target_to_nvt(xive, target, &nvt_blk, &nvt_idx)) {
> +        return H_HARDWARE;

That could be caused by a bogus 'target' value, couldn't it?  In which
case it a) should probably be checked earlier and b) should be
H_PARAMETER or similar, not H_HARDWARE, yes?

> +    }
> +
> +    /* Ensure the priority and target are correctly set (they will not
> +     * be right after allocation)

AIUI there's a static association from END to target in the PAPR
model.  So it seems to make more sense to get that set up right at
initialization / reset, rather than doing it lazily when the queue is 
configured.

> +     */
> +    end.w6 = SETFIELD(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
> +        SETFIELD(END_W6_NVT_INDEX, 0ul, nvt_idx);
> +    end.w7 = SETFIELD(END_W7_F0_PRIORITY, 0ul, priority);
> +
> +    if (flags & SPAPR_XIVE_END_ALWAYS_NOTIFY) {
> +        end.w0 |= END_W0_UCOND_NOTIFY;
> +    } else {
> +        end.w0 &= ~END_W0_UCOND_NOTIFY;
> +    }
> +
> +    /* The generation bit for the END starts at 1 and The END page
> +     * offset counter starts at 0.
> +     */
> +    end.w1 = END_W1_GENERATION | SETFIELD(END_W1_PAGE_OFF, 0ul, 0ul);
> +    end.w0 |= END_W0_VALID;
> +
> +    /* TODO: issue syncs required to ensure all in-flight interrupts
> +     * are complete on the old END */
> +out:
> +    /* Update END */
> +    if (xive_router_set_end(xrtr, end_blk, end_idx, &end)) {
> +        return H_HARDWARE;
> +    }

Again the PAPR code owns the ENDs, so it can update them directly
rather than going through an abstraction.

> +
> +    return H_SUCCESS;
> +}
> +
> +/*
> + * The H_INT_GET_QUEUE_CONFIG hcall() is used to get a EQ for a given
> + * target and priority.
> + *
> + * Parameters:
> + * Input:
> + * - "flags"
> + *      Bits 0-62: Reserved
> + *      Bit 63: Debug: Return debug data
> + * - "target" is per "ibm,ppc-interrupt-server#s" or
> + *       "ibm,ppc-interrupt-gserver#s"
> + * - "priority" is a valid priority not in
> + *       "ibm,plat-res-int-priorities"
> + *
> + * Output:
> + * - R4: "flags":
> + *       Bits 0-61: Reserved
> + *       Bit 62: The value of Event Queue Generation Number (g) per
> + *              the XIVE spec if "Debug" = 1
> + *       Bit 63: The value of Unconditional Notify (n) per the XIVE spec
> + * - R5: The logical real address of the start of the EQ
> + * - R6: The power of 2 EQ size per "ibm,xive-eq-sizes"
> + * - R7: The value of Event Queue Offset Counter per XIVE spec
> + *       if "Debug" = 1, else 0
> + *
> + */
> +
> +#define SPAPR_XIVE_END_DEBUG     PPC_BIT(63)
> +
> +static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
> +                                           sPAPRMachineState *spapr,
> +                                           target_ulong opcode,
> +                                           target_ulong *args)
> +{
> +    sPAPRXive *xive = spapr->xive;
> +    target_ulong flags = args[0];
> +    target_ulong target = args[1];
> +    target_ulong priority = args[2];
> +    XiveEND end;
> +    uint8_t end_blk;
> +    uint32_t end_idx;
> +
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    if (flags & ~SPAPR_XIVE_END_DEBUG) {
> +        return H_PARAMETER;
> +    }
> +
> +    /*
> +     * H_STATE should be returned if a H_INT_RESET is in progress.
> +     * This is not needed when running the emulation under QEMU
> +     */
> +
> +    if (!spapr_xive_priority_is_valid(priority)) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid priority %ld 
> requested\n",
> +                      priority);
> +        return H_P3;
> +    }
> +
> +    /* Validate that "target" is part of the list of threads allocated
> +     * to the partition. For that, find the END corresponding to the
> +     * target.
> +     */
> +    if (spapr_xive_target_to_end(xive, target, priority, &end_blk, 
> &end_idx)) {
> +        return H_P2;
> +    }
> +
> +    if (xive_router_get_end(XIVE_ROUTER(xive), end_blk, end_idx, &end)) {
> +        return H_HARDWARE;

Again, assert() seems appropriate here.

> +    }
> +
> +    args[0] = 0;
> +    if (end.w0 & END_W0_UCOND_NOTIFY) {
> +        args[0] |= SPAPR_XIVE_END_ALWAYS_NOTIFY;
> +    }
> +
> +    if (end.w0 & END_W0_ENQUEUE) {
> +        args[1] =
> +            (((uint64_t)(end.w2 & 0x0fffffff)) << 32) | end.w3;
> +        args[2] = GETFIELD(END_W0_QSIZE, end.w0) + 12;
> +    } else {
> +        args[1] = 0;
> +        args[2] = 0;
> +    }
> +
> +    /* TODO: do we need any locking on the END ? */
> +    if (flags & SPAPR_XIVE_END_DEBUG) {
> +        /* Load the event queue generation number into the return flags */
> +        args[0] |= (uint64_t)GETFIELD(END_W1_GENERATION, end.w1) << 62;
> +
> +        /* Load R7 with the event queue offset counter */
> +        args[3] = GETFIELD(END_W1_PAGE_OFF, end.w1);
> +    } else {
> +        args[3] = 0;
> +    }
> +
> +    return H_SUCCESS;
> +}
> +
> +/*
> + * The H_INT_SET_OS_REPORTING_LINE hcall() is used to set the
> + * reporting cache line pair for the calling thread.  The reporting
> + * cache lines will contain the OS interrupt context when the OS
> + * issues a CI store byte to @TIMA+0xC10 to acknowledge the OS
> + * interrupt. The reporting cache lines can be reset by inputting -1
> + * in "reportingLine".  Issuing the CI store byte without reporting
> + * cache lines registered will result in the data not being accessible
> + * to the OS.
> + *
> + * Parameters:
> + * Input:
> + * - "flags"
> + *      Bits 0-63: Reserved
> + * - "reportingLine": The logical real address of the reporting cache
> + *    line pair
> + *
> + * Output:
> + * - None
> + */
> +static target_ulong h_int_set_os_reporting_line(PowerPCCPU *cpu,
> +                                                sPAPRMachineState *spapr,
> +                                                target_ulong opcode,
> +                                                target_ulong *args)
> +{
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    /*
> +     * H_STATE should be returned if a H_INT_RESET is in progress.
> +     * This is not needed when running the emulation under QEMU
> +     */
> +
> +    /* TODO: H_INT_SET_OS_REPORTING_LINE */
> +    return H_FUNCTION;
> +}
> +
> +/*
> + * The H_INT_GET_OS_REPORTING_LINE hcall() is used to get the logical
> + * real address of the reporting cache line pair set for the input
> + * "target".  If no reporting cache line pair has been set, -1 is
> + * returned.
> + *
> + * Parameters:
> + * Input:
> + * - "flags"
> + *      Bits 0-63: Reserved
> + * - "target" is per "ibm,ppc-interrupt-server#s" or
> + *       "ibm,ppc-interrupt-gserver#s"
> + * - "reportingLine": The logical real address of the reporting cache
> + *   line pair
> + *
> + * Output:
> + * - R4: The logical real address of the reporting line if set, else -1
> + */
> +static target_ulong h_int_get_os_reporting_line(PowerPCCPU *cpu,
> +                                                sPAPRMachineState *spapr,
> +                                                target_ulong opcode,
> +                                                target_ulong *args)
> +{
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    /*
> +     * H_STATE should be returned if a H_INT_RESET is in progress.
> +     * This is not needed when running the emulation under QEMU
> +     */
> +
> +    /* TODO: H_INT_GET_OS_REPORTING_LINE */
> +    return H_FUNCTION;
> +}
> +
> +/*
> + * The H_INT_ESB hcall() is used to issue a load or store to the ESB
> + * page for the input "lisn".  This hcall is only supported for LISNs
> + * that have the ESB hcall flag set to 1 when returned from hcall()
> + * H_INT_GET_SOURCE_INFO.

Is there a reason for specifically restricting this to LISNs which
advertise it, rather than allowing it for anything?  Obviously using
the direct MMIOs will generally be a faster option when possible, but
I could see occasions where it might be simpler for the guest to
always use H_INT_ESB (e.g. for micro-guests like kvm-unit-tests).

> + * Parameters:
> + * Input:
> + * - "flags"
> + *      Bits 0-62: Reserved
> + *      bit 63: Store: Store=1, store operation, else load operation
> + * - "lisn" is per "interrupts", "interrupt-map", or
> + *      "ibm,xive-lisn-ranges" properties, or as returned by the
> + *      ibm,query-interrupt-source-number RTAS call, or as
> + *      returned by the H_ALLOCATE_VAS_WINDOW hcall
> + * - "esbOffset" is the offset into the ESB page for the load or store 
> operation
> + * - "storeData" is the data to write for a store operation
> + *
> + * Output:
> + * - R4: R4: The value of the load if load operation, else -1
> + */
> +
> +#define SPAPR_XIVE_ESB_STORE PPC_BIT(63)
> +
> +static target_ulong h_int_esb(PowerPCCPU *cpu,
> +                              sPAPRMachineState *spapr,
> +                              target_ulong opcode,
> +                              target_ulong *args)
> +{
> +    sPAPRXive *xive = spapr->xive;
> +    XiveEAS eas;
> +    target_ulong flags  = args[0];
> +    target_ulong lisn   = args[1];
> +    target_ulong offset = args[2];
> +    target_ulong data   = args[3];
> +    hwaddr mmio_addr;
> +    XiveSource *xsrc = &xive->source;
> +
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    if (flags & ~SPAPR_XIVE_ESB_STORE) {
> +        return H_PARAMETER;
> +    }
> +
> +    if (xive_router_get_eas(XIVE_ROUTER(xive), lisn, &eas)) {
> +        return H_P2;
> +    }
> +
> +    if (!(eas.w & EAS_VALID)) {
> +        return H_P2;
> +    }
> +
> +    if (offset > (1ull << xsrc->esb_shift)) {
> +        return H_P3;
> +    }
> +
> +    mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
> +
> +    if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
> +                      (flags & SPAPR_XIVE_ESB_STORE))) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
> +                      HWADDR_PRIx "\n", mmio_addr);
> +        return H_HARDWARE;
> +    }
> +    args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
> +    return H_SUCCESS;
> +}
> +
> +/*
> + * The H_INT_SYNC hcall() is used to issue hardware syncs that will
> + * ensure any in flight events for the input lisn are in the event
> + * queue.
> + *
> + * Parameters:
> + * Input:
> + * - "flags"
> + *      Bits 0-63: Reserved
> + * - "lisn" is per "interrupts", "interrupt-map", or
> + *      "ibm,xive-lisn-ranges" properties, or as returned by the
> + *      ibm,query-interrupt-source-number RTAS call, or as
> + *      returned by the H_ALLOCATE_VAS_WINDOW hcall
> + *
> + * Output:
> + * - None
> + */
> +static target_ulong h_int_sync(PowerPCCPU *cpu,
> +                               sPAPRMachineState *spapr,
> +                               target_ulong opcode,
> +                               target_ulong *args)
> +{
> +    sPAPRXive *xive = spapr->xive;
> +    XiveEAS eas;
> +    target_ulong flags = args[0];
> +    target_ulong lisn = args[1];
> +
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    if (flags) {
> +        return H_PARAMETER;
> +    }
> +
> +    if (xive_router_get_eas(XIVE_ROUTER(xive), lisn, &eas)) {
> +        return H_P2;
> +    }
> +
> +    if (!(eas.w & EAS_VALID)) {
> +        return H_P2;
> +    }
> +
> +    /*
> +     * H_STATE should be returned if a H_INT_RESET is in progress.
> +     * This is not needed when running the emulation under QEMU
> +     */
> +
> +    /* This is not real hardware. Nothing to be done */

At least, not as long as all the XIVE operations are under the BQL.

> +    return H_SUCCESS;
> +}
> +
> +/*
> + * The H_INT_RESET hcall() is used to reset all of the partition's
> + * interrupt exploitation structures to their initial state.  This
> + * means losing all previously set interrupt state set via
> + * H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
> + *
> + * Parameters:
> + * Input:
> + * - "flags"
> + *      Bits 0-63: Reserved
> + *
> + * Output:
> + * - None
> + */
> +static target_ulong h_int_reset(PowerPCCPU *cpu,
> +                                sPAPRMachineState *spapr,
> +                                target_ulong opcode,
> +                                target_ulong *args)
> +{
> +    sPAPRXive *xive = spapr->xive;
> +    target_ulong flags   = args[0];
> +
> +    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
> +        return H_FUNCTION;
> +    }
> +
> +    if (flags) {
> +        return H_PARAMETER;
> +    }
> +
> +    device_reset(DEVICE(xive));
> +    return H_SUCCESS;
> +}
> +
> +void spapr_xive_hcall_init(sPAPRMachineState *spapr)
> +{
> +    spapr_register_hypercall(H_INT_GET_SOURCE_INFO, h_int_get_source_info);
> +    spapr_register_hypercall(H_INT_SET_SOURCE_CONFIG, 
> h_int_set_source_config);
> +    spapr_register_hypercall(H_INT_GET_SOURCE_CONFIG, 
> h_int_get_source_config);
> +    spapr_register_hypercall(H_INT_GET_QUEUE_INFO, h_int_get_queue_info);
> +    spapr_register_hypercall(H_INT_SET_QUEUE_CONFIG, h_int_set_queue_config);
> +    spapr_register_hypercall(H_INT_GET_QUEUE_CONFIG, h_int_get_queue_config);
> +    spapr_register_hypercall(H_INT_SET_OS_REPORTING_LINE,
> +                             h_int_set_os_reporting_line);
> +    spapr_register_hypercall(H_INT_GET_OS_REPORTING_LINE,
> +                             h_int_get_os_reporting_line);
> +    spapr_register_hypercall(H_INT_ESB, h_int_esb);
> +    spapr_register_hypercall(H_INT_SYNC, h_int_sync);
> +    spapr_register_hypercall(H_INT_RESET, h_int_reset);
> +}
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 2569ae1bc7f8..da6fcfaa3c52 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -258,6 +258,8 @@ static void spapr_irq_init_xive(sPAPRMachineState *spapr, 
> int nr_irqs,
>          error_propagate(errp, local_err);
>          return;
>      }
> +
> +    spapr_xive_hcall_init(spapr);
>  }
>  
>  static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> index 301a8e972d91..eacd26836ebf 100644
> --- a/hw/intc/Makefile.objs
> +++ b/hw/intc/Makefile.objs
> @@ -38,7 +38,7 @@ obj-$(CONFIG_XICS) += xics.o
>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>  obj-$(CONFIG_XIVE) += xive.o
> -obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
> +obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o spapr_xive_hcall.o
>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>  obj-$(CONFIG_S390_FLIC) += s390_flic.o

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]