qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-ppc] [PATCH 15/19] pseries: Support for in-kernel XICS interrupt c


From: Alexey Kardashevskiy
Subject: [Qemu-ppc] [PATCH 15/19] pseries: Support for in-kernel XICS interrupt controller
Date: Sat, 6 Jul 2013 23:54:12 +1000

From: David Gibson <address@hidden>

Recent (host) kernels support emulating the PAPR defined "XICS" interrupt
controller system within KVM.  This patch allows qemu to initialize and
configure the in-kernel XICS, and keep its state in sync with qemu's XICS
state as necessary.

This should give considerable performance improvements.  e.g. on a simple
IPI ping-pong test between hardware threads, using qemu XICS gives us
around 5,000 irqs/second, whereas the in-kernel XICS gives us around
70,000 irqs/s on the same hardware configuration.

[Mike Qiu <address@hidden>: fixed mistype which caused ics_set_kvm_state() to 
fail]
Signed-off-by: David Gibson <address@hidden>
[aik: moved to a separate device]

---
Changes:
2013/07/01
* fixed VMState names in order to support xics-kvm migration to xics and vice 
versa

Signed-off-by: Alexey Kardashevskiy <address@hidden>
---
 default-configs/ppc64-softmmu.mak |   1 +
 hw/intc/Makefile.objs             |   1 +
 hw/intc/xics_kvm.c                | 445 ++++++++++++++++++++++++++++++++++++++
 hw/ppc/spapr.c                    |  32 ++-
 include/hw/ppc/xics.h             |  13 ++
 5 files changed, 489 insertions(+), 3 deletions(-)
 create mode 100644 hw/intc/xics_kvm.c

diff --git a/default-configs/ppc64-softmmu.mak 
b/default-configs/ppc64-softmmu.mak
index 69a9f8d..5b995f9 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -48,5 +48,6 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 # For pSeries
 CONFIG_PCI_HOTPLUG=y
 CONFIG_XICS=$(CONFIG_PSERIES)
+CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
 # For PReP
 CONFIG_MC146818RTC=y
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index abe8f80..9e77afe 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -23,3 +23,4 @@ obj-$(CONFIG_OPENPIC) += openpic.o
 obj-$(CONFIG_OPENPIC_KVM) += openpic_kvm.o
 obj-$(CONFIG_SH4) += sh_intc.o
 obj-$(CONFIG_XICS) += xics.o
+obj-$(CONFIG_XICS_KVM) += xics_kvm.o
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
new file mode 100644
index 0000000..b630150
--- /dev/null
+++ b/hw/intc/xics_kvm.c
@@ -0,0 +1,445 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics, in-kernel emulation
+ *
+ * Copyright (c) 2013 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+
+#include "hw/hw.h"
+#include "trace.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/xics.h"
+#include "kvm_ppc.h"
+#include "qemu/config-file.h"
+
+#include <sys/ioctl.h>
+
+struct icp_state_kvm {
+    struct icp_state parent;
+
+    uint32_t set_xive_token;
+    uint32_t get_xive_token;
+    uint32_t int_off_token;
+    uint32_t int_on_token;
+    int kernel_xics_fd;
+};
+
+static void icp_get_kvm_state(struct icp_server_state *ss)
+{
+    uint64_t state;
+    struct kvm_one_reg reg = {
+        .id = KVM_REG_PPC_ICP_STATE,
+        .addr = (uintptr_t)&state,
+    };
+    int ret;
+
+    if (!ss->cs) {
+        return; /* kernel irqchip not in use */
+    }
+
+    ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, &reg);
+    if (ret != 0) {
+        fprintf(stderr, "Unable to retrieve KVM interrupt controller state"
+                " for CPU %d: %s\n", ss->cs->cpu_index, strerror(errno));
+        exit(1);
+    }
+
+    ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
+    ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
+        & KVM_REG_PPC_ICP_MFRR_MASK;
+    ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
+        & KVM_REG_PPC_ICP_PPRI_MASK;
+}
+
+static int icp_set_kvm_state(struct icp_server_state *ss)
+{
+    uint64_t state;
+    struct kvm_one_reg reg = {
+        .id = KVM_REG_PPC_ICP_STATE,
+        .addr = (uintptr_t)&state,
+    };
+    int ret;
+
+    if (!ss->cs) {
+        return 0; /* kernel irqchip not in use */
+    }
+
+    state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
+        | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
+        | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
+
+    ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, &reg);
+    if (ret != 0) {
+        fprintf(stderr, "Unable to restore KVM interrupt controller state (0x%"
+                PRIx64 ") for CPU %d: %s\n", state, ss->cs->cpu_index,
+                strerror(errno));
+        exit(1);
+        return ret;
+    }
+
+    return 0;
+}
+
+static void ics_get_kvm_state(struct ics_state *ics)
+{
+    struct icp_state_kvm *icpkvm = XICS_KVM(ics->icp);
+    uint64_t state;
+    struct kvm_device_attr attr = {
+        .flags = 0,
+        .group = KVM_DEV_XICS_GRP_SOURCES,
+        .addr = (uint64_t)(uintptr_t)&state,
+    };
+    int i;
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        struct ics_irq_state *irq = &ics->irqs[i];
+        int ret;
+
+        attr.attr = i + ics->offset;
+
+        ret = ioctl(icpkvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
+        if (ret != 0) {
+            fprintf(stderr, "Unable to retrieve KVM interrupt controller state"
+                    " for IRQ %d: %s\n", i + ics->offset, strerror(errno));
+            exit(1);
+        }
+
+        irq->server = state & KVM_XICS_DESTINATION_MASK;
+        irq->saved_priority = (state >> KVM_XICS_PRIORITY_SHIFT)
+            & KVM_XICS_PRIORITY_MASK;
+        /*
+         * To be consistent with the software emulation in xics.c, we
+         * split out the masked state + priority that we get from the
+         * kernel into 'current priority' (0xff if masked) and
+         * 'saved priority' (if masked, this is the priority the
+         * interrupt had before it was masked).  Masking and unmasking
+         * are done with the ibm,int-off and ibm,int-on RTAS calls.
+         */
+        if (state & KVM_XICS_MASKED) {
+            irq->priority = 0xff;
+        } else {
+            irq->priority = irq->saved_priority;
+        }
+
+        if (state & KVM_XICS_PENDING) {
+            if (state & KVM_XICS_LEVEL_SENSITIVE) {
+                irq->status |= XICS_STATUS_ASSERTED;
+            } else {
+                /*
+                 * A pending edge-triggered interrupt (or MSI)
+                 * must have been rejected previously when we
+                 * first detected it and tried to deliver it,
+                 * so mark it as pending and previously rejected
+                 * for consistency with how xics.c works.
+                 */
+                irq->status |= XICS_STATUS_MASKED_PENDING
+                    | XICS_STATUS_REJECTED;
+            }
+        }
+    }
+}
+
+static int ics_set_kvm_state(struct ics_state *ics)
+{
+    struct icp_state_kvm *icpkvm = XICS_KVM(ics->icp);
+    uint64_t state;
+    struct kvm_device_attr attr = {
+        .flags = 0,
+        .group = KVM_DEV_XICS_GRP_SOURCES,
+        .addr = (uint64_t)(uintptr_t)&state,
+    };
+    int i;
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        struct ics_irq_state *irq = &ics->irqs[i];
+        int ret;
+
+        attr.attr = i + ics->offset;
+
+        state = irq->server;
+        state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK)
+            << KVM_XICS_PRIORITY_SHIFT;
+        if (irq->priority != irq->saved_priority) {
+            assert(irq->priority == 0xff);
+            state |= KVM_XICS_MASKED;
+        }
+
+        if (ics->islsi[i]) {
+            state |= KVM_XICS_LEVEL_SENSITIVE;
+            if (irq->status & XICS_STATUS_ASSERTED) {
+                state |= KVM_XICS_PENDING;
+            }
+        } else {
+            if (irq->status & XICS_STATUS_MASKED_PENDING) {
+                state |= KVM_XICS_PENDING;
+            }
+        }
+
+        ret = ioctl(icpkvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
+        if (ret != 0) {
+            fprintf(stderr, "Unable to restore KVM interrupt controller state"
+                    " for IRQs %d: %s\n", i + ics->offset, strerror(errno));
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static void icp_pre_save(void *opaque)
+{
+    struct icp_server_state *ss = opaque;
+
+    icp_get_kvm_state(ss);
+}
+
+static int icp_post_load(void *opaque, int version_id)
+{
+    struct icp_server_state *ss = opaque;
+
+    return icp_set_kvm_state(ss);
+}
+
+static void ics_pre_save(void *opaque)
+{
+    struct ics_state *ics = opaque;
+
+    ics_get_kvm_state(ics);
+}
+
+static int ics_post_load(void *opaque, int version_id)
+{
+    struct ics_state *ics = opaque;
+
+    return ics_set_kvm_state(ics);
+}
+
+static VMStateDescription vmstate_icpkvm_server = {
+    .name = "icp/server",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = icp_pre_save,
+    .post_load = icp_post_load,
+};
+
+static VMStateDescription vmstate_icskvm = {
+    .name = "ics",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = ics_pre_save,
+    .post_load = ics_post_load,
+};
+
+static void ics_set_irq_kvm(void *opaque, int srcno, int val)
+{
+    struct ics_state *ics = opaque;
+    struct kvm_irq_level args;
+    int rc;
+
+    args.irq = srcno + ics->offset;
+    if (!ics->islsi[srcno]) {
+        if (!val) {
+            return;
+        }
+        args.level = KVM_INTERRUPT_SET;
+    } else {
+        args.level = val ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
+    }
+    rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
+    if (rc < 0) {
+        perror("kvm_irq_line");
+    }
+}
+
+int xics_kvm_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
+{
+    CPUState *cs;
+    struct icp_server_state *ss;
+    struct icp_state_kvm *icpkvm = (struct icp_state_kvm *) 
object_dynamic_cast(
+            OBJECT(icp), TYPE_XICS_KVM);
+
+    if (!icpkvm) {
+        return -1;
+    }
+
+    cs = CPU(cpu);
+    ss = &icp->ss[cs->cpu_index];
+
+    assert(cs->cpu_index < icp->nr_servers);
+    if (icpkvm->kernel_xics_fd == -1) {
+        abort();
+    }
+
+    if (icpkvm->kernel_xics_fd != -1) {
+        int ret;
+        struct kvm_enable_cap xics_enable_cap = {
+            .cap = KVM_CAP_IRQ_XICS,
+            .flags = 0,
+            .args = {icpkvm->kernel_xics_fd, cs->cpu_index, 0, 0},
+        };
+
+        ss->cs = cs;
+
+        ret = kvm_vcpu_ioctl(ss->cs, KVM_ENABLE_CAP, &xics_enable_cap);
+        if (ret < 0) {
+            fprintf(stderr, "Unable to connect CPU%d to kernel XICS: %s\n",
+                    cs->cpu_index, strerror(errno));
+            exit(1);
+        }
+    }
+    xics_common_cpu_setup(icp, cpu);
+
+    vmstate_icpkvm_server.fields = vmstate_icp_server.fields;
+    vmstate_register(NULL, cs->cpu_index, &vmstate_icpkvm_server, ss);
+
+    return 0;
+}
+
+static void rtas_dummy(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+                       uint32_t token,
+                       uint32_t nargs, target_ulong args,
+                       uint32_t nret, target_ulong rets)
+{
+    fprintf(stderr, "pseries: %s() should never be called for in-kernel 
XICS\n", __func__);
+}
+
+static void xics_kvm_realize(DeviceState *dev, Error **errp)
+{
+    struct icp_state_kvm *icpkvm = XICS_KVM(dev);
+    QemuOptsList *list = qemu_find_opts("machine");
+    int rc;
+    struct kvm_create_device xics_create_device = {
+        .type = KVM_DEV_TYPE_XICS,
+        .flags = 0,
+    };
+
+    if (!kvm_enabled()) {
+        error_setg(errp, "KVM must be enabled for in-kernel XICS");
+        goto fail;
+    }
+
+    if (QTAILQ_EMPTY(&list->head) ||
+        !qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
+                           "kernel_irqchip", true) ||
+        !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
+        error_setg(errp, "KVM must be enabled for in-kernel XICS");
+        return;
+    }
+
+    icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy);
+    icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy);
+    icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy);
+    icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy);
+
+    rc = kvmppc_define_rtas_token(icpkvm->set_xive_token, "ibm,set-xive");
+    if (rc < 0) {
+        error_setg(errp, "kvmppc_define_rtas_token: ibm,set-xive");
+        goto fail;
+    }
+
+    rc = kvmppc_define_rtas_token(icpkvm->get_xive_token, "ibm,get-xive");
+    if (rc < 0) {
+        error_setg(errp, "kvmppc_define_rtas_token: ibm,get-xive");
+        goto fail;
+    }
+
+    rc = kvmppc_define_rtas_token(icpkvm->int_on_token, "ibm,int-on");
+    if (rc < 0) {
+        error_setg(errp, "kvmppc_define_rtas_token: ibm,int-on");
+        goto fail;
+    }
+
+    rc = kvmppc_define_rtas_token(icpkvm->int_off_token, "ibm,int-off");
+    if (rc < 0) {
+        error_setg(errp, "kvmppc_define_rtas_token: ibm,int-off");
+        goto fail;
+    }
+
+    /* Create the kernel ICP */
+    rc = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &xics_create_device);
+    if (rc < 0) {
+        error_setg_errno(errp, -rc, "Error on KVM_CREATE_DEVICE for XICS");
+        goto fail;
+    }
+
+    icpkvm->kernel_xics_fd = xics_create_device.fd;
+
+    xics_common_init(&icpkvm->parent, ics_set_irq_kvm);
+
+    /* We use each the ICS's offset into the global irq number space
+     * as an instance id.  This means we can extend to multiple ICS
+     * instances without needing to change the savevm format */
+    vmstate_icskvm.fields = vmstate_ics.fields;
+    vmstate_register(NULL, icpkvm->parent.ics->offset, &vmstate_icskvm,
+                     icpkvm->parent.ics);
+
+    return;
+
+fail:
+    kvmppc_define_rtas_token(0, "ibm,set-xive");
+    kvmppc_define_rtas_token(0, "ibm,get-xive");
+    kvmppc_define_rtas_token(0, "ibm,int-on");
+    kvmppc_define_rtas_token(0, "ibm,int-off");
+    return;
+}
+
+static void xics_kvm_reset(DeviceState *d)
+{
+    struct icp_state_kvm *icpkvm = XICS_KVM(d);
+    struct icp_state *icp = &icpkvm->parent;
+    int i;
+
+    xics_common_reset(icp);
+
+    for (i = 0; i < icp->nr_servers; i++) {
+        if (icp->ss[i].cs) {
+            icp_set_kvm_state(&icp->ss[i]);
+        }
+    }
+
+    ics_set_kvm_state(icp->ics);
+}
+
+static void xics_kvm_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = xics_kvm_realize;
+    dc->reset = xics_kvm_reset;
+}
+
+static const TypeInfo xics_kvm_info = {
+    .name          = TYPE_XICS_KVM,
+    .parent        = TYPE_XICS,
+    .instance_size = sizeof(struct icp_state_kvm),
+    .class_init    = xics_kvm_class_init,
+};
+
+static void xics_kvm_register_types(void)
+{
+    type_register_static(&xics_kvm_info);
+}
+
+type_init(xics_kvm_register_types)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index bf348c7..961f2f7 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1001,7 +1001,31 @@ static struct icp_state *xics_system_init(int 
nr_servers, int nr_irqs)
 {
     struct icp_state *icp = NULL;
 
-    icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
+    if (kvm_enabled()) {
+        bool irqchip_allowed = true, irqchip_required = false;
+        QemuOptsList *list = qemu_find_opts("machine");
+
+        if (!QTAILQ_EMPTY(&list->head)) {
+            irqchip_allowed = qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
+                                                "kernel_irqchip", true);
+            irqchip_required = qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
+                                                 "kernel_irqchip", false);
+        }
+
+        if (irqchip_allowed) {
+            icp = try_create_xics(TYPE_XICS_KVM, nr_servers, nr_irqs);
+        }
+
+        if (irqchip_required && !icp) {
+            perror("iFailed to create in-kernel XICS\n");
+            abort();
+        }
+    }
+
+    if (!icp) {
+        icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
+    }
+
     if (!icp) {
         perror("Failed to create XICS\n");
         abort();
@@ -1102,8 +1126,6 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
         }
         env = &cpu->env;
 
-        xics_cpu_setup(spapr->icp, cpu);
-
         /* Set time-base frequency to 512 MHz */
         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
 
@@ -1117,6 +1139,10 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
             kvmppc_set_papr(cpu);
         }
 
+        if (xics_kvm_cpu_setup(spapr->icp, cpu)) {
+            xics_cpu_setup(spapr->icp, cpu);
+        }
+
         qemu_register_reset(spapr_cpu_reset, cpu);
     }
 
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 3f72806..e474c01 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -32,6 +32,9 @@
 #define TYPE_XICS "xics"
 #define XICS(obj) OBJECT_CHECK(struct icp_state, (obj), TYPE_XICS)
 
+#define TYPE_XICS_KVM "xics-kvm"
+#define XICS_KVM(obj) OBJECT_CHECK(struct icp_state_kvm, (obj), TYPE_XICS_KVM)
+
 #define XICS_IPI        0x2
 #define XICS_BUID       0x1
 #define XICS_IRQ_BASE   (XICS_BUID << 12)
@@ -53,6 +56,7 @@ struct icp_state {
 };
 
 struct icp_server_state {
+    CPUState *cs;
     uint32_t xirr;
     uint8_t pending_priority;
     uint8_t mfrr;
@@ -88,6 +92,15 @@ void xics_common_reset(struct icp_state *icp);
 
 void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
 
+#ifdef CONFIG_KVM
+int xics_kvm_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
+#else
+static inline int xics_kvm_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
+{
+    return -1;
+}
+#endif
+
 extern const VMStateDescription vmstate_icp_server;
 extern const VMStateDescription vmstate_ics;
 
-- 
1.8.3.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]