qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 18/26] Implement the PAPR (pSeries) virtualized


From: Anthony Liguori
Subject: Re: [Qemu-devel] [PATCH 18/26] Implement the PAPR (pSeries) virtualized interrupt controller (xics)
Date: Wed, 16 Mar 2011 17:16:07 -0500
User-agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.14) Gecko/20110223 Lightning/1.0b2 Thunderbird/3.1.8

On 03/15/2011 11:56 PM, David Gibson wrote:
PAPR defines an interrupt control architecture which is logically divided
into ICS (Interrupt Control Presentation, each unit is responsible for
presenting interrupts to a particular "interrupt server", i.e. CPU) and
ICS (Interrupt Control Source, each unit responsible for one or more
hardware interrupts as numbered globally across the system).  All PAPR
virtual IO devices expect to deliver interrupts via this mechanism.  In
Linux, this interrupt controller system is handled by the "xics" driver.

On pSeries systems, access to the interrupt controller is virtualized via
hypercalls and RTAS methods.  However, the virtualized interface is very
similar to the underlying interrupt controller hardware, and similar PICs
exist un-virtualized in some other systems.

This patch implements both the ICP and ICS sides of the PAPR interrupt
controller.  For now, only the hypercall virtualized interface is provided,
however it would be relatively straightforward to graft an emulated
register interface onto the underlying interrupt logic if we want to add
a machine with a hardware ICS/ICP system in the future.

There are some limitations in this implementation: it is assumed for now
that only one instance of the ICS exists, although a full xics system can
have several, each responsible for a different group of hardware irqs.
ICP/ICS can handle both level-sensitve (LSI) and message signalled (MSI)
interrupt inputs.  For now, this implementation supports only MSI
interrupts, since that is used by PAPR virtual IO devices.

Signed-off-by: Paul Mackerras<address@hidden>
Signed-off-by: David Gibson<address@hidden>
---
  Makefile.target |    2 +-
  hw/spapr.c      |   26 +++
  hw/spapr.h      |    2 +
  hw/xics.c       |  528 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
  hw/xics.h       |   13 ++
  5 files changed, 570 insertions(+), 1 deletions(-)
  create mode 100644 hw/xics.c
  create mode 100644 hw/xics.h

diff --git a/Makefile.target b/Makefile.target
index e333225..2b0588e 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -233,7 +233,7 @@ obj-ppc-y += ppc_oldworld.o
  obj-ppc-y += ppc_newworld.o
  # IBM pSeries (sPAPR)
  obj-ppc-y += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
-obj-ppc-y += spapr_vty.o
+obj-ppc-y += xics.o spapr_vty.o
  # PowerPC 4xx boards
  obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
  obj-ppc-y += ppc440.o ppc440_bamboo.o
diff --git a/hw/spapr.c b/hw/spapr.c
index 23f493a..be30def 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -34,6 +34,7 @@

  #include "hw/spapr.h"
  #include "hw/spapr_vio.h"
+#include "hw/xics.h"

  #include<libfdt.h>

@@ -62,6 +63,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t 
ramsize,
      uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
      uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
      char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr";
+    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
      int i;
      char *modelname;
      int ret;
@@ -120,6 +122,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t 
ramsize,

      for (i = 0; i<  smp_cpus; i++) {
          CPUState *env = envs[i];
+        uint32_t gserver_prop[] = {cpu_to_be32(i), 0}; /* HACK! */
          char *nodename;
          uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                             0xffffffff, 0xffffffff};
@@ -147,6 +150,9 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t 
ramsize,
          _FDT((fdt_property(fdt, "ibm,pft-size", pft_size_prop, 
sizeof(pft_size_prop))));
          _FDT((fdt_property_string(fdt, "status", "okay")));
          _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
+        _FDT((fdt_property_cell(fdt, "ibm,ppc-interrupt-server#s", i)));
+        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
+                           gserver_prop, sizeof(gserver_prop))));

          if (envs[i]->mmu_model&  POWERPC_MMU_1TSEG) {
              _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
@@ -168,6 +174,20 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t 
ramsize,

      _FDT((fdt_end_node(fdt)));

+    /* interrupt controller */
+    _FDT((fdt_begin_node(fdt, "address@hidden")));
+
+    _FDT((fdt_property_string(fdt, "device_type",
+                              "PowerPC-External-Interrupt-Presentation")));
+    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
+    _FDT((fdt_property_cell(fdt, "reg", 0)));
+    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
+    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
+                       interrupt_server_ranges_prop,
+                       sizeof(interrupt_server_ranges_prop))));
+
+    _FDT((fdt_end_node(fdt)));
+
      /* vdevice */
      _FDT((fdt_begin_node(fdt, "vdevice")));

@@ -175,6 +195,8 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t 
ramsize,
      _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
      _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
      _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
+    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
+    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));

      _FDT((fdt_end_node(fdt)));

@@ -290,6 +312,10 @@ static void ppc_spapr_init(ram_addr_t ram_size,
      }
      qemu_free(filename);

+    /* Set up Interrupt Controller */
+    spapr->icp = xics_system_init(smp_cpus,&env, MAX_SERIAL_PORTS);
+
+    /* Set up VIO bus */
      spapr->vio_bus = spapr_vio_bus_init();

      for (i = 0; i<  MAX_SERIAL_PORTS; i++) {
diff --git a/hw/spapr.h b/hw/spapr.h
index 7a7c319..4b54c22 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -2,9 +2,11 @@
  #define __HW_SPAPR_H__

  struct VIOsPAPRBus;
+struct icp_state;

  typedef struct sPAPREnvironment {
      struct VIOsPAPRBus *vio_bus;
+    struct icp_state *icp;
  } sPAPREnvironment;

  #define H_SUCCESS         0
diff --git a/hw/xics.c b/hw/xics.c
new file mode 100644
index 0000000..46e778a
--- /dev/null
+++ b/hw/xics.c
@@ -0,0 +1,528 @@

Copyright.

+#include "hw.h"
+#include "hw/spapr.h"
+#include "hw/xics.h"
+
+#include<pthread.h>

This isn't needed and it'll break the Windows build. We carry a global mutex whenever QEMU code executes.

+/*
+ * ICP: Presentation layer
+ */
+
+struct icp_server_state {
+    uint32_t cppr :8;
+    uint32_t xisr :24;

No real reason to use bitfields here.

+    uint8_t pending_priority;
+    uint8_t mfrr;
+    qemu_irq output;
+    pthread_mutex_t lock;
+};
+
+struct ics_state;
+
+struct icp_state {
+    long nr_servers;
+    struct icp_server_state *ss;
+    struct ics_state *ics;
+};
+
+static void ics_reject(struct ics_state *ics, int nr);
+static void ics_resend(struct ics_state *ics);
+static void ics_eoi(struct ics_state *ics, int nr);
+
+static void icp_check_ipi(struct icp_state *icp, int server)
+{
+    struct icp_server_state *ss = icp->ss + server;
+
+    if (ss->xisr&&  (ss->pending_priority<= ss->mfrr)) {
+        return;
+    }
+
+    if (ss->xisr) {
+        ics_reject(icp->ics, ss->xisr);
+    }
+
+    ss->xisr = XICS_IPI;
+    ss->pending_priority = ss->mfrr;
+    qemu_irq_raise(ss->output);
+}
+
+static void icp_resend(struct icp_state *icp, int server)
+{
+    struct icp_server_state *ss = icp->ss + server;
+
+    if (ss->mfrr<  ss->cppr) {
+        icp_check_ipi(icp, server);
+    }
+    ics_resend(icp->ics);
+}
+
+static void icp_set_cppr(struct icp_state *icp, int server, uint8_t cppr)
+{
+    struct icp_server_state *ss = icp->ss + server;
+    uint8_t old_cppr;
+    uint32_t old_xisr;
+
+    pthread_mutex_lock(&ss->lock);
+    old_cppr = ss->cppr;
+    ss->cppr = cppr;
+
+    if (cppr<  old_cppr) {
+        if (ss->xisr&&  (cppr<= ss->pending_priority)) {
+            old_xisr = ss->xisr;
+            ss->xisr = 0;
+            qemu_irq_lower(ss->output);
+            ics_reject(icp->ics, old_xisr);
+        }
+    } else {
+        if (!ss->xisr) {
+            icp_resend(icp, server);
+        }
+    }
+    pthread_mutex_unlock(&ss->lock);
+}
+
+static void icp_set_mfrr(struct icp_state *icp, int nr, uint8_t mfrr)
+{
+    struct icp_server_state *ss = icp->ss + nr;
+
+    pthread_mutex_lock(&ss->lock);
+
+    ss->mfrr = mfrr;
+    if (mfrr<  ss->cppr) {
+        icp_check_ipi(icp, nr);
+    }
+
+    pthread_mutex_unlock(&ss->lock);
+}
+
+static uint32_t icp_accept(struct icp_server_state *ss)
+{
+    uint32_t xirr;
+
+    pthread_mutex_lock(&ss->lock);
+    qemu_irq_lower(ss->output);
+    xirr = ss->cppr<<  24 | ss->xisr;
+    ss->xisr = 0;
+    ss->cppr = ss->pending_priority;
+    pthread_mutex_unlock(&ss->lock);
+    return xirr;
+}
+
+static void icp_eoi(struct icp_state *icp, int server, uint32_t xirr)
+{
+    struct icp_server_state *ss = icp->ss + server;
+
+    ics_eoi(icp->ics, xirr&  0xffffff);
+    /* Send EOI ->  ICS */
+    ss->cppr = xirr>>  24;
+    if (!ss->xisr) {
+        icp_resend(icp, server);
+    }
+}
+
+static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t 
priority)
+{
+    struct icp_server_state *ss = icp->ss + server;
+
+    pthread_mutex_lock(&ss->lock);
+
+    if ((priority>= ss->cppr)
+        || (ss->xisr&&  (ss->pending_priority<= priority))) {
+        ics_reject(icp->ics, nr);
+    } else {
+        if (ss->xisr) {
+            ics_reject(icp->ics, ss->xisr);
+        }
+        ss->xisr = nr;
+        ss->pending_priority = priority;
+        qemu_irq_raise(ss->output);
+    }
+
+    pthread_mutex_unlock(&ss->lock);
+}
+
+/*
+ * ICS: Source layer
+ */
+
+struct ics_irq_state {
+    int server;
+    uint8_t priority;
+    uint8_t saved_priority;
+    /* int pending :1; */
+    /* int presented :1; */
+    int rejected :1;
+    int masked_pending :1;
+};
+
+struct ics_state {
+    int nr_irqs;
+    int offset;
+    qemu_irq *qirqs;
+    struct ics_irq_state *irqs;
+    struct icp_state *icp;
+};
+
+static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
+{
+    return (nr>= ics->offset)
+&&  (nr<  (ics->offset + ics->nr_irqs));
+}
+
+static void ics_set_irq_msi(void *opaque, int nr, int val)
+{
+    struct ics_state *ics = (struct ics_state *)opaque;
+    struct ics_irq_state *irq = ics->irqs + nr;
+
+    if (val) {
+        if (irq->priority == 0xff) {
+            irq->masked_pending = 1;
+            /* masked pending */ ;
+        } else  {
+            icp_irq(ics->icp, irq->server, nr + ics->offset, irq->priority);
+        }
+    }
+}
+
+static void ics_reject_msi(struct ics_state *ics, int nr)
+{
+    struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
+
+    irq->rejected = 1;
+}
+
+static void ics_resend_msi(struct ics_state *ics)
+{
+    int i;
+
+    for (i = 0; i<  ics->nr_irqs; i++) {
+        struct ics_irq_state *irq = ics->irqs + i;
+
+        /* FIXME: filter by server#? */
+        if (irq->rejected) {
+            irq->rejected = 0;
+            if (irq->priority != 0xff) {
+                icp_irq(ics->icp, irq->server, i + ics->offset, irq->priority);
+            }
+        }
+    }
+}
+
+static void ics_write_xive_msi(struct ics_state *ics, int nr, int server,
+                               uint8_t priority)
+{
+    struct ics_irq_state *irq = ics->irqs + nr;
+
+    irq->server = server;
+    irq->priority = priority;
+
+    if (!irq->masked_pending || (priority = 0xff)) {
+        return;
+    }
+
+    irq->masked_pending = 0;
+    icp_irq(ics->icp, server, nr + ics->offset, priority);
+}
+
+/* static void ics_recheck_irq(struct ics_state *ics, int nr) */

This is a pretty ugly way to comment out code.  At least use an #if 0.

Regards,

Anthony Liguori



reply via email to

[Prev in Thread] Current Thread [Next in Thread]