[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 3/3] pseries pci: added MSI/MSIX support
From: |
Alexey Kardashevskiy |
Subject: |
[Qemu-devel] [PATCH 3/3] pseries pci: added MSI/MSIX support |
Date: |
Thu, 14 Jun 2012 14:34:27 +1000 |
User-agent: |
Mozilla/5.0 (X11; Linux i686; rv:12.0) Gecko/20120428 Thunderbird/12.0.1 |
virtio-pci expects the guest to set up MSI message address and data, and
to do other initialization such as a vector number negotiation.
It also notifies the guest via writing an MSI message to a previously set
address.
This patch includes:
1. RTAS call "ibm,change-msi" which sets up number of MSI vectors per
a device. Note that this call may configure and return lesser number of
vectors than requested.
2. RTAS call "ibm,query-interrupt-source-number" which translates MSI
vector to interrupt controller (XICS) IRQ number.
3. A "config_space_address to msi_table" map to provide IRQ resolving from
config-address as MSI RTAS calls take a PCI config space address as
an identifier.
4. A MSIX memory region is added to catch msi_notify()/msix_notiry()
from virtio-pci and pass them to the guest via qemu_irq_pulse().
This patch depends on the "msi/msix: added functions to API to set up
message address and data" patch.
Signed-off-by: Alexey Kardashevskiy <address@hidden>
---
hw/spapr.c | 9 ++-
hw/spapr_pci.c | 266 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
hw/spapr_pci.h | 13 +++-
trace-events | 9 ++
4 files changed, 284 insertions(+), 13 deletions(-)
diff --git a/hw/spapr.c b/hw/spapr.c
index ef6ffcb..35ad075 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -43,6 +43,7 @@
#include "hw/spapr_vio.h"
#include "hw/spapr_pci.h"
#include "hw/xics.h"
+#include "hw/msi.h"
#include "kvm.h"
#include "kvm_ppc.h"
@@ -82,6 +83,7 @@
#define SPAPR_PCI_MEM_WIN_ADDR (0x10000000000ULL + 0xA0000000)
#define SPAPR_PCI_MEM_WIN_SIZE 0x20000000
#define SPAPR_PCI_IO_WIN_ADDR (0x10000000000ULL + 0x80000000)
+#define SPAPR_PCI_MSI_WIN_ADDR (0x10000000000ULL + 0x90000000)
#define PHANDLE_XICP 0x00001111
@@ -116,7 +118,7 @@ qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t
*irq_num,
/* Allocate block of consequtive IRQs, returns a number of the first */
int spapr_allocate_irq_block(uint32_t num, enum xics_irq_type type)
{
- int i, ret;
+ int i, ret = 0;
uint32_t irq = -1;
for (i = 0; i < num; ++i) {
@@ -690,6 +692,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
long pteg_shift = 17;
char *filename;
+ msi_supported = true;
+
spapr = g_malloc0(sizeof(*spapr));
QLIST_INIT(&spapr->phbs);
@@ -804,7 +808,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
SPAPR_PCI_MEM_WIN_ADDR,
SPAPR_PCI_MEM_WIN_SIZE,
- SPAPR_PCI_IO_WIN_ADDR);
+ SPAPR_PCI_IO_WIN_ADDR,
+ SPAPR_PCI_MSI_WIN_ADDR);
for (i = 0; i < nb_nics; i++) {
NICInfo *nd = &nd_table[i];
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 93017cd..21fbc50 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -24,31 +24,46 @@
*/
#include "hw.h"
#include "pci.h"
+#include "msix.h"
+#include "msi.h"
#include "pci_host.h"
#include "hw/spapr.h"
#include "hw/spapr_pci.h"
#include "exec-memory.h"
#include <libfdt.h>
+#include "trace.h"
#include "hw/pci_internals.h"
-static PCIDevice *find_dev(sPAPREnvironment *spapr,
- uint64_t buid, uint32_t config_addr)
+static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
{
- DeviceState *qdev;
- int devfn = (config_addr >> 8) & 0xFF;
sPAPRPHBState *phb;
QLIST_FOREACH(phb, &spapr->phbs, list) {
if (phb->buid != buid) {
continue;
}
+ return phb;
+ }
- QTAILQ_FOREACH(qdev, &phb->host_state.bus->qbus.children, sibling) {
- PCIDevice *dev = (PCIDevice *)qdev;
- if (dev->devfn == devfn) {
- return dev;
- }
+ return NULL;
+}
+
+static PCIDevice *find_dev(sPAPREnvironment *spapr, uint64_t buid,
+ uint32_t config_addr)
+{
+ sPAPRPHBState *phb = find_phb(spapr, buid);
+ DeviceState *qdev;
+ int devfn = (config_addr >> 8) & 0xFF;
+
+ if (!phb) {
+ return NULL;
+ }
+
+ QTAILQ_FOREACH(qdev, &phb->host_state.bus->qbus.children, sibling) {
+ PCIDevice *dev = (PCIDevice *)qdev;
+ if (dev->devfn == devfn) {
+ return dev;
}
}
@@ -138,6 +153,220 @@ static void rtas_write_pci_config(sPAPREnvironment *spapr,
rtas_st(rets, 0, 0);
}
+/*
+ * Initializes req_num vectors for a device.
+ * The code assumes that MSI/MSIX is enabled in the config space
+ * as a result of msix_init() or msi_init().
+ */
+static int spapr_pci_config_msi(sPAPRPHBState *ph, int ndev,
+ PCIDevice *pdev, bool msix, unsigned req_num)
+{
+ unsigned i;
+ int irq;
+ uint64_t msi_address;
+ uint32_t config_addr = pdev->devfn << 8;
+
+ /* Disabling - nothing to do */
+ if (0 == req_num) {
+ return 0;
+ }
+
+ /* Enabling! */
+ if (ph->msi_table[ndev].nvec && (req_num != ph->msi_table[ndev].nvec)) {
+ /* Unexpected behaviour */
+ fprintf(stderr, "Cannot reuse cached MSI config for device#%u", ndev);
+ return -1;
+ }
+
+ if (0 == ph->msi_table[ndev].nvec) {
+ irq = spapr_allocate_irq_block(req_num, XICS_MSI);
+ if (irq < 0) {
+ return -1;
+ }
+ ph->msi_table[ndev].dt_irq = irq;
+ ph->msi_table[ndev].nvec = req_num;
+ ph->msi_table[ndev].config_addr = config_addr;
+ }
+
+ if (msix) {
+ for (i = 0; i < req_num; ++i) {
+ msi_address = ph->msi_win_addr + ((ndev << 16) | i << 2);
+ msix_set_address_data(pdev, i, msi_address, 0);
+ trace_spapr_pci_msi_setup(pdev->name, i, msi_address);
+ }
+ } else {
+ msi_address = ph->msi_win_addr + (ndev << 16);
+ msi_set_address_data(pdev, msi_address, 0);
+ trace_spapr_pci_msi_setup(pdev->name, 0, msi_address);
+ }
+
+ return req_num;
+}
+
+/*
+ * The handler handles both MSI and MSIX.
+ * For MSI-X, the vector number is encoded as a part of the address
+ * and data is set to 0.
+ * For MSI, the vector takes least bits in data and not encoded in
+ * address.
+ */
+static void spapr_msi_write(void *opaque, target_phys_addr_t addr,
+ uint64_t data, unsigned size)
+{
+ sPAPRPHBState *ephb = opaque;
+ int n = addr >> 16;
+ int vec = ((addr & 0xFFFF) >> 2) | data;
+ uint32_t dt_irq = ephb->msi_table[n].dt_irq + vec;
+
+ trace_spapr_pci_msi_write(addr, data, dt_irq);
+
+ qemu_irq_pulse(xics_assign_irq(spapr->icp, dt_irq, XICS_MSI));
+}
+
+static const MemoryRegionOps spapr_msi_ops = {
+ .read = NULL,
+ .write = spapr_msi_write,
+ .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+static void rtas_ibm_change_msi(sPAPREnvironment *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ uint32_t config_addr = rtas_ld(args, 0);
+ uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+#define RTAS_QUERY_FN 0
+#define RTAS_CHANGE_FN 1
+#define RTAS_RESET_FN 2
+#define RTAS_CHANGE_MSI_FN 3
+#define RTAS_CHANGE_MSIX_FN 4
+ unsigned int func = rtas_ld(args, 3);
+ unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
+ unsigned int seq_num = rtas_ld(args, 5);
+ int assigned_num = -1;
+ enum { MSI = 1, MSIX = 2 } ret_intr_type = MSI;
+ int i;
+ sPAPRPHBState *phb = NULL;
+ PCIDevice *pdev = NULL;
+
+ switch (func) {
+ case RTAS_CHANGE_MSI_FN:
+ case RTAS_CHANGE_FN:
+ ret_intr_type = MSI;
+ break;
+ case RTAS_CHANGE_MSIX_FN:
+ ret_intr_type = MSIX;
+ break;
+ default:
+ fprintf(stderr, "rtas_ibm_change_msi(%u) is not implemented\n", func);
+ rtas_st(rets, 0, -3); /* Parameter error */
+ return;
+ }
+
+ /* Fins sPAPRPHBState */
+ phb = find_phb(spapr, buid);
+ if (phb) {
+ pdev = find_dev(spapr, buid, config_addr);
+ }
+ if (!phb || !pdev) {
+ rtas_st(rets, 0, -3); /* Parameter error */
+ return;
+ }
+
+ if (0 == req_num) {
+ /* Calculate a device number in the map to remove */
+ for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
+ if (phb->msi_table[i].nvec &&
+ (phb->msi_table[i].config_addr == config_addr)) {
+ trace_spapr_pci_msi("Releasing MSI config for device#", i);
+ break;
+ }
+ }
+ if (i >= SPAPR_MSIX_MAX_DEVS) {
+ fprintf(stderr, "MSI has not been enabled for this device!\n");
+ rtas_st(rets, 0, -1); /* Hardware error */
+ return;
+ }
+ } else {
+ /* Find a device number in the map to add */
+ for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
+ if (!phb->msi_table[i].nvec) {
+ /* as we never free entries, it means no more left in cache,
+ so we use new empty entry */
+ trace_spapr_pci_msi("Allocating new MSI for device#", i);
+ break;
+ }
+ if (phb->msi_table[i].config_addr != config_addr) {
+ continue;
+ }
+ /* Even if req_num is greater than nvec, it is expected
+ that the host will allocate (nvec) vectors again */
+ trace_spapr_pci_msi("Reuse cached MSI config for device#", i);
+ break;
+ }
+ if (i >= SPAPR_MSIX_MAX_DEVS) {
+ fprintf(stderr, "No free entry for a new MSI device\n");
+ rtas_st(rets, 0, -1); /* Hardware error */
+ return;
+ }
+ }
+
+ assigned_num = spapr_pci_config_msi(phb, i, pdev,
+ ret_intr_type == MSIX, req_num);
+
+ if (assigned_num < 0) {
+ fprintf(stderr, "rtas_ibm_change_msi(%u) failed\n", func);
+ rtas_st(rets, 0, -3); /* Parameter error */
+ return;
+ }
+ trace_spapr_pci_rtas_ibm_change_msi(func, req_num, assigned_num);
+
+ rtas_st(rets, 0, 0);
+ rtas_st(rets, 1, assigned_num);
+ rtas_st(rets, 2, ++seq_num);
+ rtas_st(rets, 3, ret_intr_type);
+}
+
+static void rtas_ibm_query_interrupt_source_number(sPAPREnvironment *spapr,
+ uint32_t token,
+ uint32_t nargs,
+ target_ulong args,
+ uint32_t nret,
+ target_ulong rets)
+{
+ uint32_t config_addr = rtas_ld(args, 0);
+ uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+ unsigned int i, intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
+ sPAPRPHBState *phb = NULL;
+
+ /* Fins sPAPRPHBState */
+ phb = find_phb(spapr, buid);
+ if (!phb) {
+ rtas_st(rets, 0, -3); /* Parameter error */
+ return;
+ }
+
+ /* Find device descriptor and start IRQ */
+ for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
+ if (phb->msi_table[i].config_addr == config_addr) {
+ intr_src_num = phb->msi_table[i].dt_irq + ioa_intr_num;
+ break;
+ }
+ }
+ /* Device has not been configured, return error */
+ if (-1 == intr_src_num) {
+ rtas_st(rets, 0, -1); /* Hardware error */
+ return;
+ }
+ trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
+ intr_src_num);
+
+ rtas_st(rets, 0, 0);
+ rtas_st(rets, 1, intr_src_num);
+ rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
+}
+
static int pci_swizzle(int slot, int pin)
{
return (slot + pin) % PCI_NUM_PINS;
@@ -162,6 +391,8 @@ static void pci_spapr_set_irq(void *opaque, int irq_num,
int level)
*/
sPAPRPHBState *phb = opaque;
+ trace_spapr_pci_lsi_set(phb->busname, irq_num,
+ phb->lsi_table[irq_num].dt_irq);
qemu_set_irq(xics_assign_irq(spapr->icp,
phb->lsi_table[irq_num].dt_irq, XICS_LSI),
level);
@@ -255,6 +486,14 @@ static int spapr_phb_init(SysBusDevice *s)
memory_region_add_subregion(get_system_memory(), phb->io_win_addr,
&phb->iowindow);
+ if (msi_supported) {
+ sprintf(namebuf, "%s.msi", phb->dtbusname);
+ memory_region_init_io(&phb->msiwindow, &spapr_msi_ops, phb,
+ namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000);
+ memory_region_add_subregion(get_system_memory(), phb->msi_win_addr,
+ &phb->msiwindow);
+ }
+
bus = pci_register_bus(&phb->host_state.busdev.qdev,
phb->busname ? phb->busname : phb->dtbusname,
pci_spapr_set_irq, pci_spapr_map_irq, phb,
@@ -286,6 +525,7 @@ static Property spapr_phb_properties[] = {
DEFINE_PROP_HEX64("mem_win_size", sPAPRPHBState, mem_win_size, 0x20000000),
DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
+ DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -301,6 +541,11 @@ static void spapr_phb_class_init(ObjectClass *klass, void
*data)
spapr_rtas_register("write-pci-config", rtas_write_pci_config);
spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
+ if (msi_supported) {
+ spapr_rtas_register("ibm,query-interrupt-source-number",
+ rtas_ibm_query_interrupt_source_number);
+ spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
+ }
}
static TypeInfo spapr_phb_info = {
@@ -313,7 +558,7 @@ static TypeInfo spapr_phb_info = {
void spapr_create_phb(sPAPREnvironment *spapr,
const char *busname, uint64_t buid,
uint64_t mem_win_addr, uint64_t mem_win_size,
- uint64_t io_win_addr)
+ uint64_t io_win_addr, uint64_t msi_win_addr)
{
DeviceState *dev;
@@ -326,6 +571,7 @@ void spapr_create_phb(sPAPREnvironment *spapr,
qdev_prop_set_uint64(dev, "mem_win_addr", mem_win_addr);
qdev_prop_set_uint64(dev, "mem_win_size", mem_win_size);
qdev_prop_set_uint64(dev, "io_win_addr", io_win_addr);
+ qdev_prop_set_uint64(dev, "msi_win_addr", msi_win_addr);
qdev_init_nofail(dev);
}
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index 11c3ee1..bb9a5ea 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -27,6 +27,8 @@
#include "hw/pci_host.h"
#include "hw/xics.h"
+#define SPAPR_MSIX_MAX_DEVS 32
+
typedef struct sPAPRPHBState {
PCIHostState host_state;
@@ -37,12 +39,21 @@ typedef struct sPAPRPHBState {
MemoryRegion memspace, iospace;
target_phys_addr_t mem_win_addr, mem_win_size, io_win_addr, io_win_size;
MemoryRegion memwindow, iowindow;
+ target_phys_addr_t msi_win_addr;
+ MemoryRegion msiwindow;
+
DMAContext *dma;
struct {
uint32_t dt_irq;
} lsi_table[PCI_NUM_PINS];
+ struct {
+ uint32_t config_addr;
+ uint32_t dt_irq; /* first irq, dt_irq!=0 means "used" */
+ int nvec;
+ } msi_table[SPAPR_MSIX_MAX_DEVS];
+
QLIST_ENTRY(sPAPRPHBState) list;
} sPAPRPHBState;
@@ -52,7 +63,7 @@ typedef struct sPAPRPHBState {
void spapr_create_phb(sPAPREnvironment *spapr,
const char *busname, uint64_t buid,
uint64_t mem_win_addr, uint64_t mem_win_size,
- uint64_t io_win_addr);
+ uint64_t io_win_addr, uint64_t msi_win_addr);
int spapr_populate_pci_dt(sPAPRPHBState *phb,
uint32_t xics_phandle,
diff --git a/trace-events b/trace-events
index 70f059d..8d94053 100644
--- a/trace-events
+++ b/trace-events
@@ -788,3 +788,12 @@ qxl_render_blit_guest_primary_initialized(void) ""
qxl_render_blit(int32_t stride, int32_t left, int32_t right, int32_t top,
int32_t bottom)
"stride=%d [%d, %d, %d, %d]"
qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t
stride, int32_t bytes_pp,
int32_t bits_pp) "%dx%d, stride %d, bpp %d, depth %d"
qxl_render_update_area_done(void *cookie) "%p"
+
+# hw/spapr_pci.c
+spapr_pci_msi(const char *msg, uint32_t n) "%s%u"
+spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr)
"dev\"%s\" vector %u,
addr=%"PRIx64
+spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req, int assigned) "func
%u, requested %u,
assigned %d"
+spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr)
"queries for #%u, IRQ%u"
+spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq)
"@%"PRIx64"<=%"PRIx64" IRQ %u"
+spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
+
--
1.7.7.3
- Re: [Qemu-devel] [PATCH] msi/msix: added public API to set/get MSI message address, and data, (continued)
- Re: [Qemu-devel] [PATCH] msi/msix: added public API to set/get MSI message address, and data, Jan Kiszka, 2012/06/21
- Re: [Qemu-devel] [PATCH] msi/msix: added public API to set/get MSI message address, and data, Alexey Kardashevskiy, 2012/06/21
- Re: [Qemu-devel] [PATCH] msi/msix: added public API to set/get MSI message address, and data, Jan Kiszka, 2012/06/21
- [Qemu-devel] [PATCH] msi/msix: added API to set MSI message address and data, Alexey Kardashevskiy, 2012/06/21
- Re: [Qemu-devel] [PATCH] msi/msix: added API to set MSI message address and data, Jan Kiszka, 2012/06/21
- Re: [Qemu-devel] [PATCH] msi/msix: added API to set MSI message address and data, Alexey Kardashevskiy, 2012/06/21
- [Qemu-devel] [PATCH] msi/msix: added API to set MSI message address and data, Alexey Kardashevskiy, 2012/06/21
- Re: [Qemu-devel] [PATCH] msi/msix: added public API to set/get MSI message address, and data, Alex Williamson, 2012/06/21
[Qemu-devel] [PATCH 2/3] pseries: added allocator for a block of IRQs, Alexey Kardashevskiy, 2012/06/14
[Qemu-devel] [PATCH 3/3] pseries pci: added MSI/MSIX support,
Alexey Kardashevskiy <=
Re: [Qemu-devel] [PATCH 0/3] adding MSI/MSIX for PCI on POWER, Alexey Kardashevskiy, 2012/06/14
Re: [Qemu-devel] [PATCH 0/3] adding MSI/MSIX for PCI on POWER, Alexander Graf, 2012/06/27