[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC v2 3/4] hw/pci-host: Add a generic PCI host contro
From: |
alvise rigo |
Subject: |
Re: [Qemu-devel] [RFC v2 3/4] hw/pci-host: Add a generic PCI host controller for virtual platforms |
Date: |
Tue, 6 Jan 2015 09:47:41 +0100 |
Hi,
On Mon, Jan 5, 2015 at 6:13 PM, Alexander Graf <address@hidden> wrote:
>
>
> On 21.11.14 19:07, Alvise Rigo wrote:
>> Add a generic PCI host controller for virtual platforms, based on the
>> previous work by Rob Herring:
>> http://lists.gnu.org/archive/html/qemu-devel/2014-06/msg03482.html
>>
>> The controller creates a PCI bus for PCI devices; it is intended to
>> receive from the platform all the needed information to initiate the
>> memory regions expected by the PCI system. Due to this dependence, a
>> header file is used to define the structure that the platform has to
>> fill with the data needed by the controller. The device provides also a
>> routine for the device tree node generation, which mostly has to take
>> care of the interrupt-map property generation. This property is fetched
>> by the guest operating system to map any PCI interrupt to the interrupt
>> controller. For the time being, the device expects a GIC v2 to be used
>> by the guest.
>> Only mach-virt has been used to test the controller.
>>
>> Signed-off-by: Alvise Rigo <address@hidden>
>> ---
>> hw/pci-host/Makefile.objs | 2 +-
>> hw/pci-host/generic-pci.c | 280
>> ++++++++++++++++++++++++++++++++++++++
>> include/hw/pci-host/generic-pci.h | 74 ++++++++++
>> 3 files changed, 355 insertions(+), 1 deletion(-)
>> create mode 100644 hw/pci-host/generic-pci.c
>> create mode 100644 include/hw/pci-host/generic-pci.h
>>
>> diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs
>> index bb65f9c..8ef9fac 100644
>> --- a/hw/pci-host/Makefile.objs
>> +++ b/hw/pci-host/Makefile.objs
>> @@ -1,4 +1,4 @@
>> -common-obj-y += pam.o
>> +common-obj-y += pam.o generic-pci.o
>>
>> # PPC devices
>> common-obj-$(CONFIG_PREP_PCI) += prep.o
>> diff --git a/hw/pci-host/generic-pci.c b/hw/pci-host/generic-pci.c
>> new file mode 100644
>> index 0000000..9c90263
>> --- /dev/null
>> +++ b/hw/pci-host/generic-pci.c
>> @@ -0,0 +1,280 @@
>> +/*
>> + * Generic PCI host controller
>> + *
>> + * Copyright (c) 2014 Linaro, Ltd.
>> + * Author: Rob Herring <address@hidden>
>> + *
>> + * Based on ARM Versatile PCI controller (hw/pci-host/versatile.c):
>> + * Copyright (c) 2006-2009 CodeSourcery.
>> + * Written by Paul Brook
>> + *
>> + * This code is licensed under the LGPL.
>> + */
>> +
>> +#include "hw/sysbus.h"
>> +#include "hw/pci-host/generic-pci.h"
>> +#include "exec/address-spaces.h"
>> +#include "sysemu/device_tree.h"
>> +
>> +static const VMStateDescription pci_generic_host_vmstate = {
>> + .name = "generic-host-pci",
>> + .version_id = 1,
>> + .minimum_version_id = 1,
>> +};
>> +
>> +static void pci_cam_config_write(void *opaque, hwaddr addr,
>> + uint64_t val, unsigned size)
>> +{
>> + PCIGenState *s = opaque;
>> + pci_data_write(&s->pci_bus, addr, val, size);
>> +}
>> +
>> +static uint64_t pci_cam_config_read(void *opaque, hwaddr addr, unsigned
>> size)
>> +{
>> + PCIGenState *s = opaque;
>> + uint32_t val;
>> + val = pci_data_read(&s->pci_bus, addr, size);
>> + return val;
>> +}
>> +
>> +static const MemoryRegionOps pci_vpb_config_ops = {
>> + .read = pci_cam_config_read,
>> + .write = pci_cam_config_write,
>> + .endianness = DEVICE_NATIVE_ENDIAN,
>
> Never use NATIVE_ENDIAN unless you're 100% sure it's correct. In this
> case, please make it LITTLE_ENDIAN.
Agreed.
>
>> +};
>> +
>> +static void pci_generic_set_irq(void *opaque, int irq_num, int level)
>> +{
>> + qemu_irq *pic = opaque;
>> + qemu_set_irq(pic[irq_num], level);
>> +}
>> +
>> +static void pci_generic_host_init(Object *obj)
>> +{
>> + PCIHostState *h = PCI_HOST_BRIDGE(obj);
>> + PCIGenState *s = PCI_GEN(obj);
>> + GenericPCIHostState *gps = &s->pci_gen;
>> +
>> + memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32);
>
> Why is IO space that big? It's only 64k usually, no?
This was just to prevent a definition of the io space smaller than the
actual size of the memory region.
This could be avoided as you suggested later, by postponing the
creation of the PCIBus.
>
>> + memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32);
>> +
>> + pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), "pci",
>> + &s->pci_mem_space, &s->pci_io_space,
>> + PCI_DEVFN(0, 0), TYPE_PCIE_BUS);
>> + h->bus = &s->pci_bus;
>> +
>> + object_initialize(gps, sizeof(*gps), TYPE_GENERIC_PCI_HOST);
>> + qdev_set_parent_bus(DEVICE(gps), BUS(&s->pci_bus));
>> + gps->devfns = 0;
>> +}
>> +
>> +static int generic_pci_map_irq_fn(PCIDevice *pci_dev, int pin)
>> +{
>> + BusState *bus = qdev_get_parent_bus(&pci_dev->qdev);
>> + PCIBus *pci_bus = PCI_BUS(bus);
>> + PCIDevice *pdev = pci_bus->devices[PCI_DEVFN(0, 0)];
>> + GenericPCIHostState *gps = PCI_GEN_HOST(pdev);
>> +
>> + return gps->irqmap.devfn_idx_map[PCI_SLOT(pci_dev->devfn)]
>> + [PCI_FUNC(pci_dev->devfn)];
>> +}
>> +
>> +static void pci_generic_host_realize(DeviceState *dev, Error **errp)
>> +{
>> + PCIGenState *s = PCI_GEN(dev);
>> + SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
>> + GenericPCIPlatformData *pdata = &s->plat_data;
>
> Where does this come from? Why isn't it exposed as qdev parameters?
It comes from the platform (e.g. mach-virt).
Indeed, I will expose them as qdev properties.
>
>> + int i;
>> +
>> + if (!pdata->addr_map[PCI_CFG].size || !pdata->addr_map[PCI_IO].size ||
>> + !pdata->addr_map[PCI_MEM].size || !pdata->irqs) {
>> + hw_error("generic_pci: PCI controller not fully configured.");
>> + }
>> +
>> + for (i = 0; i < pdata->irqs; i++) {
>> + sysbus_init_irq(sbd, &s->irq[i]);
>> + }
>> +
>> + pci_bus_irqs(&s->pci_bus, pci_generic_set_irq, generic_pci_map_irq_fn,
>> + s->irq, pdata->irqs);
>> +
>> + memory_region_init_io(&s->mem_config, OBJECT(s), &pci_vpb_config_ops, s,
>> + "pci-config", pdata->addr_map[PCI_CFG].size);
>> + sysbus_init_mmio(sbd, &s->mem_config);
>> +
>> + /* Depending on the available memory of the board using the controller,
>> we
>> + create a window on both the I/O and mememory space. */
>
> meme?
Ack.
>
>> + memory_region_init_alias(&s->pci_io_window, OBJECT(s), "pci-io-win",
>> + &s->pci_io_space, 0,
>> + pdata->addr_map[PCI_IO].size);
>> + sysbus_init_mmio(sbd, &s->pci_io_window);
>> +
>> + memory_region_init_alias(&s->pci_mem_window, OBJECT(s), "pci-mem-win",
>> + &s->pci_mem_space,
>> + pdata->addr_map[PCI_MEM].addr,
>> + pdata->addr_map[PCI_MEM].size);
>> + sysbus_init_mmio(sbd, &s->pci_mem_window);
>
> What if we simply postpone the creation of those regions and the
> creation of the PCIBus to the realize function? At that point we know
> the size of the regions.
I'm not sure but I think I've already tried this path and for some
reason I've abandoned it.
I will explore it again and I'll let you know.
>
>> +
>> + /* TODO Remove once realize propagates to child devices. */
>> + object_property_set_bool(OBJECT(&s->pci_gen), true, "realized", errp);
>
> Is this still necessary? In fact, wouldn't the realize of our PHB and
> the creation of child devices happen consecutively usually?
Sure, I will fix it.
>
>> +}
>> +
>> +static void pci_generic_host_class_init(ObjectClass *klass, void *data)
>> +{
>> + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
>> + DeviceClass *dc = DEVICE_CLASS(klass);
>> +
>> + k->vendor_id = PCI_VENDOR_ID_REDHAT;
>> + k->device_id = 0x1234;
>
> Is this a reserved ID?
Not at all. I see now from the documentation (docs/specs/pci-ids.txt)
that a value within the range 0000 -> 00ff should be used.
I suppose that eventually we will pick up one of the available?
>
>> + k->class_id = PCI_CLASS_PROCESSOR_CO;
>> + /*
>> + * PCI-facing part of the host bridge, not usable without the
>> + * host-facing part, which can't be device_add'ed, yet.
>> + */
>> + dc->cannot_instantiate_with_device_add_yet = true;
>> +}
>> +
>> +struct dt_irq_mapping {
>> + int irqs;
>> + uint32_t gic_phandle;
>> + int base_irq_num;
>> + uint64_t *data;
>> +};
>> +
>> +/* */
>> +#define IRQ_MAP_ENTRY_DESC_SZ 14
>> +static void gen_int_mapping_fn(PCIBus *b, PCIDevice *d, void *opaque)
>> +{
>> + struct dt_irq_mapping *map_data;
>> + int pin;
>> +
>> + PCIDevice *pdev = b->devices[PCI_DEVFN(0, 0)];
>> + GenericPCIHostState *gps = PCI_GEN_HOST(pdev);
>> + map_data = (struct dt_irq_mapping *)opaque;
>> +
>> + /* Check if the platform has enough IRQs available. */
>> + if (gps->devfns > map_data->irqs) {
>> + hw_error("generic_pci: too many PCI devices.");
>> + }
>> +
>> + pin = pci_get_byte(d->config + PCI_INTERRUPT_PIN);
>> + if (pin) {
>> + uint64_t *data_ptr = map_data->data + (IRQ_MAP_ENTRY_DESC_SZ *
>> gps->devfns);
>> + uint8_t pci_slot = PCI_SLOT(d->devfn);
>> + uint8_t pci_func = PCI_FUNC(d->devfn);
>> + uint8_t *devfn_idx = &gps->irqmap.devfn_idx_map[pci_slot][0];
>> + uint8_t *devfn_irq = gps->irqmap.devfn_irq_map;
>> +
>> + devfn_idx[pci_func] = gps->devfns;
>> + devfn_irq[gps->devfns] = map_data->base_irq_num + gps->devfns;
>> +
>> + uint64_t buffer[IRQ_MAP_ENTRY_DESC_SZ] =
>> + {1, (pci_slot << 11)|(pci_func << 8), 2, 0x00000000, 1, pin,
>> + 1, map_data->gic_phandle, 1, 0x0, 1, devfn_irq[gps->devfns],
>> + 1, 0x1};
>> +
>> + memcpy(data_ptr, buffer, IRQ_MAP_ENTRY_DESC_SZ * sizeof(*buffer));
>> + gps->devfns++;
>> + }
>> +}
>> +
>> +/* Generate the "interrup-map" node's data and store it in map_data */
>> +static void generate_int_mapping(struct dt_irq_mapping *map_data,
>> + PCIGenState *s)
>> +{
>> + pci_for_each_device(s->parent_obj.bus, 0, gen_int_mapping_fn, map_data);
>
> The interrupt-map should describe interrupt mappings for every slot, not
> every device. If you only describe the current state of affairs, hotplug
> won't work.
Ack, as agreed also with Peter in the other thread.
>
>> +}
>> +
>> +static void generate_dt_node(void *fdt, DeviceState *dev)
>> +{
>> + PCIGenState *s = PCI_GEN(dev);
>> + char *nodename;
>> + uint32_t gic_phandle;
>> + uint32_t plat_acells;
>> + uint32_t plat_scells;
>> +
>> + SysBusDevice *busdev;
>> + busdev = SYS_BUS_DEVICE(dev);
>> + MemoryRegion *cfg = sysbus_mmio_get_region(busdev, 0);
>> + MemoryRegion *io = sysbus_mmio_get_region(busdev, 1);
>> + MemoryRegion *mem = sysbus_mmio_get_region(busdev, 2);
>> +
>> + nodename = g_strdup_printf("/address@hidden" PRIx64, cfg->addr);
>> + qemu_fdt_add_subnode(fdt, nodename);
>> + qemu_fdt_setprop_string(fdt, nodename, "compatible",
>> + "pci-host-cam-generic");
>> + qemu_fdt_setprop_string(fdt, nodename, "device_type", "pci");
>> + qemu_fdt_setprop_cell(fdt, nodename, "#address-cells", 0x3);
>> + qemu_fdt_setprop_cell(fdt, nodename, "#size-cells", 0x2);
>> + qemu_fdt_setprop_cell(fdt, nodename, "#interrupt-cells", 0x1);
>> +
>> + plat_acells = qemu_fdt_getprop_cell(fdt, "/", "#address-cells");
>> + plat_scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells");
>> + qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", plat_acells,
>> cfg->addr,
>> + plat_scells,
>> memory_region_size(cfg));
>> +
>> + qemu_fdt_setprop_sized_cells(fdt, nodename, "ranges",
>> + 1, 0x01000000, 2, 0x00000000, /* I/O space */
>> + 2, io->addr, 2, memory_region_size(io),
>> + 1, 0x02000000, 2, mem->addr, /* 32bit memory space
>> */
>> + 2, mem->addr, 2, memory_region_size(mem));
>> +
>> + gic_phandle = qemu_fdt_get_phandle(fdt, s->plat_data.gic_node_name);
>> + /* A mask covering bits [15,8] of phys.high allows to honor the
>> + function number when resolving a triggered PCI interrupt. */
>> + qemu_fdt_setprop_sized_cells(fdt, nodename, "interrupt-map-mask",
>> + 1, 0xff00, 1, 0x0, 1, 0x0, 1, 0x7);
>> +
>> + uint64_t *int_mapping_data = g_malloc0(IRQ_MAP_ENTRY_DESC_SZ *
>> + sizeof(uint64_t) *
>> s->plat_data.irqs);
>> + struct dt_irq_mapping dt_map_data = {
>> + .irqs = s->plat_data.irqs,
>> + .gic_phandle = gic_phandle,
>> + .base_irq_num = s->plat_data.base_irq,
>> + .data = int_mapping_data
>> + };
>> + /* Generate the interrupt mapping according to the devices attached
>> + * to the PCI bus of the controller. */
>> + generate_int_mapping(&dt_map_data, s);
>> + qemu_fdt_setprop_sized_cells_from_array(fdt, nodename, "interrupt-map",
>> + (s->pci_gen.devfns * IRQ_MAP_ENTRY_DESC_SZ)/2,
>> int_mapping_data);
>> +
>> + g_free(int_mapping_data);
>> + g_free(nodename);
>> +}
>> +
>> +void pci_controller_build_dt_node(void *fdt, DeviceState *dev)
>> +{
>> + generate_dt_node(fdt, dev);
>
> Device tree generation should *never* be part of device code. It's
> machine / board specific. If one day someone can convince me that it's
Actually you already convinced me that the device tree generation
should stay in the board code :)
I will rework the code accordingly.
Thank you for your feedback,
alvise
> safe to share device tree bits of one device with multiple boards I'm
> happy to work with them to achieve that goal then, but for now, please
> leave device tree bits in the machine logic.
>
>
> Alex
>
>> +}
>> +
>> +static const TypeInfo pci_generic_host_info = {
>> + .name = TYPE_GENERIC_PCI_HOST,
>> + .parent = TYPE_PCI_DEVICE,
>> + .instance_size = sizeof(GenericPCIHostState),
>> + .class_init = pci_generic_host_class_init,
>> +};
>> +
>> +static void pci_generic_class_init(ObjectClass *klass, void *data)
>> +{
>> + DeviceClass *dc = DEVICE_CLASS(klass);
>> +
>> + dc->realize = pci_generic_host_realize;
>> + dc->vmsd = &pci_generic_host_vmstate;
>> +}
>> +
>> +static const TypeInfo pci_generic_info = {
>> + .name = TYPE_GENERIC_PCI,
>> + .parent = TYPE_PCI_HOST_BRIDGE,
>> + .instance_size = sizeof(PCIGenState),
>> + .instance_init = pci_generic_host_init,
>> + .class_init = pci_generic_class_init,
>> +};
>> +
>> +static void generic_pci_host_register_types(void)
>> +{
>> + type_register_static(&pci_generic_info);
>> + type_register_static(&pci_generic_host_info);
>> +}
>> +
>> +type_init(generic_pci_host_register_types)
>> \ No newline at end of file
>> diff --git a/include/hw/pci-host/generic-pci.h
>> b/include/hw/pci-host/generic-pci.h
>> new file mode 100644
>> index 0000000..43c7a0f
>> --- /dev/null
>> +++ b/include/hw/pci-host/generic-pci.h
>> @@ -0,0 +1,74 @@
>> +#ifndef QEMU_GENERIC_PCI_H
>> +#define QEMU_GENERIC_PCI_H
>> +
>> +#include "hw/pci/pci.h"
>> +#include "hw/pci/pci_bus.h"
>> +#include "hw/pci/pci_host.h"
>> +
>> +#define MAX_PCI_DEVICES (PCI_SLOT_MAX * PCI_FUNC_MAX)
>> +
>> +enum {
>> + PCI_CFG = 0,
>> + PCI_IO,
>> + PCI_MEM,
>> +};
>> +
>> +typedef struct {
>> + /* addr_map[PCI_CFG].addr = base address of configuration memory
>> + addr_map[PCI_CFG].size = configuration memory size
>> + ... */
>> + struct addr_map {
>> + hwaddr addr;
>> + hwaddr size;
>> + } addr_map[3];
>> +
>> + const char *gic_node_name;
>> + uint32_t base_irq;
>> + uint32_t irqs;
>> +} GenericPCIPlatformData;
>> +
>> +typedef struct {
>> + PCIDevice parent_obj;
>> +
>> + struct irqmap {
>> + /* devfn_idx_map[i][j] = index inside slot_irq_map for device at
>> slot i,
>> + fn j */
>> + uint8_t devfn_idx_map[PCI_SLOT_MAX][PCI_FUNC_MAX];
>> + /* devfn_irq_map[i] = IRQ num. of the i-th device (devfn) attached
>> to
>> + the bus */
>> + uint8_t devfn_irq_map[MAX_PCI_DEVICES];
>> + } irqmap;
>> + /* number of devices attached to the bus */
>> + uint32_t devfns;
>> +} GenericPCIHostState;
>> +
>> +typedef struct {
>> + PCIHostState parent_obj;
>> +
>> + qemu_irq irq[MAX_PCI_DEVICES];
>> + MemoryRegion mem_config;
>> + /* Containers representing the PCI address spaces */
>> + MemoryRegion pci_io_space;
>> + MemoryRegion pci_mem_space;
>> + /* Alias regions into PCI address spaces which we expose as sysbus
>> regions.
>> + * The offsets into pci_mem_space is fixed. */
>> + MemoryRegion pci_io_window;
>> + MemoryRegion pci_mem_window;
>> + PCIBus pci_bus;
>> + GenericPCIHostState pci_gen;
>> +
>> + /* Platform dependent data */
>> + GenericPCIPlatformData plat_data;
>> +} PCIGenState;
>> +
>> +#define TYPE_GENERIC_PCI "generic_pci"
>> +#define PCI_GEN(obj) \
>> + OBJECT_CHECK(PCIGenState, (obj), TYPE_GENERIC_PCI)
>> +
>> +#define TYPE_GENERIC_PCI_HOST "generic_pci_host"
>> +#define PCI_GEN_HOST(obj) \
>> + OBJECT_CHECK(GenericPCIHostState, (obj), TYPE_GENERIC_PCI_HOST)
>> +
>> +void pci_controller_build_dt_node(void *fdt, DeviceState *dev);
>> +
>> +#endif
>> \ No newline at end of file
>>