[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v2 11/14] spapr_pci: enable basic hotplug operat
From: |
Alexey Kardashevskiy |
Subject: |
Re: [Qemu-devel] [PATCH v2 11/14] spapr_pci: enable basic hotplug operations |
Date: |
Mon, 16 Dec 2013 15:36:32 +1100 |
User-agent: |
Mozilla/5.0 (X11; Linux i686 on x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.2.0 |
On 12/06/2013 09:33 AM, Michael Roth wrote:
> From: Mike Day <address@hidden>
>
> This enables hotplug for PHB bridges. Upon hotplug we generate the
> OF-nodes required by PAPR specification and IEEE 1275-1994
> "PCI Bus Binding to Open Firmware" for the device.
>
> We associate the corresponding FDT for these nodes with the DrcEntry
> corresponding to the slot, which will be fetched via
> ibm,configure-connector RTAS calls by the guest as described by PAPR
> specification. The FDT is cleaned up in the case of unplug.
>
> Amongst the required OF-node properties for each device are the "reg"
> and "assigned-addresses" properties which describe the BAR-assignments
> for IO/MEM/ROM regions. To handle these assignments we scan the address
> space associated with each region for a contiguous range of appropriate
> size based on PCI specification and encode these in accordance with
> Open Firmware PCI Bus Binding spec.
>
> These assignments will be used by the guest when the rpaphp hotplug
> module is used, but may be re-assigned by guests for cases where we
> rely on bus rescan.
>
> Signed-off-by: Mike Day <address@hidden>
> Signed-off-by: Michael Roth <address@hidden>
> ---
> hw/ppc/spapr_pci.c | 375
> ++++++++++++++++++++++++++++++++++++++++++++++--
> include/hw/ppc/spapr.h | 1 +
> 2 files changed, 368 insertions(+), 8 deletions(-)
>
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 6e7ee31..9b4f829 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -56,6 +56,17 @@
> #define RTAS_TYPE_MSI 1
> #define RTAS_TYPE_MSIX 2
>
> +#define FDT_MAX_SIZE 0x10000
> +#define _FDT(exp) \
> + do { \
> + int ret = (exp); \
> + if (ret < 0) { \
> + return ret; \
> + } \
> + } while (0)
> +
> +static void spapr_drc_state_reset(DrcEntry *drc_entry);
> +
> static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
> {
> sPAPRPHBState *sphb;
> @@ -448,6 +459,22 @@ static void rtas_set_indicator(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> /* encode the new value into the correct bit field */
> shift = INDICATOR_ISOLATION_SHIFT;
> mask = INDICATOR_ISOLATION_MASK;
> + if (drc_entry) {
> + /* transition from unisolated to isolated for a hotplug slot
> + * entails completion of guest-side device unplug/cleanup, so
> + * we can now safely remove the device if qemu is waiting for
> + * it to be released
> + */
> + if (DECODE_DRC_STATE(*pind, mask, shift) != indicator_state) {
> + if (indicator_state == 0 && drc_entry->awaiting_release) {
> + /* device_del has been called and host is waiting
> + * for guest to release/isolate device, go ahead
> + * and remove it now
> + */
> + spapr_drc_state_reset(drc_entry);
> + }
> + }
> + }
> break;
> case 9002: /* DR */
> shift = INDICATOR_DR_SHIFT;
> @@ -776,6 +803,345 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus,
> void *opaque, int devfn)
> return &phb->iommu_as;
> }
>
> +/* for 'reg'/'assigned-addresses' OF properties */
> +#define RESOURCE_CELLS_SIZE 2
> +#define RESOURCE_CELLS_ADDRESS 3
> +#define RESOURCE_CELLS_TOTAL \
> + (RESOURCE_CELLS_SIZE + RESOURCE_CELLS_ADDRESS)
> +
> +static void fill_resource_props(PCIDevice *d, int bus_num,
> + uint32_t *reg, int *reg_size,
> + uint32_t *assigned, int *assigned_size)
> +{
> + uint32_t *reg_row, *assigned_row;
> + uint32_t dev_id = ((bus_num << 8) |
> + (PCI_SLOT(d->devfn) << 3) | PCI_FUNC(d->devfn));
> + int i, idx = 0;
> +
> + reg[0] = cpu_to_be32(dev_id << 8);
> +
> + for (i = 0; i < PCI_NUM_REGIONS; i++) {
> + if (!d->io_regions[i].size) {
> + continue;
> + }
> + reg_row = ®[(idx + 1) * RESOURCE_CELLS_TOTAL];
> + assigned_row = &assigned[idx * RESOURCE_CELLS_TOTAL];
> + reg_row[0] = cpu_to_be32((dev_id << 8) | (pci_bar(d, i) & 0xff));
> + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
> + reg_row[0] |= cpu_to_be32(0x01000000);
> + } else {
> + reg_row[0] |= cpu_to_be32(0x02000000);
> + }
> + assigned_row[0] = cpu_to_be32(reg_row[0] | 0x80000000);
0x80000000 == relocatable? 0x01000000/0x02000000 - space codes? There are
macros (b_n, b_ss) in this file, can you please use them?
> + assigned_row[3] = reg_row[3] = cpu_to_be32(d->io_regions[i].size >>
> 32);
> + assigned_row[4] = reg_row[4] = cpu_to_be32(d->io_regions[i].size);
> + assigned_row[1] = cpu_to_be32(d->io_regions[i].addr >> 32);
> + assigned_row[2] = cpu_to_be32(d->io_regions[i].addr);
> + idx++;
> + }
> +
> + *reg_size = (idx + 1) * RESOURCE_CELLS_TOTAL * sizeof(uint32_t);
> + *assigned_size = idx * RESOURCE_CELLS_TOTAL * sizeof(uint32_t);
> +}
> +
> +static hwaddr spapr_find_bar_addr(sPAPRPHBState *phb, PCIIORegion *r)
This does not use @pbh at all and therefore can go to hw/pci/pci.c may be
(which can be tricky though)?
> +{
> + MemoryRegionSection mrs = { 0 };
> + hwaddr search_addr;
> + hwaddr size = r->size;
> + hwaddr addr_mask = ~(size - 1);
> + hwaddr increment = size;
> + hwaddr limit;
> +
> + if (r->type == PCI_BASE_ADDRESS_SPACE_MEMORY) {
> + /* beginning portion of mmio address space for bus does not get
> + * mapped into system memory, so calculate addr starting at the
> + * corresponding offset into mmio as.
> + */
> + search_addr = (SPAPR_PCI_MEM_WIN_BUS_OFFSET + increment) & addr_mask;
> + } else {
> + search_addr = increment;
> + }
> + limit = memory_region_size(r->address_space);
> +
> + do {
> + mrs = memory_region_find_subregion(r->address_space, search_addr,
> size);
> + if (mrs.mr) {
> + hwaddr mr_last_addr;
> + mr_last_addr = mrs.mr->addr + memory_region_size(mrs.mr) - 1;
> + search_addr = (mr_last_addr + 1) & addr_mask;
> + if (search_addr <= mr_last_addr) {
> + search_addr += increment;
> + }
> + /* this memory region overlaps, unref and continue searching */
> + memory_region_unref(mrs.mr);
> + }
> + } while (int128_nz(mrs.size) && search_addr + size <= limit);
> +
> + if (search_addr + size >= limit) {
> + return PCI_BAR_UNMAPPED;
> + }
> +
> + return search_addr;
> +}
> +
> +static int spapr_map_bars(sPAPRPHBState *phb, PCIDevice *dev)
This does not use @phb, well, it uses to call spapr_find_bar_addr() but
that function does not use it either.
Yet another candidate to get moved to hw/pci/pci.c? If you do so, you'll
get even more reviews :)
> +{
> + PCIIORegion *r;
> + int i, ret = -1;
> +
> + for (i = 0; i < PCI_NUM_REGIONS; i++) {
> + uint32_t bar_address = pci_bar(dev, i);
> + uint32_t bar_value;
> + uint16_t cmd_value = pci_default_read_config(dev, PCI_COMMAND, 2);
> + hwaddr addr;
> +
> + r = &dev->io_regions[i];
> +
> + /* this region isn't registered */
> + if (!r->size) {
> + continue;
> + }
> +
> + /* find a hw addr we can map */
> + addr = spapr_find_bar_addr(phb, r);
> + if (addr == PCI_BAR_UNMAPPED) {
> + /* we can't find a free range within address space for this BAR
> */
> + fprintf(stderr,
> + "Unable to map BAR %d, no free range available\n", i);
> + return -1;
> + }
> + /* we can probably map this region into memory if there is not
> + * a race condition with some other allocator. write the address
> + * to the device BAR which will force a call to pci_update_mappings
> + */
> + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) {
> + pci_default_write_config(dev, PCI_COMMAND,
> + cmd_value | PCI_COMMAND_IO, 2);
> + } else {
> + pci_default_write_config(dev, PCI_COMMAND,
> + cmd_value | PCI_COMMAND_MEMORY, 2);
> + }
> +
> + bar_value = addr;
> +
> + if (i == PCI_ROM_SLOT) {
> + bar_value |= PCI_ROM_ADDRESS_ENABLE;
> + }
> + /* write the new bar value */
> + pci_default_write_config(dev, bar_address, bar_value, 4);
> +
> + /* if this is a 64-bit BAR, we need to also write the
> + * upper 32 bit value.
> + */
> + if (r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> + bar_value = (addr >> 32) & 0xffffffffUL;
> + pci_default_write_config(dev, bar_address + 4, bar_value, 4);
> + }
> + ret = 0;
> + }
> + return ret;
> +}
> +
> +static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
> + int phb_index)
> +{
> + int slot = PCI_SLOT(dev->devfn);
> + char slotname[16];
> + bool is_bridge = 1;
> + DrcEntry *drc_entry, *drc_entry_slot;
> + uint32_t reg[RESOURCE_CELLS_TOTAL * 8] = { 0 };
> + uint32_t assigned[RESOURCE_CELLS_TOTAL * 8] = { 0 };
> + int reg_size, assigned_size;
> +
> + drc_entry = spapr_phb_to_drc_entry(phb_index + SPAPR_PCI_BASE_BUID);
> + g_assert(drc_entry);
> + drc_entry_slot = &drc_entry->child_entries[slot];
> +
> + if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
s/1/PCI_HEADER_TYPE_BRIDGE/
> + PCI_HEADER_TYPE_NORMAL) {
> + is_bridge = 0;
> + }
> +
> + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id",
> + pci_default_read_config(dev, PCI_VENDOR_ID, 2)));
> + _FDT(fdt_setprop_cell(fdt, offset, "device-id",
> + pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
> + _FDT(fdt_setprop_cell(fdt, offset, "revision-id",
> + pci_default_read_config(dev, PCI_REVISION_ID, 1)));
> + _FDT(fdt_setprop_cell(fdt, offset, "class-code",
> + pci_default_read_config(dev, PCI_CLASS_DEVICE, 2)
> << 8));
> +
> + _FDT(fdt_setprop_cell(fdt, offset, "interrupts",
> + pci_default_read_config(dev, PCI_INTERRUPT_PIN,
> 1)));
> +
> + /* if this device is NOT a bridge */
> + if (!is_bridge) {
s/!is_bridge/pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
PCI_HEADER_TYPE_NORMAL/
and get rid of is_bridge?
> + _FDT(fdt_setprop_cell(fdt, offset, "min-grant",
> + pci_default_read_config(dev, PCI_MIN_GNT, 1)));
> + _FDT(fdt_setprop_cell(fdt, offset, "max-latency",
> + pci_default_read_config(dev, PCI_MAX_LAT, 1)));
> + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id",
> + pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)));
> + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id",
> + pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)));
> + }
> +
> + _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size",
> + pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1)));
> +
> + /* the following fdt cells are masked off the pci status register */
> + int pci_status = pci_default_read_config(dev, PCI_STATUS, 2);
> + _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed",
> + PCI_STATUS_DEVSEL_MASK & pci_status));
> + _FDT(fdt_setprop_cell(fdt, offset, "fast-back-to-back",
> + PCI_STATUS_FAST_BACK & pci_status));
> + _FDT(fdt_setprop_cell(fdt, offset, "66mhz-capable",
> + PCI_STATUS_66MHZ & pci_status));
> + _FDT(fdt_setprop_cell(fdt, offset, "udf-supported",
> + PCI_STATUS_UDF & pci_status));
> +
> + _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
> + sprintf(slotname, "Slot %d", slot + phb_index * 32);
> + _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", slotname,
> strlen(slotname)));
> + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index",
> + drc_entry_slot->drc_index));
> +
> + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
> + RESOURCE_CELLS_ADDRESS));
> + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
> + RESOURCE_CELLS_SIZE));
> + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x",
> + RESOURCE_CELLS_SIZE));
> + fill_resource_props(dev, phb_index, reg, ®_size,
> + assigned, &assigned_size);
> + _FDT(fdt_setprop(fdt, offset, "reg", reg, reg_size));
> + _FDT(fdt_setprop(fdt, offset, "assigned-addresses",
> + assigned, assigned_size));
> +
> + return 0;
> +}
> +
> +static int spapr_device_hotplug_add(DeviceState *qdev, PCIDevice *dev)
> +{
> + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev);
> + DrcEntry *drc_entry, *drc_entry_slot;
> + ConfigureConnectorState *ccs;
> + int slot = PCI_SLOT(dev->devfn);
> + int offset, ret;
> + void *fdt_orig, *fdt;
> + char nodename[512];
> + uint32_t encoded = ENCODE_DRC_STATE(INDICATOR_ENTITY_SENSE_PRESENT,
> + INDICATOR_ENTITY_SENSE_MASK,
> + INDICATOR_ENTITY_SENSE_SHIFT);
> +
> + drc_entry = spapr_phb_to_drc_entry(phb->buid);
> + g_assert(drc_entry);
> + drc_entry_slot = &drc_entry->child_entries[slot];
> +
> + drc_entry->state &= ~(uint32_t)INDICATOR_ENTITY_SENSE_MASK;
> + drc_entry->state |= encoded; /* DR entity present */
> + drc_entry_slot->state &= ~(uint32_t)INDICATOR_ENTITY_SENSE_MASK;
> + drc_entry_slot->state |= encoded; /* and the slot */
"and the slot" what?
s/uint32_t encoded/const uint32_t present/ and remove the comments?
> + /* need to allocate memory region for device BARs */
> + spapr_map_bars(phb, dev);
> +
> + /* add OF node for pci device and required OF DT properties */
> + fdt_orig = g_malloc0(FDT_MAX_SIZE);
> + offset = fdt_create(fdt_orig, FDT_MAX_SIZE);
> + fdt_begin_node(fdt_orig, "");
> + fdt_end_node(fdt_orig);
> + fdt_finish(fdt_orig);
> +
> + fdt = g_malloc0(FDT_MAX_SIZE);
> + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE);
> + sprintf(nodename, "address@hidden", slot);
> + offset = fdt_add_subnode(fdt, 0, nodename);
> + ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index);
> + g_assert(!ret);
> + g_free(fdt_orig);
> +
> + /* hold on to node, configure_connector will pass it to the guest later
> */
> + ccs = &drc_entry_slot->cc_state;
> + ccs->fdt = fdt;
> + ccs->offset_start = offset;
> + ccs->state = CC_STATE_PENDING;
> + ccs->dev = dev;
> +
> + return 0;
> +}
> +
> +/* check whether guest has released/isolated device */
> +static bool spapr_drc_state_is_releasable(DrcEntry *drc_entry)
> +{
> + return !DECODE_DRC_STATE(drc_entry->state,
> + INDICATOR_ISOLATION_MASK,
> + INDICATOR_ISOLATION_SHIFT);
> +}
It looks like this is the only separated function which calls
DECODE_DRC_STATE, and it is used just once, and others call
DECODE_DRC_STATE()/ENCODE_DRC_STATE() directly. I'd remove this function
and call DECODE_DRC_STATE() directly, below in the code.
> +
> +/* finalize device unplug/deletion */
> +static void spapr_drc_state_reset(DrcEntry *drc_entry)
> +{
> + ConfigureConnectorState *ccs = &drc_entry->cc_state;
> + uint32_t sense_empty = ENCODE_DRC_STATE(INDICATOR_ENTITY_SENSE_EMPTY,
> + INDICATOR_ENTITY_SENSE_MASK,
> + INDICATOR_ENTITY_SENSE_SHIFT);
> +
> + g_free(ccs->fdt);
> + ccs->fdt = NULL;
> + object_unparent(OBJECT(ccs->dev));
> + ccs->dev = NULL;
> + ccs->state = CC_STATE_IDLE;
> + drc_entry->state &= ~INDICATOR_ENTITY_SENSE_MASK;
> + drc_entry->state |= sense_empty;
> + drc_entry->awaiting_release = false;
> +}
> +
> +static void spapr_device_hotplug_remove(DeviceState *qdev, PCIDevice *dev)
> +{
> + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev);
> + DrcEntry *drc_entry, *drc_entry_slot;
> + ConfigureConnectorState *ccs;
> + int slot = PCI_SLOT(dev->devfn);
> +
> + drc_entry = spapr_phb_to_drc_entry(phb->buid);
> + g_assert(drc_entry);
> + drc_entry_slot = &drc_entry->child_entries[slot];
> + ccs = &drc_entry_slot->cc_state;
> + /* shouldn't be removing devices we haven't created an fdt for */
> + g_assert(ccs->state != CC_STATE_IDLE);
Instead of g_assert(), would not it be better to return -1 here and then
return this return code from spapr_device_hotplug() and let common PCI code
handle this?
Or we are absolutely sure that spapr_device_hotplug() cannot possibly fail
so we are ready to kill the guest if it does? I do not know, just asking :)
> + /* if the device has already been released/isolated by guest, go ahead
> + * and remove it now. Otherwise, flag it as pending guest release so it
> + * can be removed later
> + */
> + if (spapr_drc_state_is_releasable(drc_entry_slot)) {
> + spapr_drc_state_reset(drc_entry_slot);
> + } else {
> + if (drc_entry_slot->awaiting_release) {
> + fprintf(stderr, "waiting for guest to release the device");
> + } else {
> + drc_entry_slot->awaiting_release = true;
> + }
> + }
> +}
> +
> +static int spapr_device_hotplug(DeviceState *qdev, PCIDevice *dev,
> + PCIHotplugState state)
> +{
sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev);
> + if (state == PCI_COLDPLUG_ENABLED) {
> + return 0;
> + }
> +
> + if (state == PCI_HOTPLUG_ENABLED) {
> + spapr_device_hotplug_add(qdev, dev);
> + } else {
> + spapr_device_hotplug_remove(qdev, dev);
> + }
and here s/qdev/phb/? spapr_device_hotplug_(add|remove),
spapr_pci_hotplug_(add|remove)_event (from further patch(es)) do not use
qdev as a DeviceState anyway, they cast it to sPAPRPHBState and use that.
> +
> + return 0;
> +}
> +
> static int spapr_phb_init(SysBusDevice *s)
> {
> DeviceState *dev = DEVICE(s);
> @@ -889,6 +1255,7 @@ static int spapr_phb_init(SysBusDevice *s)
> &sphb->memspace, &sphb->iospace,
> PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
> phb->bus = bus;
> + pci_bus_hotplug(phb->bus, spapr_device_hotplug, DEVICE(sphb));
>
> sphb->dma_window_start = 0;
> sphb->dma_window_size = 0x40000000;
> @@ -1181,14 +1548,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> return bus_off;
> }
>
> -#define _FDT(exp) \
> - do { \
> - int ret = (exp); \
> - if (ret < 0) { \
> - return ret; \
> - } \
> - } while (0)
> -
> /* Write PHB properties */
> _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
> _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 7c8a521..1c9b725 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -328,6 +328,7 @@ struct DrcEntry {
> void *fdt;
> int fdt_offset;
> uint32_t state;
> + bool awaiting_release;
> ConfigureConnectorState cc_state;
> DrcEntry *child_entries;
> };
>
--
Alexey
- [Qemu-devel] [PATCH v2 09/14] pci: make pci_bar useable outside pci.c, (continued)
- [Qemu-devel] [PATCH v2 09/14] pci: make pci_bar useable outside pci.c, Michael Roth, 2013/12/05
- [Qemu-devel] [PATCH v2 04/14] spapr_pci: add set-indicator RTAS interface, Michael Roth, 2013/12/05
- [Qemu-devel] [PATCH v2 01/14] spapr: populate DRC entries for root dt node, Michael Roth, 2013/12/05
- [Qemu-devel] [PATCH v2 05/14] spapr_pci: add get/set-power-level RTAS interfaces, Michael Roth, 2013/12/05
- [Qemu-devel] [PATCH v2 08/14] memory: add memory_region_find_subregion, Michael Roth, 2013/12/05
- [Qemu-devel] [PATCH v2 11/14] spapr_pci: enable basic hotplug operations, Michael Roth, 2013/12/05
- Re: [Qemu-devel] [PATCH v2 11/14] spapr_pci: enable basic hotplug operations,
Alexey Kardashevskiy <=
- [Qemu-devel] [PATCH v2 10/14] pci: allow 0 address for PCI IO regions, Michael Roth, 2013/12/05
- Re: [Qemu-devel] [PATCH v2 10/14] pci: allow 0 address for PCI IO regions, Michael S. Tsirkin, 2013/12/12
[Qemu-devel] [PATCH v2 12/14] spapr_events: re-use EPOW event infrastructure for hotplug events, Michael Roth, 2013/12/05
[Qemu-devel] [PATCH v2 13/14] spapr_events: event-scan RTAS interface, Michael Roth, 2013/12/05