qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH] spapr-pci: rework MSI/MSIX


From: Alexey Kardashevskiy
Subject: Re: [Qemu-ppc] [PATCH] spapr-pci: rework MSI/MSIX
Date: Tue, 30 Jul 2013 12:56:15 +1000
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130514 Thunderbird/17.0.6

Ping, anyone?


On 07/23/2013 12:54 PM, Alexey Kardashevskiy wrote:
> Michael, could you please review the patch as it is about PCI? Thanks!
> 
> 
> On 07/12/2013 05:38 PM, Alexey Kardashevskiy wrote:
>> On the sPAPR platform a guest allocates MSI/MSIX vectors via RTAS
>> hypercalls which return global IRQ numbers to a guest so it only
>> operates with those and never touches MSIMessage.
>>
>> Therefore MSIMessage handling is completely hidden in QEMU.
>>
>> Previously every sPAPR PCI host bridge implemented its own MSI window
>> to catch msi_notify()/msix_notify() calls from QEMU devices (virtio-pci
>> or vfio) and route them to the guest via qemu_pulse_irq().
>> MSIMessage used to be encoded as:
>>      .addr - address within the PHB MSI window;
>>      .data - the device index on PHB plus vector number.
>> The MSI MR write function translated this MSIMessage to a global IRQ
>> number and called qemu_pulse_irq().
>>
>> However the total number of IRQs is not really big (at the moment it is
>> 1024 IRQs starting from 4096) and even 16bit data field of MSIMessage
>> seems to be enough to store an IRQ number there.
>>
>> This simplifies MSI handling in sPAPR PHB. Specifically, this does:
>> 1. remove a MSI window from a PHB;
>> 2. add a single memory region for all MSIs to sPAPREnvironment
>> and spapr_pci_msi_init() to initialize it;
>> 3. encode MSIMessage as:
>>     * .addr - a fixed address of SPAPR_PCI_MSI_WINDOW==0x40000000000ULL;
>>     * .data as an IRQ number.
>> 4. change IRQ allocator to align first IRQ number in a block for MSI.
>> MSI uses lower bits to specify the vector number so the first IRQ has to
>> be aligned. MSIX does not need any special allocator though.
>>
>> Signed-off-by: Alexey Kardashevskiy <address@hidden>
>> ---
>>  hw/ppc/spapr.c              | 29 ++++++++++++++++++---
>>  hw/ppc/spapr_pci.c          | 61 
>> ++++++++++++++++++++-------------------------
>>  include/hw/pci-host/spapr.h |  8 +++---
>>  include/hw/ppc/spapr.h      |  4 ++-
>>  4 files changed, 60 insertions(+), 42 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 671bbd9..6b21191 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -88,6 +88,9 @@ int spapr_allocate_irq(int hint, bool lsi)
>>  
>>      if (hint) {
>>          irq = hint;
>> +        if (hint >= spapr->next_irq) {
>> +            spapr->next_irq = hint + 1;
>> +        }
>>          /* FIXME: we should probably check for collisions somehow */
>>      } else {
>>          irq = spapr->next_irq++;
>> @@ -103,22 +106,39 @@ int spapr_allocate_irq(int hint, bool lsi)
>>      return irq;
>>  }
>>  
>> -/* Allocate block of consequtive IRQs, returns a number of the first */
>> -int spapr_allocate_irq_block(int num, bool lsi)
>> +/*
>> + * Allocate block of consequtive IRQs, returns a number of the first.
>> + * If msi==true, aligns the first IRQ number to num.
>> + */
>> +int spapr_allocate_irq_block(int num, bool lsi, bool msi)
>>  {
>>      int first = -1;
>> -    int i;
>> +    int i, hint = 0;
>> +
>> +    /*
>> +     * MSIMesage::data is used for storing VIRQ so
>> +     * it has to be aligned to num to support multiple
>> +     * MSI vectors. MSI-X is not affected by this.
>> +     * The hint is used for the first IRQ, the rest should
>> +     * be allocated continously.
>> +     */
>> +    if (msi) {
>> +        assert((num == 1) || (num == 2) || (num == 4) ||
>> +               (num == 8) || (num == 16) || (num == 32));
>> +        hint = (spapr->next_irq + num - 1) & ~(num - 1);
>> +    }
>>  
>>      for (i = 0; i < num; ++i) {
>>          int irq;
>>  
>> -        irq = spapr_allocate_irq(0, lsi);
>> +        irq = spapr_allocate_irq(hint, lsi);
>>          if (!irq) {
>>              return -1;
>>          }
>>  
>>          if (0 == i) {
>>              first = irq;
>> +            hint = 0;
>>          }
>>  
>>          /* If the above doesn't create a consecutive block then that's
>> @@ -1127,6 +1147,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>      spapr_create_nvram(spapr);
>>  
>>      /* Set up PCI */
>> +    spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
>>      spapr_pci_rtas_init();
>>  
>>      phb = spapr_create_phb(spapr, 0);
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index dfe4d04..c8ea777 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -258,11 +258,11 @@ static int spapr_msicfg_find(sPAPRPHBState *phb, 
>> uint32_t config_addr,
>>   * This is required for msi_notify()/msix_notify() which
>>   * will write at the addresses via spapr_msi_write().
>>   */
>> -static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr,
>> -                             bool msix, unsigned req_num)
>> +static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
>> +                             unsigned first_irq, unsigned req_num)
>>  {
>>      unsigned i;
>> -    MSIMessage msg = { .address = addr, .data = 0 };
>> +    MSIMessage msg = { .address = addr, .data = first_irq };
>>  
>>      if (!msix) {
>>          msi_set_message(pdev, msg);
>> @@ -270,8 +270,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr 
>> addr,
>>          return;
>>      }
>>  
>> -    for (i = 0; i < req_num; ++i) {
>> -        msg.address = addr | (i << 2);
>> +    for (i = 0; i < req_num; ++i, ++msg.data) {
>>          msix_set_message(pdev, i, msg);
>>          trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
>>      }
>> @@ -351,7 +350,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
>> sPAPREnvironment *spapr,
>>  
>>      /* There is no cached config, allocate MSIs */
>>      if (!phb->msi_table[ndev].nvec) {
>> -        irq = spapr_allocate_irq_block(req_num, false);
>> +        irq = spapr_allocate_irq_block(req_num, false,
>> +                                       ret_intr_type == RTAS_TYPE_MSI);
>>          if (irq < 0) {
>>              fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
>>              rtas_st(rets, 0, -1); /* Hardware error */
>> @@ -363,8 +363,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
>> sPAPREnvironment *spapr,
>>      }
>>  
>>      /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
>> -    spapr_msi_setmsg(pdev, phb->msi_win_addr | (ndev << 16),
>> -                     ret_intr_type == RTAS_TYPE_MSIX, req_num);
>> +    spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == 
>> RTAS_TYPE_MSIX,
>> +                     phb->msi_table[ndev].irq, req_num);
>>  
>>      rtas_st(rets, 0, 0);
>>      rtas_st(rets, 1, req_num);
>> @@ -487,10 +487,7 @@ static const MemoryRegionOps spapr_io_ops = {
>>  static void spapr_msi_write(void *opaque, hwaddr addr,
>>                              uint64_t data, unsigned size)
>>  {
>> -    sPAPRPHBState *phb = opaque;
>> -    int ndev = addr >> 16;
>> -    int vec = ((addr & 0xFFFF) >> 2) | data;
>> -    uint32_t irq = phb->msi_table[ndev].irq + vec;
>> +    uint32_t irq = data;
>>  
>>      trace_spapr_pci_msi_write(addr, data, irq);
>>  
>> @@ -504,6 +501,23 @@ static const MemoryRegionOps spapr_msi_ops = {
>>      .endianness = DEVICE_LITTLE_ENDIAN
>>  };
>>  
>> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
>> +{
>> +    /*
>> +     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
>> +     * we need to allocate some memory to catch those writes coming
>> +     * from msi_notify()/msix_notify().
>> +     * As MSIMessage:addr is going to be the same and MSIMessage:data
>> +     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
>> +     * be used.
>> +     */
>> +    spapr->msi_win_addr = addr;
>> +    memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
>> +                          "msi", getpagesize());
>> +    memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
>> +                                &spapr->msiwindow);
>> +}
>> +
>>  /*
>>   * PHB PCI device
>>   */
>> @@ -528,8 +542,7 @@ static int spapr_phb_init(SysBusDevice *s)
>>  
>>          if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
>>              || (sphb->mem_win_addr != -1)
>> -            || (sphb->io_win_addr != -1)
>> -            || (sphb->msi_win_addr != -1)) {
>> +            || (sphb->io_win_addr != -1)) {
>>              fprintf(stderr, "Either \"index\" or other parameters must"
>>                      " be specified for PAPR PHB, not both\n");
>>              return -1;
>> @@ -542,7 +555,6 @@ static int spapr_phb_init(SysBusDevice *s)
>>              + sphb->index * SPAPR_PCI_WINDOW_SPACING;
>>          sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
>>          sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
>> -        sphb->msi_win_addr = windows_base + SPAPR_PCI_MSI_WIN_OFF;
>>      }
>>  
>>      if (sphb->buid == -1) {
>> @@ -565,11 +577,6 @@ static int spapr_phb_init(SysBusDevice *s)
>>          return -1;
>>      }
>>  
>> -    if (sphb->msi_win_addr == -1) {
>> -        fprintf(stderr, "MSI window address not specified for PHB\n");
>> -        return -1;
>> -    }
>> -
>>      if (find_phb(spapr, sphb->buid)) {
>>          fprintf(stderr, "PCI host bridges must have unique BUIDs\n");
>>          return -1;
>> @@ -609,18 +616,6 @@ static int spapr_phb_init(SysBusDevice *s)
>>                            namebuf, SPAPR_PCI_IO_WIN_SIZE);
>>      memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
>>                                  &sphb->iowindow);
>> -
>> -    /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
>> -     * we need to allocate some memory to catch those writes coming
>> -     * from msi_notify()/msix_notify() */
>> -    if (msi_supported) {
>> -        sprintf(namebuf, "%s.msi", sphb->dtbusname);
>> -        memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), 
>> &spapr_msi_ops, sphb,
>> -                              namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000);
>> -        memory_region_add_subregion(get_system_memory(), sphb->msi_win_addr,
>> -                                    &sphb->msiwindow);
>> -    }
>> -
>>      /*
>>       * Selecting a busname is more complex than you'd think, due to
>>       * interacting constraints.  If the user has specified an id
>> @@ -695,7 +690,6 @@ static Property spapr_phb_properties[] = {
>>      DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
>>      DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size,
>>                        SPAPR_PCI_IO_WIN_SIZE),
>> -    DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, -1),
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>>  
>> @@ -737,7 +731,6 @@ static const VMStateDescription vmstate_spapr_pci = {
>>          VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
>>          VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
>>          VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
>> -        VMSTATE_UINT64_EQUAL(msi_win_addr, sPAPRPHBState),
>>          VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
>>                               vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
>>          VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 
>> 0,
>> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
>> index 93f9511..970b4a9 100644
>> --- a/include/hw/pci-host/spapr.h
>> +++ b/include/hw/pci-host/spapr.h
>> @@ -43,8 +43,7 @@ typedef struct sPAPRPHBState {
>>  
>>      MemoryRegion memspace, iospace;
>>      hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
>> -    hwaddr msi_win_addr;
>> -    MemoryRegion memwindow, iowindow, msiwindow;
>> +    MemoryRegion memwindow, iowindow;
>>  
>>      uint32_t dma_liobn;
>>      uint64_t dma_window_start;
>> @@ -73,7 +72,8 @@ typedef struct sPAPRPHBState {
>>  #define SPAPR_PCI_MMIO_WIN_SIZE      0x20000000
>>  #define SPAPR_PCI_IO_WIN_OFF         0x80000000
>>  #define SPAPR_PCI_IO_WIN_SIZE        0x10000
>> -#define SPAPR_PCI_MSI_WIN_OFF        0x90000000
>> +
>> +#define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
>>  
>>  #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
>>  
>> @@ -88,6 +88,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>                            uint32_t xics_phandle,
>>                            void *fdt);
>>  
>> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr);
>> +
>>  void spapr_pci_rtas_init(void);
>>  
>>  #endif /* __HW_SPAPR_PCI_H__ */
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 7aa9a3d..aed02e1 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -14,6 +14,8 @@ struct icp_state;
>>  typedef struct sPAPREnvironment {
>>      struct VIOsPAPRBus *vio_bus;
>>      QLIST_HEAD(, sPAPRPHBState) phbs;
>> +    hwaddr msi_win_addr;
>> +    MemoryRegion msiwindow;
>>      struct sPAPRNVRAM *nvram;
>>      struct icp_state *icp;
>>  
>> @@ -303,7 +305,7 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, 
>> target_ulong opcode,
>>                               target_ulong *args);
>>  
>>  int spapr_allocate_irq(int hint, bool lsi);
>> -int spapr_allocate_irq_block(int num, bool lsi);
>> +int spapr_allocate_irq_block(int num, bool lsi, bool msi);
>>  
>>  static inline int spapr_allocate_msi(int hint)
>>  {
>>
> 
> 


-- 
Alexey



reply via email to

[Prev in Thread] Current Thread [Next in Thread]