qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] spapr-pci: rework MSI/MSIX


From: Alexey Kardashevskiy
Subject: Re: [Qemu-devel] [PATCH] spapr-pci: rework MSI/MSIX
Date: Tue, 23 Jul 2013 12:54:21 +1000
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130514 Thunderbird/17.0.6

Michael, could you please review the patch as it is about PCI? Thanks!


On 07/12/2013 05:38 PM, Alexey Kardashevskiy wrote:
> On the sPAPR platform a guest allocates MSI/MSIX vectors via RTAS
> hypercalls which return global IRQ numbers to a guest so it only
> operates with those and never touches MSIMessage.
> 
> Therefore MSIMessage handling is completely hidden in QEMU.
> 
> Previously every sPAPR PCI host bridge implemented its own MSI window
> to catch msi_notify()/msix_notify() calls from QEMU devices (virtio-pci
> or vfio) and route them to the guest via qemu_pulse_irq().
> MSIMessage used to be encoded as:
>       .addr - address within the PHB MSI window;
>       .data - the device index on PHB plus vector number.
> The MSI MR write function translated this MSIMessage to a global IRQ
> number and called qemu_pulse_irq().
> 
> However the total number of IRQs is not really big (at the moment it is
> 1024 IRQs starting from 4096) and even 16bit data field of MSIMessage
> seems to be enough to store an IRQ number there.
> 
> This simplifies MSI handling in sPAPR PHB. Specifically, this does:
> 1. remove a MSI window from a PHB;
> 2. add a single memory region for all MSIs to sPAPREnvironment
> and spapr_pci_msi_init() to initialize it;
> 3. encode MSIMessage as:
>     * .addr - a fixed address of SPAPR_PCI_MSI_WINDOW==0x40000000000ULL;
>     * .data as an IRQ number.
> 4. change IRQ allocator to align first IRQ number in a block for MSI.
> MSI uses lower bits to specify the vector number so the first IRQ has to
> be aligned. MSIX does not need any special allocator though.
> 
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
>  hw/ppc/spapr.c              | 29 ++++++++++++++++++---
>  hw/ppc/spapr_pci.c          | 61 
> ++++++++++++++++++++-------------------------
>  include/hw/pci-host/spapr.h |  8 +++---
>  include/hw/ppc/spapr.h      |  4 ++-
>  4 files changed, 60 insertions(+), 42 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 671bbd9..6b21191 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -88,6 +88,9 @@ int spapr_allocate_irq(int hint, bool lsi)
>  
>      if (hint) {
>          irq = hint;
> +        if (hint >= spapr->next_irq) {
> +            spapr->next_irq = hint + 1;
> +        }
>          /* FIXME: we should probably check for collisions somehow */
>      } else {
>          irq = spapr->next_irq++;
> @@ -103,22 +106,39 @@ int spapr_allocate_irq(int hint, bool lsi)
>      return irq;
>  }
>  
> -/* Allocate block of consequtive IRQs, returns a number of the first */
> -int spapr_allocate_irq_block(int num, bool lsi)
> +/*
> + * Allocate block of consequtive IRQs, returns a number of the first.
> + * If msi==true, aligns the first IRQ number to num.
> + */
> +int spapr_allocate_irq_block(int num, bool lsi, bool msi)
>  {
>      int first = -1;
> -    int i;
> +    int i, hint = 0;
> +
> +    /*
> +     * MSIMesage::data is used for storing VIRQ so
> +     * it has to be aligned to num to support multiple
> +     * MSI vectors. MSI-X is not affected by this.
> +     * The hint is used for the first IRQ, the rest should
> +     * be allocated continously.
> +     */
> +    if (msi) {
> +        assert((num == 1) || (num == 2) || (num == 4) ||
> +               (num == 8) || (num == 16) || (num == 32));
> +        hint = (spapr->next_irq + num - 1) & ~(num - 1);
> +    }
>  
>      for (i = 0; i < num; ++i) {
>          int irq;
>  
> -        irq = spapr_allocate_irq(0, lsi);
> +        irq = spapr_allocate_irq(hint, lsi);
>          if (!irq) {
>              return -1;
>          }
>  
>          if (0 == i) {
>              first = irq;
> +            hint = 0;
>          }
>  
>          /* If the above doesn't create a consecutive block then that's
> @@ -1127,6 +1147,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>      spapr_create_nvram(spapr);
>  
>      /* Set up PCI */
> +    spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
>      spapr_pci_rtas_init();
>  
>      phb = spapr_create_phb(spapr, 0);
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index dfe4d04..c8ea777 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -258,11 +258,11 @@ static int spapr_msicfg_find(sPAPRPHBState *phb, 
> uint32_t config_addr,
>   * This is required for msi_notify()/msix_notify() which
>   * will write at the addresses via spapr_msi_write().
>   */
> -static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr,
> -                             bool msix, unsigned req_num)
> +static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
> +                             unsigned first_irq, unsigned req_num)
>  {
>      unsigned i;
> -    MSIMessage msg = { .address = addr, .data = 0 };
> +    MSIMessage msg = { .address = addr, .data = first_irq };
>  
>      if (!msix) {
>          msi_set_message(pdev, msg);
> @@ -270,8 +270,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr,
>          return;
>      }
>  
> -    for (i = 0; i < req_num; ++i) {
> -        msg.address = addr | (i << 2);
> +    for (i = 0; i < req_num; ++i, ++msg.data) {
>          msix_set_message(pdev, i, msg);
>          trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
>      }
> @@ -351,7 +350,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>  
>      /* There is no cached config, allocate MSIs */
>      if (!phb->msi_table[ndev].nvec) {
> -        irq = spapr_allocate_irq_block(req_num, false);
> +        irq = spapr_allocate_irq_block(req_num, false,
> +                                       ret_intr_type == RTAS_TYPE_MSI);
>          if (irq < 0) {
>              fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
>              rtas_st(rets, 0, -1); /* Hardware error */
> @@ -363,8 +363,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>      }
>  
>      /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
> -    spapr_msi_setmsg(pdev, phb->msi_win_addr | (ndev << 16),
> -                     ret_intr_type == RTAS_TYPE_MSIX, req_num);
> +    spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == 
> RTAS_TYPE_MSIX,
> +                     phb->msi_table[ndev].irq, req_num);
>  
>      rtas_st(rets, 0, 0);
>      rtas_st(rets, 1, req_num);
> @@ -487,10 +487,7 @@ static const MemoryRegionOps spapr_io_ops = {
>  static void spapr_msi_write(void *opaque, hwaddr addr,
>                              uint64_t data, unsigned size)
>  {
> -    sPAPRPHBState *phb = opaque;
> -    int ndev = addr >> 16;
> -    int vec = ((addr & 0xFFFF) >> 2) | data;
> -    uint32_t irq = phb->msi_table[ndev].irq + vec;
> +    uint32_t irq = data;
>  
>      trace_spapr_pci_msi_write(addr, data, irq);
>  
> @@ -504,6 +501,23 @@ static const MemoryRegionOps spapr_msi_ops = {
>      .endianness = DEVICE_LITTLE_ENDIAN
>  };
>  
> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
> +{
> +    /*
> +     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
> +     * we need to allocate some memory to catch those writes coming
> +     * from msi_notify()/msix_notify().
> +     * As MSIMessage:addr is going to be the same and MSIMessage:data
> +     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
> +     * be used.
> +     */
> +    spapr->msi_win_addr = addr;
> +    memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
> +                          "msi", getpagesize());
> +    memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
> +                                &spapr->msiwindow);
> +}
> +
>  /*
>   * PHB PCI device
>   */
> @@ -528,8 +542,7 @@ static int spapr_phb_init(SysBusDevice *s)
>  
>          if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
>              || (sphb->mem_win_addr != -1)
> -            || (sphb->io_win_addr != -1)
> -            || (sphb->msi_win_addr != -1)) {
> +            || (sphb->io_win_addr != -1)) {
>              fprintf(stderr, "Either \"index\" or other parameters must"
>                      " be specified for PAPR PHB, not both\n");
>              return -1;
> @@ -542,7 +555,6 @@ static int spapr_phb_init(SysBusDevice *s)
>              + sphb->index * SPAPR_PCI_WINDOW_SPACING;
>          sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
>          sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
> -        sphb->msi_win_addr = windows_base + SPAPR_PCI_MSI_WIN_OFF;
>      }
>  
>      if (sphb->buid == -1) {
> @@ -565,11 +577,6 @@ static int spapr_phb_init(SysBusDevice *s)
>          return -1;
>      }
>  
> -    if (sphb->msi_win_addr == -1) {
> -        fprintf(stderr, "MSI window address not specified for PHB\n");
> -        return -1;
> -    }
> -
>      if (find_phb(spapr, sphb->buid)) {
>          fprintf(stderr, "PCI host bridges must have unique BUIDs\n");
>          return -1;
> @@ -609,18 +616,6 @@ static int spapr_phb_init(SysBusDevice *s)
>                            namebuf, SPAPR_PCI_IO_WIN_SIZE);
>      memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
>                                  &sphb->iowindow);
> -
> -    /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
> -     * we need to allocate some memory to catch those writes coming
> -     * from msi_notify()/msix_notify() */
> -    if (msi_supported) {
> -        sprintf(namebuf, "%s.msi", sphb->dtbusname);
> -        memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), 
> &spapr_msi_ops, sphb,
> -                              namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000);
> -        memory_region_add_subregion(get_system_memory(), sphb->msi_win_addr,
> -                                    &sphb->msiwindow);
> -    }
> -
>      /*
>       * Selecting a busname is more complex than you'd think, due to
>       * interacting constraints.  If the user has specified an id
> @@ -695,7 +690,6 @@ static Property spapr_phb_properties[] = {
>      DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
>      DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size,
>                        SPAPR_PCI_IO_WIN_SIZE),
> -    DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, -1),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -737,7 +731,6 @@ static const VMStateDescription vmstate_spapr_pci = {
>          VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
>          VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
>          VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
> -        VMSTATE_UINT64_EQUAL(msi_win_addr, sPAPRPHBState),
>          VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
>                               vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
>          VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 
> 0,
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index 93f9511..970b4a9 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -43,8 +43,7 @@ typedef struct sPAPRPHBState {
>  
>      MemoryRegion memspace, iospace;
>      hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
> -    hwaddr msi_win_addr;
> -    MemoryRegion memwindow, iowindow, msiwindow;
> +    MemoryRegion memwindow, iowindow;
>  
>      uint32_t dma_liobn;
>      uint64_t dma_window_start;
> @@ -73,7 +72,8 @@ typedef struct sPAPRPHBState {
>  #define SPAPR_PCI_MMIO_WIN_SIZE      0x20000000
>  #define SPAPR_PCI_IO_WIN_OFF         0x80000000
>  #define SPAPR_PCI_IO_WIN_SIZE        0x10000
> -#define SPAPR_PCI_MSI_WIN_OFF        0x90000000
> +
> +#define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
>  
>  #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
>  
> @@ -88,6 +88,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>                            uint32_t xics_phandle,
>                            void *fdt);
>  
> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr);
> +
>  void spapr_pci_rtas_init(void);
>  
>  #endif /* __HW_SPAPR_PCI_H__ */
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 7aa9a3d..aed02e1 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -14,6 +14,8 @@ struct icp_state;
>  typedef struct sPAPREnvironment {
>      struct VIOsPAPRBus *vio_bus;
>      QLIST_HEAD(, sPAPRPHBState) phbs;
> +    hwaddr msi_win_addr;
> +    MemoryRegion msiwindow;
>      struct sPAPRNVRAM *nvram;
>      struct icp_state *icp;
>  
> @@ -303,7 +305,7 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, 
> target_ulong opcode,
>                               target_ulong *args);
>  
>  int spapr_allocate_irq(int hint, bool lsi);
> -int spapr_allocate_irq_block(int num, bool lsi);
> +int spapr_allocate_irq_block(int num, bool lsi, bool msi);
>  
>  static inline int spapr_allocate_msi(int hint)
>  {
> 


-- 
Alexey



reply via email to

[Prev in Thread] Current Thread [Next in Thread]