qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 05/11] vfio: add check host bus reset is supp


From: Alex Williamson
Subject: Re: [Qemu-devel] [PATCH v2 05/11] vfio: add check host bus reset is support or not
Date: Wed, 9 Mar 2016 09:59:04 -0700

On Wed, 9 Mar 2016 18:47:35 +0200
"Michael S. Tsirkin" <address@hidden> wrote:

> On Mon, Mar 07, 2016 at 11:22:58AM +0800, Cao jin wrote:
> > From: Chen Fan <address@hidden>
> > 
> > when boot up a VM that assigning vfio devices with aer enabled, we
> > must check the vfio device whether support host bus reset. because
> > when one error occur. OS driver always recover the device by do a
> > bus reset, in order to recover the vfio device, qemu must to do a
> > host bus reset to reset the device to default status. and for all
> > affected devices by the bus reset. we must check them whether all
> > are assigned to the VM.
> > 
> > Signed-off-by: Chen Fan <address@hidden>
> > ---
> >  hw/vfio/pci.c | 218 
> > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  hw/vfio/pci.h |   1 +
> >  2 files changed, 212 insertions(+), 7 deletions(-)
> > 
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 8ec9b25..0898e34 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -1868,6 +1868,197 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, 
> > uint8_t pos)
> >      return 0;
> >  }
> >  
> > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> > +                                     PCIHostDeviceAddress *host2)
> > +{
> > +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> > +            host1->slot == host2->slot);
> > +}
> > +
> > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > +                                PCIHostDeviceAddress *host2)
> > +{
> > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > +            host1->function == host2->function);
> > +}
> > +
> > +struct VFIODeviceFind {
> > +    PCIDevice *pdev;
> > +    bool found;
> > +};
> > +
> > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
> > +                                      void *opaque)
> > +{
> > +    DeviceState *dev = DEVICE(pdev);
> > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > +    VFIOPCIDevice *vdev;
> > +    struct VFIODeviceFind *find = opaque;
> > +
> > +    if (find->found) {
> > +        return;
> > +    }
> > +
> > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > +        if (!dc->reset) {
> > +            goto found;
> > +        }
> > +        return;
> > +    }
> > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > +        !vdev->vbasedev.reset_works) {
> > +        goto found;
> > +    }
> > +
> > +    return;
> > +found:
> > +    find->pdev = pdev;
> > +    find->found = true;
> > +}
> > +
> > +static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> > +{
> > +    PCIBus *bus = vdev->pdev.bus;
> > +    struct vfio_pci_hot_reset_info *info = NULL;
> > +    struct vfio_pci_dependent_device *devices;
> > +    VFIOGroup *group;
> > +    struct VFIODeviceFind find;
> > +    int ret, i;
> > +
> > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > +    if (ret) {
> > +        error_setg(errp, "vfio: Cannot enable AER for device %s,"
> > +                   " device does not support hot reset.",
> > +                   vdev->vbasedev.name);
> > +        return;
> > +    }
> > +
> > +    /* List all affected devices by bus reset */
> > +    devices = &info->devices[0];
> > +
> > +    /* Verify that we have all the groups required */
> > +    for (i = 0; i < info->count; i++) {
> > +        PCIHostDeviceAddress host;
> > +        VFIOPCIDevice *tmp;
> > +        VFIODevice *vbasedev_iter;
> > +        bool found = false;
> > +
> > +        host.domain = devices[i].segment;
> > +        host.bus = devices[i].bus;
> > +        host.slot = PCI_SLOT(devices[i].devfn);
> > +        host.function = PCI_FUNC(devices[i].devfn);
> > +
> > +        /* Skip the current device */
> > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > +            continue;
> > +        }
> > +
> > +        /* Ensure we own the group of the affected device */
> > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > +            if (group->groupid == devices[i].group_id) {
> > +                break;
> > +            }
> > +        }
> > +
> > +        if (!group) {
> > +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> > +                       "depends on group %d which is not owned.",
> > +                       vdev->vbasedev.name, devices[i].group_id);
> > +            goto out;
> > +        }
> > +
> > +        /* Ensure affected devices for reset on the same slot */
> > +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {  
> 
> 
> 
> So what you did here, first you let user build up
> all functions, then when finally function 0 is added,
> you go and check them all.
> 
> Result is rather messy.
> 
> Instead, make is_valid_func access two functions:
> existing one and new one.
> And this way each new function can validate that
> all existing functions are setup correctly,
> and each existing function can validate that
> the new one does not conflict with it.

Same problem as what you're proposed for 09/11, when bus reset is
involved, there may be no valid configuration of a subset of the
devices.  Only when the entire set of devices is added does it become
valid.  Thanks,

Alex

> > +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> > +                continue;
> > +            }
> > +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > +                /*
> > +                 * AER errors may be broadcast to all functions of a multi-
> > +                 * function endpoint.  If any of those sibling functions 
> > are
> > +                 * also assigned, they need to have AER enabled or else an
> > +                 * error may continue to cause a vm_stop condition.  IOW,
> > +                 * AER setup of this function would be pointless.
> > +                 */
> > +                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
> > +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> > +                    error_setg(errp, "vfio: Cannot enable AER for device 
> > %s, on same slot"
> > +                               " the dependent device %s which does not 
> > enable AER.",
> > +                               vdev->vbasedev.name, tmp->vbasedev.name);
> > +                    goto out;
> > +                }
> > +
> > +                if (tmp->pdev.bus != bus) {
> > +                    error_setg(errp, "vfio: Cannot enable AER for device 
> > %s, "
> > +                               "the dependent device %s is not on the same 
> > bus",
> > +                               vdev->vbasedev.name, tmp->vbasedev.name);
> > +                    goto out;
> > +                }
> > +                found = true;
> > +                break;
> > +            }
> > +        }
> > +
> > +        /* Ensure all affected devices assigned to VM */
> > +        if (!found) {
> > +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> > +                       "the dependent device %04x:%02x:%02x.%x "
> > +                       "is not assigned to VM.",
> > +                       vdev->vbasedev.name, host.domain, host.bus,
> > +                       host.slot, host.function);
> > +            goto out;
> > +        }
> > +    }
> > +
> > +    /*
> > +     * Check the all pci devices on or below the target bus
> > +     * have a reset mechanism at least.
> > +     */
> > +    find.pdev = NULL;
> > +    find.found = false;
> > +    pci_for_each_device(bus, pci_bus_num(bus),
> > +                        vfio_check_device_noreset, &find);
> > +    if (find.found) {
> > +        error_setg(errp, "vfio: Cannot enable AER for device %s, "
> > +                   "the affected device %s does not have a reset 
> > mechanism.",
> > +                   vdev->vbasedev.name, find.pdev->name);
> > +        goto out;
> > +    }
> > +
> > +out:
> > +    g_free(info);
> > +    return;
> > +}
> > +
> > +static void vfio_check_devices_host_bus_reset(Error **errp)
> > +{
> > +    VFIOGroup *group;
> > +    VFIODevice *vbasedev;
> > +    VFIOPCIDevice *vdev;
> > +    Error *local_err = NULL;
> > +
> > +    /* Check All vfio-pci devices if have bus reset capability */
> > +    QLIST_FOREACH(group, &vfio_group_list, next) {
> > +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> > +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
> > +                continue;
> > +            }
> > +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> > +            if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> > +                vfio_check_host_bus_reset(vdev, &local_err);
> > +                if (local_err) {
> > +                    error_propagate(errp, local_err);
> > +                    return;
> > +                }
> > +            }
> > +        }
> > +    }
> > +
> > +    return;
> > +}
> > +
> >  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
> >                            int pos, uint16_t size)
> >  {
> > @@ -2047,13 +2238,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
> >      vfio_intx_enable(vdev);
> >  }
> >  
> > -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > -                                PCIHostDeviceAddress *host2)
> > -{
> > -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> > -            host1->slot == host2->slot && host1->function == 
> > host2->function);
> > -}
> > -
> >  static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
> >  {
> >      VFIOGroup *group;
> > @@ -2559,6 +2743,21 @@ static void 
> > vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
> >      vdev->req_enabled = false;
> >  }
> >  
> > +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
> > +{
> > +    Error *local_err = NULL;
> > +
> > +    vfio_check_devices_host_bus_reset(&local_err);
> > +    if (local_err) {
> > +        error_report_err(local_err);
> > +        exit(1);
> > +    }
> > +}
> > +
> > +static Notifier machine_notifier = {
> > +    .notify = vfio_pci_machine_done_notify,
> > +};
> > +
> >  static int vfio_initfn(PCIDevice *pdev)
> >  {
> >      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > @@ -2905,6 +3104,11 @@ static const TypeInfo vfio_pci_dev_info = {
> >  static void register_vfio_pci_dev_type(void)
> >  {
> >      type_register_static(&vfio_pci_dev_info);
> > +    /*
> > +     * Register notifier when machine init is done, since we need
> > +     * check the configration manner after all vfio device are inited.
> > +     */
> > +    qemu_add_machine_init_done_notifier(&machine_notifier);
> >  }
> >  
> >  type_init(register_vfio_pci_dev_type)
> > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > index e0c53f2..aff46c2 100644
> > --- a/hw/vfio/pci.h
> > +++ b/hw/vfio/pci.h
> > @@ -15,6 +15,7 @@
> >  #include "qemu-common.h"
> >  #include "exec/memory.h"
> >  #include "hw/pci/pci.h"
> > +#include "hw/pci/pci_bus.h"
> >  #include "hw/pci/pci_bridge.h"
> >  #include "hw/vfio/vfio-common.h"
> >  #include "qemu/event_notifier.h"
> > -- 
> > 1.9.3
> > 
> >   




reply via email to

[Prev in Thread] Current Thread [Next in Thread]