[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for h
From: |
Alex Williamson |
Subject: |
Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset |
Date: |
Wed, 29 Apr 2015 21:21:06 -0600 |
On Thu, 2015-04-30 at 11:07 +0800, Chen Fan wrote:
> On 04/30/2015 01:32 AM, Alex Williamson wrote:
> > On Wed, 2015-04-29 at 16:48 +0800, Chen Fan wrote:
> >> add host secondary bus reset for vfio when AER occurs, if reset failed,
> >> we should stop vm.
> >>
> >> Signed-off-by: Chen Fan <address@hidden>
> >> ---
> >> hw/vfio/pci.c | 151
> >> +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
> >> 1 file changed, 138 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index 060fb47..619daed 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -154,6 +154,8 @@ typedef struct VFIOPCIDevice {
> >> PCIHostDeviceAddress host;
> >> EventNotifier err_notifier;
> >> EventNotifier req_notifier;
> >> +
> >> + Notifier sec_bus_reset_notifier;
> >> uint32_t features;
> >> #define VFIO_FEATURE_ENABLE_VGA_BIT 0
> >> #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
> >> @@ -2627,6 +2629,13 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev,
> >> int pos, uint8_t size)
> >> return pos;
> >> }
> >>
> >> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> >> + PCIHostDeviceAddress *host2)
> >> +{
> >> + return (host1->domain == host2->domain && host1->bus == host2->bus &&
> >> + host1->slot == host2->slot && host1->function ==
> >> host2->function);
> >> +}
> >> +
> >> static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
> >> {
> >> uint32_t cap = pci_get_long(vdev->pdev.config + pos +
> >> PCI_EXP_DEVCAP);
> >> @@ -2819,6 +2828,131 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev,
> >> uint8_t pos)
> >> return 0;
> >> }
> >>
> >> +static int vfio_aer_validate_devices(DeviceState *dev,
> >> + void *opaque)
> >> +{
> >> + VFIOPCIDevice *vdev;
> >> + int i;
> >> + bool found = false;
> >> + struct vfio_pci_hot_reset_info *info = opaque;
> >> + struct vfio_pci_dependent_device *devices = &info->devices[0];
> >> +
> >> + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> >> + return 0;
> >> + }
> >> +
> >> + vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(dev));
> >> + for (i = 0; i < info->count; i++) {
> >> + PCIHostDeviceAddress host;
> >> +
> >> + host.domain = devices[i].segment;
> >> + host.bus = devices[i].bus;
> >> + host.slot = PCI_SLOT(devices[i].devfn);
> >> + host.function = PCI_FUNC(devices[i].devfn);
> >> +
> >> + if (vfio_pci_host_match(&host, &vdev->host)) {
> >> + found = true;
> >> + break;
> >> + }
> >> + }
> >> +
> >> + if (!found) {
> >> + error_report("vfio: Cannot reset parent bus with AER supported,"
> >> + "depends on device %s which is not contained.",
> >> + vdev->vbasedev.name);
> >> + return -1;
> >> + }
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static void vfio_pci_vm_stop(VFIOPCIDevice *vdev)
> >> +{
> >> + error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
> >> + "Please collect any data possible and then kill the
> >> guest",
> >> + __func__, vdev->host.domain, vdev->host.bus,
> >> + vdev->host.slot, vdev->host.function);
> >> +
> >> + vm_stop(RUN_STATE_INTERNAL_ERROR);
> >> +}
> >> +
> >> +static void vfio_pci_host_bus_reset(Notifier *n, void *opaque)
> >> +{
> >> + VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice,
> >> sec_bus_reset_notifier);
> >> + PCIDevice *pdev = &vdev->pdev;
> >> + int ret, i;
> >> + struct vfio_pci_hot_reset_info *info;
> >> + struct vfio_pci_dependent_device *devices;
> >> + VFIOGroup *group;
> >> +
> >> + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> >> + return;
> >> + }
> >> +
> >> + /*
> >> + * Check the affected devices by virtual bus reset are contained in
> >> + * the set of groups.
> >> + */
> >> + ret = vfio_get_hot_reset_info(vdev, &info);
> >> + if (ret < 0) {
> >> + goto stop_vm;
> >> + }
> >> +
> >> + devices = &info->devices[0];
> >> +
> >> + /* Verify that we have all the groups required */
> >> + for (i = 0; i < info->count; i++) {
> >> + PCIHostDeviceAddress host;
> >> +
> >> + host.domain = devices[i].segment;
> >> + host.bus = devices[i].bus;
> >> + host.slot = PCI_SLOT(devices[i].devfn);
> >> + host.function = PCI_FUNC(devices[i].devfn);
> >> +
> >> + if (vfio_pci_host_match(&host, &vdev->host)) {
> >> + continue;
> >> + }
> >> +
> >> + QLIST_FOREACH(group, &vfio_group_list, next) {
> >> + if (group->groupid == devices[i].group_id) {
> >> + break;
> >> + }
> >> + }
> >> +
> >> + if (!group) {
> >> + if (!vdev->has_pm_reset) {
> > I'm not sure how has_pm_reset has anything to do with what we're testing
> > here. Copy-paste error?
> >
> >> + error_report("vfio: Cannot reset device %s with AER
> >> supported,"
> >> + "depends on group %d which is not owned.",
> >> + vdev->vbasedev.name, devices[i].group_id);
> >> + }
> >> + ret = -EPERM;
> >> + goto stop_vm;
> >> + }
> >> + }
> >
> > The above verifies that all of the devices affected by the bus reset are
> > owned by the VM.
> >
> >> +
> >> + /* Verify that we have all the affected devices under the bus */
> >> + ret = qbus_walk_children(BUS(pdev->bus), NULL, NULL,
> >> + vfio_aer_validate_devices,
> >> + NULL, info);
> > And here we make sure that any vfio-pci devices affected by the bus
> > reset are contained in the list of affected devices. What we're still
> > missing is whether there are any affected devices that are exposed to
> > the VM that are not covered in this bus walk. For instance, if I assign
> > a multi-function device and place function 0 and 1 under separate, peer
> > root ports, I believe the above tests will confirm that the VM owns the
> > necessary groups and that the only vfio-pci device affected by the bus
> > reset is in the list of affected devices, but it will not verify that
> > the other function is not affected by the guest bus reset, but is
> > affected by the host bus reset.
> you are right.
>
> >
> >> + if (ret < 0) {
> >> + goto stop_vm;
> >> + }
> >> +
> >> +
> >> + /* bus reset! */
> >> + ret = vfio_pci_do_hot_reset(vdev, info);
> >> + if (ret < 0) {
> >> + goto stop_vm;
> >> + }
> >> +
> >> + g_free(info);
> >> + return;
> >> +
> >> +stop_vm:
> >> + g_free(info);
> >> + vfio_pci_vm_stop(vdev);
> >> +}
> >> +
> >> static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
> >> {
> >> PCIDevice *pdev = &vdev->pdev;
> >> @@ -2852,6 +2986,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int
> >> pos, uint16_t size)
> >> pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
> >> pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER,
> >> ~severity);
> >>
> >> + vdev->sec_bus_reset_notifier.notify = vfio_pci_host_bus_reset;
> >> + pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier);
> >> +
> > Additionally, we're going to do a bus reset once for every vfio-pci
> > device subordinate to the bus being reset. That's not very efficient.
> >
> > There are still a number of problems here. We're allowing users to
> > configure their VMs however they wish and enable AER, then only when
> > they do a bus reset (which we have no way to infer is related to an AER
> > event) do we check and potentially halt the VM if guest mapping of the
> > host topology prevents a bus reset. That seems sort of like making
> > backups of your data, but never testing that you can recover from the
> > backup until we need the data. I can't imagine that many users are
> > going to be able to get this correct and they won't know it's incorrect
> > until an AER event occurs.
> >
> > Also, Do we need to worry about guest root complex devices? The guest
> > won't be able to perform a bus reset on the guest bus in that case. Is
> > AER even valid for RC devices since they don't have a parent downstream
> > port to signal the AER? This seems like another case where it's more
> > likely that a user will create an invalid configuration than it is they
> > will create a valid one, but we won't know until an AER occurs with the
> > code structure here.
> do you refer to the root complex device is the root port of root complex?
> the root port can notify system itself, the PCI Express aer driver
> reset the upstream link between root port and upstream port of
> subordinate switch. so I think we don't need to care that.
No, I'm saying that we can place an assigned device directly on pcie.0,
the host bus, without using a root port. I don't know if we have any
way to signal an AER event for root complex devices, but the guest
certainly doesn't have a way to perform a bus reset on the host bus.
Thanks,
Alex
> > Therefore I think that if the user specifies AER, we need to verify and
> > enforce at that point, ie. the initfn, that a host bus reset is
> > possible, that a guest reset is possible, and that a guest bus reset
> > fully contains the VM visible host devices affected by the reset.
> >
> > I'd also like to see the patches ordered such that we provide all the
> > infrastructure to validate and enforce the configuration restrictions to
> > support AER before we enable it to the guest. The order here creates
> > bisect points where AER is exposed to the guest with unacceptable QEMU
> > handling. Thanks,
> I'm sorry for that, due to this is RFC version I missed that,
> and I will arrange the patches after this series.
>
> Thanks,
> Chen
>
>
> >
> > Alex
> >
> >> return 0;
> >> }
> >>
> >> @@ -2978,13 +3115,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
> >> vfio_enable_intx(vdev);
> >> }
> >>
> >> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> >> - PCIHostDeviceAddress *host2)
> >> -{
> >> - return (host1->domain == host2->domain && host1->bus == host2->bus &&
> >> - host1->slot == host2->slot && host1->function ==
> >> host2->function);
> >> -}
> >> -
> >> static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
> >> {
> >> VFIOGroup *group;
> >> @@ -3328,12 +3458,7 @@ static void vfio_err_notifier_handler(void *opaque)
> >> * terminate the guest to contain the error.
> >> */
> >>
> >> - error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
> >> - "Please collect any data possible and then kill the
> >> guest",
> >> - __func__, vdev->host.domain, vdev->host.bus,
> >> - vdev->host.slot, vdev->host.function);
> >> -
> >> - vm_stop(RUN_STATE_INTERNAL_ERROR);
> >> + vfio_pci_vm_stop(vdev);
> >> }
> >>
> >> /*
> >
> >
> > .
> >
>
- [Qemu-devel] [PATCH RFC v6 01/11] vfio: add pcie extanded capability support, (continued)
- [Qemu-devel] [PATCH RFC v6 01/11] vfio: add pcie extanded capability support, Chen Fan, 2015/04/29
- [Qemu-devel] [PATCH RFC v6 05/11] vfio-pci: pass the aer error to guest, Chen Fan, 2015/04/29
- [Qemu-devel] [PATCH RFC v6 06/11] vfio: add 'aer' property to expose aercap, Chen Fan, 2015/04/29
- [Qemu-devel] [PATCH RFC v6 07/11] pc: add HW_COMPAT_2_2 to disable aercap for vifo device, Chen Fan, 2015/04/29
- [Qemu-devel] [PATCH RFC v6 08/11] vfio: extract vfio_get_hot_reset_info as a single function, Chen Fan, 2015/04/29
- [Qemu-devel] [PATCH RFC v6 09/11] qdev: add bus reset_notifiers callbacks for host bus reset, Chen Fan, 2015/04/29
- [Qemu-devel] [PATCH RFC v6 10/11] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset, Chen Fan, 2015/04/29
- [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset, Chen Fan, 2015/04/29