[Intel-gfx] [PATCH v3 12/12] vfio/pci: Report dev_id in VFIO_DEVICE_GET_PCI_HOT_RESET_INFO
Alex Williamson
alex.williamson at redhat.com
Wed Apr 5 16:25:45 UTC 2023
On Wed, 5 Apr 2023 14:04:51 +0000
"Liu, Yi L" <yi.l.liu at intel.com> wrote:
> Hi Eric,
>
> > From: Eric Auger <eric.auger at redhat.com>
> > Sent: Wednesday, April 5, 2023 8:20 PM
> >
> > Hi Yi,
> > On 4/1/23 16:44, Yi Liu wrote:
> > > for the users that accept device fds passed from management stacks to be
> > > able to figure out the host reset affected devices among the devices
> > > opened by the user. This is needed as such users do not have BDF (bus,
> > > devfn) knowledge about the devices it has opened, hence unable to use
> > > the information reported by existing VFIO_DEVICE_GET_PCI_HOT_RESET_INFO
> > > to figure out the affected devices.
> > >
> > > Signed-off-by: Yi Liu <yi.l.liu at intel.com>
> > > ---
> > > drivers/vfio/pci/vfio_pci_core.c | 58 ++++++++++++++++++++++++++++----
> > > include/uapi/linux/vfio.h | 24 ++++++++++++-
> > > 2 files changed, 74 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> > > index 19f5b075d70a..a5a7e148dce1 100644
> > > --- a/drivers/vfio/pci/vfio_pci_core.c
> > > +++ b/drivers/vfio/pci/vfio_pci_core.c
> > > @@ -30,6 +30,7 @@
> > > #if IS_ENABLED(CONFIG_EEH)
> > > #include <asm/eeh.h>
> > > #endif
> > > +#include <uapi/linux/iommufd.h>
> > >
> > > #include "vfio_pci_priv.h"
> > >
> > > @@ -767,6 +768,20 @@ static int vfio_pci_get_irq_count(struct
> > vfio_pci_core_device *vdev, int irq_typ
> > > return 0;
> > > }
> > >
> > > +static struct vfio_device *
> > > +vfio_pci_find_device_in_devset(struct vfio_device_set *dev_set,
> > > + struct pci_dev *pdev)
> > > +{
> > > + struct vfio_device *cur;
> > > +
> > > + lockdep_assert_held(&dev_set->lock);
> > > +
> > > + list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
> > > + if (cur->dev == &pdev->dev)
> > > + return cur;
> > > + return NULL;
> > > +}
> > > +
> > > static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
> > > {
> > > (*(int *)data)++;
> > > @@ -776,13 +791,20 @@ static int vfio_pci_count_devs(struct pci_dev *pdev, void
> > *data)
> > > struct vfio_pci_fill_info {
> > > int max;
> > > int cur;
> > > + bool require_devid;
> > > + struct iommufd_ctx *iommufd;
> > > + struct vfio_device_set *dev_set;
> > > struct vfio_pci_dependent_device *devices;
> > > };
> > >
> > > static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
> > > {
> > > struct vfio_pci_fill_info *fill = data;
> > > + struct vfio_device_set *dev_set = fill->dev_set;
> > > struct iommu_group *iommu_group;
> > > + struct vfio_device *vdev;
> > > +
> > > + lockdep_assert_held(&dev_set->lock);
> > >
> > > if (fill->cur == fill->max)
> > > return -EAGAIN; /* Something changed, try again */
> > > @@ -791,7 +813,21 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void
> > *data)
> > > if (!iommu_group)
> > > return -EPERM; /* Cannot reset non-isolated devices */
> > >
> > > - fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
> > > + if (fill->require_devid) {
> > > + /*
> > > + * Report dev_id of the devices that are opened as cdev
> > > + * and have the same iommufd with the fill->iommufd.
> > > + * Otherwise, just fill IOMMUFD_INVALID_ID.
> > > + */
> > > + vdev = vfio_pci_find_device_in_devset(dev_set, pdev);
> > > + if (vdev && vfio_device_cdev_opened(vdev) &&
> > > + fill->iommufd == vfio_iommufd_physical_ictx(vdev))
> > > + vfio_iommufd_physical_devid(vdev, &fill->devices[fill-
> > >cur].dev_id);
> > > + else
> > > + fill->devices[fill->cur].dev_id = IOMMUFD_INVALID_ID;
> > > + } else {
> > > + fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
> > > + }
> > > fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
> > > fill->devices[fill->cur].bus = pdev->bus->number;
> > > fill->devices[fill->cur].devfn = pdev->devfn;
> > > @@ -1230,17 +1266,27 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
> > > return -ENOMEM;
> > >
> > > fill.devices = devices;
> > > + fill.dev_set = vdev->vdev.dev_set;
> > >
> > > + mutex_lock(&vdev->vdev.dev_set->lock);
> > > + if (vfio_device_cdev_opened(&vdev->vdev)) {
> > > + fill.require_devid = true;
> > > + fill.iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
> > > + }
> > > ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_fill_devs,
> > > &fill, slot);
> > > + mutex_unlock(&vdev->vdev.dev_set->lock);
> > >
> > > /*
> > > * If a device was removed between counting and filling, we may come up
> > > * short of fill.max. If a device was added, we'll have a return of
> > > * -EAGAIN above.
> > > */
> > > - if (!ret)
> > > + if (!ret) {
> > > hdr.count = fill.cur;
> > > + if (fill.require_devid)
> > > + hdr.flags = VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID;
> > > + }
> > >
> > > reset_info_exit:
> > > if (copy_to_user(arg, &hdr, minsz))
> > > @@ -2346,12 +2392,10 @@ static bool vfio_dev_in_files(struct
> > vfio_pci_core_device *vdev,
> > > static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
> > > {
> > > struct vfio_device_set *dev_set = data;
> > > - struct vfio_device *cur;
> > >
> > > - list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
> > > - if (cur->dev == &pdev->dev)
> > > - return 0;
> > > - return -EBUSY;
> > > + lockdep_assert_held(&dev_set->lock);
> > > +
> > > + return vfio_pci_find_device_in_devset(dev_set, pdev) ? 0 : -EBUSY;
> > > }
> > >
> > > /*
> > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > > index 25432ef213ee..5a34364e3b94 100644
> > > --- a/include/uapi/linux/vfio.h
> > > +++ b/include/uapi/linux/vfio.h
> > > @@ -650,11 +650,32 @@ enum {
> > > * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12,
> > > * struct vfio_pci_hot_reset_info)
> > > *
> > > + * This command is used to query the affected devices in the hot reset for
> > > + * a given device. User could use the information reported by this command
> > > + * to figure out the affected devices among the devices it has opened.
> > > + * This command always reports the segment, bus and devfn information for
> > > + * each affected device, and selectively report the group_id or the dev_id
> > > + * per the way how the device being queried is opened.
> > > + * - If the device is opened via the traditional group/container manner,
> > > + * this command reports the group_id for each affected device.
> > > + *
> > > + * - If the device is opened as a cdev, this command needs to report
> > s/needs to report/reports
>
> got it.
>
> > > + * dev_id for each affected device and set the
> > > + * VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID flag. For the affected
> > > + * devices that are not opened as cdev or bound to different iommufds
> > > + * with the device that is queried, report an invalid dev_id to avoid
> > s/bound to different iommufds with the device that is queried/bound to
> > iommufds different from the reset device one?
>
> hmmm, I'm not a native speaker here. This _INFO is to query if want
> hot reset a given device, what devices would be affected. So it appears
> the queried device is better. But I'd admit "the queried device" is also
> "the reset device". may Alex help pick one. 😊
- If the calling device is opened directly via cdev rather than
accessed through the vfio group, the returned
vfio_pci_depdendent_device structure reports the dev_id
rather than the group_id, which is indicated by the
VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID flag in
vfio_pci_hot_reset_info. If the reset affects devices that
are not opened within the same iommufd context as the calling
device, IOMMUFD_INVALID_ID will be provided as the dev_id.
But that kind of brings to light the question of what does the user do
when they encounter this situation. If the device is not opened, the
reset can complete. If the device is opened by a different user, the
reset is blocked. The only logical conclusion is that the user should
try the reset regardless of the result of the info ioctl, which the
null-array approach further solidifies as the direction of the API.
I'm not liking this. Thanks,
Alex
> > > + * potential dev_id conflict as dev_id is local to iommufd. For such
> > > + * affected devices, user shall fall back to use the segment, bus and
> > > + * devfn info to map it to opened device.
> > > + *
> > > * Return: 0 on success, -errno on failure:
> > > * -enospc = insufficient buffer, -enodev = unsupported for device.
> > > */
> > > struct vfio_pci_dependent_device {
> > > - __u32 group_id;
> > > + union {
> > > + __u32 group_id;
> > > + __u32 dev_id;
> > > + };
> > > __u16 segment;
> > > __u8 bus;
> > > __u8 devfn; /* Use PCI_SLOT/PCI_FUNC */
> > > @@ -663,6 +684,7 @@ struct vfio_pci_dependent_device {
> > > struct vfio_pci_hot_reset_info {
> > > __u32 argsz;
> > > __u32 flags;
> > > +#define VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID (1 << 0)
> > > __u32 count;
> > > struct vfio_pci_dependent_device devices[];
> > > };
> > Eric
>
More information about the Intel-gfx
mailing list