[Intel-gfx] [PATCH v7 8/9] vfio/pci: Extend VFIO_DEVICE_GET_PCI_HOT_RESET_INFO for vfio device cdev

Liu, Yi L yi.l.liu at intel.com
Wed Jun 14 10:35:10 UTC 2023


> From: Jason Gunthorpe <jgg at nvidia.com>
> Sent: Wednesday, June 14, 2023 2:23 AM
> 
> On Fri, Jun 02, 2023 at 05:15:14AM -0700, Yi Liu wrote:
> > This allows VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl use the iommufd_ctx
> > of the cdev device to check the ownership of the other affected devices.
> >
> > When VFIO_DEVICE_GET_PCI_HOT_RESET_INFO is called on an IOMMUFD managed
> > device, the new flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is reported to indicate
> > the values returned are IOMMUFD devids rather than group IDs as used when
> > accessing vfio devices through the conventional vfio group interface.
> > Additionally the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED will be reported
> > in this mode if all of the devices affected by the hot-reset are owned by
> > either virtue of being directly bound to the same iommufd context as the
> > calling device, or implicitly owned via a shared IOMMU group.
> >
> > Suggested-by: Jason Gunthorpe <jgg at nvidia.com>
> > Suggested-by: Alex Williamson <alex.williamson at redhat.com>
> > Signed-off-by: Yi Liu <yi.l.liu at intel.com>
> > ---
> >  drivers/vfio/iommufd.c           | 49 +++++++++++++++++++++++++++++++
> >  drivers/vfio/pci/vfio_pci_core.c | 47 +++++++++++++++++++++++++-----
> >  include/linux/vfio.h             | 16 ++++++++++
> >  include/uapi/linux/vfio.h        | 50 +++++++++++++++++++++++++++++++-
> >  4 files changed, 154 insertions(+), 8 deletions(-)
> 
> This could use some more fiddling, like we could copy each
> vfio_pci_dependent_device to user memory inside the loop instead of
> allocating an array.

I understand the motivation. But have some concerns. Please check
inline.

> Add another patch with something like this in it:
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index b0eadafcbcf502..516e0fda74bec9 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -775,19 +775,23 @@ static int vfio_pci_count_devs(struct pci_dev *pdev, void
> *data)
>  }
> 
>  struct vfio_pci_fill_info {
> -	int max;
> -	int cur;
> -	struct vfio_pci_dependent_device *devices;
> +	struct vfio_pci_dependent_device __user *devices;
> +	struct vfio_pci_dependent_device __user *devices_end;
>  	struct vfio_device *vdev;
>  	u32 flags;
>  };
> 
>  static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
>  {
> +	struct vfio_pci_dependent_device info = {
> +		.segment = pci_domain_nr(pdev->bus),
> +		.bus = pdev->bus->number,
> +		.devfn = pdev->devfn,
> +	};
>  	struct vfio_pci_fill_info *fill = data;
> 
> -	if (fill->cur == fill->max)
> -		return -EAGAIN; /* Something changed, try again */
> +	if (fill->devices_end >= fill->devices)
> +		return -ENOSPC;

This should be fill->devices_end <= fill->devices. Even it's corrected. The
new code does not return -EAGAIN. And if return -ENOSPC, the expected
size should be returned. But I didn't see it. As the hunk below[1] is removed,
seems no way to know how many memory it requires.

> 
>  	if (fill->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID) {
>  		struct iommufd_ctx *iommufd = vfio_iommufd_device_ictx(fill->vdev);
> @@ -800,12 +804,12 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
>  		 */
>  		vdev = vfio_find_device_in_devset(dev_set, &pdev->dev);
>  		if (!vdev)
> -			fill->devices[fill->cur].devid = VFIO_PCI_DEVID_NOT_OWNED;
> +			info.devid = VFIO_PCI_DEVID_NOT_OWNED;
>  		else
> -			fill->devices[fill->cur].devid =
> -				vfio_iommufd_device_hot_reset_devid(vdev, iommufd);
> +			info.devid = vfio_iommufd_device_hot_reset_devid(
> +				vdev, iommufd);
>  		/* If devid is VFIO_PCI_DEVID_NOT_OWNED, clear owned flag. */
> -		if (fill->devices[fill->cur].devid == VFIO_PCI_DEVID_NOT_OWNED)
> +		if (info.devid == VFIO_PCI_DEVID_NOT_OWNED)
>  			fill->flags &= ~VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED;
>  	} else {
>  		struct iommu_group *iommu_group;
> @@ -814,13 +818,13 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
>  		if (!iommu_group)
>  			return -EPERM; /* Cannot reset non-isolated devices */
> 
> -		fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
> +		info.group_id = iommu_group_id(iommu_group);
>  		iommu_group_put(iommu_group);
>  	}
> -	fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
> -	fill->devices[fill->cur].bus = pdev->bus->number;
> -	fill->devices[fill->cur].devfn = pdev->devfn;
> -	fill->cur++;
> +
> +	if (copy_to_user(fill->devices, &info, sizeof(info)))
> +		return -EFAULT;
> +	fill->devices++;
>  	return 0;
>  }
> 
> @@ -1212,8 +1216,7 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
>  	unsigned long minsz =
>  		offsetofend(struct vfio_pci_hot_reset_info, count);
>  	struct vfio_pci_hot_reset_info hdr;
> -	struct vfio_pci_fill_info fill = { 0 };
> -	struct vfio_pci_dependent_device *devices = NULL;
> +	struct vfio_pci_fill_info fill = {};
>  	bool slot = false;
>  	int ret = 0;
> 
> @@ -1231,29 +1234,9 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
>  	else if (pci_probe_reset_bus(vdev->pdev->bus))
>  		return -ENODEV;
> 
> -	/* How many devices are affected? */
> -	ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
> -					    &fill.max, slot);
> -	if (ret)
> -		return ret;
> -
> -	WARN_ON(!fill.max); /* Should always be at least one */
> -
> -	/*
> -	 * If there's enough space, fill it now, otherwise return -ENOSPC and
> -	 * the number of devices affected.
> -	 */
> -	if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
> -		ret = -ENOSPC;
> -		hdr.count = fill.max;
> -		goto reset_info_exit;
> -	}

[1] The loop in this hunk figures out how many devices are affected
      and also figures out how many memory is needs.

> -
> -	devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
> -	if (!devices)
> -		return -ENOMEM;
> -
> -	fill.devices = devices;
> +	fill.devices = arg->devices;
> +	fill.devices_end = arg->devices +
> +			   (hdr.argsz - sizeof(hdr)) / sizeof(arg->devices[0]);
>  	fill.vdev = &vdev->vdev;
> 
>  	if (vfio_device_cdev_opened(&vdev->vdev))
> @@ -1264,29 +1247,14 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
>  	ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_fill_devs,
>  					    &fill, slot);
>  	mutex_unlock(&vdev->vdev.dev_set->lock);
> +	if (ret)
> +		return ret;
> 
> -	/*
> -	 * If a device was removed between counting and filling, we may come up
> -	 * short of fill.max.  If a device was added, we'll have a return of
> -	 * -EAGAIN above.
> -	 */
> -	if (!ret) {
> -		hdr.count = fill.cur;
> -		hdr.flags = fill.flags;
> -	}

This mechanism is also removed though it may be rare.

> -
> -reset_info_exit:
> +	hdr.count = fill.devices - arg->devices;
> +	hdr.flags = fill.flags;
>  	if (copy_to_user(arg, &hdr, minsz))
>  		ret = -EFAULT;
> -
> -	if (!ret) {
> -		if (copy_to_user(&arg->devices, devices,
> -				 hdr.count * sizeof(*devices)))
> -			ret = -EFAULT;
> -	}
> -
> -	kfree(devices);
> -	return ret;
> +	return 0;

should still return ret as "if (copy_to_user(arg, &hdr, minsz))" can
fail.

>  }
> 
>  static int

It appears to me there are subtle changes in the uapi (-ENOSPC, -EAGAIN).
Though uapi header didn't document them. But per the comment in the
code, it's changed. Maybe we can do it in a follow-up patch instead of
part of this series.

Regards,
Yi Liu


More information about the Intel-gfx mailing list