[Intel-gfx] [PATCH v4 9/9] vfio/pci: Allow passing zero-length fd array in VFIO_DEVICE_PCI_HOT_RESET

Liu, Yi L yi.l.liu at intel.com
Tue May 2 12:55:47 UTC 2023


> From: Alex Williamson <alex.williamson at redhat.com>
> Sent: Friday, April 28, 2023 5:55 AM
> 
> On Wed, 26 Apr 2023 07:54:19 -0700
> Yi Liu <yi.l.liu at intel.com> wrote:
> 
> > This is the way user to invoke hot-reset for the devices opened by cdev
> > interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_RESETTABLE
> > in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing
> > hot-reset for cdev devices.
> >
> > Suggested-by: Jason Gunthorpe <jgg at nvidia.com>
> > Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> > Reviewed-by: Jason Gunthorpe <jgg at nvidia.com>
> > Tested-by: Yanting Jiang <yanting.jiang at intel.com>
> > Signed-off-by: Yi Liu <yi.l.liu at intel.com>
> > ---
> >  drivers/vfio/pci/vfio_pci_core.c | 66 +++++++++++++++++++++++++++-----
> >  include/uapi/linux/vfio.h        | 22 +++++++++++
> >  2 files changed, 79 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> > index 43858d471447..f70e3b948b16 100644
> > --- a/drivers/vfio/pci/vfio_pci_core.c
> > +++ b/drivers/vfio/pci/vfio_pci_core.c
> > @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device
> *vdev)
> >  struct vfio_pci_group_info;
> >  static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
> >  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> > -				      struct vfio_pci_group_info *groups);
> > +				      struct vfio_pci_group_info *groups,
> > +				      struct iommufd_ctx *iommufd_ctx);
> >
> >  /*
> >   * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
> > @@ -1364,8 +1365,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct
> vfio_pci_core_device *vdev,
> >  	if (ret)
> >  		return ret;
> >
> > -	/* Somewhere between 1 and count is OK */
> > -	if (!array_count || array_count > count)
> > +	if (array_count > count)
> >  		return -EINVAL;
> 
> Doesn't this need a || vfio_device_cdev_opened(vdev) test as well?
> It's invalid to pass fds for a cdev device.  Presumably it would fail
> later collecting group fds as well, but might as well enforce the
> semantics early.

Yes, it is.

> 
> >
> >  	group_fds = kcalloc(array_count, sizeof(*group_fds), GFP_KERNEL);
> > @@ -1414,7 +1414,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct
> vfio_pci_core_device *vdev,
> >  	info.count = array_count;
> >  	info.files = files;
> >
> > -	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
> > +	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
> >
> >  hot_reset_release:
> >  	for (file_idx--; file_idx >= 0; file_idx--)
> > @@ -1429,6 +1429,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct
> vfio_pci_core_device *vdev,
> >  {
> >  	unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
> >  	struct vfio_pci_hot_reset hdr;
> > +	struct iommufd_ctx *iommufd;
> >  	bool slot = false;
> >
> >  	if (copy_from_user(&hdr, arg, minsz))
> > @@ -1443,7 +1444,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct
> vfio_pci_core_device *vdev,
> >  	else if (pci_probe_reset_bus(vdev->pdev->bus))
> >  		return -ENODEV;
> >
> > -	return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> > +	if (hdr.count)
> > +		return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> > +
> > +	iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
> > +
> > +	return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd);
> 
> Why did we need to store iommufd in a variable?

will remove it.

> >  }
> >
> >  static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
> > @@ -2415,6 +2421,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device
> *vdev,
> >  {
> >  	unsigned int i;
> >
> > +	if (!groups)
> > +		return false;
> > +
> >  	for (i = 0; i < groups->count; i++)
> >  		if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
> >  			return true;
> > @@ -2488,13 +2497,38 @@ static int vfio_pci_dev_set_pm_runtime_get(struct
> vfio_device_set *dev_set)
> >  	return ret;
> >  }
> >
> > +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev,
> > +				    struct iommufd_ctx *iommufd_ctx)
> > +{
> > +	struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
> > +	struct iommu_group *iommu_group;
> > +
> > +	if (!iommufd_ctx)
> > +		return false;
> > +
> > +	if (iommufd == iommufd_ctx)
> > +		return true;
> > +
> > +	iommu_group = iommu_group_get(vdev->vdev.dev);
> > +	if (!iommu_group)
> > +		return false;
> > +
> > +	/*
> > +	 * Try to check if any device within iommu_group is bound with
> > +	 * the input iommufd_ctx.
> > +	 */
> > +	return vfio_devset_iommufd_has_group(vdev->vdev.dev_set,
> > +					     iommufd_ctx, iommu_group);
> > +}
> 
> This last test makes this not do what the function name suggests it
> does.  If it were true, the device is not in the iommufd_ctx, it simply
> cannot be within another iommu ctx.

Yes. it actually means not possible to be in another iommufd_ctx.

> 
> > +
> >  /*
> >   * We need to get memory_lock for each device, but devices can share mmap_lock,
> >   * therefore we need to zap and hold the vma_lock for each device, and only then
> >   * get each memory_lock.
> >   */
> >  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> > -				      struct vfio_pci_group_info *groups)
> > +				      struct vfio_pci_group_info *groups,
> > +				      struct iommufd_ctx *iommufd_ctx)
> >  {
> >  	struct vfio_pci_core_device *cur_mem;
> >  	struct vfio_pci_core_device *cur_vma;
> > @@ -2525,10 +2559,24 @@ static int vfio_pci_dev_set_hot_reset(struct
> vfio_device_set *dev_set,
> >
> >  	list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
> >  		/*
> > -		 * Test whether all the affected devices are contained by the
> > -		 * set of groups provided by the user.
> > +		 * Test whether all the affected devices can be reset by the
> > +		 * user.
> > +		 *
> > +		 * If user provides a set of groups, all the opened devices
> > +		 * in the dev_set should be contained by the set of groups
> > +		 * provided by the user.
> > +		 *
> > +		 * If user provides a zero-length group fd array, then all
> > +		 * the affected devices must be bound to same iommufd_ctx as
> > +		 * the input iommufd_ctx.  If there is device that has not
> > +		 * been bound to iommufd_ctx yet, shall check if there is any
> > +		 * device within its iommu_group that has been bound to the
> > +		 * input iommufd_ctx.
> > +		 *
> > +		 * Otherwise, reset is not allowed.
> >  		 */
> > -		if (!vfio_dev_in_groups(cur_vma, groups)) {
> > +		if (!vfio_dev_in_groups(cur_vma, groups) &&
> > +		    !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx)) {
> 
> 
> Rather than mangling vfio_dev_in_groups() and inventing
> vfio_dev_in_iommufd_ctx() that doesn't do what it implies, how about:
> 
> bool vfio_device_owned(struct vfio_device *vdev,
> 		       struct vfio_pci_group_info *groups,
> 		       struct iommufd_ctx *iommufd_ctx)
> {
> 	struct iommu_group *group;
> 
> 	WARN_ON(!!groups == !!iommufd_ctx);
> 
> 	if (groups)
> 		return vfio_dev_in_groups(vdev, groups));
> 
> 	if (vfio_iommufd_physical_ictx(vdev) == iommufd_ctx)
> 		return true;
> 
> 	group = iommu_group_get(vdev->dev);
> 	if (group)
> 		return vfio_devset_iommufd_has_group(vdev->vdev.dev_set,
> 						     iommufd_ctx, group);
> 	return false;
> }

Will follow above suggestion.

> Seems like such a function would live in vfio_main.c

It may require to make the struct vfio_pci_group_info visible outside
of vfio-pci. This seems to be strange to make vfio_main.c to refer pci
specific structure.

> 
> >  			ret = -EINVAL;
> >  			goto err_undo;
> >  		}
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 4b4e2c28984b..1241d02d8701 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -710,6 +710,28 @@ struct vfio_pci_hot_reset_info {
> >   * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
> >   *				    struct vfio_pci_hot_reset)
> >   *
> > + * Userspace requests hot reset for the devices it operates.  Due to the
> > + * underlying topology, multiple devices can be affected in the reset
> > + * while some might be opened by another user.  To avoid interference
> > + * the calling user must ensure all affected devices are owned by itself.
> > + * The ownership proof needs to refer the output of
> > + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO.  Ownership can be proved as:
> > + *
> > + *   1) An array of group fds - This is used for the devices opened via
> > + *				the group/container interface.
> > + *   2) A zero-length array - This is used for the devices opened via
> > + *			      the cdev interface.  User should check the
> > + *			      flag VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID
> > + *			      and flag VFIO_PCI_HOT_RESET_FLAG_RESETTABLE
> > + *			      before using this method.
> > + *
> > + * In case a non void group fd array is passed, the devices affected by
> > + * the reset must belong to those opened VFIO groups.  In case a zero
> > + * length array is passed, the other devices affected by the reset, if
> > + * any, must be either bound to the same iommufd as this VFIO device or
> > + * in the same iommu_group with a device that does.  Either of the two
> > + * methods is applied to check the feasibility of the hot reset.
> 
> This should probably just refer to the concept of ownership described
> in the INFO ioctl and clarify that cdev opened device must exclusively
> provide an empty array and group opened devices must exclusively use an
> array of group fds for proof of ownership.  Mixed access to devices
> between cdev and legacy groups are not supported by this interface.
> Thanks,

Sure. Will make it in next version.

Regards,
Yi Liu

> 
> Alex
> 
> > + *
> >   * Return: 0 on success, -errno on failure.
> >   */
> >  struct vfio_pci_hot_reset {



More information about the Intel-gfx mailing list