[Intel-gfx] [PATCH v7 9/9] vfio/pci: Allow passing zero-length fd array in VFIO_DEVICE_PCI_HOT_RESET

Alex Williamson alex.williamson at redhat.com
Thu Jun 8 22:30:18 UTC 2023


On Fri,  2 Jun 2023 05:15:15 -0700
Yi Liu <yi.l.liu at intel.com> wrote:

> This is the way user to invoke hot-reset for the devices opened by cdev
> interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED
> in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing
> hot-reset for cdev devices.
> 
> Suggested-by: Jason Gunthorpe <jgg at nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> Reviewed-by: Jason Gunthorpe <jgg at nvidia.com>
> Tested-by: Yanting Jiang <yanting.jiang at intel.com>
> Signed-off-by: Yi Liu <yi.l.liu at intel.com>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 61 ++++++++++++++++++++++++++------
>  include/uapi/linux/vfio.h        | 14 ++++++++
>  2 files changed, 64 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index a615a223cdef..b0eadafcbcf5 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -181,7 +181,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
>  struct vfio_pci_group_info;
>  static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
>  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> -				      struct vfio_pci_group_info *groups);
> +				      struct vfio_pci_group_info *groups,
> +				      struct iommufd_ctx *iommufd_ctx);
>  
>  /*
>   * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
> @@ -1308,8 +1309,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
>  	if (ret)
>  		return ret;
>  
> -	/* Somewhere between 1 and count is OK */
> -	if (!array_count || array_count > count)
> +	if (array_count > count)
>  		return -EINVAL;
>  
>  	group_fds = kcalloc(array_count, sizeof(*group_fds), GFP_KERNEL);
> @@ -1358,7 +1358,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
>  	info.count = array_count;
>  	info.files = files;
>  
> -	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
> +	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
>  
>  hot_reset_release:
>  	for (file_idx--; file_idx >= 0; file_idx--)
> @@ -1381,13 +1381,21 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
>  	if (hdr.argsz < minsz || hdr.flags)
>  		return -EINVAL;
>  
> +	/* zero-length array is only for cdev opened devices */
> +	if (!!hdr.count == vfio_device_cdev_opened(&vdev->vdev))
> +		return -EINVAL;
> +
>  	/* Can we do a slot or bus reset or neither? */
>  	if (!pci_probe_reset_slot(vdev->pdev->slot))
>  		slot = true;
>  	else if (pci_probe_reset_bus(vdev->pdev->bus))
>  		return -ENODEV;
>  
> -	return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> +	if (hdr.count)
> +		return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> +
> +	return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL,
> +					  vfio_iommufd_device_ictx(&vdev->vdev));
>  }
>  
>  static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
> @@ -2354,13 +2362,16 @@ const struct pci_error_handlers vfio_pci_core_err_handlers = {
>  };
>  EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);
>  
> -static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
> +static bool vfio_dev_in_groups(struct vfio_device *vdev,
>  			       struct vfio_pci_group_info *groups)
>  {
>  	unsigned int i;
>  
> +	if (!groups)
> +		return false;
> +
>  	for (i = 0; i < groups->count; i++)
> -		if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
> +		if (vfio_file_has_dev(groups->files[i], vdev))
>  			return true;
>  	return false;
>  }
> @@ -2436,7 +2447,8 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
>   * get each memory_lock.
>   */
>  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> -				      struct vfio_pci_group_info *groups)
> +				      struct vfio_pci_group_info *groups,
> +				      struct iommufd_ctx *iommufd_ctx)
>  {
>  	struct vfio_pci_core_device *cur_mem;
>  	struct vfio_pci_core_device *cur_vma;
> @@ -2466,11 +2478,38 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  		goto err_unlock;
>  
>  	list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
> +		bool owned;
> +
>  		/*
> -		 * Test whether all the affected devices are contained by the
> -		 * set of groups provided by the user.
> +		 * Test whether all the affected devices can be reset by the
> +		 * user.
> +		 *
> +		 * If called from a group opened device and the user provides
> +		 * a set of groups, all the devices in the dev_set should be
> +		 * contained by the set of groups provided by the user.
> +		 *
> +		 * If called from a cdev opened device and the user provides
> +		 * a zero-length array, all the devices in the dev_set must
> +		 * be bound to the same iommufd_ctx as the input iommufd_ctx.
> +		 * If there is any device that has not been bound to any
> +		 * iommufd_ctx yet, check if its iommu_group has any device
> +		 * bound to the input iommufd_ctx.  Such devices can be
> +		 * considered owned by the input iommufd_ctx as the device
> +		 * cannot be owned by another iommufd_ctx when its iommu_group
> +		 * is owned.
> +		 *
> +		 * Otherwise, reset is not allowed.
>  		 */
> -		if (!vfio_dev_in_groups(cur_vma, groups)) {
> +		if (iommufd_ctx) {
> +			int devid = vfio_iommufd_device_hot_reset_devid(&cur_vma->vdev,
> +									iommufd_ctx);
> +
> +			owned = (devid != VFIO_PCI_DEVID_NOT_OWNED);
> +		} else {
> +			owned = vfio_dev_in_groups(&cur_vma->vdev, groups);
> +		}
> +
> +		if (!owned) {
>  			ret = -EINVAL;
>  			goto err_undo;
>  		}
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 70cc31e6b1ce..f753124e1c82 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -690,6 +690,9 @@ enum {
>   *	  affected devices are represented in the dev_set and also owned by
>   *	  the user.  This flag is available only when
>   *	  flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved.
> + *	  When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero
> + *	  length fd array on the calling device as the ownership is validated
> + *	  by iommufd_ctx.
>   *
>   * Return: 0 on success, -errno on failure:
>   *	-enospc = insufficient buffer, -enodev = unsupported for device.
> @@ -721,6 +724,17 @@ struct vfio_pci_hot_reset_info {
>   * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
>   *				    struct vfio_pci_hot_reset)
>   *
> + * Userspace requests hot reset for the devices it operates.  Due to the
> + * underlying topology, multiple devices can be affected in the reset
> + * while some might be opened by another user.  To avoid interference
> + * the calling user must ensure all affected devices are owned by itself.

This phrasing suggest to me that we're placing the responsibility on
the user to avoid resetting another user's devices.  Perhaps these
paragraphs could be replaced with:

  A PCI hot reset results in either a bus or slot reset which may affect
  other devices sharing the bus/slot.  The calling user must have
  ownership of the full set of affected devices as determined by the
  VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl.

  When called on a device file descriptor acquired through the vfio
  group interface, the user is required to provide proof of ownership
  of those affected devices via the group_fds array in struct
  vfio_pci_hot_reset.

  When called on a direct cdev opened vfio device, the flags field of
  struct vfio_pci_hot_reset_info reports the ownership status of the
  affected devices and this ioctl must be called with an empty group_fds
  array.  See above INFO ioctl definition for ownership requirements.

  Mixed usage of legacy groups and cdevs across the set of affected
  devices is not supported.

Other than this and the couple other comments, the series looks ok to
me.  We still need acks from Jason for iommufd on 3-5.  Thanks,

Alex

> + *
> + * As the ownership described by VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, the
> + * cdev opened devices must exclusively provide a zero-length fd array and
> + * the group opened devices must exclusively use an array of group fds for
> + * proof of ownership.  Mixed access to devices between cdev and legacy
> + * groups are not supported by this interface.
> + *
>   * Return: 0 on success, -errno on failure.
>   */
>  struct vfio_pci_hot_reset {



More information about the Intel-gfx mailing list