[Intel-gfx] [PATCH v2 10/10] vfio/pci: Add VFIO_DEVICE_GET_PCI_HOT_RESET_GROUP_INFO

Liu, Yi L yi.l.liu at intel.com
Tue Mar 28 03:45:00 UTC 2023


> From: Alex Williamson <alex.williamson at redhat.com>
> Sent: Tuesday, March 28, 2023 4:41 AM
> 
> On Mon, 27 Mar 2023 13:26:19 -0600
> Alex Williamson <alex.williamson at redhat.com> wrote:
> 
[...]
> > > diff --git a/drivers/vfio/pci/vfio_pci_core.c
> b/drivers/vfio/pci/vfio_pci_core.c
> > > index 19f5b075d70a..45edf4e9b98b 100644
> > > --- a/drivers/vfio/pci/vfio_pci_core.c
> > > +++ b/drivers/vfio/pci/vfio_pci_core.c
> > > @@ -1181,6 +1181,102 @@ static int vfio_pci_ioctl_reset(struct
> vfio_pci_core_device *vdev,
> > >  	return ret;
> > >  }
> > >
> > > +static struct pci_dev *
> > > +vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set);
> > > +
> > > +static int vfio_pci_ioctl_get_pci_hot_reset_group_info(
> > > +	struct vfio_pci_core_device *vdev,
> > > +	struct vfio_pci_hot_reset_group_info __user *arg)
> > > +{
> > > +	unsigned long minsz =
> > > +		offsetofend(struct vfio_pci_hot_reset_group_info, count);
> > > +	struct vfio_pci_hot_reset_group_info hdr;
> > > +	struct iommufd_ctx *iommufd, *cur_iommufd;
> > > +	u32 count = 0, index = 0, *devices = NULL;
> > > +	struct vfio_pci_core_device *cur;
> > > +	bool slot = false;
> > > +	int ret = 0;
> > > +
> > > +	if (copy_from_user(&hdr, arg, minsz))
> > > +		return -EFAULT;
> > > +
> > > +	if (hdr.argsz < minsz)
> > > +		return -EINVAL;
> > > +
> > > +	hdr.flags = 0;
> > > +
> > > +	/* Can we do a slot or bus reset or neither? */
> > > +	if (!pci_probe_reset_slot(vdev->pdev->slot))
> > > +		slot = true;
> > > +	else if (pci_probe_reset_bus(vdev->pdev->bus))
> > > +		return -ENODEV;
> > > +
> > > +	mutex_lock(&vdev->vdev.dev_set->lock);
> > > +	if (!vfio_pci_dev_set_resettable(vdev->vdev.dev_set)) {

[1]

> > > +		ret = -EPERM;
> > > +		goto out_unlock;
> > > +	}
> > > +
> > > +	iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
> > > +	if (!iommufd) {
> > > +		ret = -EPERM;
> > > +		goto out_unlock;
> > > +	}
> > > +
> > > +	/* How many devices are affected? */
> > > +	ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
> vfio_pci_count_devs,
> > > +					    &count, slot);
> > > +	if (ret)
> > > +		goto out_unlock;
> > > +
> > > +	WARN_ON(!count); /* Should always be at least one */
> > > +
> > > +	/*
> > > +	 * If there's enough space, fill it now, otherwise return -ENOSPC and
> > > +	 * the number of devices affected.
> > > +	 */
> > > +	if (hdr.argsz < sizeof(hdr) + (count * sizeof(*devices))) {
> > > +		ret = -ENOSPC;
> > > +		hdr.count = count;
> > > +		goto reset_info_exit;
> > > +	}
> > > +
> > > +	devices = kcalloc(count, sizeof(*devices), GFP_KERNEL);
> > > +	if (!devices) {
> > > +		ret = -ENOMEM;
> > > +		goto reset_info_exit;
> > > +	}
> > > +
> > > +	list_for_each_entry(cur, &vdev->vdev.dev_set->device_list,
> vdev.dev_set_list) {
> > > +		cur_iommufd = vfio_iommufd_physical_ictx(&cur->vdev);
> > > +		if (cur->vdev.open_count) {
> > > +			if (cur_iommufd != iommufd) {
> > > +				ret = -EPERM;
> > > +				break;
> > > +			}
> > > +			ret = vfio_iommufd_physical_devid(&cur->vdev,
> &devices[index]);
> > > +			if (ret)
> > > +				break;
> > > +			index++;
> > > +		}
> > > +	}
> 
> This also makes use of vfio_pci_for_each_slot_or_bus() to iterate
> affected devices, but then still assumes those affected devices are
> necessarily represented in the dev_set.  For example, a two-function
> device with ACS isolation can have function 0 bound to vfio and
> function 1 bound to a native host driver.  The above code requires the
> user to pass a buffer large enough for both functions, but only
> function 0 is part of the dev_set, so function 1 is not reported as
> affected, nor does it generate an error.

The vfio_pci_dev_set_resettable() is used at [1] to check if all the affected
devices are in the dev_set. If not, then it fails at the first place. So in the
following code, looping the devices in the dev_set is equivalent to looping
devices with vfio_pci_for_each_slot_or_bus(). I need to loop dev_set as
dev_set has the vfio_device which is more convenient to get dev_id.

> 
> It looks like we also fail to set hdr.count except in the error path
> above, so the below copy_to_user() copies a user specified range beyond
> the end the allocated devices buffer out to userspace!  Thanks,

Oops, yes, it is. My test only has one device affected, so it does not hit
the problem. ☹ 

Thanks,
Yi Liu

> Alex
> 
> > > +
> > > +reset_info_exit:
> > > +	if (copy_to_user(arg, &hdr, minsz))
> > > +		ret = -EFAULT;
> > > +
> > > +	if (!ret) {
> > > +		if (copy_to_user(&arg->devices, devices,
> > > +				 hdr.count * sizeof(*devices)))
> > > +			ret = -EFAULT;
> > > +	}
> > > +
> > > +	kfree(devices);
> > > +out_unlock:
> > > +	mutex_unlock(&vdev->vdev.dev_set->lock);
> > > +	return ret;
> > > +}
> > > +
> > >  static int vfio_pci_ioctl_get_pci_hot_reset_info(
> > >  	struct vfio_pci_core_device *vdev,
> > >  	struct vfio_pci_hot_reset_info __user *arg)
> > > @@ -1404,6 +1500,8 @@ long vfio_pci_core_ioctl(struct vfio_device
> *core_vdev, unsigned int cmd,
> > >  		return vfio_pci_ioctl_get_irq_info(vdev, uarg);
> > >  	case VFIO_DEVICE_GET_PCI_HOT_RESET_INFO:
> > >  		return vfio_pci_ioctl_get_pci_hot_reset_info(vdev, uarg);
> > > +	case VFIO_DEVICE_GET_PCI_HOT_RESET_GROUP_INFO:
> > > +		return vfio_pci_ioctl_get_pci_hot_reset_group_info(vdev,
> uarg);
> > >  	case VFIO_DEVICE_GET_REGION_INFO:
> > >  		return vfio_pci_ioctl_get_region_info(vdev, uarg);
> > >  	case VFIO_DEVICE_IOEVENTFD:
> > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > > index 25432ef213ee..61b801dfd40b 100644
> > > --- a/include/uapi/linux/vfio.h
> > > +++ b/include/uapi/linux/vfio.h
> > > @@ -669,6 +669,53 @@ struct vfio_pci_hot_reset_info {
> > >
> > >  #define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO	_IO(VFIO_TYPE,
> VFIO_BASE + 12)
> > >
> > > +/**
> > > + * VFIO_DEVICE_GET_PCI_HOT_RESET_GROUP_INFO -
> _IOWR(VFIO_TYPE, VFIO_BASE + 12,
> > > + *						    struct
> vfio_pci_hot_reset_group_info)
> > > + *
> > > + * This is used in the vfio device cdev mode.  It returns the list of
> > > + * affected devices (represented by iommufd dev_id) when hot reset is
> > > + * issued on the current device with which this ioctl is invoked.  It
> > > + * only includes the devices that are opened by the current user in the
> > > + * time of this command is invoked.  This list may change when user
> opens
> > > + * new device or close opened device, hence user should re-invoke it
> > > + * after open/close devices.  This command has no guarantee on the
> result
> > > + * of VFIO_DEVICE_PCI_HOT_RESET since the not-opened affected
> device can
> > > + * be by other users in the window between the two ioctls.  If the
> affected
> > > + * devices are opened by multiple users, the
> VFIO_DEVICE_PCI_HOT_RESET
> > > + * shall fail, detail can check the description of
> VFIO_DEVICE_PCI_HOT_RESET.
> > > + *
> > > + * For the users that open vfio group/container, this ioctl is useless as
> > > + * they have no idea about the iommufd dev_id returned by this ioctl.
> For
> > > + * the users of the traditional mode vfio group/container, this ioctl will
> > > + * fail as this mode does not use iommufd hence no dev_id to report
> back.
> > > + * For the users of the iommufd compat mode vfio group/container,
> this ioctl
> > > + * would succeed as this mode uses iommufd as container fd.  But such
> users
> > > + * still have no idea about the iommufd dev_id as the dev_id is only
> stored
> > > + * in kernel in this mode.  For the users of the vfio group/container, the
> > > + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO should be used to know
> the hot reset
> > > + * affected devices.
> > > + *
> > > + * Return: 0 on success, -errno on failure:
> > > + *	-enospc = insufficient buffer;
> > > + *	-enodev = unsupported for device;
> > > + *	-eperm = no permission for device, this error comes:
> > > + *		 - when there are affected devices that are opened but
> > > + *		   not bound to the same iommufd with the current device
> > > + *		   with which this ioctl is invoked,
> > > + *		 - there are affected devices that are not bound to vfio
> > > + *		   driver yet.
> > > + *		 - no valid iommufd is bound (e.g. noiommu mode)
> > > + */
> > > +struct vfio_pci_hot_reset_group_info {
> > > +	__u32	argsz;
> > > +	__u32	flags;
> > > +	__u32	count;
> > > +	__u32	devices[];
> > > +};
> > > +
> > > +#define VFIO_DEVICE_GET_PCI_HOT_RESET_GROUP_INFO
> 	_IO(VFIO_TYPE, VFIO_BASE + 18)
> > > +
> > >  /**
> > >   * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
> > >   *				    struct vfio_pci_hot_reset)
> >



More information about the Intel-gfx mailing list