[Intel-gfx] [PATCH v5 16/19] vfio: Add VFIO_DEVICE_BIND_IOMMUFD

Liu, Yi L yi.l.liu at intel.com
Fri Mar 3 07:23:11 UTC 2023


> From: Liu, Yi L <yi.l.liu at intel.com>
> Sent: Friday, March 3, 2023 2:58 PM
> 
> > From: Jason Gunthorpe <jgg at nvidia.com>
> > Sent: Thursday, March 2, 2023 1:47 AM
> >
> > On Wed, Mar 01, 2023 at 09:19:07AM +0000, Liu, Yi L wrote:
> > > > From: Liu, Yi L <yi.l.liu at intel.com>
> > > > Sent: Monday, February 27, 2023 7:12 PM
> > > [...]
> > > > +long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df,
> > > > +				    unsigned long arg)
> > > > +{
> > > > +	struct vfio_device *device = df->device;
> > > > +	struct vfio_device_bind_iommufd bind;
> > > > +	struct iommufd_ctx *iommufd = NULL;
> > > > +	unsigned long minsz;
> > > > +	int ret;
> > > > +
> > > > +	minsz = offsetofend(struct vfio_device_bind_iommufd, out_devid);
> > > > +
> > > > +	if (copy_from_user(&bind, (void __user *)arg, minsz))
> > > > +		return -EFAULT;
> > > > +
> > > > +	if (bind.argsz < minsz || bind.flags)
> > > > +		return -EINVAL;
> > > > +
> > > > +	if (!device->ops->bind_iommufd)
> > > > +		return -ENODEV;
> > >
> > > Hi Jason,
> > >
> > > Per the comment in vfio_iommufd_bind(), such device driver
> > > won't provide .bind_iommufd(). So shall we allow this ioctl
> > > to go longer to call .open_device() instead of failing it here?
> > > I think we need to allow it to go further. E.g. leave the check
> > > to be in vfio_iommufd_bind(). Otherwise, user may not able
> > > to use such devices. Is it?
> >
> > You are thinking about the crazy mdev samples?
> >
> > We should probably just change them to provide a 'no dma' set of ops.
> >
> > > > +struct vfio_device_bind_iommufd {
> > > > +	__u32		argsz;
> > > > +	__u32		flags;
> > > > +	__aligned_u64	dev_cookie;
> > > > +	__s32		iommufd;
> > > > +	__u32		out_devid;
> > >
> > > As above, for the devices that do not do DMA, there is
> no .bind_iommufd
> > > op, hence no iommufd_device generated. This means no good value
> > > can be filled in this out_devid field. So this field is optional. Only
> > > for the devices which do DMA, should this out_devid field return a
> > > valid ID otherwise an invalid ID would be filled (e.g. value #0 is an
> > > invalid value in the iommufd object id pool). Userspace needs to
> > > check if the out_devid is valid or not before use. This ID can be further
> > > used in iommufd uAPIs like IOMMU_HWPT_ALLOC,
> > IOMMU_DEVICE_GET_INFO
> > > and etc.
> >
> > I would say create an access and harmonize the no-DMA devices with the
> > emulated devices.
> 
> How about below change?
> 
> diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
> index 4f82a6fa7c6c..e536515086d7 100644
> --- a/drivers/vfio/iommufd.c
> +++ b/drivers/vfio/iommufd.c
> @@ -18,12 +18,8 @@ int vfio_iommufd_bind(struct vfio_device *vdev,
> struct iommufd_ctx *ictx)
> 
>  	lockdep_assert_held(&vdev->dev_set->lock);
> 
> -	/*
> -	 * If the driver doesn't provide this op then it means the device does
> -	 * not do DMA at all. So nothing to do.
> -	 */
> -	if (!vdev->ops->bind_iommufd)
> -		return 0;
> +	if (WARN_ON(!vdev->ops->bind_iommufd))
> +		return -ENODEV;
> 
>  	ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
>  	if (ret)
> @@ -102,7 +98,9 @@
> EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas);
>  /*
>   * The emulated standard ops mean that vfio_device is going to use the
>   * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using
> this
> - * ops set should call vfio_register_emulated_iommu_dev().
> + * ops set should call vfio_register_emulated_iommu_dev(). Drivers that
> do
> + * not call  vfio_pin_pages()/vfio_dma_rw() has no need to provide
> dma_unmap
> + * callback.
>   */
> 
>  static void vfio_emulated_unmap(void *data, unsigned long iova,
> @@ -110,7 +107,8 @@ static void vfio_emulated_unmap(void *data,
> unsigned long iova,
>  {
>  	struct vfio_device *vdev = data;
> 
> -	vdev->ops->dma_unmap(vdev, iova, length);
> +	if (vdev->ops->dma_unmap)
> +		vdev->ops->dma_unmap(vdev, iova, length);
>  }
> 
>  static const struct iommufd_access_ops vfio_user_ops = {
> diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
> index e54eb752e1ba..19391dda5fba 100644
> --- a/samples/vfio-mdev/mbochs.c
> +++ b/samples/vfio-mdev/mbochs.c
> @@ -1374,6 +1374,9 @@ static const struct vfio_device_ops
> mbochs_dev_ops = {
>  	.write = mbochs_write,
>  	.ioctl = mbochs_ioctl,
>  	.mmap = mbochs_mmap,
> +	.bind_iommufd	= vfio_iommufd_emulated_bind,
> +	.unbind_iommufd	= vfio_iommufd_emulated_unbind,
> +	.attach_ioas	= vfio_iommufd_emulated_attach_ioas,
>  };
> 
>  static struct mdev_driver mbochs_driver = {
> diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
> index e8400fdab71d..5f48aef36995 100644
> --- a/samples/vfio-mdev/mdpy.c
> +++ b/samples/vfio-mdev/mdpy.c
> @@ -663,6 +663,9 @@ static const struct vfio_device_ops mdpy_dev_ops =
> {
>  	.write = mdpy_write,
>  	.ioctl = mdpy_ioctl,
>  	.mmap = mdpy_mmap,
> +	.bind_iommufd	= vfio_iommufd_emulated_bind,
> +	.unbind_iommufd	= vfio_iommufd_emulated_unbind,
> +	.attach_ioas	= vfio_iommufd_emulated_attach_ioas,
>  };
> 
>  static struct mdev_driver mdpy_driver = {
> diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
> index e887de672c52..35460901b9f7 100644
> --- a/samples/vfio-mdev/mtty.c
> +++ b/samples/vfio-mdev/mtty.c
> @@ -1269,6 +1269,9 @@ static const struct vfio_device_ops mtty_dev_ops
> = {
>  	.read = mtty_read,
>  	.write = mtty_write,
>  	.ioctl = mtty_ioctl,
> +	.bind_iommufd	= vfio_iommufd_emulated_bind,
> +	.unbind_iommufd	= vfio_iommufd_emulated_unbind,
> +	.attach_ioas	= vfio_iommufd_emulated_attach_ioas,
>  };
> 
>  static struct mdev_driver mtty_driver = {
> 
> > What should we return here anyhow if an access was created?
> 
> iommufd_access->obj.id. should be fine. Is it?

btw. It requires creating iommufd_access in vfio_iommufd_emulated_bind()
instead of in the attach(). Seems like Nicolin's replace domain series has a patch
to move iommufd_access creation to the bind().

Regards,
Yi Liu


More information about the Intel-gfx mailing list