[Intel-gfx] [PATCH v6 10/10] vfio/pci: Allow passing zero-length fd array in VFIO_DEVICE_PCI_HOT_RESET
Liu, Yi L
yi.l.liu at intel.com
Tue May 30 04:23:12 UTC 2023
> From: Alex Williamson <alex.williamson at redhat.com>
> Sent: Thursday, May 25, 2023 4:20 AM
>
> On Mon, 22 May 2023 04:57:51 -0700
> Yi Liu <yi.l.liu at intel.com> wrote:
>
> > This is the way user to invoke hot-reset for the devices opened by cdev
> > interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED
> > in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing
> > hot-reset for cdev devices.
> >
> > Suggested-by: Jason Gunthorpe <jgg at nvidia.com>
> > Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> > Reviewed-by: Jason Gunthorpe <jgg at nvidia.com>
> > Tested-by: Yanting Jiang <yanting.jiang at intel.com>
> > Signed-off-by: Yi Liu <yi.l.liu at intel.com>
> > ---
> > drivers/vfio/pci/vfio_pci_core.c | 56 +++++++++++++++++++++++++-------
> > include/uapi/linux/vfio.h | 14 ++++++++
> > 2 files changed, 59 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> > index 890065f846e4..67f1cb426505 100644
> > --- a/drivers/vfio/pci/vfio_pci_core.c
> > +++ b/drivers/vfio/pci/vfio_pci_core.c
> > @@ -181,7 +181,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device
> *vdev)
> > struct vfio_pci_group_info;
> > static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
> > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> > - struct vfio_pci_group_info *groups);
> > + struct vfio_pci_group_info *groups,
> > + struct iommufd_ctx *iommufd_ctx);
> >
> > /*
> > * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
> > @@ -1301,8 +1302,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct
> vfio_pci_core_device *vdev,
> > if (ret)
> > return ret;
> >
> > - /* Somewhere between 1 and count is OK */
> > - if (!array_count || array_count > count)
> > + if (array_count > count || vfio_device_cdev_opened(&vdev->vdev))
> > return -EINVAL;
> >
> > group_fds = kcalloc(array_count, sizeof(*group_fds), GFP_KERNEL);
> > @@ -1351,7 +1351,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct
> vfio_pci_core_device *vdev,
> > info.count = array_count;
> > info.files = files;
> >
> > - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
> > + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
> >
> > hot_reset_release:
> > for (file_idx--; file_idx >= 0; file_idx--)
> > @@ -1380,7 +1380,11 @@ static int vfio_pci_ioctl_pci_hot_reset(struct
> vfio_pci_core_device *vdev,
> > else if (pci_probe_reset_bus(vdev->pdev->bus))
> > return -ENODEV;
> >
> > - return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> > + if (hdr.count)
> > + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> > +
> > + return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL,
> > + vfio_iommufd_device_ictx(&vdev->vdev));
> > }
> >
> > static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
> > @@ -2347,13 +2351,16 @@ const struct pci_error_handlers
> vfio_pci_core_err_handlers = {
> > };
> > EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);
> >
> > -static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
> > +static bool vfio_dev_in_groups(struct vfio_device *vdev,
> > struct vfio_pci_group_info *groups)
> > {
> > unsigned int i;
> >
> > + if (!groups)
> > + return false;
> > +
> > for (i = 0; i < groups->count; i++)
> > - if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
> > + if (vfio_file_has_dev(groups->files[i], vdev))
> > return true;
> > return false;
> > }
> > @@ -2429,7 +2436,8 @@ static int vfio_pci_dev_set_pm_runtime_get(struct
> vfio_device_set *dev_set)
> > * get each memory_lock.
> > */
> > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> > - struct vfio_pci_group_info *groups)
> > + struct vfio_pci_group_info *groups,
> > + struct iommufd_ctx *iommufd_ctx)
> > {
> > struct vfio_pci_core_device *cur_mem;
> > struct vfio_pci_core_device *cur_vma;
> > @@ -2459,11 +2467,37 @@ static int vfio_pci_dev_set_hot_reset(struct
> vfio_device_set *dev_set,
> > goto err_unlock;
> >
> > list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
> > + bool owned;
> > +
> > /*
> > - * Test whether all the affected devices are contained by the
> > - * set of groups provided by the user.
> > + * Test whether all the affected devices can be reset by the
> > + * user.
> > + *
> > + * If the user provides a set of groups, all the devices
> > + * in the dev_set should be contained by the set of groups
> > + * provided by the user.
>
> "If called from a group opened device and the user provides a set of
> groups,..."
>
> > + *
> > + * If the user provides a zero-length group fd array, then
>
> "If called from a cdev opened device and the user provides a
> zero-length array,..."
>
>
> > + * all the devices in the dev_set must be bound to the same
> > + * iommufd_ctx as the input iommufd_ctx. If there is any
> > + * device that has not been bound to iommufd_ctx yet, check
> > + * if its iommu_group has any device bound to the input
> > + * iommufd_ctx Such devices can be considered owned by
>
> "."...........................^
Thanks, above comments well received.
> > + * the input iommufd_ctx as the device cannot be owned
> > + * by another iommufd_ctx when its iommu_group is owned.
> > + *
> > + * Otherwise, reset is not allowed.
>
>
> In the case where a non-null array is provided,
> vfio_pci_ioctl_pci_hot_reset_groups() explicitly tests
> vfio_device_cdev_opened(), so we exclude cdev devices from providing a
> group list.
Yes.
> However, what prevents a compat opened group device from
> providing a null array?
I think I've asked this question before. What I got is seems no need
to prevent it[1].
[1] https://lore.kernel.org/kvm/BN9PR11MB5276ABE919C04E06A0326E8E8CBC9@BN9PR11MB5276.namprd11.prod.outlook.com/
Now, I intend to disallow it. If compat mode user binds the devices
to different containers, it shall be able to do hot reset as it can use
group fd to prove ownership. But if using the zero-length array, it
would be failed. So we may add below logic and remove the
vfio_device_cdev_opened() in vfio_pci_ioctl_pci_hot_reset_groups().
vfio_pci_ioctl_pci_hot_reset()
{
...
if (!!hdr.count == !!vfio_device_cdev_opened(&vdev->vdev))
return -EINVAL;
if (hdr.count)
vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL,
vfio_iommufd_device_ictx(&vdev->vdev));
}
>
> I thought it would be that this function is called with groups == NULL
> and therefore the vfio_dev_in_groups() test below fails, but I don't
> think that's true for a compat opened device. Thanks,
How about above logic?
Regards,
Yi Liu
>
> > */
> > - if (!vfio_dev_in_groups(cur_vma, groups)) {
> > + if (iommufd_ctx) {
> > + int devid = vfio_iommufd_device_hot_reset_devid(&cur_vma-
> >vdev,
> > + iommufd_ctx);
> > +
> > + owned = (devid != VFIO_PCI_DEVID_NOT_OWNED);
> > + } else {
> > + owned = vfio_dev_in_groups(&cur_vma->vdev, groups);
> > + }
> > +
> > + if (!owned) {
> > ret = -EINVAL;
> > goto err_undo;
> > }
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 01203215251a..24858b650562 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -686,6 +686,9 @@ enum {
> > * Flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED would be set when all the
> > * affected devices are owned by the user. This flag is available only
> > * when VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved.
> > + * When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero
> > + * length fd array on the calling device as the ownership is validated
> > + * by iommufd_ctx.
> > *
> > * Return: 0 on success, -errno on failure:
> > * -enospc = insufficient buffer, -enodev = unsupported for device.
> > @@ -717,6 +720,17 @@ struct vfio_pci_hot_reset_info {
> > * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
> > * struct vfio_pci_hot_reset)
> > *
> > + * Userspace requests hot reset for the devices it operates. Due to the
> > + * underlying topology, multiple devices can be affected in the reset
> > + * while some might be opened by another user. To avoid interference
> > + * the calling user must ensure all affected devices are owned by itself.
> > + *
> > + * As the ownership described by VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, the
> > + * cdev opened devices must exclusively provide a zero-length fd array and
> > + * the group opened devices must exclusively use an array of group fds for
> > + * proof of ownership. Mixed access to devices between cdev and legacy
> > + * groups are not supported by this interface.
> > + *
> > * Return: 0 on success, -errno on failure.
> > */
> > struct vfio_pci_hot_reset {
More information about the Intel-gfx
mailing list