[Intel-gfx] [PATCH v3 05/12] vfio/pci: Allow passing zero-length fd array in VFIO_DEVICE_PCI_HOT_RESET
Eric Auger
eric.auger at redhat.com
Wed Apr 5 08:02:13 UTC 2023
On 4/4/23 22:18, Alex Williamson wrote:
> On Sat, 1 Apr 2023 07:44:22 -0700
> Yi Liu <yi.l.liu at intel.com> wrote:
>
>> as an alternative method for ownership check when iommufd is used. In
>> this case all opened devices in the affected dev_set are verified to
>> be bound to a same valid iommufd value to allow reset. It's simpler
>> and faster as user does not need to pass a set of fds and kernel no
>> need to search the device within the given fds.
>>
>> a device in noiommu mode doesn't have a valid iommufd, so this method
>> should not be used in a dev_set which contains multiple devices and one
>> of them is in noiommu. The only allowed noiommu scenario is that the
>> calling device is noiommu and it's in a singleton dev_set.
>>
>> Suggested-by: Jason Gunthorpe <jgg at nvidia.com>
>> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
>> Reviewed-by: Jason Gunthorpe <jgg at nvidia.com>
>> Tested-by: Yanting Jiang <yanting.jiang at intel.com>
>> Signed-off-by: Yi Liu <yi.l.liu at intel.com>
>> ---
>> drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++++++++-----
>> include/uapi/linux/vfio.h | 9 ++++++-
>> 2 files changed, 44 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
>> index 3696b8e58445..b68fcba67a4b 100644
>> --- a/drivers/vfio/pci/vfio_pci_core.c
>> +++ b/drivers/vfio/pci/vfio_pci_core.c
>> @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
>> struct vfio_pci_group_info;
>> static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
>> static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>> - struct vfio_pci_group_info *groups);
>> + struct vfio_pci_group_info *groups,
>> + struct iommufd_ctx *iommufd_ctx);
>>
>> /*
>> * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
>> @@ -1277,7 +1278,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
>> return ret;
>>
>> /* Somewhere between 1 and count is OK */
>> - if (!hdr->count || hdr->count > count)
>> + if (hdr->count > count)
>> return -EINVAL;
>>
>> group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL);
>> @@ -1326,7 +1327,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
>> info.count = hdr->count;
>> info.files = files;
>>
>> - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
>> + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
>>
>> hot_reset_release:
>> for (file_idx--; file_idx >= 0; file_idx--)
>> @@ -1341,6 +1342,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
>> {
>> unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
>> struct vfio_pci_hot_reset hdr;
>> + struct iommufd_ctx *iommufd;
>> bool slot = false;
>>
>> if (copy_from_user(&hdr, arg, minsz))
>> @@ -1355,7 +1357,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
>> else if (pci_probe_reset_bus(vdev->pdev->bus))
>> return -ENODEV;
>>
>> - return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg);
>> + if (hdr.count)
>> + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg);
>> +
>> + iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
>> +
>> + return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd);
>> }
>>
>> static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
>> @@ -2327,6 +2334,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
>> {
>> unsigned int i;
>>
>> + if (!groups)
>> + return false;
>> +
>> for (i = 0; i < groups->count; i++)
>> if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
>> return true;
>> @@ -2402,13 +2412,25 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
>> return ret;
>> }
>>
>> +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev,
>> + struct iommufd_ctx *iommufd_ctx)
>> +{
>> + struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
>> +
>> + if (!iommufd)
>> + return false;
>> +
>> + return iommufd == iommufd_ctx;
>> +}
>> +
>> /*
>> * We need to get memory_lock for each device, but devices can share mmap_lock,
>> * therefore we need to zap and hold the vma_lock for each device, and only then
>> * get each memory_lock.
>> */
>> static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>> - struct vfio_pci_group_info *groups)
>> + struct vfio_pci_group_info *groups,
>> + struct iommufd_ctx *iommufd_ctx)
>> {
>> struct vfio_pci_core_device *cur_mem;
>> struct vfio_pci_core_device *cur_vma;
>> @@ -2448,9 +2470,17 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>> *
>> * Otherwise all opened devices in the dev_set must be
>> * contained by the set of groups provided by the user.
>> + *
>> + * If user provides a zero-length array, then all the
>> + * opened devices must be bound to a same iommufd_ctx.
>> + *
>> + * If all above checks are failed, reset is allowed only if
>> + * the calling device is in a singleton dev_set.
>> */
>> if (cur_vma->vdev.open_count &&
>> - !vfio_dev_in_groups(cur_vma, groups)) {
>> + !vfio_dev_in_groups(cur_vma, groups) &&
>> + !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx) &&
>> + (dev_set->device_count > 1)) {
> This last condition looks buggy to me, we need all conditions to be
> true to generate an error here, which means that for a singleton
> dev_set, it doesn't matter what group fds are passed, if any, or whether
> the iommufd context matches. I think in fact this means that the empty
> array path is equally available for group use cases with a singleton
> dev_set, but we don't enable it for multiple device dev_sets like we do
> iommufd.
>
> You pointed out a previous issue with hot-reset info and no-iommu where
> if other affected devices are not bound to vfio-pci the info ioctl
> returns error. That's handled in the hot-reset ioctl by the fact that
> all affected devices must be in the dev_set and therefore bound to
> vfio-pci drivers. So it seems to me that aside from the spurious error
> because we can't report an iommu group when none exists, and didn't
> spot it to invent an invalid group for debugging, hot-reset otherwise
> works with no-iommu just like it does for iommu backed devices. We
> don't currently require singleton no-iommu dev_sets afaict.
>
> I'll also note that if the dev_set is singleton, this suggests that
> pci_reset_function() can make use of bus reset, so a hot-reset is
> accessible via VFIO_DEVICE_RESET if the appropriate reset method is
> selected.
>
> Therefore, I think as written, the singleton dev_set hot-reset is
> enabled for iommufd and (unintentionally?) for the group path, while
> also negating a requirement for a group fd or that a provided group fd
> actually matches the device in this latter case. The null-array
> approach is not however extended to groups for more general use.
> Additionally, limiting no-iommu hot-reset to singleton dev_sets
> provides only a marginal functional difference vs VFIO_DEVICE_RESET.
> Thanks,
>
> Alex
What bout introducing a helper
static bool is_reset_ok(pdev, groups, ctx) {
if (!pdev->vdev.open_count)
return true;
if (groups && vfio_dev_in_groups(pdev, groups))
return true;
if (ctx && vfio_dev_in_iommufd_ctx(pdev, ctx)
return true;
return false;
}
Assuming the above logic is correct I think this would make the code
more readable
Thanks
Eric
>> ret = -EINVAL;
>> goto err_undo;
>> }
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index f96e5689cffc..17aa5d09db41 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -679,7 +679,14 @@ struct vfio_pci_hot_reset_info {
>> * the calling user must ensure all affected devices, if opened, are
>> * owned by itself.
>> *
>> - * The ownership is proved by an array of group fds.
>> + * The ownership can be proved by:
>> + * - An array of group fds
>> + * - A zero-length array
>> + *
>> + * In the last case all affected devices which are opened by this user
>> + * must have been bound to a same iommufd. If the calling device is in
>> + * noiommu mode (no valid iommufd) then it can be reset only if the reset
>> + * doesn't affect other devices.
>> *
>> * Return: 0 on success, -errno on failure.
>> */
More information about the Intel-gfx
mailing list