[Intel-gfx] [PATCH 3/7] vfio/pci: Allow passing zero-length fd array in VFIO_DEVICE_PCI_HOT_RESET
Yi Liu
yi.l.liu at intel.com
Thu Mar 16 12:41:52 UTC 2023
as an alternative method for ownership check when iommufd is used. In
this case all opened devices in the affected dev_set are verified to
be bound to a same valid iommufd value to allow reset. It's simpler
and faster as user does not need to pass a set of fds and kernel no
need to search the device within the given fds.
a device in noiommu mode doesn't have a valid iommufd, so this method
should not be used in a dev_set which contains multiple devices and one
of them is in noiommu. The only allowed noiommu scenario is that the
calling device is noiommu and it's in a singleton dev_set.
Suggested-by: Jason Gunthorpe <jgg at nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu at intel.com>
---
drivers/iommu/iommufd/device.c | 6 ++
drivers/vfio/iommufd.c | 8 +++
drivers/vfio/pci/vfio_pci_core.c | 94 +++++++++++++++++++++++---------
include/linux/iommufd.h | 1 +
include/linux/vfio.h | 3 +
include/uapi/linux/vfio.h | 9 ++-
6 files changed, 93 insertions(+), 28 deletions(-)
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 0295140dd384..2ca12716db98 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -131,6 +131,12 @@ void iommufd_device_unbind(struct iommufd_device *idev)
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
+struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
+{
+ return idev->ictx;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD);
+
static int iommufd_device_setup_msi(struct iommufd_device *idev,
struct iommufd_hw_pagetable *hwpt,
phys_addr_t sw_msi_start)
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
index 9aabd8b31c15..ca0c16bb747e 100644
--- a/drivers/vfio/iommufd.c
+++ b/drivers/vfio/iommufd.c
@@ -66,6 +66,14 @@ void vfio_iommufd_unbind(struct vfio_device *vdev)
vdev->ops->unbind_iommufd(vdev);
}
+struct iommufd_ctx *vfio_iommufd_physical_ictx(struct vfio_device *vdev)
+{
+ if (!vdev->iommufd_device)
+ return NULL;
+ return iommufd_device_to_ictx(vdev->iommufd_device);
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_ictx);
+
/*
* The physical standard ops mean that the iommufd_device is bound to the
* physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 5d745c9abf05..b68fcba67a4b 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
struct vfio_pci_group_info;
static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
- struct vfio_pci_group_info *groups);
+ struct vfio_pci_group_info *groups,
+ struct iommufd_ctx *iommufd_ctx);
/*
* INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
@@ -1255,29 +1256,17 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
return ret;
}
-static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
- struct vfio_pci_hot_reset __user *arg)
+static int
+vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_hot_reset *hdr,
+ bool slot,
+ struct vfio_pci_hot_reset __user *arg)
{
- unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
- struct vfio_pci_hot_reset hdr;
int32_t *group_fds;
struct file **files;
struct vfio_pci_group_info info;
- bool slot = false;
int file_idx, count = 0, ret = 0;
- if (copy_from_user(&hdr, arg, minsz))
- return -EFAULT;
-
- if (hdr.argsz < minsz || hdr.flags)
- return -EINVAL;
-
- /* Can we do a slot or bus reset or neither? */
- if (!pci_probe_reset_slot(vdev->pdev->slot))
- slot = true;
- else if (pci_probe_reset_bus(vdev->pdev->bus))
- return -ENODEV;
-
/*
* We can't let userspace give us an arbitrarily large buffer to copy,
* so verify how many we think there could be. Note groups can have
@@ -1289,11 +1278,11 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
return ret;
/* Somewhere between 1 and count is OK */
- if (!hdr.count || hdr.count > count)
+ if (hdr->count > count)
return -EINVAL;
- group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
- files = kcalloc(hdr.count, sizeof(*files), GFP_KERNEL);
+ group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL);
+ files = kcalloc(hdr->count, sizeof(*files), GFP_KERNEL);
if (!group_fds || !files) {
kfree(group_fds);
kfree(files);
@@ -1301,7 +1290,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
}
if (copy_from_user(group_fds, arg->group_fds,
- hdr.count * sizeof(*group_fds))) {
+ hdr->count * sizeof(*group_fds))) {
kfree(group_fds);
kfree(files);
return -EFAULT;
@@ -1311,7 +1300,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
* Get the group file for each fd to ensure the group held across
* the reset
*/
- for (file_idx = 0; file_idx < hdr.count; file_idx++) {
+ for (file_idx = 0; file_idx < hdr->count; file_idx++) {
struct file *file = fget(group_fds[file_idx]);
if (!file) {
@@ -1335,10 +1324,10 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
if (ret)
goto hot_reset_release;
- info.count = hdr.count;
+ info.count = hdr->count;
info.files = files;
- ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
+ ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
hot_reset_release:
for (file_idx--; file_idx >= 0; file_idx--)
@@ -1348,6 +1337,34 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
return ret;
}
+static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_hot_reset __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
+ struct vfio_pci_hot_reset hdr;
+ struct iommufd_ctx *iommufd;
+ bool slot = false;
+
+ if (copy_from_user(&hdr, arg, minsz))
+ return -EFAULT;
+
+ if (hdr.argsz < minsz || hdr.flags)
+ return -EINVAL;
+
+ /* Can we do a slot or bus reset or neither? */
+ if (!pci_probe_reset_slot(vdev->pdev->slot))
+ slot = true;
+ else if (pci_probe_reset_bus(vdev->pdev->bus))
+ return -ENODEV;
+
+ if (hdr.count)
+ return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg);
+
+ iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
+
+ return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd);
+}
+
static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
struct vfio_device_ioeventfd __user *arg)
{
@@ -2317,6 +2334,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
{
unsigned int i;
+ if (!groups)
+ return false;
+
for (i = 0; i < groups->count; i++)
if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
return true;
@@ -2392,13 +2412,25 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
return ret;
}
+static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev,
+ struct iommufd_ctx *iommufd_ctx)
+{
+ struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
+
+ if (!iommufd)
+ return false;
+
+ return iommufd == iommufd_ctx;
+}
+
/*
* We need to get memory_lock for each device, but devices can share mmap_lock,
* therefore we need to zap and hold the vma_lock for each device, and only then
* get each memory_lock.
*/
static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
- struct vfio_pci_group_info *groups)
+ struct vfio_pci_group_info *groups,
+ struct iommufd_ctx *iommufd_ctx)
{
struct vfio_pci_core_device *cur_mem;
struct vfio_pci_core_device *cur_vma;
@@ -2438,9 +2470,17 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
*
* Otherwise all opened devices in the dev_set must be
* contained by the set of groups provided by the user.
+ *
+ * If user provides a zero-length array, then all the
+ * opened devices must be bound to a same iommufd_ctx.
+ *
+ * If all above checks are failed, reset is allowed only if
+ * the calling device is in a singleton dev_set.
*/
if (cur_vma->vdev.open_count &&
- !vfio_dev_in_groups(cur_vma, groups)) {
+ !vfio_dev_in_groups(cur_vma, groups) &&
+ !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx) &&
+ (dev_set->device_count > 1)) {
ret = -EINVAL;
goto err_undo;
}
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 1129a36a74c4..035d5d28e612 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -20,6 +20,7 @@ struct file;
struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
struct device *dev, u32 *id);
void iommufd_device_unbind(struct iommufd_device *idev);
+struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev);
int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id);
void iommufd_device_detach(struct iommufd_device *idev);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 3188d8a374bd..f0a5ff317b20 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -116,6 +116,7 @@ struct vfio_device_ops {
int vfio_iommufd_physical_bind(struct vfio_device *vdev,
struct iommufd_ctx *ictx, u32 *out_device_id);
void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
+struct iommufd_ctx *vfio_iommufd_physical_ictx(struct vfio_device *vdev);
int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
struct iommufd_ctx *ictx, u32 *out_device_id);
@@ -127,6 +128,8 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
u32 *out_device_id)) NULL)
#define vfio_iommufd_physical_unbind \
((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_physical_ictx \
+ ((struct iommufd_ctx * (*)(struct vfio_device *vdev)) NULL)
#define vfio_iommufd_physical_attach_ioas \
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
#define vfio_iommufd_emulated_bind \
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index f96e5689cffc..17aa5d09db41 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -679,7 +679,14 @@ struct vfio_pci_hot_reset_info {
* the calling user must ensure all affected devices, if opened, are
* owned by itself.
*
- * The ownership is proved by an array of group fds.
+ * The ownership can be proved by:
+ * - An array of group fds
+ * - A zero-length array
+ *
+ * In the last case all affected devices which are opened by this user
+ * must have been bound to a same iommufd. If the calling device is in
+ * noiommu mode (no valid iommufd) then it can be reset only if the reset
+ * doesn't affect other devices.
*
* Return: 0 on success, -errno on failure.
*/
--
2.34.1
More information about the Intel-gfx
mailing list