[PATCH 2/8] drm/panthor: Extend GROUP_CREATE for user submission

Mihail Atanassov mihail.atanassov at arm.com
Wed Aug 28 17:25:58 UTC 2024


From: Ketil Johnsen <ketil.johnsen at arm.com>

Allow userspace to mmap() the ring buffer, and the doorbell and user I/O
pages, so that it can submit work directly to queues.

Signed-off-by: Ketil Johnsen <ketil.johnsen at arm.com>
Co-developed-by: Mihail Atanassov <mihail.atanassov at arm.com>
Signed-off-by: Mihail Atanassov <mihail.atanassov at arm.com>
Co-developed-by: Akash Goel <akash.goel at arm.com>
Signed-off-by: Akash Goel <akash.goel at arm.com>
---
 drivers/gpu/drm/panthor/panthor_device.h |  24 ++++
 drivers/gpu/drm/panthor/panthor_drv.c    |  69 ++++++++++-
 drivers/gpu/drm/panthor/panthor_sched.c  | 151 ++++++++++++++++++-----
 drivers/gpu/drm/panthor/panthor_sched.h  |   4 +-
 4 files changed, 209 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index e388c0472ba7..7c27dbba8270 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -171,6 +171,9 @@ struct panthor_file {
 	/** @ptdev: Device attached to this file. */
 	struct panthor_device *ptdev;
 
+	/** @drm_file: Corresponding drm_file */
+	struct drm_file *drm_file;
+
 	/** @vms: VM pool attached to this file. */
 	struct panthor_vm_pool *vms;
 
@@ -353,6 +356,27 @@ static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
 					 pirq);							\
 }
 
+/*
+ * We currently set the maximum of groups per file to an arbitrary low value.
+ * But this can be updated if we need more.
+ */
+#define MAX_GROUPS_PER_POOL 128
+
+/*
+ * The maximum page size supported by the driver, when running with a smaller
+ * page size only the first page at this offset is valid.
+ */
+#define DRM_PANTHOR_MAX_PAGE_SHIFT		16
+
+/* Carve out private MMIO offsets */
+#define PANTHOR_PRIVATE_MMIO_OFFSET  (DRM_PANTHOR_USER_MMIO_OFFSET + (1ull << 42))
+
+/* Give out file offset for doorbell pages based on the group handle */
+#define PANTHOR_DOORBELL_OFFSET(group) (PANTHOR_PRIVATE_MMIO_OFFSET + \
+					((group) << DRM_PANTHOR_MAX_PAGE_SHIFT))
+#define PANTHOR_DOORBELL_OFFSET_START PANTHOR_DOORBELL_OFFSET(0)
+#define PANTHOR_DOORBELL_OFFSET_END PANTHOR_DOORBELL_OFFSET(MAX_GROUPS_PER_POOL)
+
 extern struct workqueue_struct *panthor_cleanup_wq;
 
 #endif
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index 4f1efe616698..0bd600c464b8 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -67,6 +67,54 @@ panthor_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 kern_size, const v
 	return 0;
 }
 
+/**
+ * panthor_set_uobj_array() - Copy a kernel object array into a user object array.
+ * @out: The object array to copy to.
+ * @min_stride: Minimum array stride.
+ * @obj_size: Kernel object size.
+ *
+ * Helper automating kernel -> user object copies.
+ *
+ * Don't use this function directly, use PANTHOR_UOBJ_SET_ARRAY() instead.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_set_uobj_array(const struct drm_panthor_obj_array *out, u32 min_stride, u32 obj_size,
+		       const void *in)
+{
+	if (out->stride < min_stride)
+		return -EINVAL;
+
+	if (!out->count)
+		return 0;
+
+	if (obj_size == out->stride) {
+		if (copy_to_user(u64_to_user_ptr(out->array), in,
+				 (unsigned long)obj_size * out->count))
+			return -EFAULT;
+	} else {
+		u32 cpy_elem_size = min_t(u32, out->stride, obj_size);
+		void __user *out_ptr = u64_to_user_ptr(out->array);
+		const void *in_ptr = in;
+
+		for (u32 i = 0; i < out->count; i++) {
+			if (copy_to_user(out_ptr, in_ptr, cpy_elem_size))
+				return -EFAULT;
+
+			if (out->stride > obj_size &&
+			    clear_user(out_ptr + cpy_elem_size, out->stride - obj_size)) {
+				return -EFAULT;
+			}
+
+			out_ptr += out->stride;
+			in_ptr += obj_size;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * panthor_get_uobj_array() - Copy a user object array into a kernel accessible object array.
  * @in: The object array to copy.
@@ -182,6 +230,20 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
 			 PANTHOR_UOBJ_MIN_SIZE(_src_obj), \
 			 sizeof(_src_obj), &(_src_obj))
 
+/**
+ * PANTHOR_UOBJ_SET_ARRAY() - Copies from _src_array to @_dest_drm_panthor_obj_array.array.
+ * @_dest_drm_pvr_obj_array: The &struct drm_pvr_obj_array containing a __u64 raw
+ * pointer to the destination C array in user space and the size of each array
+ * element in user space (the 'stride').
+ * @_src_array: The source C array object in kernel space.
+ *
+ * Return: Error code. See panthor_set_uobj_array().
+ */
+#define PANTHOR_UOBJ_SET_ARRAY(_dest_drm_panthor_obj_array, _src_array) \
+	panthor_set_uobj_array(_dest_drm_panthor_obj_array, \
+			       PANTHOR_UOBJ_MIN_SIZE((_src_array)[0]), \
+			       sizeof((_src_array)[0]), _src_array)
+
 /**
  * PANTHOR_UOBJ_GET_ARRAY() - Copy a user object array to a kernel accessible
  * object array.
@@ -1012,10 +1074,8 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data,
 		return ret;
 
 	ret = panthor_group_create(pfile, args, queue_args);
-	if (ret >= 0) {
-		args->group_handle = ret;
-		ret = 0;
-	}
+	if (!ret)
+		ret = PANTHOR_UOBJ_SET_ARRAY(&args->queues, queue_args);
 
 	kvfree(queue_args);
 	return ret;
@@ -1262,6 +1322,7 @@ panthor_open(struct drm_device *ddev, struct drm_file *file)
 	}
 
 	pfile->ptdev = ptdev;
+	pfile->drm_file = file;
 
 	ret = panthor_vm_pool_create(pfile);
 	if (ret)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index b2cf053b3601..ad160a821957 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -384,6 +384,21 @@ struct panthor_queue {
 	 */
 	u8 doorbell_id;
 
+	/** @doorbell_offset: file offset user space can use to map the doorbell page */
+	u64 doorbell_offset;
+
+	/** @ringbuf_offset: file offset user space can use to map the ring buffer
+	 *
+	 * Only valid when group is created with DRM_PANTHOR_GROUP_CREATE_USER_SUBMIT
+	 */
+	u64 ringbuf_offset;
+
+	/** @user_io_offset: file offset user space can use to map the two user IO pages
+	 *
+	 * Only valid when group is created with DRM_PANTHOR_GROUP_CREATE_USER_SUBMIT
+	 */
+	u64 user_io_offset;
+
 	/**
 	 * @priority: Priority of the queue inside the group.
 	 *
@@ -524,6 +539,12 @@ struct panthor_group {
 	/** @ptdev: Device. */
 	struct panthor_device *ptdev;
 
+	/** @pfile: associated panthor_file */
+	struct panthor_file *pfile;
+
+	/** @handle: integer value used by user to refer to this group */
+	u32 handle;
+
 	/** @vm: VM bound to the group. */
 	struct panthor_vm *vm;
 
@@ -548,6 +569,9 @@ struct panthor_group {
 	/** @priority: Group priority (check panthor_csg_priority). */
 	u8 priority;
 
+	/** @user_submit: true if user space controls submission */
+	bool user_submit;
+
 	/** @blocked_queues: Bitmask reflecting the blocked queues. */
 	u32 blocked_queues;
 
@@ -708,12 +732,6 @@ struct panthor_group {
 			mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
 	} while (0)
 
-/*
- * We currently set the maximum of groups per file to an arbitrary low value.
- * But this can be updated if we need more.
- */
-#define MAX_GROUPS_PER_POOL 128
-
 /**
  * struct panthor_group_pool - Group pool
  *
@@ -836,6 +854,12 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
 
 	panthor_queue_put_syncwait_obj(queue);
 
+	if (queue->ringbuf_offset)
+		drm_vma_node_revoke(&queue->ringbuf->obj->vma_node, group->pfile->drm_file);
+
+	if (queue->user_io_offset)
+		drm_vma_node_revoke(&queue->iface.mem->obj->vma_node, group->pfile->drm_file);
+
 	panthor_kernel_bo_destroy(queue->ringbuf);
 	panthor_kernel_bo_destroy(queue->iface.mem);
 
@@ -1552,7 +1576,7 @@ static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
 
 	lockdep_assert_held(&ptdev->scheduler->lock);
 
-	if (group)
+	if (group && !group->user_submit)
 		group_queue_work(group, sync_upd);
 
 	sched_queue_work(ptdev->scheduler, sync_upd);
@@ -2019,10 +2043,12 @@ group_term_post_processing(struct panthor_group *group)
 		}
 		spin_unlock(&queue->fence_ctx.lock);
 
-		/* Manually update the syncobj seqno to unblock waiters. */
-		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
-		syncobj->status = ~0;
-		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
+		if (!group->user_submit) {
+			/* Manually update the syncobj seqno to unblock waiters. */
+			syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
+			syncobj->status = ~0;
+			syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
+		}
 		sched_queue_work(group->ptdev->scheduler, sync_upd);
 	}
 	dma_fence_end_signalling(cookie);
@@ -2785,6 +2811,9 @@ static void group_sync_upd_work(struct work_struct *work)
 	u32 queue_idx;
 	bool cookie;
 
+	if (drm_WARN_ON(&group->ptdev->base, group->user_submit))
+		return;
+
 	cookie = dma_fence_begin_signalling();
 	for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
 		struct panthor_queue *queue = group->queues[queue_idx];
@@ -3021,6 +3050,9 @@ group_create_queue(struct panthor_group *group,
 	if (args->priority > CSF_MAX_QUEUE_PRIO)
 		return ERR_PTR(-EINVAL);
 
+	if (args->ringbuf_offset || args->user_io_offset || args->doorbell_offset)
+		return ERR_PTR(-EINVAL);
+
 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
 	if (!queue)
 		return ERR_PTR(-ENOMEM);
@@ -3046,6 +3078,20 @@ group_create_queue(struct panthor_group *group,
 	if (ret)
 		goto err_free_queue;
 
+	if (group->user_submit) {
+		ret = drm_vma_node_allow(&queue->ringbuf->obj->vma_node, group->pfile->drm_file);
+		if (ret)
+			goto err_free_queue;
+
+		ret = drm_gem_create_mmap_offset(queue->ringbuf->obj);
+		if (ret) {
+			drm_vma_node_revoke(&queue->ringbuf->obj->vma_node, group->pfile->drm_file);
+			goto err_free_queue;
+		}
+
+		queue->ringbuf_offset = drm_vma_node_offset_addr(&queue->ringbuf->obj->vma_node);
+	}
+
 	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
 							    &queue->iface.input,
 							    &queue->iface.output,
@@ -3056,6 +3102,21 @@ group_create_queue(struct panthor_group *group,
 		goto err_free_queue;
 	}
 
+	if (group->user_submit) {
+		ret = drm_vma_node_allow(&queue->iface.mem->obj->vma_node, group->pfile->drm_file);
+		if (ret)
+			goto err_free_queue;
+
+		ret = drm_gem_create_mmap_offset(queue->iface.mem->obj);
+		if (ret) {
+			drm_vma_node_revoke(&queue->iface.mem->obj->vma_node,
+					    group->pfile->drm_file);
+			goto err_free_queue;
+		}
+
+		queue->user_io_offset = drm_vma_node_offset_addr(&queue->iface.mem->obj->vma_node);
+	}
+
 	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
 			     group->ptdev->scheduler->wq, 1,
 			     args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
@@ -3075,11 +3136,9 @@ group_create_queue(struct panthor_group *group,
 	return ERR_PTR(ret);
 }
 
-#define MAX_GROUPS_PER_POOL		128
-
 int panthor_group_create(struct panthor_file *pfile,
-			 const struct drm_panthor_group_create *group_args,
-			 const struct drm_panthor_queue_create *queue_args)
+			 struct drm_panthor_group_create *group_args,
+			 struct drm_panthor_queue_create *queue_args)
 {
 	struct panthor_device *ptdev = pfile->ptdev;
 	struct panthor_group_pool *gpool = pfile->groups;
@@ -3115,6 +3174,7 @@ int panthor_group_create(struct panthor_file *pfile,
 	group->csg_id = -1;
 
 	group->ptdev = ptdev;
+	group->pfile = pfile;
 	group->max_compute_cores = group_args->max_compute_cores;
 	group->compute_core_mask = group_args->compute_core_mask;
 	group->max_fragment_cores = group_args->max_fragment_cores;
@@ -3130,6 +3190,9 @@ int panthor_group_create(struct panthor_file *pfile,
 	INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
 	INIT_WORK(&group->release_work, group_release_work);
 
+	if (group_args->flags & DRM_PANTHOR_GROUP_CREATE_USER_SUBMIT)
+		group->user_submit = true;
+
 	group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
 	if (!group->vm) {
 		ret = -EINVAL;
@@ -3152,25 +3215,27 @@ int panthor_group_create(struct panthor_file *pfile,
 		goto err_put_group;
 	}
 
-	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
-						   group_args->queues.count *
-						   sizeof(struct panthor_syncobj_64b),
-						   DRM_PANTHOR_BO_NO_MMAP,
-						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
-						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
-						   PANTHOR_VM_KERNEL_AUTO_VA);
-	if (IS_ERR(group->syncobjs)) {
-		ret = PTR_ERR(group->syncobjs);
-		goto err_put_group;
+	if (!group->user_submit) {
+		group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
+							   group_args->queues.count *
+							   sizeof(struct panthor_syncobj_64b),
+							   DRM_PANTHOR_BO_NO_MMAP,
+							   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
+							   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+							   PANTHOR_VM_KERNEL_AUTO_VA);
+		if (IS_ERR(group->syncobjs)) {
+			ret = PTR_ERR(group->syncobjs);
+			goto err_put_group;
+		}
+
+		ret = panthor_kernel_bo_vmap(group->syncobjs);
+		if (ret)
+			goto err_put_group;
+
+		memset(group->syncobjs->kmap, 0,
+		       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
 	}
 
-	ret = panthor_kernel_bo_vmap(group->syncobjs);
-	if (ret)
-		goto err_put_group;
-
-	memset(group->syncobjs->kmap, 0,
-	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
-
 	for (i = 0; i < group_args->queues.count; i++) {
 		group->queues[i] = group_create_queue(group, &queue_args[i]);
 		if (IS_ERR(group->queues[i])) {
@@ -3188,6 +3253,21 @@ int panthor_group_create(struct panthor_file *pfile,
 	if (ret)
 		goto err_put_group;
 
+	group->handle = gid;
+	group_args->group_handle = gid;
+
+	if (group->user_submit) {
+		for (i = 0; i < group_args->queues.count; i++) {
+			/* All queues in group use the same HW doorbell */
+			group->queues[i]->doorbell_offset = PANTHOR_DOORBELL_OFFSET(gid - 1);
+
+			/* copy to queue_args so these values can be returned to user */
+			queue_args[i].doorbell_offset = group->queues[i]->doorbell_offset;
+			queue_args[i].ringbuf_offset = group->queues[i]->ringbuf_offset;
+			queue_args[i].user_io_offset = group->queues[i]->user_io_offset;
+		}
+	}
+
 	mutex_lock(&sched->reset.lock);
 	if (atomic_read(&sched->reset.in_progress)) {
 		panthor_group_stop(group);
@@ -3199,7 +3279,7 @@ int panthor_group_create(struct panthor_file *pfile,
 	}
 	mutex_unlock(&sched->reset.lock);
 
-	return gid;
+	return 0;
 
 err_put_group:
 	group_put(group);
@@ -3390,6 +3470,11 @@ panthor_job_create(struct panthor_file *pfile,
 		goto err_put_job;
 	}
 
+	if (job->group->user_submit) {
+		ret = -EINVAL;
+		goto err_put_job;
+	}
+
 	if (job->queue_idx >= job->group->queue_count ||
 	    !job->group->queues[job->queue_idx]) {
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
index 3a30d2328b30..55b6534fa390 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -19,8 +19,8 @@ struct panthor_group_pool;
 struct panthor_job;
 
 int panthor_group_create(struct panthor_file *pfile,
-			 const struct drm_panthor_group_create *group_args,
-			 const struct drm_panthor_queue_create *queue_args);
+			 struct drm_panthor_group_create *group_args,
+			 struct drm_panthor_queue_create *queue_args);
 int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle);
 int panthor_group_get_state(struct panthor_file *pfile,
 			    struct drm_panthor_group_get_state *get_state);
-- 
2.45.0



More information about the dri-devel mailing list