[PATCH 2/8] drm/panthor: Extend GROUP_CREATE for user submission
Mihail Atanassov
mihail.atanassov at arm.com
Wed Aug 28 17:25:58 UTC 2024
From: Ketil Johnsen <ketil.johnsen at arm.com>
Allow userspace to mmap() the ring buffer, and the doorbell and user I/O
pages, so that it can submit work directly to queues.
Signed-off-by: Ketil Johnsen <ketil.johnsen at arm.com>
Co-developed-by: Mihail Atanassov <mihail.atanassov at arm.com>
Signed-off-by: Mihail Atanassov <mihail.atanassov at arm.com>
Co-developed-by: Akash Goel <akash.goel at arm.com>
Signed-off-by: Akash Goel <akash.goel at arm.com>
---
drivers/gpu/drm/panthor/panthor_device.h | 24 ++++
drivers/gpu/drm/panthor/panthor_drv.c | 69 ++++++++++-
drivers/gpu/drm/panthor/panthor_sched.c | 151 ++++++++++++++++++-----
drivers/gpu/drm/panthor/panthor_sched.h | 4 +-
4 files changed, 209 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index e388c0472ba7..7c27dbba8270 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -171,6 +171,9 @@ struct panthor_file {
/** @ptdev: Device attached to this file. */
struct panthor_device *ptdev;
+ /** @drm_file: Corresponding drm_file */
+ struct drm_file *drm_file;
+
/** @vms: VM pool attached to this file. */
struct panthor_vm_pool *vms;
@@ -353,6 +356,27 @@ static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \
pirq); \
}
+/*
+ * We currently set the maximum of groups per file to an arbitrary low value.
+ * But this can be updated if we need more.
+ */
+#define MAX_GROUPS_PER_POOL 128
+
+/*
+ * The maximum page size supported by the driver, when running with a smaller
+ * page size only the first page at this offset is valid.
+ */
+#define DRM_PANTHOR_MAX_PAGE_SHIFT 16
+
+/* Carve out private MMIO offsets */
+#define PANTHOR_PRIVATE_MMIO_OFFSET (DRM_PANTHOR_USER_MMIO_OFFSET + (1ull << 42))
+
+/* Give out file offset for doorbell pages based on the group handle */
+#define PANTHOR_DOORBELL_OFFSET(group) (PANTHOR_PRIVATE_MMIO_OFFSET + \
+ ((group) << DRM_PANTHOR_MAX_PAGE_SHIFT))
+#define PANTHOR_DOORBELL_OFFSET_START PANTHOR_DOORBELL_OFFSET(0)
+#define PANTHOR_DOORBELL_OFFSET_END PANTHOR_DOORBELL_OFFSET(MAX_GROUPS_PER_POOL)
+
extern struct workqueue_struct *panthor_cleanup_wq;
#endif
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index 4f1efe616698..0bd600c464b8 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -67,6 +67,54 @@ panthor_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 kern_size, const v
return 0;
}
+/**
+ * panthor_set_uobj_array() - Copy a kernel object array into a user object array.
+ * @out: The object array to copy to.
+ * @min_stride: Minimum array stride.
+ * @obj_size: Kernel object size.
+ *
+ * Helper automating kernel -> user object copies.
+ *
+ * Don't use this function directly, use PANTHOR_UOBJ_SET_ARRAY() instead.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_set_uobj_array(const struct drm_panthor_obj_array *out, u32 min_stride, u32 obj_size,
+ const void *in)
+{
+ if (out->stride < min_stride)
+ return -EINVAL;
+
+ if (!out->count)
+ return 0;
+
+ if (obj_size == out->stride) {
+ if (copy_to_user(u64_to_user_ptr(out->array), in,
+ (unsigned long)obj_size * out->count))
+ return -EFAULT;
+ } else {
+ u32 cpy_elem_size = min_t(u32, out->stride, obj_size);
+ void __user *out_ptr = u64_to_user_ptr(out->array);
+ const void *in_ptr = in;
+
+ for (u32 i = 0; i < out->count; i++) {
+ if (copy_to_user(out_ptr, in_ptr, cpy_elem_size))
+ return -EFAULT;
+
+ if (out->stride > obj_size &&
+ clear_user(out_ptr + cpy_elem_size, out->stride - obj_size)) {
+ return -EFAULT;
+ }
+
+ out_ptr += out->stride;
+ in_ptr += obj_size;
+ }
+ }
+
+ return 0;
+}
+
/**
* panthor_get_uobj_array() - Copy a user object array into a kernel accessible object array.
* @in: The object array to copy.
@@ -182,6 +230,20 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
PANTHOR_UOBJ_MIN_SIZE(_src_obj), \
sizeof(_src_obj), &(_src_obj))
+/**
+ * PANTHOR_UOBJ_SET_ARRAY() - Copies from _src_array to @_dest_drm_panthor_obj_array.array.
+ * @_dest_drm_pvr_obj_array: The &struct drm_pvr_obj_array containing a __u64 raw
+ * pointer to the destination C array in user space and the size of each array
+ * element in user space (the 'stride').
+ * @_src_array: The source C array object in kernel space.
+ *
+ * Return: Error code. See panthor_set_uobj_array().
+ */
+#define PANTHOR_UOBJ_SET_ARRAY(_dest_drm_panthor_obj_array, _src_array) \
+ panthor_set_uobj_array(_dest_drm_panthor_obj_array, \
+ PANTHOR_UOBJ_MIN_SIZE((_src_array)[0]), \
+ sizeof((_src_array)[0]), _src_array)
+
/**
* PANTHOR_UOBJ_GET_ARRAY() - Copy a user object array to a kernel accessible
* object array.
@@ -1012,10 +1074,8 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data,
return ret;
ret = panthor_group_create(pfile, args, queue_args);
- if (ret >= 0) {
- args->group_handle = ret;
- ret = 0;
- }
+ if (!ret)
+ ret = PANTHOR_UOBJ_SET_ARRAY(&args->queues, queue_args);
kvfree(queue_args);
return ret;
@@ -1262,6 +1322,7 @@ panthor_open(struct drm_device *ddev, struct drm_file *file)
}
pfile->ptdev = ptdev;
+ pfile->drm_file = file;
ret = panthor_vm_pool_create(pfile);
if (ret)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index b2cf053b3601..ad160a821957 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -384,6 +384,21 @@ struct panthor_queue {
*/
u8 doorbell_id;
+ /** @doorbell_offset: file offset user space can use to map the doorbell page */
+ u64 doorbell_offset;
+
+ /** @ringbuf_offset: file offset user space can use to map the ring buffer
+ *
+ * Only valid when group is created with DRM_PANTHOR_GROUP_CREATE_USER_SUBMIT
+ */
+ u64 ringbuf_offset;
+
+ /** @user_io_offset: file offset user space can use to map the two user IO pages
+ *
+ * Only valid when group is created with DRM_PANTHOR_GROUP_CREATE_USER_SUBMIT
+ */
+ u64 user_io_offset;
+
/**
* @priority: Priority of the queue inside the group.
*
@@ -524,6 +539,12 @@ struct panthor_group {
/** @ptdev: Device. */
struct panthor_device *ptdev;
+ /** @pfile: associated panthor_file */
+ struct panthor_file *pfile;
+
+ /** @handle: integer value used by user to refer to this group */
+ u32 handle;
+
/** @vm: VM bound to the group. */
struct panthor_vm *vm;
@@ -548,6 +569,9 @@ struct panthor_group {
/** @priority: Group priority (check panthor_csg_priority). */
u8 priority;
+ /** @user_submit: true if user space controls submission */
+ bool user_submit;
+
/** @blocked_queues: Bitmask reflecting the blocked queues. */
u32 blocked_queues;
@@ -708,12 +732,6 @@ struct panthor_group {
mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
} while (0)
-/*
- * We currently set the maximum of groups per file to an arbitrary low value.
- * But this can be updated if we need more.
- */
-#define MAX_GROUPS_PER_POOL 128
-
/**
* struct panthor_group_pool - Group pool
*
@@ -836,6 +854,12 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
panthor_queue_put_syncwait_obj(queue);
+ if (queue->ringbuf_offset)
+ drm_vma_node_revoke(&queue->ringbuf->obj->vma_node, group->pfile->drm_file);
+
+ if (queue->user_io_offset)
+ drm_vma_node_revoke(&queue->iface.mem->obj->vma_node, group->pfile->drm_file);
+
panthor_kernel_bo_destroy(queue->ringbuf);
panthor_kernel_bo_destroy(queue->iface.mem);
@@ -1552,7 +1576,7 @@ static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
lockdep_assert_held(&ptdev->scheduler->lock);
- if (group)
+ if (group && !group->user_submit)
group_queue_work(group, sync_upd);
sched_queue_work(ptdev->scheduler, sync_upd);
@@ -2019,10 +2043,12 @@ group_term_post_processing(struct panthor_group *group)
}
spin_unlock(&queue->fence_ctx.lock);
- /* Manually update the syncobj seqno to unblock waiters. */
- syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
- syncobj->status = ~0;
- syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
+ if (!group->user_submit) {
+ /* Manually update the syncobj seqno to unblock waiters. */
+ syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
+ syncobj->status = ~0;
+ syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
+ }
sched_queue_work(group->ptdev->scheduler, sync_upd);
}
dma_fence_end_signalling(cookie);
@@ -2785,6 +2811,9 @@ static void group_sync_upd_work(struct work_struct *work)
u32 queue_idx;
bool cookie;
+ if (drm_WARN_ON(&group->ptdev->base, group->user_submit))
+ return;
+
cookie = dma_fence_begin_signalling();
for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
struct panthor_queue *queue = group->queues[queue_idx];
@@ -3021,6 +3050,9 @@ group_create_queue(struct panthor_group *group,
if (args->priority > CSF_MAX_QUEUE_PRIO)
return ERR_PTR(-EINVAL);
+ if (args->ringbuf_offset || args->user_io_offset || args->doorbell_offset)
+ return ERR_PTR(-EINVAL);
+
queue = kzalloc(sizeof(*queue), GFP_KERNEL);
if (!queue)
return ERR_PTR(-ENOMEM);
@@ -3046,6 +3078,20 @@ group_create_queue(struct panthor_group *group,
if (ret)
goto err_free_queue;
+ if (group->user_submit) {
+ ret = drm_vma_node_allow(&queue->ringbuf->obj->vma_node, group->pfile->drm_file);
+ if (ret)
+ goto err_free_queue;
+
+ ret = drm_gem_create_mmap_offset(queue->ringbuf->obj);
+ if (ret) {
+ drm_vma_node_revoke(&queue->ringbuf->obj->vma_node, group->pfile->drm_file);
+ goto err_free_queue;
+ }
+
+ queue->ringbuf_offset = drm_vma_node_offset_addr(&queue->ringbuf->obj->vma_node);
+ }
+
queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
&queue->iface.input,
&queue->iface.output,
@@ -3056,6 +3102,21 @@ group_create_queue(struct panthor_group *group,
goto err_free_queue;
}
+ if (group->user_submit) {
+ ret = drm_vma_node_allow(&queue->iface.mem->obj->vma_node, group->pfile->drm_file);
+ if (ret)
+ goto err_free_queue;
+
+ ret = drm_gem_create_mmap_offset(queue->iface.mem->obj);
+ if (ret) {
+ drm_vma_node_revoke(&queue->iface.mem->obj->vma_node,
+ group->pfile->drm_file);
+ goto err_free_queue;
+ }
+
+ queue->user_io_offset = drm_vma_node_offset_addr(&queue->iface.mem->obj->vma_node);
+ }
+
ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
group->ptdev->scheduler->wq, 1,
args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
@@ -3075,11 +3136,9 @@ group_create_queue(struct panthor_group *group,
return ERR_PTR(ret);
}
-#define MAX_GROUPS_PER_POOL 128
-
int panthor_group_create(struct panthor_file *pfile,
- const struct drm_panthor_group_create *group_args,
- const struct drm_panthor_queue_create *queue_args)
+ struct drm_panthor_group_create *group_args,
+ struct drm_panthor_queue_create *queue_args)
{
struct panthor_device *ptdev = pfile->ptdev;
struct panthor_group_pool *gpool = pfile->groups;
@@ -3115,6 +3174,7 @@ int panthor_group_create(struct panthor_file *pfile,
group->csg_id = -1;
group->ptdev = ptdev;
+ group->pfile = pfile;
group->max_compute_cores = group_args->max_compute_cores;
group->compute_core_mask = group_args->compute_core_mask;
group->max_fragment_cores = group_args->max_fragment_cores;
@@ -3130,6 +3190,9 @@ int panthor_group_create(struct panthor_file *pfile,
INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
INIT_WORK(&group->release_work, group_release_work);
+ if (group_args->flags & DRM_PANTHOR_GROUP_CREATE_USER_SUBMIT)
+ group->user_submit = true;
+
group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
if (!group->vm) {
ret = -EINVAL;
@@ -3152,25 +3215,27 @@ int panthor_group_create(struct panthor_file *pfile,
goto err_put_group;
}
- group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
- group_args->queues.count *
- sizeof(struct panthor_syncobj_64b),
- DRM_PANTHOR_BO_NO_MMAP,
- DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
- DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
- PANTHOR_VM_KERNEL_AUTO_VA);
- if (IS_ERR(group->syncobjs)) {
- ret = PTR_ERR(group->syncobjs);
- goto err_put_group;
+ if (!group->user_submit) {
+ group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
+ group_args->queues.count *
+ sizeof(struct panthor_syncobj_64b),
+ DRM_PANTHOR_BO_NO_MMAP,
+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
+ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+ PANTHOR_VM_KERNEL_AUTO_VA);
+ if (IS_ERR(group->syncobjs)) {
+ ret = PTR_ERR(group->syncobjs);
+ goto err_put_group;
+ }
+
+ ret = panthor_kernel_bo_vmap(group->syncobjs);
+ if (ret)
+ goto err_put_group;
+
+ memset(group->syncobjs->kmap, 0,
+ group_args->queues.count * sizeof(struct panthor_syncobj_64b));
}
- ret = panthor_kernel_bo_vmap(group->syncobjs);
- if (ret)
- goto err_put_group;
-
- memset(group->syncobjs->kmap, 0,
- group_args->queues.count * sizeof(struct panthor_syncobj_64b));
-
for (i = 0; i < group_args->queues.count; i++) {
group->queues[i] = group_create_queue(group, &queue_args[i]);
if (IS_ERR(group->queues[i])) {
@@ -3188,6 +3253,21 @@ int panthor_group_create(struct panthor_file *pfile,
if (ret)
goto err_put_group;
+ group->handle = gid;
+ group_args->group_handle = gid;
+
+ if (group->user_submit) {
+ for (i = 0; i < group_args->queues.count; i++) {
+ /* All queues in group use the same HW doorbell */
+ group->queues[i]->doorbell_offset = PANTHOR_DOORBELL_OFFSET(gid - 1);
+
+ /* copy to queue_args so these values can be returned to user */
+ queue_args[i].doorbell_offset = group->queues[i]->doorbell_offset;
+ queue_args[i].ringbuf_offset = group->queues[i]->ringbuf_offset;
+ queue_args[i].user_io_offset = group->queues[i]->user_io_offset;
+ }
+ }
+
mutex_lock(&sched->reset.lock);
if (atomic_read(&sched->reset.in_progress)) {
panthor_group_stop(group);
@@ -3199,7 +3279,7 @@ int panthor_group_create(struct panthor_file *pfile,
}
mutex_unlock(&sched->reset.lock);
- return gid;
+ return 0;
err_put_group:
group_put(group);
@@ -3390,6 +3470,11 @@ panthor_job_create(struct panthor_file *pfile,
goto err_put_job;
}
+ if (job->group->user_submit) {
+ ret = -EINVAL;
+ goto err_put_job;
+ }
+
if (job->queue_idx >= job->group->queue_count ||
!job->group->queues[job->queue_idx]) {
ret = -EINVAL;
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
index 3a30d2328b30..55b6534fa390 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -19,8 +19,8 @@ struct panthor_group_pool;
struct panthor_job;
int panthor_group_create(struct panthor_file *pfile,
- const struct drm_panthor_group_create *group_args,
- const struct drm_panthor_queue_create *queue_args);
+ struct drm_panthor_group_create *group_args,
+ struct drm_panthor_queue_create *queue_args);
int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle);
int panthor_group_get_state(struct panthor_file *pfile,
struct drm_panthor_group_get_state *get_state);
--
2.45.0
More information about the dri-devel
mailing list