[PATCH 5/6] drm/amdgpu: Implement userqueue signal/wait IOCTL functions
Christian König
christian.koenig at amd.com
Mon Feb 27 12:59:47 UTC 2023
Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:
> This patch introduces new IOCTL for userqueue secure semaphore.
>
> The signal IOCTL called from userspace application creates a drm
> syncobj and array of bo GEM handles and passed in as parameter to
> the driver to install the fence into it.
>
> The wait IOCTL gets an array of drm syncobjs, finds the fences
> attached to the drm syncobjs and obtain the array of
> memory_address/fence_value combintion which are returned to
> userspace.
>
> v2: Worked on review comments from Christian for the following
> modifications
>
> - Install fence into GEM BO object.
> - Lock all BO's using the dma resv subsystem
> - Reorder the sequence in signal IOCTL function.
> - Get write pointer from the shadow wptr
> - use userq_fence to fetch the va/value in wait IOCTL.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 258 ++++++++++++++++++
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 6 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 1 +
> 5 files changed, 270 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 1c3eba2d0390..255d73795493 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -964,6 +964,8 @@ struct amdgpu_device {
> struct amdgpu_mes mes;
> struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
>
> + struct amdgpu_userq_mgr *userq_mgr;
> +
> /* df */
> struct amdgpu_df df;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6b7ac1ebd04c..66a7304fabe3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2752,6 +2752,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_DOORBELL_RING, amdgpu_userq_doorbell_ring_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> +
> };
>
> static const struct drm_driver amdgpu_kms_driver = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 609a7328e9a6..26fd1d4f758a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -249,3 +249,261 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = {
> .signaled = amdgpu_userq_fence_signaled,
> .release = amdgpu_userq_fence_release,
> };
> +
> +static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
> + struct amdgpu_usermode_queue *queue,
> + u64 *wptr)
> +{
> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> + struct amdgpu_bo_va_mapping *mapping;
> + struct amdgpu_vm *vm = &fpriv->vm;
> + struct amdgpu_bo *bo;
> + u64 *ptr;
> + int r;
> +
> + mapping = amdgpu_vm_bo_lookup_mapping(vm, queue->wptr_gpu_addr >> PAGE_SHIFT);
> + if (!mapping)
> + return -EINVAL;
> +
> + bo = mapping->bo_va->base.bo;
> + r = amdgpu_bo_kmap(bo, (void **)&ptr);
Oh, that's not something you can do that easily.
The BO must be reserved (locked) first if you want to call
amdgpu_bo_kmap() on it.
> + if (r) {
> + DRM_ERROR("Failed mapping the userqueue wptr bo");
> + return r;
> + }
> +
> + *wptr = le64_to_cpu(*ptr);
> +
> + return 0;
> +}
> +
> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp)
> +{
> + struct drm_amdgpu_userq_signal *args = data;
> + struct amdgpu_device *adev = drm_to_adev(dev);
> + struct amdgpu_userq_mgr *userq_mgr = adev->userq_mgr;
> + struct amdgpu_usermode_queue *queue;
> + struct drm_syncobj *syncobj = NULL;
> + struct drm_gem_object **gobj;
> + u64 num_bo_handles, wptr;
> + struct dma_fence *fence;
> + u32 *bo_handles;
> + bool shared;
> + int r, i;
> +
> + /* Retrieve the user queue */
> + queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
> + if (!queue)
> + return -ENOENT;
> +
> + r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
> + if (r)
> + return -EINVAL;
> +
> + /* Find Syncobj if any */
> + syncobj = drm_syncobj_find(filp, args->handle);
> +
> + /* Array of bo handles */
> + num_bo_handles = args->num_bo_handles;
> + bo_handles = kmalloc_array(num_bo_handles, sizeof(*bo_handles), GFP_KERNEL);
> + if (bo_handles == NULL)
> + return -ENOMEM;
> +
> + if (copy_from_user(bo_handles, u64_to_user_ptr(args->bo_handles_array),
> + sizeof(u32) * num_bo_handles)) {
> + r = -EFAULT;
> + goto err_free_handles;
> + }
> +
> + /* Array of GEM object handles */
> + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
> + if (gobj == NULL) {
> + r = -ENOMEM;
> + goto err_free_handles;
> + }
> +
> + for (i = 0; i < num_bo_handles; i++) {
> + /* Retrieve GEM object */
> + gobj[i] = drm_gem_object_lookup(filp, bo_handles[i]);
> + if (!gobj[i]) {
> + r = -ENOENT;
> + goto err_put_gobj;
> + }
> +
> + dma_resv_lock(gobj[i]->resv, NULL);
> + r = dma_resv_reserve_fences(gobj[i]->resv, 1);
> + if (r) {
> + dma_resv_unlock(gobj[i]->resv);
> + goto err_put_gobj;
> + }
> + dma_resv_unlock(gobj[i]->resv);
> + }
> +
> + /* Create a new fence */
> + r = amdgpu_userq_fence_create(queue, wptr, &fence);
> + if (!fence)
> + goto err_put_gobj;
> +
> + /* Add the created fence to syncobj/BO's */
> + if (syncobj)
> + drm_syncobj_replace_fence(syncobj, fence);
> +
> + shared = args->bo_flags & AMDGPU_USERQ_BO_READ;
> + for (i = 0; i < num_bo_handles; i++) {
> + dma_resv_lock(gobj[i]->resv, NULL);
> + dma_resv_add_fence(gobj[i]->resv, fence, shared ?
> + DMA_RESV_USAGE_READ :
> + DMA_RESV_USAGE_WRITE);
> + dma_resv_unlock(gobj[i]->resv);
> + }
That will still not work correctly. You've forgotten to call
dma_resv_reserve_fences().
The tricky part is that you need to do this for all BOs at the same time.
I will work on my drm_exec() patch set once more. That one should make
this job much easier.
Similar applies to the _wait function, but let's get _signal working first.
Regards,
Christian.
> +
> + for (i = 0; i < num_bo_handles; i++)
> + drm_gem_object_put(gobj[i]);
> +
> + dma_fence_put(fence);
> +
> + /* Free all handles */
> + kfree(bo_handles);
> + kfree(gobj);
> +
> + return 0;
> +
> +err_put_gobj:
> + while (i-- > 0)
> + drm_gem_object_put(gobj[i]);
> + kfree(gobj);
> +err_free_handles:
> + kfree(bo_handles);
> +
> + return r;
> +}
> +
> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp)
> +{
> + struct drm_amdgpu_userq_fence_info *fence_info = NULL;
> + struct drm_amdgpu_userq_wait *args = data;
> + struct amdgpu_userq_fence *userq_fence;
> + u32 *syncobj_handles, *bo_handles;
> + u64 num_syncobj, num_bo_handles;
> + struct drm_gem_object **gobj;
> + struct dma_fence *fence;
> + bool wait_flag;
> + int r, i, tmp;
> +
> + /* Array of Syncobj handles */
> + num_syncobj = args->count_handles;
> + syncobj_handles = kmalloc_array(num_syncobj, sizeof(*syncobj_handles), GFP_KERNEL);
> + if (!syncobj_handles)
> + return -ENOMEM;
> +
> + if (copy_from_user(syncobj_handles, u64_to_user_ptr(args->handles),
> + sizeof(u32) * num_syncobj)) {
> + r = -EFAULT;
> + goto err_free_syncobj_handles;
> + }
> +
> + /* Array of bo handles */
> + num_bo_handles = args->num_bo_handles;
> + bo_handles = kmalloc_array(num_bo_handles, sizeof(*bo_handles), GFP_KERNEL);
> + if (!bo_handles)
> + goto err_free_syncobj_handles;
> +
> + if (copy_from_user(bo_handles, u64_to_user_ptr(args->bo_handles_array),
> + sizeof(u32) * num_bo_handles)) {
> + r = -EFAULT;
> + goto err_free_bo_handles;
> + }
> +
> + /* Array of fence gpu address */
> + fence_info = kmalloc_array(num_syncobj + num_bo_handles, sizeof(*fence_info), GFP_KERNEL);
> + if (!fence_info) {
> + r = -ENOMEM;
> + goto err_free_bo_handles;
> + }
> +
> + /* Retrieve syncobj's fence */
> + for (i = 0; i < num_syncobj; i++) {
> + r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence);
> + if (r) {
> + DRM_ERROR("syncobj %u failed to find the fence!\n", syncobj_handles[i]);
> + r = -ENOENT;
> + goto err_free_fence_info;
> + }
> +
> + /* Store drm syncobj's gpu va address and value */
> + userq_fence = to_amdgpu_userq_fence(fence);
> + fence_info[i].va = userq_fence->fence_drv->gpu_addr;
> + fence_info[i].value = fence->seqno;
> + dma_fence_put(fence);
> + }
> +
> + tmp = i;
> +
> + /* Array of GEM object handles */
> + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
> + if (gobj == NULL) {
> + r = -ENOMEM;
> + goto err_free_fence_info;
> + }
> +
> + /* Retrieve GEM objects's fence */
> + wait_flag = args->bo_wait_flags & AMDGPU_USERQ_BO_READ;
> + for (i = 0; i < num_bo_handles; i++, tmp++) {
> + struct dma_fence *bo_fence;
> +
> + gobj[i] = drm_gem_object_lookup(filp, bo_handles[i]);
> + if (!gobj[i]) {
> + r = -ENOENT;
> + goto err_put_gobj;
> + }
> +
> + dma_resv_lock(gobj[i]->resv, NULL);
> + r = dma_resv_get_singleton(gobj[i]->resv,
> + wait_flag ?
> + DMA_RESV_USAGE_READ :
> + DMA_RESV_USAGE_WRITE,
> + &bo_fence);
> + if (r) {
> + dma_resv_unlock(gobj[i]->resv);
> + goto err_put_gobj;
> + }
> + dma_resv_unlock(gobj[i]->resv);
> + drm_gem_object_put(gobj[i]);
> +
> + /* Store GEM objects's gpu va address and value */
> + userq_fence = to_amdgpu_userq_fence(bo_fence);
> + fence_info[tmp].va = userq_fence->fence_drv->gpu_addr;
> + fence_info[tmp].value = bo_fence->seqno;
> + dma_fence_put(bo_fence);
> + }
> +
> + if (copy_to_user(u64_to_user_ptr(args->userq_fence_info),
> + fence_info, sizeof(fence_info))) {
> + r = -EFAULT;
> + goto err_free_gobj;
> + }
> +
> + /* Free all handles */
> + kfree(syncobj_handles);
> + kfree(bo_handles);
> + kfree(fence_info);
> + kfree(gobj);
> +
> + return 0;
> +
> +err_put_gobj:
> + while (i-- > 0)
> + drm_gem_object_put(gobj[i]);
> +err_free_gobj:
> + kfree(gobj);
> +err_free_fence_info:
> + kfree(fence_info);
> +err_free_bo_handles:
> + kfree(bo_handles);
> +err_free_syncobj_handles:
> + kfree(syncobj_handles);
> +
> + return r;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> index 230dd54b4cd3..999d1e2baff5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> @@ -27,6 +27,8 @@
>
> #include <linux/types.h>
>
> +#define AMDGPU_USERQ_BO_READ 0x1
> +
> struct amdgpu_userq_fence {
> struct dma_fence base;
> /* userq fence lock */
> @@ -57,5 +59,9 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
> u64 seq, struct dma_fence **f);
> bool amdgpu_userq_fence_process(struct amdgpu_userq_fence_driver *fence_drv);
> void amdgpu_userq_fence_driver_destroy(struct kref *ref);
> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp);
> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp);
>
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index d4317b0f6487..4d3d6777a178 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -457,6 +457,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi
> idr_init_base(&userq_mgr->userq_idr, 1);
> INIT_LIST_HEAD(&userq_mgr->userq_list);
> userq_mgr->adev = adev;
> + adev->userq_mgr = userq_mgr;
>
> r = amdgpu_userqueue_setup_mqd_funcs(userq_mgr);
> if (r) {
More information about the amd-gfx
mailing list