[PATCH v2 06/08] drm/amdgpu: Add few optimizations to userq fence driver
Christian König
ckoenig.leichtzumerken at gmail.com
Thu Sep 26 12:28:45 UTC 2024
Am 25.09.24 um 21:59 schrieb Arunpravin Paneer Selvam:
> Add few optimizations to userq fence driver.
"Few optimization and fixes for userq fence driver".
>
> v1:(Christian):
> - Remove unnecessary comments.
> - In drm_exec_init call give num_bo_handles as last parameter it would
> making allocation of the array more efficient
> - Handle return value of __xa_store() and improve the error handling of
> amdgpu_userq_fence_driver_alloc().
>
> v2:(Christian):
> - Revert userq_xa xarray init to XA_FLAGS_LOCK_IRQ.
> - move the xa_unlock before the error check of the call xa_err(__xa_store())
> and moved this change to a separate patch as this is adding a missing error
> handling.
> - Removed the unnecessary comments.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
> ---
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 44 ++++++++++++-------
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 6 +--
> .../gpu/drm/amd/include/amdgpu_userqueue.h | 2 +-
> 4 files changed, 32 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 8d2a0331cd96..f3576c31428c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -76,7 +76,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
> fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
> if (!fence_drv) {
> DRM_ERROR("Failed to allocate memory for fence driver\n");
> - return -ENOMEM;
> + r = -ENOMEM;
> + goto free_fence_drv;
> }
>
> /* Acquire seq64 memory */
> @@ -84,7 +85,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
> &fence_drv->cpu_addr);
> if (r) {
> kfree(fence_drv);
> - return -ENOMEM;
> + r = -ENOMEM;
> + goto free_seq64;
> }
>
> memset(fence_drv->cpu_addr, 0, sizeof(u64));
> @@ -94,7 +96,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
> spin_lock_init(&fence_drv->fence_list_lock);
>
> fence_drv->adev = adev;
> - fence_drv->uq_fence_drv_xa_ref = &userq->uq_fence_drv_xa;
> + fence_drv->fence_drv_xa_ptr = &userq->fence_drv_xa;
> fence_drv->context = dma_fence_context_alloc(1);
> get_task_comm(fence_drv->timeline_name, current);
>
> @@ -106,6 +108,13 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
> userq->fence_drv = fence_drv;
>
> return 0;
> +
> +free_seq64:
> + amdgpu_seq64_free(adev, fence_drv->gpu_addr);
> +free_fence_drv:
> + kfree(fence_drv);
> +
> + return r;
> }
>
> void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
> @@ -147,7 +156,7 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref)
> struct amdgpu_device *adev = fence_drv->adev;
> struct amdgpu_userq_fence *fence, *tmp;
> struct xarray *xa = &adev->userq_xa;
> - unsigned long index;
> + unsigned long index, flags;
> struct dma_fence *f;
>
> spin_lock(&fence_drv->fence_list_lock);
> @@ -164,11 +173,11 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref)
> }
> spin_unlock(&fence_drv->fence_list_lock);
>
> - xa_lock(xa);
> + xa_lock_irqsave(xa, flags);
> xa_for_each(xa, index, xa_fence_drv)
> if (xa_fence_drv == fence_drv)
> __xa_erase(xa, index);
> - xa_unlock(xa);
> + xa_unlock_irqrestore(xa, flags);
>
> /* Free seq64 memory */
> amdgpu_seq64_free(adev, fence_drv->gpu_addr);
> @@ -212,12 +221,12 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
> amdgpu_userq_fence_driver_get(fence_drv);
> dma_fence_get(fence);
>
> - if (!xa_empty(&userq->uq_fence_drv_xa)) {
> + if (!xa_empty(&userq->fence_drv_xa)) {
> struct amdgpu_userq_fence_driver *stored_fence_drv;
> unsigned long index, count = 0;
> int i = 0;
>
> - xa_for_each(&userq->uq_fence_drv_xa, index, stored_fence_drv)
> + xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
> count++;
>
> userq_fence->fence_drv_array =
> @@ -226,9 +235,9 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
> GFP_KERNEL);
>
> if (userq_fence->fence_drv_array) {
> - xa_for_each(&userq->uq_fence_drv_xa, index, stored_fence_drv) {
> + xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
> userq_fence->fence_drv_array[i] = stored_fence_drv;
> - xa_erase(&userq->uq_fence_drv_xa, index);
> + xa_erase(&userq->fence_drv_xa, index);
> i++;
> }
> }
> @@ -378,7 +387,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> struct drm_exec exec;
> u64 wptr;
>
> - /* Array of syncobj handles */
> num_syncobj_handles = args->num_syncobj_handles;
> syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles_array),
> sizeof(u32) * num_syncobj_handles);
> @@ -400,7 +408,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> }
> }
>
> - /* Array of bo handles */
> num_bo_handles = args->num_bo_handles;
> bo_handles = memdup_user(u64_to_user_ptr(args->bo_handles_array),
> sizeof(u32) * num_bo_handles);
> @@ -422,7 +429,9 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> }
> }
>
> - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, num_bo_handles);
> +
> + /* Lock all BOs with retry handling */
> drm_exec_until_all_locked(&exec) {
> r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 1);
> drm_exec_retry_on_contention(&exec);
> @@ -527,7 +536,6 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> goto free_timeline_handles;
> }
>
> - /* Array of GEM object handles */
> gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
> if (!gobj) {
> r = -ENOMEM;
> @@ -542,7 +550,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> }
> }
>
> - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, num_bo_handles);
> +
> + /* Lock all BOs with retry handling */
> drm_exec_until_all_locked(&exec) {
> r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 0);
> drm_exec_retry_on_contention(&exec);
> @@ -702,8 +712,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> * Otherwise, we would gather those references until we don't
> * have any more space left and crash.
> */
> - if (fence_drv->uq_fence_drv_xa_ref) {
> - r = xa_alloc(fence_drv->uq_fence_drv_xa_ref, &index, fence_drv,
> + if (fence_drv->fence_drv_xa_ptr) {
> + r = xa_alloc(fence_drv->fence_drv_xa_ptr, &index, fence_drv,
> xa_limit_32b, GFP_KERNEL);
> if (r)
> goto free_fences;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> index f72424248cc5..89c82ba38b50 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> @@ -54,7 +54,7 @@ struct amdgpu_userq_fence_driver {
> spinlock_t fence_list_lock;
> struct list_head fences;
> struct amdgpu_device *adev;
> - struct xarray *uq_fence_drv_xa_ref;
> + struct xarray *fence_drv_xa_ptr;
> char timeline_name[TASK_COMM_LEN];
> };
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index e7f7354e0c0e..9b24218f7ad2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -48,8 +48,8 @@ static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
> static void
> amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
> {
> - amdgpu_userq_walk_and_drop_fence_drv(&userq->uq_fence_drv_xa);
> - xa_destroy(&userq->uq_fence_drv_xa);
> + amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
> + xa_destroy(&userq->fence_drv_xa);
> /* Drop the fence_drv reference held by user queue */
> amdgpu_userq_fence_driver_put(userq->fence_drv);
> }
> @@ -348,7 +348,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
> }
> queue->doorbell_index = index;
>
> - xa_init_flags(&queue->uq_fence_drv_xa, XA_FLAGS_ALLOC);
> + xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
> r = amdgpu_userq_fence_driver_alloc(adev, queue);
> if (r) {
> DRM_ERROR("Failed to alloc fence driver\n");
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> index d035b5c2b14b..6eb094a54f4b 100644
> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> @@ -52,7 +52,7 @@ struct amdgpu_usermode_queue {
> struct amdgpu_userq_obj db_obj;
> struct amdgpu_userq_obj fw_obj;
> struct amdgpu_userq_obj wptr_obj;
> - struct xarray uq_fence_drv_xa;
> + struct xarray fence_drv_xa;
> struct amdgpu_userq_fence_driver *fence_drv;
> struct dma_fence *last_fence;
> };
More information about the amd-gfx
mailing list