[PATCH] drm/amdgpu: Fix TTM validation warnings in user queue doorbell handling

Christian König christian.koenig at amd.com
Fri May 9 11:21:54 UTC 2025


On 5/9/25 12:10, Jesse.Zhang wrote:
> This patch resolves a kernel warning that occurs during user queue initialization:
> 
> [  428.714241] WARNING: CPU: 23 PID: 1965 at drivers/gpu/drm/ttm/ttm_bo.c:823 ttm_bo_validate+0x15c/0x1a0 [ttm]
> [  428.714758] Call Trace:
> [  428.714758]  amdgpu_bo_pin+0x1b5/0x310 [amdgpu]
> [  428.714915]  amdgpu_userq_get_doorbell_index+0x71/0x270 [amdgpu]
> 
> The warning occurs due to improper buffer object state management when
> setting up doorbells for user queues. The key issues addressed are:
> 
> 1. Incorrect locking sequence - pinning before reservation
> 2. Inadequate error handling paths
> 3. Race conditions during BO validation
> 
> Changes made:
> 1. Reordered operations to reserve BO before pinning
> 2. Added proper cleanup path for reservation failures
> 3. Improved error handling with separate unpin/unreserve paths

Looks correct to me, but IIRC I've already seen the same patch from Arun.

It's just that Arun probably hasn't committed it yet because he's on vacation this week.

Regards,
Christian.

> 
> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 33 +++++++++++++----------
>  1 file changed, 19 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 0f1cb6bc63db..444a0f5d98ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -283,28 +283,31 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
>  	uint64_t index;
>  	struct drm_gem_object *gobj;
>  	struct amdgpu_userq_obj *db_obj = db_info->db_obj;
> +	struct amdgpu_bo *bo;
>  	int r, db_size;
>  
>  	gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
> -	if (gobj == NULL) {
> +	if (!gobj) {
>  		drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
>  		return -EINVAL;
>  	}
>  
> -	db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
> +	bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
>  	drm_gem_object_put(gobj);
> +	db_obj->obj = bo;
>  
> -	/* Pin the BO before generating the index, unpin in queue destroy */
> -	r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
> +	/* First reserve the BO to ensure proper state */
> +	r = amdgpu_bo_reserve(bo, true);
>  	if (r) {
> -		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
> +		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to reserve doorbell BO\n");
>  		goto unref_bo;
>  	}
>  
> -	r = amdgpu_bo_reserve(db_obj->obj, true);
> +	/* Validate and pin the BO */
> +	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_DOORBELL);
>  	if (r) {
> -		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
> -		goto unpin_bo;
> +		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell BO\n");
> +		goto unreserve_bo;
>  	}
>  
>  	switch (db_info->queue_type) {
> @@ -325,24 +328,26 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
>  		break;
>  
>  	default:
> -		drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
> +		drm_file_err(uq_mgr->file, "IP %d not supported\n",
>  			     db_info->queue_type);
>  		r = -EINVAL;
>  		goto unpin_bo;
>  	}
>  
> -	index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
> +	index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, bo,
>  					     db_info->doorbell_offset, db_size);
>  	drm_dbg_driver(adev_to_drm(uq_mgr->adev),
>  		       "[Usermode queues] doorbell index=%lld\n", index);
> -	amdgpu_bo_unreserve(db_obj->obj);
> +
> +	amdgpu_bo_unreserve(bo);
>  	return index;
>  
>  unpin_bo:
> -	amdgpu_bo_unpin(db_obj->obj);
> -
> +	amdgpu_bo_unpin(bo);
> +unreserve_bo:
> +	amdgpu_bo_unreserve(bo);
>  unref_bo:
> -	amdgpu_bo_unref(&db_obj->obj);
> +	amdgpu_bo_unref(&bo);
>  	return r;
>  }
>  



More information about the amd-gfx mailing list