[PATCH] drm/amdgpu: Fix TTM validation warnings in user queue doorbell handling

Paneer Selvam, Arunpravin arunpravin.paneerselvam at amd.com
Tue May 13 17:18:00 UTC 2025



On 5/9/2025 4:51 PM, Christian König wrote:
> On 5/9/25 12:10, Jesse.Zhang wrote:
>> This patch resolves a kernel warning that occurs during user queue initialization:
>>
>> [  428.714241] WARNING: CPU: 23 PID: 1965 at drivers/gpu/drm/ttm/ttm_bo.c:823 ttm_bo_validate+0x15c/0x1a0 [ttm]
>> [  428.714758] Call Trace:
>> [  428.714758]  amdgpu_bo_pin+0x1b5/0x310 [amdgpu]
>> [  428.714915]  amdgpu_userq_get_doorbell_index+0x71/0x270 [amdgpu]
>>
>> The warning occurs due to improper buffer object state management when
>> setting up doorbells for user queues. The key issues addressed are:
>>
>> 1. Incorrect locking sequence - pinning before reservation
>> 2. Inadequate error handling paths
>> 3. Race conditions during BO validation
>>
>> Changes made:
>> 1. Reordered operations to reserve BO before pinning
>> 2. Added proper cleanup path for reservation failures
>> 3. Improved error handling with separate unpin/unreserve paths
> Looks correct to me, but IIRC I've already seen the same patch from Arun.
>
> It's just that Arun probably hasn't committed it yet because he's on vacation this week.
I committed my patch into amd-staging-drm-next.

Thanks,
Arun.
>
> Regards,
> Christian.
>
>> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 33 +++++++++++++----------
>>   1 file changed, 19 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> index 0f1cb6bc63db..444a0f5d98ce 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> @@ -283,28 +283,31 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
>>   	uint64_t index;
>>   	struct drm_gem_object *gobj;
>>   	struct amdgpu_userq_obj *db_obj = db_info->db_obj;
>> +	struct amdgpu_bo *bo;
>>   	int r, db_size;
>>   
>>   	gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
>> -	if (gobj == NULL) {
>> +	if (!gobj) {
>>   		drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
>>   		return -EINVAL;
>>   	}
>>   
>> -	db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
>> +	bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
>>   	drm_gem_object_put(gobj);
>> +	db_obj->obj = bo;
>>   
>> -	/* Pin the BO before generating the index, unpin in queue destroy */
>> -	r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
>> +	/* First reserve the BO to ensure proper state */
>> +	r = amdgpu_bo_reserve(bo, true);
>>   	if (r) {
>> -		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
>> +		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to reserve doorbell BO\n");
>>   		goto unref_bo;
>>   	}
>>   
>> -	r = amdgpu_bo_reserve(db_obj->obj, true);
>> +	/* Validate and pin the BO */
>> +	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_DOORBELL);
>>   	if (r) {
>> -		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
>> -		goto unpin_bo;
>> +		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell BO\n");
>> +		goto unreserve_bo;
>>   	}
>>   
>>   	switch (db_info->queue_type) {
>> @@ -325,24 +328,26 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
>>   		break;
>>   
>>   	default:
>> -		drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
>> +		drm_file_err(uq_mgr->file, "IP %d not supported\n",
>>   			     db_info->queue_type);
>>   		r = -EINVAL;
>>   		goto unpin_bo;
>>   	}
>>   
>> -	index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
>> +	index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, bo,
>>   					     db_info->doorbell_offset, db_size);
>>   	drm_dbg_driver(adev_to_drm(uq_mgr->adev),
>>   		       "[Usermode queues] doorbell index=%lld\n", index);
>> -	amdgpu_bo_unreserve(db_obj->obj);
>> +
>> +	amdgpu_bo_unreserve(bo);
>>   	return index;
>>   
>>   unpin_bo:
>> -	amdgpu_bo_unpin(db_obj->obj);
>> -
>> +	amdgpu_bo_unpin(bo);
>> +unreserve_bo:
>> +	amdgpu_bo_unreserve(bo);
>>   unref_bo:
>> -	amdgpu_bo_unref(&db_obj->obj);
>> +	amdgpu_bo_unref(&bo);
>>   	return r;
>>   }
>>   



More information about the amd-gfx mailing list