[PATCH] drm/amdgpu: fix error handling in amdgpu_vm_init
Felix Kuehling
felix.kuehling at amd.com
Wed Nov 1 22:00:44 UTC 2023
On 2023-10-31 11:18, Alex Deucher wrote:
> On Tue, Oct 31, 2023 at 11:12 AM Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>> When clearing the root PD fails we need to properly release it again.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
> Acked-by: Alex Deucher <alexander.deucher at amd.com>
Has this been submitted? I see some intermittent failures in the PSDB
that may be related to this.
Regards,
Felix
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +++++++++++++-------------
>> 1 file changed, 16 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index d72daf15662f..5877f6e9b893 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -2042,7 +2042,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>> * Returns:
>> * 0 for success, error for failure.
>> */
>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> + int32_t xcp_id)
>> {
>> struct amdgpu_bo *root_bo;
>> struct amdgpu_bo_vm *root;
>> @@ -2061,6 +2062,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
>> INIT_LIST_HEAD(&vm->done);
>> INIT_LIST_HEAD(&vm->pt_freed);
>> INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>> + INIT_KFIFO(vm->faults);
>>
>> /* create scheduler entities for page table updates */
>> r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
>> @@ -2103,34 +2105,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
>> false, &root, xcp_id);
>> if (r)
>> goto error_free_delayed;
>> - root_bo = &root->bo;
>> +
>> + root_bo = amdgpu_bo_ref(&root->bo);
>> r = amdgpu_bo_reserve(root_bo, true);
>> - if (r)
>> - goto error_free_root;
>> + if (r) {
>> + amdgpu_bo_unref(&root->shadow);
>> + amdgpu_bo_unref(&root_bo);
>> + goto error_free_delayed;
>> + }
>>
>> + amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
>> r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
>> if (r)
>> - goto error_unreserve;
>> -
>> - amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
>> + goto error_free_root;
>>
>> r = amdgpu_vm_pt_clear(adev, vm, root, false);
>> if (r)
>> - goto error_unreserve;
>> + goto error_free_root;
>>
>> amdgpu_bo_unreserve(vm->root.bo);
>> -
>> - INIT_KFIFO(vm->faults);
>> + amdgpu_bo_unref(&root_bo);
>>
>> return 0;
>>
>> -error_unreserve:
>> - amdgpu_bo_unreserve(vm->root.bo);
>> -
>> error_free_root:
>> - amdgpu_bo_unref(&root->shadow);
>> + amdgpu_vm_pt_free_root(adev, vm);
>> + amdgpu_bo_unreserve(vm->root.bo);
>> amdgpu_bo_unref(&root_bo);
>> - vm->root.bo = NULL;
>>
>> error_free_delayed:
>> dma_fence_put(vm->last_tlb_flush);
>> --
>> 2.34.1
>>
More information about the amd-gfx
mailing list