[PATCH] drm/amdgpu: fix error handling in amdgpu_vm_init
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Nov 6 12:31:49 UTC 2023
Am 01.11.23 um 23:00 schrieb Felix Kuehling:
> On 2023-10-31 11:18, Alex Deucher wrote:
>> On Tue, Oct 31, 2023 at 11:12 AM Christian König
>> <ckoenig.leichtzumerken at gmail.com> wrote:
>>> When clearing the root PD fails we need to properly release it again.
>>>
>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> Acked-by: Alex Deucher <alexander.deucher at amd.com>
> Has this been submitted? I see some intermittent failures in the PSDB
> that may be related to this.
Not yet. But going to push that now
This is just a fix for the error code path. Fixing the underlying
problem had more importance.
Regards,
Christian.
>
> Regards,
> Felix
>
>
>
>>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31
>>> +++++++++++++-------------
>>> 1 file changed, 16 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index d72daf15662f..5877f6e9b893 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -2042,7 +2042,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm,
>>> long timeout)
>>> * Returns:
>>> * 0 for success, error for failure.
>>> */
>>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm
>>> *vm, int32_t xcp_id)
>>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>> + int32_t xcp_id)
>>> {
>>> struct amdgpu_bo *root_bo;
>>> struct amdgpu_bo_vm *root;
>>> @@ -2061,6 +2062,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm, int32_t xcp
>>> INIT_LIST_HEAD(&vm->done);
>>> INIT_LIST_HEAD(&vm->pt_freed);
>>> INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>>> + INIT_KFIFO(vm->faults);
>>>
>>> /* create scheduler entities for page table updates */
>>> r = drm_sched_entity_init(&vm->immediate,
>>> DRM_SCHED_PRIORITY_NORMAL,
>>> @@ -2103,34 +2105,33 @@ int amdgpu_vm_init(struct amdgpu_device
>>> *adev, struct amdgpu_vm *vm, int32_t xcp
>>> false, &root, xcp_id);
>>> if (r)
>>> goto error_free_delayed;
>>> - root_bo = &root->bo;
>>> +
>>> + root_bo = amdgpu_bo_ref(&root->bo);
>>> r = amdgpu_bo_reserve(root_bo, true);
>>> - if (r)
>>> - goto error_free_root;
>>> + if (r) {
>>> + amdgpu_bo_unref(&root->shadow);
>>> + amdgpu_bo_unref(&root_bo);
>>> + goto error_free_delayed;
>>> + }
>>>
>>> + amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
>>> r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
>>> if (r)
>>> - goto error_unreserve;
>>> -
>>> - amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
>>> + goto error_free_root;
>>>
>>> r = amdgpu_vm_pt_clear(adev, vm, root, false);
>>> if (r)
>>> - goto error_unreserve;
>>> + goto error_free_root;
>>>
>>> amdgpu_bo_unreserve(vm->root.bo);
>>> -
>>> - INIT_KFIFO(vm->faults);
>>> + amdgpu_bo_unref(&root_bo);
>>>
>>> return 0;
>>>
>>> -error_unreserve:
>>> - amdgpu_bo_unreserve(vm->root.bo);
>>> -
>>> error_free_root:
>>> - amdgpu_bo_unref(&root->shadow);
>>> + amdgpu_vm_pt_free_root(adev, vm);
>>> + amdgpu_bo_unreserve(vm->root.bo);
>>> amdgpu_bo_unref(&root_bo);
>>> - vm->root.bo = NULL;
>>>
>>> error_free_delayed:
>>> dma_fence_put(vm->last_tlb_flush);
>>> --
>>> 2.34.1
>>>
More information about the amd-gfx
mailing list