[PATCH] drm/amdgpu: fix error handling in amdgpu_vm_init

Felix Kuehling felix.kuehling at amd.com
Wed Nov 1 22:00:44 UTC 2023


On 2023-10-31 11:18, Alex Deucher wrote:
> On Tue, Oct 31, 2023 at 11:12 AM Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>> When clearing the root PD fails we need to properly release it again.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
> Acked-by: Alex Deucher <alexander.deucher at amd.com>
Has this been submitted? I see some intermittent failures in the PSDB 
that may be related to this.

Regards,
   Felix



>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +++++++++++++-------------
>>   1 file changed, 16 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index d72daf15662f..5877f6e9b893 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -2042,7 +2042,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>>    * Returns:
>>    * 0 for success, error for failure.
>>    */
>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> +                  int32_t xcp_id)
>>   {
>>          struct amdgpu_bo *root_bo;
>>          struct amdgpu_bo_vm *root;
>> @@ -2061,6 +2062,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
>>          INIT_LIST_HEAD(&vm->done);
>>          INIT_LIST_HEAD(&vm->pt_freed);
>>          INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>> +       INIT_KFIFO(vm->faults);
>>
>>          /* create scheduler entities for page table updates */
>>          r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
>> @@ -2103,34 +2105,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
>>                                  false, &root, xcp_id);
>>          if (r)
>>                  goto error_free_delayed;
>> -       root_bo = &root->bo;
>> +
>> +       root_bo = amdgpu_bo_ref(&root->bo);
>>          r = amdgpu_bo_reserve(root_bo, true);
>> -       if (r)
>> -               goto error_free_root;
>> +       if (r) {
>> +               amdgpu_bo_unref(&root->shadow);
>> +               amdgpu_bo_unref(&root_bo);
>> +               goto error_free_delayed;
>> +       }
>>
>> +       amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
>>          r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
>>          if (r)
>> -               goto error_unreserve;
>> -
>> -       amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
>> +               goto error_free_root;
>>
>>          r = amdgpu_vm_pt_clear(adev, vm, root, false);
>>          if (r)
>> -               goto error_unreserve;
>> +               goto error_free_root;
>>
>>          amdgpu_bo_unreserve(vm->root.bo);
>> -
>> -       INIT_KFIFO(vm->faults);
>> +       amdgpu_bo_unref(&root_bo);
>>
>>          return 0;
>>
>> -error_unreserve:
>> -       amdgpu_bo_unreserve(vm->root.bo);
>> -
>>   error_free_root:
>> -       amdgpu_bo_unref(&root->shadow);
>> +       amdgpu_vm_pt_free_root(adev, vm);
>> +       amdgpu_bo_unreserve(vm->root.bo);
>>          amdgpu_bo_unref(&root_bo);
>> -       vm->root.bo = NULL;
>>
>>   error_free_delayed:
>>          dma_fence_put(vm->last_tlb_flush);
>> --
>> 2.34.1
>>


More information about the amd-gfx mailing list