[PATCH 1/1] drm/amdgpu: recover gart table at resume

Christian König ckoenig.leichtzumerken at gmail.com
Tue Oct 19 15:54:28 UTC 2021


Am 19.10.21 um 17:41 schrieb Andrey Grodzovsky:
>
> On 2021-10-19 9:22 a.m., Nirmoy Das wrote:
>> Get rid off pin/unpin and evict and swap back gart
>> page table which should make things less likely to break.
>
> +Christian
>
> Could you guys also clarify what exactly are the stability issues this 
> fixes ?

When we evict the GART table during suspend it is theoretically possible 
that we run into an OOM situation.

But since the OOM killer and the buffer move functions are already 
disable that is basically not gracefully handle able.

When we just keep the GART pinned all the time and restore it's content 
during resume from the metadata we should be able to avoid any memory 
allocation for the move.

Christian.

>
> Andrey
>
>
>>
>> Also remove 2nd call to amdgpu_device_evict_resources()
>> as we don't need it.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 -----
>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c     | 16 ++++++++++++----
>>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c      | 17 +++++++++++++----
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 16 ++++++++++++----
>>   4 files changed, 37 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 41ce86244144..22ff229ab981 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3941,11 +3941,6 @@ int amdgpu_device_suspend(struct drm_device 
>> *dev, bool fbcon)
>>       amdgpu_fence_driver_hw_fini(adev);
>>         amdgpu_device_ip_suspend_phase2(adev);
>> -    /* This second call to evict device resources is to evict
>> -     * the gart page table using the CPU.
>> -     */
>> -    amdgpu_device_evict_resources(adev);
>> -
>>       return 0;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 3ec5ff5a6dbe..18e3f3c5aae6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -992,9 +992,16 @@ static int gmc_v10_0_gart_enable(struct 
>> amdgpu_device *adev)
>>           return -EINVAL;
>>       }
>>   -    r = amdgpu_gart_table_vram_pin(adev);
>> -    if (r)
>> -        return r;
>> +    if (!adev->in_suspend) {
>> +        r = amdgpu_gart_table_vram_pin(adev);
>> +        if (r)
>> +            return r;
>> +    } else {
>> +        r = amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev,
>> +                                TTM_PL_TT));
>> +        if (r)
>> +            return r;
>> +    }
>>         r = adev->gfxhub.funcs->gart_enable(adev);
>>       if (r)
>> @@ -1062,7 +1069,8 @@ static void gmc_v10_0_gart_disable(struct 
>> amdgpu_device *adev)
>>   {
>>       adev->gfxhub.funcs->gart_disable(adev);
>>       adev->mmhub.funcs->gart_disable(adev);
>> -    amdgpu_gart_table_vram_unpin(adev);
>> +    if (!adev->in_suspend)
>> +        amdgpu_gart_table_vram_unpin(adev);
>>   }
>>     static int gmc_v10_0_hw_fini(void *handle)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 492ebed2915b..0ef50ad3d7d5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -837,9 +837,17 @@ static int gmc_v8_0_gart_enable(struct 
>> amdgpu_device *adev)
>>           dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
>>           return -EINVAL;
>>       }
>> -    r = amdgpu_gart_table_vram_pin(adev);
>> -    if (r)
>> -        return r;
>> +
>> +    if (!adev->in_suspend) {
>> +        r = amdgpu_gart_table_vram_pin(adev);
>> +        if (r)
>> +            return r;
>> +    } else {
>> +        r = amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev,
>> +                                TTM_PL_TT));
>> +        if (r)
>> +            return r;
>> +    }
>>         table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
>>   @@ -992,7 +1000,8 @@ static void gmc_v8_0_gart_disable(struct 
>> amdgpu_device *adev)
>>       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
>>       WREG32(mmVM_L2_CNTL, tmp);
>>       WREG32(mmVM_L2_CNTL2, 0);
>> -    amdgpu_gart_table_vram_unpin(adev);
>> +    if (!adev->in_suspend)
>> +        amdgpu_gart_table_vram_unpin(adev);
>>   }
>>     /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index cb82404df534..1bbcefd53974 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -1714,9 +1714,16 @@ static int gmc_v9_0_gart_enable(struct 
>> amdgpu_device *adev)
>>           return -EINVAL;
>>       }
>>   -    r = amdgpu_gart_table_vram_pin(adev);
>> -    if (r)
>> -        return r;
>> +    if (!adev->in_suspend) {
>> +        r = amdgpu_gart_table_vram_pin(adev);
>> +        if (r)
>> +            return r;
>> +    } else {
>> +        r = amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev,
>> +                                TTM_PL_TT));
>> +        if (r)
>> +            return r;
>> +    }
>>         r = adev->gfxhub.funcs->gart_enable(adev);
>>       if (r)
>> @@ -1793,7 +1800,8 @@ static void gmc_v9_0_gart_disable(struct 
>> amdgpu_device *adev)
>>   {
>>       adev->gfxhub.funcs->gart_disable(adev);
>>       adev->mmhub.funcs->gart_disable(adev);
>> -    amdgpu_gart_table_vram_unpin(adev);
>> +    if (!adev->in_suspend)
>> +        amdgpu_gart_table_vram_unpin(adev);
>>   }
>>     static int gmc_v9_0_hw_fini(void *handle)



More information about the amd-gfx mailing list