[PATCH 1/1] drm/amdgpu: recover gart table at resume
Andrey Grodzovsky
andrey.grodzovsky at amd.com
Tue Oct 19 16:01:55 UTC 2021
On 2021-10-19 11:54 a.m., Christian König wrote:
> Am 19.10.21 um 17:41 schrieb Andrey Grodzovsky:
>>
>> On 2021-10-19 9:22 a.m., Nirmoy Das wrote:
>>> Get rid off pin/unpin and evict and swap back gart
>>> page table which should make things less likely to break.
>>
>> +Christian
>>
>> Could you guys also clarify what exactly are the stability issues
>> this fixes ?
>
> When we evict the GART table during suspend it is theoretically
> possible that we run into an OOM situation.
>
> But since the OOM killer and the buffer move functions are already
> disable that is basically not gracefully handle able.
>
> When we just keep the GART pinned all the time and restore it's
> content during resume from the metadata we should be able to avoid any
> memory allocation for the move.
>
> Christian.
Got it.
Andrey
>
>>
>> Andrey
>>
>>
>>>
>>> Also remove 2nd call to amdgpu_device_evict_resources()
>>> as we don't need it.
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 -----
>>> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 16 ++++++++++++----
>>> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 17 +++++++++++++----
>>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++++++++++++----
>>> 4 files changed, 37 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 41ce86244144..22ff229ab981 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3941,11 +3941,6 @@ int amdgpu_device_suspend(struct drm_device
>>> *dev, bool fbcon)
>>> amdgpu_fence_driver_hw_fini(adev);
>>> amdgpu_device_ip_suspend_phase2(adev);
>>> - /* This second call to evict device resources is to evict
>>> - * the gart page table using the CPU.
>>> - */
>>> - amdgpu_device_evict_resources(adev);
>>> -
>>> return 0;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> index 3ec5ff5a6dbe..18e3f3c5aae6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> @@ -992,9 +992,16 @@ static int gmc_v10_0_gart_enable(struct
>>> amdgpu_device *adev)
>>> return -EINVAL;
>>> }
>>> - r = amdgpu_gart_table_vram_pin(adev);
>>> - if (r)
>>> - return r;
>>> + if (!adev->in_suspend) {
>>> + r = amdgpu_gart_table_vram_pin(adev);
>>> + if (r)
>>> + return r;
>>> + } else {
>>> + r = amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev,
>>> + TTM_PL_TT));
>>> + if (r)
>>> + return r;
>>> + }
>>> r = adev->gfxhub.funcs->gart_enable(adev);
>>> if (r)
>>> @@ -1062,7 +1069,8 @@ static void gmc_v10_0_gart_disable(struct
>>> amdgpu_device *adev)
>>> {
>>> adev->gfxhub.funcs->gart_disable(adev);
>>> adev->mmhub.funcs->gart_disable(adev);
>>> - amdgpu_gart_table_vram_unpin(adev);
>>> + if (!adev->in_suspend)
>>> + amdgpu_gart_table_vram_unpin(adev);
>>> }
>>> static int gmc_v10_0_hw_fini(void *handle)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> index 492ebed2915b..0ef50ad3d7d5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> @@ -837,9 +837,17 @@ static int gmc_v8_0_gart_enable(struct
>>> amdgpu_device *adev)
>>> dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
>>> return -EINVAL;
>>> }
>>> - r = amdgpu_gart_table_vram_pin(adev);
>>> - if (r)
>>> - return r;
>>> +
>>> + if (!adev->in_suspend) {
>>> + r = amdgpu_gart_table_vram_pin(adev);
>>> + if (r)
>>> + return r;
>>> + } else {
>>> + r = amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev,
>>> + TTM_PL_TT));
>>> + if (r)
>>> + return r;
>>> + }
>>> table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
>>> @@ -992,7 +1000,8 @@ static void gmc_v8_0_gart_disable(struct
>>> amdgpu_device *adev)
>>> tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
>>> WREG32(mmVM_L2_CNTL, tmp);
>>> WREG32(mmVM_L2_CNTL2, 0);
>>> - amdgpu_gart_table_vram_unpin(adev);
>>> + if (!adev->in_suspend)
>>> + amdgpu_gart_table_vram_unpin(adev);
>>> }
>>> /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index cb82404df534..1bbcefd53974 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -1714,9 +1714,16 @@ static int gmc_v9_0_gart_enable(struct
>>> amdgpu_device *adev)
>>> return -EINVAL;
>>> }
>>> - r = amdgpu_gart_table_vram_pin(adev);
>>> - if (r)
>>> - return r;
>>> + if (!adev->in_suspend) {
>>> + r = amdgpu_gart_table_vram_pin(adev);
>>> + if (r)
>>> + return r;
>>> + } else {
>>> + r = amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev,
>>> + TTM_PL_TT));
>>> + if (r)
>>> + return r;
>>> + }
>>> r = adev->gfxhub.funcs->gart_enable(adev);
>>> if (r)
>>> @@ -1793,7 +1800,8 @@ static void gmc_v9_0_gart_disable(struct
>>> amdgpu_device *adev)
>>> {
>>> adev->gfxhub.funcs->gart_disable(adev);
>>> adev->mmhub.funcs->gart_disable(adev);
>>> - amdgpu_gart_table_vram_unpin(adev);
>>> + if (!adev->in_suspend)
>>> + amdgpu_gart_table_vram_unpin(adev);
>>> }
>>> static int gmc_v9_0_hw_fini(void *handle)
>
More information about the amd-gfx
mailing list