[PATCH 1/1] drm/amdgpu: recover gart table at resume

Andrey Grodzovsky andrey.grodzovsky at amd.com
Tue Oct 19 16:01:55 UTC 2021


On 2021-10-19 11:54 a.m., Christian König wrote:
> Am 19.10.21 um 17:41 schrieb Andrey Grodzovsky:
>>
>> On 2021-10-19 9:22 a.m., Nirmoy Das wrote:
>>> Get rid off pin/unpin and evict and swap back gart
>>> page table which should make things less likely to break.
>>
>> +Christian
>>
>> Could you guys also clarify what exactly are the stability issues 
>> this fixes ?
>
> When we evict the GART table during suspend it is theoretically 
> possible that we run into an OOM situation.
>
> But since the OOM killer and the buffer move functions are already 
> disable that is basically not gracefully handle able.
>
> When we just keep the GART pinned all the time and restore it's 
> content during resume from the metadata we should be able to avoid any 
> memory allocation for the move.
>
> Christian.


Got it.

Andrey


>
>>
>> Andrey
>>
>>
>>>
>>> Also remove 2nd call to amdgpu_device_evict_resources()
>>> as we don't need it.
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 -----
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c     | 16 ++++++++++++----
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c      | 17 +++++++++++++----
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 16 ++++++++++++----
>>>   4 files changed, 37 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 41ce86244144..22ff229ab981 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3941,11 +3941,6 @@ int amdgpu_device_suspend(struct drm_device 
>>> *dev, bool fbcon)
>>>       amdgpu_fence_driver_hw_fini(adev);
>>>         amdgpu_device_ip_suspend_phase2(adev);
>>> -    /* This second call to evict device resources is to evict
>>> -     * the gart page table using the CPU.
>>> -     */
>>> -    amdgpu_device_evict_resources(adev);
>>> -
>>>       return 0;
>>>   }
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> index 3ec5ff5a6dbe..18e3f3c5aae6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> @@ -992,9 +992,16 @@ static int gmc_v10_0_gart_enable(struct 
>>> amdgpu_device *adev)
>>>           return -EINVAL;
>>>       }
>>>   -    r = amdgpu_gart_table_vram_pin(adev);
>>> -    if (r)
>>> -        return r;
>>> +    if (!adev->in_suspend) {
>>> +        r = amdgpu_gart_table_vram_pin(adev);
>>> +        if (r)
>>> +            return r;
>>> +    } else {
>>> +        r = amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev,
>>> +                                TTM_PL_TT));
>>> +        if (r)
>>> +            return r;
>>> +    }
>>>         r = adev->gfxhub.funcs->gart_enable(adev);
>>>       if (r)
>>> @@ -1062,7 +1069,8 @@ static void gmc_v10_0_gart_disable(struct 
>>> amdgpu_device *adev)
>>>   {
>>>       adev->gfxhub.funcs->gart_disable(adev);
>>>       adev->mmhub.funcs->gart_disable(adev);
>>> -    amdgpu_gart_table_vram_unpin(adev);
>>> +    if (!adev->in_suspend)
>>> +        amdgpu_gart_table_vram_unpin(adev);
>>>   }
>>>     static int gmc_v10_0_hw_fini(void *handle)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> index 492ebed2915b..0ef50ad3d7d5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> @@ -837,9 +837,17 @@ static int gmc_v8_0_gart_enable(struct 
>>> amdgpu_device *adev)
>>>           dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
>>>           return -EINVAL;
>>>       }
>>> -    r = amdgpu_gart_table_vram_pin(adev);
>>> -    if (r)
>>> -        return r;
>>> +
>>> +    if (!adev->in_suspend) {
>>> +        r = amdgpu_gart_table_vram_pin(adev);
>>> +        if (r)
>>> +            return r;
>>> +    } else {
>>> +        r = amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev,
>>> +                                TTM_PL_TT));
>>> +        if (r)
>>> +            return r;
>>> +    }
>>>         table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
>>>   @@ -992,7 +1000,8 @@ static void gmc_v8_0_gart_disable(struct 
>>> amdgpu_device *adev)
>>>       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
>>>       WREG32(mmVM_L2_CNTL, tmp);
>>>       WREG32(mmVM_L2_CNTL2, 0);
>>> -    amdgpu_gart_table_vram_unpin(adev);
>>> +    if (!adev->in_suspend)
>>> +        amdgpu_gart_table_vram_unpin(adev);
>>>   }
>>>     /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index cb82404df534..1bbcefd53974 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -1714,9 +1714,16 @@ static int gmc_v9_0_gart_enable(struct 
>>> amdgpu_device *adev)
>>>           return -EINVAL;
>>>       }
>>>   -    r = amdgpu_gart_table_vram_pin(adev);
>>> -    if (r)
>>> -        return r;
>>> +    if (!adev->in_suspend) {
>>> +        r = amdgpu_gart_table_vram_pin(adev);
>>> +        if (r)
>>> +            return r;
>>> +    } else {
>>> +        r = amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev,
>>> +                                TTM_PL_TT));
>>> +        if (r)
>>> +            return r;
>>> +    }
>>>         r = adev->gfxhub.funcs->gart_enable(adev);
>>>       if (r)
>>> @@ -1793,7 +1800,8 @@ static void gmc_v9_0_gart_disable(struct 
>>> amdgpu_device *adev)
>>>   {
>>>       adev->gfxhub.funcs->gart_disable(adev);
>>>       adev->mmhub.funcs->gart_disable(adev);
>>> -    amdgpu_gart_table_vram_unpin(adev);
>>> +    if (!adev->in_suspend)
>>> +        amdgpu_gart_table_vram_unpin(adev);
>>>   }
>>>     static int gmc_v9_0_hw_fini(void *handle)
>


More information about the amd-gfx mailing list