[PATCH v2 3/3] drm/amdgpu: recover gart table at resume
Das, Nirmoy
nirmoy.das at amd.com
Wed Oct 20 08:41:22 UTC 2021
On 10/20/2021 8:52 AM, Christian König wrote:
> Am 19.10.21 um 20:14 schrieb Nirmoy Das:
>> Get rid off pin/unpin of gart BO at resume/suspend and
>> instead pin only once and try to recover gart content
>> at resume time. This is much more stable in case there
>> is OOM situation at 2nd call to amdgpu_device_evict_resources()
>> while evicting GART table.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 42 ++++++++++++----------
>> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 ++---
>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 10 +++---
>> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 10 +++---
>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 ++---
>> 6 files changed, 45 insertions(+), 39 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 5807df52031c..f69e613805db 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3941,10 +3941,6 @@ int amdgpu_device_suspend(struct drm_device
>> *dev, bool fbcon)
>> amdgpu_fence_driver_hw_fini(adev);
>>
>> amdgpu_device_ip_suspend_phase2(adev);
>> - /* This second call to evict device resources is to evict
>> - * the gart page table using the CPU.
>> - */
>> - amdgpu_device_evict_resources(adev);
>>
>> return 0;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> index d3e4203f6217..97a9f61fa106 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> @@ -107,33 +107,37 @@ void amdgpu_gart_dummy_page_fini(struct
>> amdgpu_device *adev)
>> *
>> * @adev: amdgpu_device pointer
>> *
>> - * Allocate video memory for GART page table
>> + * Allocate and pin video memory for GART page table
>> * (pcie r4xx, r5xx+). These asics require the
>> * gart table to be in video memory.
>> * Returns 0 for success, error for failure.
>> */
>> int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
>> {
>> + struct amdgpu_bo_param bp;
>> int r;
>>
>> - if (adev->gart.bo == NULL) {
>> - struct amdgpu_bo_param bp;
>> -
>> - memset(&bp, 0, sizeof(bp));
>> - bp.size = adev->gart.table_size;
>> - bp.byte_align = PAGE_SIZE;
>> - bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
>> - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>> - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
>> - bp.type = ttm_bo_type_kernel;
>> - bp.resv = NULL;
>> - bp.bo_ptr_size = sizeof(struct amdgpu_bo);
>> -
>> - r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
>> - if (r) {
>> - return r;
>> - }
>> - }
>> + if (adev->gart.bo != NULL)
>> + return 0;
>> +
>> + memset(&bp, 0, sizeof(bp));
>> + bp.size = adev->gart.table_size;
>> + bp.byte_align = PAGE_SIZE;
>> + bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
>> + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>> + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
>> + bp.type = ttm_bo_type_kernel;
>> + bp.resv = NULL;
>> + bp.bo_ptr_size = sizeof(struct amdgpu_bo);
>> +
>> + r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
>> + if (r)
>> + return r;
>> +
>> + r = amdgpu_gart_table_vram_pin(adev);
>> + if (r)
>> + return r;
>> +
>
> Instead of all this you should be able to use amdgpu_bo_create_kernel().
OK, with that we can remove amdgpu_gart_table_vram_pin() completely.
>
>> return 0;
>> }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 3ec5ff5a6dbe..75d584e1b0e9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -992,9 +992,11 @@ static int gmc_v10_0_gart_enable(struct
>> amdgpu_device *adev)
>> return -EINVAL;
>> }
>>
>> - r = amdgpu_gart_table_vram_pin(adev);
>> - if (r)
>> - return r;
>> + if (adev->in_suspend) {
>> + r = amdgpu_gtt_mgr_recover(adev);
>> + if (r)
>> + return r;
>> + }
>
> Please drop the in_suspend check here.
>
> If I'm not completely mistaken the GTT domain should already be
> initialized here and if it's not then we can easily check for that in
> amdgpu_gtt_mgr_recover.
Yes it is. I will remove that.
Thanks,
Nirmoy
>
> Christian.
>
>>
>> r = adev->gfxhub.funcs->gart_enable(adev);
>> if (r)
>> @@ -1062,7 +1064,6 @@ static void gmc_v10_0_gart_disable(struct
>> amdgpu_device *adev)
>> {
>> adev->gfxhub.funcs->gart_disable(adev);
>> adev->mmhub.funcs->gart_disable(adev);
>> - amdgpu_gart_table_vram_unpin(adev);
>> }
>>
>> static int gmc_v10_0_hw_fini(void *handle)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index 0a50fdaced7e..02e90d9443c1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -620,9 +620,12 @@ static int gmc_v7_0_gart_enable(struct
>> amdgpu_device *adev)
>> dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
>> return -EINVAL;
>> }
>> - r = amdgpu_gart_table_vram_pin(adev);
>> - if (r)
>> - return r;
>> +
>> + if (adev->in_suspend) {
>> + r = amdgpu_gtt_mgr_recover(adev);
>> + if (r)
>> + return r;
>> + }
>>
>> table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
>>
>> @@ -758,7 +761,6 @@ static void gmc_v7_0_gart_disable(struct
>> amdgpu_device *adev)
>> tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
>> WREG32(mmVM_L2_CNTL, tmp);
>> WREG32(mmVM_L2_CNTL2, 0);
>> - amdgpu_gart_table_vram_unpin(adev);
>> }
>>
>> /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 492ebed2915b..dc2577e37688 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -837,9 +837,12 @@ static int gmc_v8_0_gart_enable(struct
>> amdgpu_device *adev)
>> dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
>> return -EINVAL;
>> }
>> - r = amdgpu_gart_table_vram_pin(adev);
>> - if (r)
>> - return r;
>> +
>> + if (adev->in_suspend) {
>> + r = amdgpu_gtt_mgr_recover(adev);
>> + if (r)
>> + return r;
>> + }
>>
>> table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
>>
>> @@ -992,7 +995,6 @@ static void gmc_v8_0_gart_disable(struct
>> amdgpu_device *adev)
>> tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
>> WREG32(mmVM_L2_CNTL, tmp);
>> WREG32(mmVM_L2_CNTL2, 0);
>> - amdgpu_gart_table_vram_unpin(adev);
>> }
>>
>> /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index cb82404df534..732d91dbf449 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -1714,9 +1714,11 @@ static int gmc_v9_0_gart_enable(struct
>> amdgpu_device *adev)
>> return -EINVAL;
>> }
>>
>> - r = amdgpu_gart_table_vram_pin(adev);
>> - if (r)
>> - return r;
>> + if (adev->in_suspend) {
>> + r = amdgpu_gtt_mgr_recover(adev);
>> + if (r)
>> + return r;
>> + }
>>
>> r = adev->gfxhub.funcs->gart_enable(adev);
>> if (r)
>> @@ -1793,7 +1795,6 @@ static void gmc_v9_0_gart_disable(struct
>> amdgpu_device *adev)
>> {
>> adev->gfxhub.funcs->gart_disable(adev);
>> adev->mmhub.funcs->gart_disable(adev);
>> - amdgpu_gart_table_vram_unpin(adev);
>> }
>>
>> static int gmc_v9_0_hw_fini(void *handle)
>> --
>> 2.32.0
>>
>
More information about the amd-gfx
mailing list