[PATCH 5/5] drm/amdgpu: fix calltrace during kmd unload

Liu, Monk Monk.Liu at amd.com
Fri Nov 29 01:58:35 UTC 2019


The content of CSIB is always static, I submitted a patch to use the re-init and get rid of pin/unpin CSIB in hw_ini/fini,  (my purpose is to fix the double unpin warning during unload )
_____________________________________
Monk Liu|GPU Virtualization Team |AMD


-----Original Message-----
From: Christian König <ckoenig.leichtzumerken at gmail.com> 
Sent: Thursday, November 28, 2019 7:51 PM
To: Liu, Monk <Monk.Liu at amd.com>; Yuan, Xiaojie <Xiaojie.Yuan at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>
Cc: amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 5/5] drm/amdgpu: fix calltrace during kmd unload

Hi Monk,

if the content of the CSIB is constant then it is certainly better to just re-initialize it.

This also prevents from corruption because of VRAM lost.

Christian.

Am 28.11.19 um 03:53 schrieb Liu, Monk:
> Hi Xiaojie
>
> For SRIOV we don't use suspend so I didn't think to that part, thanks for the remind !
> But we still need to fix this call trace issue anyway (our jenkins 
> testing  system consider such call trace as an error )
>
> How about we do "  adev->gfx.rlc.funcs->get_csb_buffer(adev, 
> dst_ptr);" in the hw_init() ? this way You don't need to evict the CSIB during suspend and the CSIB always will be re-initialized after S3 resume ?
>
> @Deucher, Alexander @Koenig, Christian what's your opinion ?
> _____________________________________
> Monk Liu|GPU Virtualization Team |AMD
>
>
> -----Original Message-----
> From: Yuan, Xiaojie <Xiaojie.Yuan at amd.com>
> Sent: Tuesday, November 26, 2019 9:10 PM
> To: Liu, Monk <Monk.Liu at amd.com>
> Cc: amd-gfx at lists.freedesktop.org
> Subject: Re: [PATCH 5/5] drm/amdgpu: fix calltrace during kmd unload
>
> Hi Monk,
>
> hw_fini() is called in suspend code path as well. I'm wondering how csb can be evicted if it's not unpined before suspend.
>
> BR,
> Xiaojie
>
>> On Nov 26, 2019, at 7:50 PM, Monk Liu <Monk.Liu at amd.com> wrote:
>>
>> kernel would report a warning on double unpin on the csb BO because 
>> we unpin it during hw_fini but actually we don't need to pin/unpin it 
>> during hw_init/fini since it is created with kernel pinned
>>
>> remove all those useless code for gfx9/10
>>
>> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c |  1 - 
>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 38 --------------------------------
>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 ---------------------------------
>> 3 files changed, 78 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
>> index c8793e6..289fada 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
>> @@ -145,7 +145,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
>>     dst_ptr = adev->gfx.rlc.cs_ptr;
>>     adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
>>     amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
>> -    amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
>>     amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>>
>>     return 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index a56cba9..5ee7467 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -996,39 +996,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
>>     return 0;
>> }
>>
>> -static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) -{
>> -    int r;
>> -
>> -    r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
>> -    if (unlikely(r != 0))
>> -        return r;
>> -
>> -    r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
>> -            AMDGPU_GEM_DOMAIN_VRAM);
>> -    if (!r)
>> -        adev->gfx.rlc.clear_state_gpu_addr =
>> -            amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
>> -
>> -    amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>> -
>> -    return r;
>> -}
>> -
>> -static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) -{
>> -    int r;
>> -
>> -    if (!adev->gfx.rlc.clear_state_obj)
>> -        return;
>> -
>> -    r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
>> -    if (likely(r == 0)) {
>> -        amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
>> -        amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>> -    }
>> -}
>> -
>> static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) {
>>     amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@
>> -3780,10 +3747,6 @@ static int gfx_v10_0_hw_init(void *handle)
>>     int r;
>>     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>
>> -    r = gfx_v10_0_csb_vram_pin(adev);
>> -    if (r)
>> -        return r;
>> -
>>     if (!amdgpu_emu_mode)
>>         gfx_v10_0_init_golden_registers(adev);
>>
>> @@ -3871,7 +3834,6 @@ static int gfx_v10_0_hw_fini(void *handle)
>>     }
>>     gfx_v10_0_cp_enable(adev, false);
>>     gfx_v10_0_enable_gui_idle_interrupt(adev, false);
>> -    gfx_v10_0_csb_vram_unpin(adev);
>>
>>     return 0;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 4cc2e50..524a7ba 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -1683,39 +1683,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
>>     return 0;
>> }
>>
>> -static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) -{
>> -    int r;
>> -
>> -    r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
>> -    if (unlikely(r != 0))
>> -        return r;
>> -
>> -    r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
>> -            AMDGPU_GEM_DOMAIN_VRAM);
>> -    if (!r)
>> -        adev->gfx.rlc.clear_state_gpu_addr =
>> -            amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
>> -
>> -    amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>> -
>> -    return r;
>> -}
>> -
>> -static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) -{
>> -    int r;
>> -
>> -    if (!adev->gfx.rlc.clear_state_obj)
>> -        return;
>> -
>> -    r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
>> -    if (likely(r == 0)) {
>> -        amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
>> -        amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>> -    }
>> -}
>> -
>> static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) {
>>     amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@
>> -3694,10 +3661,6 @@ static int gfx_v9_0_hw_init(void *handle)
>>
>>     gfx_v9_0_constants_init(adev);
>>
>> -    r = gfx_v9_0_csb_vram_pin(adev);
>> -    if (r)
>> -        return r;
>> -
>>     r = adev->gfx.rlc.funcs->resume(adev);
>>     if (r)
>>         return r;
>> @@ -3779,8 +3742,6 @@ static int gfx_v9_0_hw_fini(void *handle)
>>     gfx_v9_0_cp_enable(adev, false);
>>     adev->gfx.rlc.funcs->stop(adev);
>>
>> -    gfx_v9_0_csb_vram_unpin(adev);
>> -
>>     return 0;
>> }
>>
>> --
>> 2.7.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flis
>> t 
>> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7CX
>> i
>> aojie.Yuan%40amd.com%7C65e162e509ea4a90f79308d77266de65%7C3dd8961fe48
>> 8 
>> 4e608e11a82d994e183d%7C0%7C0%7C637103658464512751&sdata=r5fpid5Is
>> P
>> 8anzg%2FZIYHn0N8xceBvG7rtRG80%2B7868o%3D&reserved=0
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7CMo
> nk.Liu%40amd.com%7Ccadef01b84ab45f90f1908d773f932cc%7C3dd8961fe4884e60
> 8e11a82d994e183d%7C0%7C0%7C637105386456299680&sdata=hYsvNtzUb%2BTb
> iANKAx2x9dmYW1ikC66r%2B6Hbk3244PE%3D&reserved=0



More information about the amd-gfx mailing list