[PATCH 5/5] drm/amdgpu: fix calltrace during kmd unload

Liu, Monk Monk.Liu at amd.com
Fri Nov 29 07:53:41 UTC 2019


It is tested/verified by Xiaojie  on nv14 against S3 issues.

Thanks 

_____________________________________
Monk Liu|GPU Virtualization Team |AMD


-----Original Message-----
From: Koenig, Christian <Christian.Koenig at amd.com> 
Sent: Friday, November 29, 2019 3:46 PM
To: Liu, Monk <Monk.Liu at amd.com>; Yuan, Xiaojie <Xiaojie.Yuan at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>
Cc: amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 5/5] drm/amdgpu: fix calltrace during kmd unload

In this case feel free to add an Acked-by: Christian König <christian.koenig at amd.com> to your patch.

But I would recommend to get an rb from somebody which knows that stuff better than I do.

Regards,
Christian.

Am 29.11.19 um 02:58 schrieb Liu, Monk:
> The content of CSIB is always static, I submitted a patch to use the 
> re-init and get rid of pin/unpin CSIB in hw_ini/fini,  (my purpose is 
> to fix the double unpin warning during unload ) 
> _____________________________________
> Monk Liu|GPU Virtualization Team |AMD
>
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken at gmail.com>
> Sent: Thursday, November 28, 2019 7:51 PM
> To: Liu, Monk <Monk.Liu at amd.com>; Yuan, Xiaojie 
> <Xiaojie.Yuan at amd.com>; Deucher, Alexander 
> <Alexander.Deucher at amd.com>; Koenig, Christian 
> <Christian.Koenig at amd.com>
> Cc: amd-gfx at lists.freedesktop.org
> Subject: Re: [PATCH 5/5] drm/amdgpu: fix calltrace during kmd unload
>
> Hi Monk,
>
> if the content of the CSIB is constant then it is certainly better to just re-initialize it.
>
> This also prevents from corruption because of VRAM lost.
>
> Christian.
>
> Am 28.11.19 um 03:53 schrieb Liu, Monk:
>> Hi Xiaojie
>>
>> For SRIOV we don't use suspend so I didn't think to that part, thanks for the remind !
>> But we still need to fix this call trace issue anyway (our jenkins 
>> testing  system consider such call trace as an error )
>>
>> How about we do "  adev->gfx.rlc.funcs->get_csb_buffer(adev,
>> dst_ptr);" in the hw_init() ? this way You don't need to evict the CSIB during suspend and the CSIB always will be re-initialized after S3 resume ?
>>
>> @Deucher, Alexander @Koenig, Christian what's your opinion ?
>> _____________________________________
>> Monk Liu|GPU Virtualization Team |AMD
>>
>>
>> -----Original Message-----
>> From: Yuan, Xiaojie <Xiaojie.Yuan at amd.com>
>> Sent: Tuesday, November 26, 2019 9:10 PM
>> To: Liu, Monk <Monk.Liu at amd.com>
>> Cc: amd-gfx at lists.freedesktop.org
>> Subject: Re: [PATCH 5/5] drm/amdgpu: fix calltrace during kmd unload
>>
>> Hi Monk,
>>
>> hw_fini() is called in suspend code path as well. I'm wondering how csb can be evicted if it's not unpined before suspend.
>>
>> BR,
>> Xiaojie
>>
>>> On Nov 26, 2019, at 7:50 PM, Monk Liu <Monk.Liu at amd.com> wrote:
>>>
>>> kernel would report a warning on double unpin on the csb BO because 
>>> we unpin it during hw_fini but actually we don't need to pin/unpin 
>>> it during hw_init/fini since it is created with kernel pinned
>>>
>>> remove all those useless code for gfx9/10
>>>
>>> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c |  1 - 
>>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 38 --------------------------------
>>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 ---------------------------------
>>> 3 files changed, 78 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
>>> index c8793e6..289fada 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
>>> @@ -145,7 +145,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
>>>      dst_ptr = adev->gfx.rlc.cs_ptr;
>>>      adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
>>>      amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
>>> -    amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
>>>      amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>>>
>>>      return 0;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> index a56cba9..5ee7467 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> @@ -996,39 +996,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
>>>      return 0;
>>> }
>>>
>>> -static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) -{
>>> -    int r;
>>> -
>>> -    r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
>>> -    if (unlikely(r != 0))
>>> -        return r;
>>> -
>>> -    r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
>>> -            AMDGPU_GEM_DOMAIN_VRAM);
>>> -    if (!r)
>>> -        adev->gfx.rlc.clear_state_gpu_addr =
>>> -            amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
>>> -
>>> -    amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>>> -
>>> -    return r;
>>> -}
>>> -
>>> -static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) -{
>>> -    int r;
>>> -
>>> -    if (!adev->gfx.rlc.clear_state_obj)
>>> -        return;
>>> -
>>> -    r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
>>> -    if (likely(r == 0)) {
>>> -        amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
>>> -        amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>>> -    }
>>> -}
>>> -
>>> static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) {
>>>      amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 
>>> @@
>>> -3780,10 +3747,6 @@ static int gfx_v10_0_hw_init(void *handle)
>>>      int r;
>>>      struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>>
>>> -    r = gfx_v10_0_csb_vram_pin(adev);
>>> -    if (r)
>>> -        return r;
>>> -
>>>      if (!amdgpu_emu_mode)
>>>          gfx_v10_0_init_golden_registers(adev);
>>>
>>> @@ -3871,7 +3834,6 @@ static int gfx_v10_0_hw_fini(void *handle)
>>>      }
>>>      gfx_v10_0_cp_enable(adev, false);
>>>      gfx_v10_0_enable_gui_idle_interrupt(adev, false);
>>> -    gfx_v10_0_csb_vram_unpin(adev);
>>>
>>>      return 0;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> index 4cc2e50..524a7ba 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> @@ -1683,39 +1683,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
>>>      return 0;
>>> }
>>>
>>> -static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) -{
>>> -    int r;
>>> -
>>> -    r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
>>> -    if (unlikely(r != 0))
>>> -        return r;
>>> -
>>> -    r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
>>> -            AMDGPU_GEM_DOMAIN_VRAM);
>>> -    if (!r)
>>> -        adev->gfx.rlc.clear_state_gpu_addr =
>>> -            amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
>>> -
>>> -    amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>>> -
>>> -    return r;
>>> -}
>>> -
>>> -static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) -{
>>> -    int r;
>>> -
>>> -    if (!adev->gfx.rlc.clear_state_obj)
>>> -        return;
>>> -
>>> -    r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
>>> -    if (likely(r == 0)) {
>>> -        amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
>>> -        amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
>>> -    }
>>> -}
>>> -
>>> static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) {
>>>      amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 
>>> @@
>>> -3694,10 +3661,6 @@ static int gfx_v9_0_hw_init(void *handle)
>>>
>>>      gfx_v9_0_constants_init(adev);
>>>
>>> -    r = gfx_v9_0_csb_vram_pin(adev);
>>> -    if (r)
>>> -        return r;
>>> -
>>>      r = adev->gfx.rlc.funcs->resume(adev);
>>>      if (r)
>>>          return r;
>>> @@ -3779,8 +3742,6 @@ static int gfx_v9_0_hw_fini(void *handle)
>>>      gfx_v9_0_cp_enable(adev, false);
>>>      adev->gfx.rlc.funcs->stop(adev);
>>>
>>> -    gfx_v9_0_csb_vram_unpin(adev);
>>> -
>>>      return 0;
>>> }
>>>
>>> --
>>> 2.7.4
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fli
>>> s
>>> t
>>> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7C
>>> X
>>> i
>>> aojie.Yuan%40amd.com%7C65e162e509ea4a90f79308d77266de65%7C3dd8961fe4
>>> 8
>>> 8
>>> 4e608e11a82d994e183d%7C0%7C0%7C637103658464512751&sdata=r5fpid5I
>>> s
>>> P
>>> 8anzg%2FZIYHn0N8xceBvG7rtRG80%2B7868o%3D&reserved=0
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flis
>> t 
>> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7CM
>> o
>> nk.Liu%40amd.com%7Ccadef01b84ab45f90f1908d773f932cc%7C3dd8961fe4884e6
>> 0 
>> 8e11a82d994e183d%7C0%7C0%7C637105386456299680&sdata=hYsvNtzUb%2BT
>> b
>> iANKAx2x9dmYW1ikC66r%2B6Hbk3244PE%3D&reserved=0



More information about the amd-gfx mailing list