[PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot

Quan, Evan Evan.Quan at amd.com
Fri Nov 5 08:14:28 UTC 2021


[AMD Official Use Only]



> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
> James Zhu
> Sent: Friday, November 5, 2021 10:03 AM
> To: amd-gfx at lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system
> reboot
> 
> 
> On 2021-11-04 4:19 a.m., Evan Quan wrote:
> > It's confirmed that on some APUs the interaction with SMU about DPM
> > disablement will power off the UVD completely. Thus the succeeding
> > interactions with UVD during the reboot will trigger hard hang. To
> > workaround this issue, we will skip the dpm disablement on APUs.
> >
> > Signed-off-by: Evan Quan <evan.quan at amd.com>
> > Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
> > ---
> >   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
> >   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30
> +++++++++++++++++++--------
> >   drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
> >   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
> >   4 files changed, 52 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > index c108b8381795..67ec13622e51 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->uvd.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_uvd(adev, false);
> 
> [JZ] Hi Evan, VCN code put amdgpu_dpm_enable_uvd(false) at the end of
> stop.  Can we do the same for uvd/vce?
[Quan, Evan] Sounds reasonable to me. Actually Lijo provided some insights which enlightened me.
I will drop this patch and provide a new one.

BR
Evan
> 
> Here, it is possible that some dec/enc jobs are still running when dpm is
> called. I am not sure if this situation caused hard hang during reboot.
> 
> > -	} else {
> > -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > -		/* shutdown the UVD block */
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_CG_STATE_GATE);
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_uvd(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > +			/* shutdown the UVD block */
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = uvd_v4_2_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > index 2d558c2f417d..60d05ec8c953 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->uvd.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_uvd(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > -		/* shutdown the UVD block */
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_CG_STATE_GATE);
> > +	/*
> > +	 * It's confirmed that on some APUs the interaction with SMU(about
> DPM disablement)
> > +	 * will power off the UVD. That will make the succeeding interactions
> with UVD on the
> > +	 * suspend path impossible. And the system will hang due to that. To
> workaround the
> > +	 * issue, we will skip the dpm disablement on APUs.
> > +	 *
> > +	 * TODO: a better solution is to reorg the action chains performed on
> suspend and make
> > +	 * the dpm disablement the last one. But that will involve a lot and
> needs MM team's
> > +	 * help.
> > +	 */
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_uvd(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > +			/* shutdown the UVD block */
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = uvd_v6_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > index 67eb01fef789..8aa9d8c07053 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_vce(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_CG_STATE_GATE);
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_vce(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = vce_v2_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > index 142e291983b4..b177cd442838 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_vce(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_CG_STATE_GATE);
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_vce(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = vce_v3_0_hw_fini(adev);


More information about the amd-gfx mailing list