[PATCH] drm/amdgpu: fix the hang observed on Carrizo due to UVD suspend failure

Lazar, Lijo lijo.lazar at amd.com
Mon Oct 18 10:47:21 UTC 2021



On 10/18/2021 3:21 PM, Quan, Evan wrote:
> [AMD Official Use Only]
> 
> 
> 
>> -----Original Message-----
>> From: Lazar, Lijo <Lijo.Lazar at amd.com>
>> Sent: Monday, October 18, 2021 3:58 PM
>> To: Quan, Evan <Evan.Quan at amd.com>; amd-gfx at lists.freedesktop.org
>> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; bp at alien8.de
>> Subject: Re: [PATCH] drm/amdgpu: fix the hang observed on Carrizo due to
>> UVD suspend failure
>>
>>
>>
>> On 10/18/2021 1:04 PM, Evan Quan wrote:
>>> It's confirmed that on some APUs the interaction with SMU(about DPM
>>> disablement) will power off the UVD. That will make the succeeding
>>> interactions with UVD on the suspend path impossible. And the system
>>> will hang due to that. To workaround the issue, we will skip the
>>> UVD(or VCE) power off during interaction with SMU for suspend scenario.
>>>
>>
>> The original issue reported by Boris is related to sytem reboot and hw_fini
>> calls (SMU is hw_fini before UVD/VCE). Boris also mentioned that it got
>> solved by reverting the disable DPM calls during hw_fini. So, I'm still not sure
>> how this is related to suspend path.
> [Quan, Evan] hw_fini() was not on the path of system reboot as I confirmed. It's different from the issue Andrey found(during driver unload).
> The call flow for system reboot is: amdgpu_pci_shutdown() -> amdgpu_device_ip_suspend() -> ...
> 

Sorry then I misunderstood. I confused it with pci_remove and the 
hw_fini sequence. It was suspend() calling hw_fini() then.

BTW, is this unrelated to the reboot issue then? in_suspend is not set 
during amdgpu_pci_shutdown(). Wouldn't it be better to skip uvd/vce 
poweroff when their DPM is disabled?

Thanks,
Lijo

> BR
> Evan
>>
>> Thanks,
>> Lijo
>>
>>> Signed-off-by: Evan Quan <evan.quan at amd.com>
>>> Change-Id: I7804d3835aadbc7cf4b686da4994e83333461748
>>> ---
>>>    .../powerplay/hwmgr/smu7_clockpowergating.c   | 20
>> +++++++++++++++++--
>>>    .../drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c   | 16
>> +++++++++++++--
>>>    drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c     |  4 ++--
>>>    3 files changed, 34 insertions(+), 6 deletions(-)
>>>
>>> diff --git
>>> a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.c
>>> b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.c
>>> index f2bda3bcbbde..d2c6fe8fe473 100644
>>> ---
>> a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.c
>>> +++
>> b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.c
>>> @@ -57,7 +57,17 @@ static int smu7_update_vce_dpm(struct pp_hwmgr
>>> *hwmgr, bool bgate)
>>>
>>>    int smu7_powerdown_uvd(struct pp_hwmgr *hwmgr)
>>>    {
>>> -	if (phm_cf_want_uvd_power_gating(hwmgr))
>>> +	struct amdgpu_device *adev = (struct amdgpu_device *)hwmgr-
>>> adev;
>>> +
>>> +	/*
>>> +	 * Further inactions with UVD are still needed on the suspend path.
>> Thus
>>> +	 * the power off for UVD here should be avoided.
>>> +	 *
>>> +	 * TODO: a better solution is to reorg the action chains performed on
>> suspend
>>> +	 * and make the action here the last one. But that will involve a lot
>> and needs
>>> +	 * MM team's help.
>>> +	 */
>>> +	if (phm_cf_want_uvd_power_gating(hwmgr) && !adev-
>>> in_suspend)
>>>    		return smum_send_msg_to_smc(hwmgr,
>>>    				PPSMC_MSG_UVDPowerOFF,
>>>    				NULL);
>>> @@ -82,7 +92,13 @@ static int smu7_powerup_uvd(struct pp_hwmgr
>> *hwmgr)
>>>
>>>    static int smu7_powerdown_vce(struct pp_hwmgr *hwmgr)
>>>    {
>>> -	if (phm_cf_want_vce_power_gating(hwmgr))
>>> +	struct amdgpu_device *adev = (struct amdgpu_device *)hwmgr-
>>> adev;
>>> +
>>> +	/*
>>> +	 * Further inactions with VCE are still needed on the suspend path.
>> Thus
>>> +	 * the power off for VCE here should be avoided.
>>> +	 */
>>> +	if (phm_cf_want_vce_power_gating(hwmgr) && !adev->in_suspend)
>>>    		return smum_send_msg_to_smc(hwmgr,
>>>    				PPSMC_MSG_VCEPowerOFF,
>>>    				NULL);
>>> diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
>>> b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
>>> index b94a77e4e714..09e755980c42 100644
>>> --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
>>> +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
>>> @@ -1247,7 +1247,13 @@ static int smu8_dpm_force_dpm_level(struct
>>> pp_hwmgr *hwmgr,
>>>
>>>    static int smu8_dpm_powerdown_uvd(struct pp_hwmgr *hwmgr)
>>>    {
>>> -	if (PP_CAP(PHM_PlatformCaps_UVDPowerGating))
>>> +	struct amdgpu_device *adev = (struct amdgpu_device *)hwmgr-
>>> adev;
>>> +
>>> +	/*
>>> +	 * Further inactions with UVD are still needed on the suspend path.
>> Thus
>>> +	 * the power off for UVD here should be avoided.
>>> +	 */
>>> +	if (PP_CAP(PHM_PlatformCaps_UVDPowerGating) && !adev-
>>> in_suspend)
>>>    		return smum_send_msg_to_smc(hwmgr,
>> PPSMC_MSG_UVDPowerOFF, NULL);
>>>    	return 0;
>>>    }
>>> @@ -1301,7 +1307,13 @@ static int  smu8_dpm_update_vce_dpm(struct
>>> pp_hwmgr *hwmgr)
>>>
>>>    static int smu8_dpm_powerdown_vce(struct pp_hwmgr *hwmgr)
>>>    {
>>> -	if (PP_CAP(PHM_PlatformCaps_VCEPowerGating))
>>> +	struct amdgpu_device *adev = (struct amdgpu_device *)hwmgr-
>>> adev;
>>> +
>>> +	/*
>>> +	 * Further inactions with VCE are still needed on the suspend path.
>> Thus
>>> +	 * the power off for VCE here should be avoided.
>>> +	 */
>>> +	if (PP_CAP(PHM_PlatformCaps_VCEPowerGating) && !adev-
>>> in_suspend)
>>>    		return smum_send_msg_to_smc(hwmgr,
>>>    					    PPSMC_MSG_VCEPowerOFF,
>>>    					    NULL);
>>> diff --git a/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c
>>> b/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c
>>> index bcae42cef374..4e9c9da255a7 100644
>>> --- a/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c
>>> +++ b/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c
>>> @@ -1683,7 +1683,7 @@ static void kv_dpm_powergate_uvd(void *handle,
>> bool gate)
>>>    		amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>>    						       AMD_PG_STATE_GATE);
>>>    		kv_update_uvd_dpm(adev, gate);
>>> -		if (pi->caps_uvd_pg)
>>> +		if (pi->caps_uvd_pg && !adev->in_suspend)
>>>    			/* power off the UVD block */
>>>    			amdgpu_kv_notify_message_to_smu(adev,
>> PPSMC_MSG_UVDPowerOFF);
>>>    	} else {
>>> @@ -1710,7 +1710,7 @@ static void kv_dpm_powergate_vce(void *handle,
>> bool gate)
>>>    		amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>>    						       AMD_PG_STATE_GATE);
>>>    		kv_enable_vce_dpm(adev, false);
>>> -		if (pi->caps_vce_pg) /* power off the VCE block */
>>> +		if (pi->caps_vce_pg && !adev->in_suspend) /* power off the
>> VCE
>>> +block */
>>>    			amdgpu_kv_notify_message_to_smu(adev,
>> PPSMC_MSG_VCEPowerOFF);
>>>    	} else {
>>>    		if (pi->caps_vce_pg) /* power on the VCE block */
>>>


More information about the amd-gfx mailing list