[PATCH] drm/amd/pm: fix driver reload SMC firmware fail issue for smu13

Quan, Evan Evan.Quan at amd.com
Fri Jun 10 02:55:08 UTC 2022


[AMD Official Use Only - General]

Will this break gpu reset? As PPSMC_MSG_PrepareMp1ForUnload may put SMC into out-of-service state. That may make it unable to serve succeeding reset messages.
Can you have a confirm?

BR
Evan
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Yang
> Wang
> Sent: Thursday, June 9, 2022 7:33 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Feng, Kenneth <Kenneth.Feng at amd.com>; Wang, Yang(Kevin)
> <KevinYang.Wang at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
> Subject: [PATCH] drm/amd/pm: fix driver reload SMC firmware fail issue for
> smu13
> 
> issue calltrace:
> [  402.773695] [drm] failed to load ucode SMC(0x2C)
> [  402.773754] [drm] psp gfx command LOAD_IP_FW(0x6) failed and
> response status is (0x0)
> [  402.773762] [drm:psp_load_smu_fw [amdgpu]] *ERROR* PSP load smu
> failed!
> [  402.966758] [drm:psp_v13_0_ring_destroy [amdgpu]] *ERROR* Fail to stop
> psp ring
> [  402.966949] [drm:psp_hw_init [amdgpu]] *ERROR* PSP firmware loading
> failed
> [  402.967116] [drm:amdgpu_device_fw_loading [amdgpu]] *ERROR*
> hw_init of IP block <psp> failed -22
> [  402.967252] amdgpu 0000:03:00.0: amdgpu: amdgpu_device_ip_init failed
> [  402.967255] amdgpu 0000:03:00.0: amdgpu: Fatal error during GPU init
> 
> if not reset mp1 state during kernel driver unload, it will cause psp
> load pmfw fail at the second time.
> 
> add PPSMC_MSG_PrepareMp1ForUnload support for
> smu_v13_0_0/smu_v13_0_7
> 
> Signed-off-by: Yang Wang <KevinYang.Wang at amd.com>
> ---
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c            | 8 ++++++++
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 2 ++
>  3 files changed, 12 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index f57710790b8c..14ebc35d9cf0 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -66,6 +66,8 @@ static int smu_set_fan_control_mode(void *handle, u32
> value);
>  static int smu_set_power_limit(void *handle, uint32_t limit);
>  static int smu_set_fan_speed_rpm(void *handle, uint32_t speed);
>  static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
> +static int smu_set_mp1_state(void *handle, enum pp_mp1_state
> mp1_state);
> +
> 
>  static int smu_sys_get_pp_feature_mask(void *handle,
>  				       char *buf)
> @@ -1414,6 +1416,12 @@ static int smu_disable_dpms(struct smu_context
> *smu)
>  	switch (adev->ip_versions[MP1_HWIP][0]) {
>  	case IP_VERSION(13, 0, 0):
>  	case IP_VERSION(13, 0, 7):
> +		ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
> +		if (ret) {
> +			dev_err(adev->dev, "Fail set mp1 state to
> UNLOAD !\n");
> +			return ret;
> +		}
> +
>  		return 0;
>  	default:
>  		break;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index 26fb72a588e7..fda89e309b07 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -118,6 +118,7 @@ static struct cmn2asic_msg_mapping
> smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
>  	MSG_MAP(GetPptLimit,
> 	PPSMC_MSG_GetPptLimit,                 0),
>  	MSG_MAP(NotifyPowerSource,
> 	PPSMC_MSG_NotifyPowerSource,           0),
>  	MSG_MAP(Mode1Reset,
> 	PPSMC_MSG_Mode1Reset,                  0),
> +	MSG_MAP(PrepareMp1ForUnload,
> 	PPSMC_MSG_PrepareMp1ForUnload,         0),
>  };
> 
>  static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT]
> = {
> @@ -1662,6 +1663,7 @@ static const struct pptable_funcs
> smu_v13_0_0_ppt_funcs = {
>  	.baco_exit = smu_v13_0_baco_exit,
>  	.mode1_reset_is_support =
> smu_v13_0_0_is_mode1_reset_supported,
>  	.mode1_reset = smu_v13_0_mode1_reset,
> +	.set_mp1_state = smu_cmn_set_mp1_state,
>  };
> 
>  void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> index a92ab3266091..185058637f7d 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> @@ -116,6 +116,7 @@ static struct cmn2asic_msg_mapping
> smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
>  	MSG_MAP(DramLogSetDramSize,
> 	PPSMC_MSG_DramLogSetDramSize,          0),
>  	MSG_MAP(AllowGfxOff,
> 	PPSMC_MSG_AllowGfxOff,                 0),
>  	MSG_MAP(DisallowGfxOff,
> 	PPSMC_MSG_DisallowGfxOff,              0),
> +	MSG_MAP(PrepareMp1ForUnload,
> 	PPSMC_MSG_PrepareMp1ForUnload,         0),
>  };
> 
>  static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT]
> = {
> @@ -1647,6 +1648,7 @@ static const struct pptable_funcs
> smu_v13_0_7_ppt_funcs = {
>  	.baco_set_state = smu_v13_0_7_baco_set_state,
>  	.baco_enter = smu_v13_0_7_baco_enter,
>  	.baco_exit = smu_v13_0_baco_exit,
> +	.set_mp1_state = smu_cmn_set_mp1_state,
>  };
> 
>  void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
> --
> 2.25.1


More information about the amd-gfx mailing list