[PATCH] drm/amd/pm: fix driver reload SMC firmware fail issue for smu13

Wang, Yang(Kevin) KevinYang.Wang at amd.com
Fri Jun 10 03:15:19 UTC 2022


[AMD Official Use Only - General]



________________________________
From: Quan, Evan <Evan.Quan at amd.com>
Sent: Friday, June 10, 2022 10:55 AM
To: Wang, Yang(Kevin) <KevinYang.Wang at amd.com>; amd-gfx at lists.freedesktop.org <amd-gfx at lists.freedesktop.org>
Cc: Feng, Kenneth <Kenneth.Feng at amd.com>; Wang, Yang(Kevin) <KevinYang.Wang at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
Subject: RE: [PATCH] drm/amd/pm: fix driver reload SMC firmware fail issue for smu13

[AMD Official Use Only - General]

Will this break gpu reset? As PPSMC_MSG_PrepareMp1ForUnload may put SMC into out-of-service state. That may make it unable to serve succeeding reset messages.
Can you have a confirm?

BR
Evan

[kevin]:

I missed this case, it will cause gpu reset fail when do SMC unload.
I will submit a new patch to fix this issue.

thanks.

Best Regards,
Kevin
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Yang
> Wang
> Sent: Thursday, June 9, 2022 7:33 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Feng, Kenneth <Kenneth.Feng at amd.com>; Wang, Yang(Kevin)
> <KevinYang.Wang at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
> Subject: [PATCH] drm/amd/pm: fix driver reload SMC firmware fail issue for
> smu13
>
> issue calltrace:
> [  402.773695] [drm] failed to load ucode SMC(0x2C)
> [  402.773754] [drm] psp gfx command LOAD_IP_FW(0x6) failed and
> response status is (0x0)
> [  402.773762] [drm:psp_load_smu_fw [amdgpu]] *ERROR* PSP load smu
> failed!
> [  402.966758] [drm:psp_v13_0_ring_destroy [amdgpu]] *ERROR* Fail to stop
> psp ring
> [  402.966949] [drm:psp_hw_init [amdgpu]] *ERROR* PSP firmware loading
> failed
> [  402.967116] [drm:amdgpu_device_fw_loading [amdgpu]] *ERROR*
> hw_init of IP block <psp> failed -22
> [  402.967252] amdgpu 0000:03:00.0: amdgpu: amdgpu_device_ip_init failed
> [  402.967255] amdgpu 0000:03:00.0: amdgpu: Fatal error during GPU init
>
> if not reset mp1 state during kernel driver unload, it will cause psp
> load pmfw fail at the second time.
>
> add PPSMC_MSG_PrepareMp1ForUnload support for
> smu_v13_0_0/smu_v13_0_7
>
> Signed-off-by: Yang Wang <KevinYang.Wang at amd.com>
> ---
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c            | 8 ++++++++
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 2 ++
>  3 files changed, 12 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index f57710790b8c..14ebc35d9cf0 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -66,6 +66,8 @@ static int smu_set_fan_control_mode(void *handle, u32
> value);
>  static int smu_set_power_limit(void *handle, uint32_t limit);
>  static int smu_set_fan_speed_rpm(void *handle, uint32_t speed);
>  static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
> +static int smu_set_mp1_state(void *handle, enum pp_mp1_state
> mp1_state);
> +
>
>  static int smu_sys_get_pp_feature_mask(void *handle,
>                                       char *buf)
> @@ -1414,6 +1416,12 @@ static int smu_disable_dpms(struct smu_context
> *smu)
>        switch (adev->ip_versions[MP1_HWIP][0]) {
>        case IP_VERSION(13, 0, 0):
>        case IP_VERSION(13, 0, 7):
> +             ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
> +             if (ret) {
> +                     dev_err(adev->dev, "Fail set mp1 state to
> UNLOAD !\n");
> +                     return ret;
> +             }
> +
>                return 0;
>        default:
>                break;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index 26fb72a588e7..fda89e309b07 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -118,6 +118,7 @@ static struct cmn2asic_msg_mapping
> smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
>        MSG_MAP(GetPptLimit,
>        PPSMC_MSG_GetPptLimit,                 0),
>        MSG_MAP(NotifyPowerSource,
>        PPSMC_MSG_NotifyPowerSource,           0),
>        MSG_MAP(Mode1Reset,
>        PPSMC_MSG_Mode1Reset,                  0),
> +     MSG_MAP(PrepareMp1ForUnload,
>        PPSMC_MSG_PrepareMp1ForUnload,         0),
>  };
>
>  static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT]
> = {
> @@ -1662,6 +1663,7 @@ static const struct pptable_funcs
> smu_v13_0_0_ppt_funcs = {
>        .baco_exit = smu_v13_0_baco_exit,
>        .mode1_reset_is_support =
> smu_v13_0_0_is_mode1_reset_supported,
>        .mode1_reset = smu_v13_0_mode1_reset,
> +     .set_mp1_state = smu_cmn_set_mp1_state,
>  };
>
>  void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> index a92ab3266091..185058637f7d 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> @@ -116,6 +116,7 @@ static struct cmn2asic_msg_mapping
> smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
>        MSG_MAP(DramLogSetDramSize,
>        PPSMC_MSG_DramLogSetDramSize,          0),
>        MSG_MAP(AllowGfxOff,
>        PPSMC_MSG_AllowGfxOff,                 0),
>        MSG_MAP(DisallowGfxOff,
>        PPSMC_MSG_DisallowGfxOff,              0),
> +     MSG_MAP(PrepareMp1ForUnload,
>        PPSMC_MSG_PrepareMp1ForUnload,         0),
>  };
>
>  static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT]
> = {
> @@ -1647,6 +1648,7 @@ static const struct pptable_funcs
> smu_v13_0_7_ppt_funcs = {
>        .baco_set_state = smu_v13_0_7_baco_set_state,
>        .baco_enter = smu_v13_0_7_baco_enter,
>        .baco_exit = smu_v13_0_baco_exit,
> +     .set_mp1_state = smu_cmn_set_mp1_state,
>  };
>
>  void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
> --
> 2.25.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20220610/621af8f4/attachment.htm>


More information about the amd-gfx mailing list