[PATCH 2/2] drm/amd/pm: enable smu_v13_0_6 mca debug mode when UMC RAS feature is enabled
Yang, Stanley
Stanley.Yang at amd.com
Fri Sep 8 07:51:06 UTC 2023
[AMD Official Use Only - General]
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Yang
> Wang
> Sent: Friday, September 8, 2023 2:34 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Wang, Yang(Kevin) <KevinYang.Wang at amd.com>; Zhang, Hawking
> <Hawking.Zhang at amd.com>
> Subject: [PATCH 2/2] drm/amd/pm: enable smu_v13_0_6 mca debug mode
> when UMC RAS feature is enabled
>
> enable smu_v13_0_6 mca debug mode when UMC RAS feature is enabled.
>
> Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
> ---
> drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 ++-
> .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 26
> +++++++++++++++++++
> 2 files changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
> b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
> index ebc789e7a289..f762c01b98a5 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
> @@ -247,7 +247,8 @@
> __SMU_DUMMY_MAP(Mode2Reset), \
> __SMU_DUMMY_MAP(RequestI2cTransaction), \
> __SMU_DUMMY_MAP(GetMetricsTable), \
> - __SMU_DUMMY_MAP(DALNotPresent),
> + __SMU_DUMMY_MAP(DALNotPresent), \
> + __SMU_DUMMY_MAP(ClearMcaOnRead),
>
> #undef __SMU_DUMMY_MAP
> #define __SMU_DUMMY_MAP(type) SMU_MSG_##type
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index ff58ee14a68f..5ecc90e6af10 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -133,6 +133,7 @@ static const struct cmn2asic_msg_mapping
> smu_v13_0_6_message_map[SMU_MSG_MAX_COU
> MSG_MAP(SetSoftMaxGfxClk,
> PPSMC_MSG_SetSoftMaxGfxClk, 0),
> MSG_MAP(PrepareMp1ForUnload,
> PPSMC_MSG_PrepareForDriverUnload, 0),
> MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit,
> 0),
> + MSG_MAP(ClearMcaOnRead,
> PPSMC_MSG_ClearMcaOnRead, 0),
> };
>
> static const struct cmn2asic_mapping
> smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { @@ -1393,6 +1394,20 @@
> static int smu_v13_0_6_notify_unload(struct smu_context *smu)
> return 0;
> }
>
> +static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu,
> bool
> +enable) {
> + uint32_t smu_version;
> +
> + /* NOTE: this ClearMcaOnRead message is only supported for smu
> version 85.72.0 or higher */
> + smu_cmn_get_smc_version(smu, NULL, &smu_version);
> + if (smu_version < 0x554800)
> + return 0;
> +
> + return smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_ClearMcaOnRead,
> + enable ? 0 :
> ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK,
> + NULL);
> +}
> +
> static int smu_v13_0_6_system_features_control(struct smu_context *smu,
> bool enable)
> {
> @@ -2182,6 +2197,16 @@ static int
> smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu,
> return ret;
> }
>
> +static int smu_v13_0_6_post_init(struct smu_context *smu) {
> + struct amdgpu_device *adev = smu->adev;
> +
> + if (!amdgpu_sriov_vf(adev) && (adev->ras_enabled &
> BIT(AMDGPU_RAS_BLOCK__UMC)))
[Stanley]: is there any reason only check AMDGPU_RAS_BLOCK__UMC bit? If HBM ECC is not active but SRAM ECC is active,
the AMDGPU_RAS_BLOCK__UMC bit is not set, is it necessary to set debug mode for this scenario?
Regards,
Stanley
> + return smu_v13_0_6_mca_set_debug_mode(smu, true);
> +
> + return 0;
> +}
> +
> static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
> /* init dpm */
> .get_allowed_feature_mask =
> smu_v13_0_6_get_allowed_feature_mask,
> @@ -2235,6 +2260,7 @@ static const struct pptable_funcs
> smu_v13_0_6_ppt_funcs = {
> .i2c_init = smu_v13_0_6_i2c_control_init,
> .i2c_fini = smu_v13_0_6_i2c_control_fini,
> .send_hbm_bad_pages_num =
> smu_v13_0_6_smu_send_hbm_bad_page_num,
> + .post_init = smu_v13_0_6_post_init,
> };
>
> void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
> --
> 2.34.1
More information about the amd-gfx
mailing list