[PATCH] drm/amd/pm: Enable bad memory page/channel recording support for smu v13_0_0

Zhang, Hawking Hawking.Zhang at amd.com
Mon Nov 21 02:02:48 UTC 2022


[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>

Regards,
Hawking
-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Candice Li
Sent: Saturday, November 19, 2022 12:42
To: amd-gfx at lists.freedesktop.org
Cc: Li, Candice <Candice.Li at amd.com>; Quan, Evan <Evan.Quan at amd.com>
Subject: [PATCH] drm/amd/pm: Enable bad memory page/channel recording support for smu v13_0_0

Send message to SMU to update bad memory page and bad channel info.

Signed-off-by: Candice Li <candice.li at amd.com>
Reviewed-by: Evan Quan <evan.quan at amd.com>
---
 .../pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h  |  8 +++-  drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  4 +-  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 39 +++++++++++++++++++
 3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
index 9ebb8f39732a0e..8b8266890a1002 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
@@ -131,7 +131,13 @@
 #define PPSMC_MSG_EnableAudioStutterWA           0x44
 #define PPSMC_MSG_PowerUpUmsch                   0x45
 #define PPSMC_MSG_PowerDownUmsch                 0x46
-#define PPSMC_Message_Count                      0x47
+#define PPSMC_MSG_SetDcsArch                     0x47
+#define PPSMC_MSG_TriggerVFFLR                   0x48
+#define PPSMC_MSG_SetNumBadMemoryPagesRetired    0x49
+#define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
+#define PPSMC_MSG_SetPriorityDeltaGain           0x4B
+#define PPSMC_MSG_AllowIHHostInterrupt           0x4C
+#define PPSMC_Message_Count                      0x4D

 //Debug Dump Message
 #define DEBUGSMC_MSG_TestMessage                    0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 58098b82df660c..a4e3425b1027c2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -239,7 +239,9 @@
        __SMU_DUMMY_MAP(DriverMode2Reset), \
        __SMU_DUMMY_MAP(GetGfxOffStatus),                \
        __SMU_DUMMY_MAP(GetGfxOffEntryCount),            \
-       __SMU_DUMMY_MAP(LogGfxOffResidency),
+       __SMU_DUMMY_MAP(LogGfxOffResidency),                    \
+       __SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired),           \
+       __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel),

 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 5bcb61f77e4193..87d7c66e49ef28 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -141,6 +141,9 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
        MSG_MAP(PrepareMp1ForUnload,            PPSMC_MSG_PrepareMp1ForUnload,         0),
        MSG_MAP(DFCstateControl,                PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
        MSG_MAP(ArmD3,                          PPSMC_MSG_ArmD3,                       0),
+       MSG_MAP(SetNumBadMemoryPagesRetired,    PPSMC_MSG_SetNumBadMemoryPagesRetired,   0),
+       MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel,
+                           PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel,   0),
 };

 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -1838,6 +1841,40 @@ static void smu_v13_0_0_set_smu_mailbox_registers(struct smu_context *smu)
        smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_54);  }

+static int smu_v13_0_0_smu_send_bad_mem_page_num(struct smu_context *smu,
+               uint32_t size)
+{
+       int ret = 0;
+
+       /* message SMU to update the bad page number on SMUBUS */
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+                                         SMU_MSG_SetNumBadMemoryPagesRetired,
+                                         size, NULL);
+       if (ret)
+               dev_err(smu->adev->dev,
+                         "[%s] failed to message SMU to update bad memory pages number\n",
+                         __func__);
+
+       return ret;
+}
+
+static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu,
+               uint32_t size)
+{
+       int ret = 0;
+
+       /* message SMU to update the bad channel info on SMUBUS */
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+                                 SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel,
+                                 size, NULL);
+       if (ret)
+               dev_err(smu->adev->dev,
+                         "[%s] failed to message SMU to update bad memory pages channel info\n",
+                         __func__);
+
+       return ret;
+}
+
 static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
        .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
        .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
@@ -1908,6 +1945,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
        .mode1_reset = smu_v13_0_0_mode1_reset,
        .set_mp1_state = smu_v13_0_0_set_mp1_state,
        .set_df_cstate = smu_v13_0_0_set_df_cstate,
+       .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num,
+       .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag,
 };

 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
--
2.25.1



More information about the amd-gfx mailing list