[PATCH] drm/amd/pm: Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10

Zhang, Hawking Hawking.Zhang at amd.com
Fri Jan 13 03:11:21 UTC 2023


[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>

Regards,
Hawking
-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Candice Li
Sent: Friday, January 13, 2023 10:40
To: amd-gfx at lists.freedesktop.org
Cc: Li, Candice <Candice.Li at amd.com>
Subject: [PATCH] drm/amd/pm: Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10

Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10.

Signed-off-by: Candice Li <candice.li at amd.com>
Reviewed-by: Evan Quan <evan.quan at amd.com>
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 42 +++++++++++++++++--
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        |  6 +++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h        |  3 ++
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 969e5f96554015..d0cdc578344d8d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -1904,15 +1904,51 @@ static int smu_v13_0_0_set_df_cstate(struct smu_context *smu,
                                               NULL);
 }

+static void smu_v13_0_0_set_mode1_reset_param(struct smu_context *smu,
+                                               uint32_t supported_version,
+                                               uint32_t *param)
+{
+       uint32_t smu_version;
+       struct amdgpu_device *adev = smu->adev;
+       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+       smu_cmn_get_smc_version(smu, NULL, &smu_version);
+
+       if ((smu_version >= supported_version) &&
+                       ras && atomic_read(&ras->in_recovery))
+               /* Set RAS fatal error reset flag */
+               *param = 1 << 16;
+       else
+               *param = 0;
+}
+
 static int smu_v13_0_0_mode1_reset(struct smu_context *smu)  {
        int ret;
+       uint32_t param;
        struct amdgpu_device *adev = smu->adev;

-       if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10))
-               ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset);
-       else
+       switch (adev->ip_versions[MP1_HWIP][0]) {
+       case IP_VERSION(13, 0, 0):
+               /* SMU 13_0_0 PMFW supports RAS fatal error reset from 78.77 */
+               smu_v13_0_0_set_mode1_reset_param(smu, 0x004e4d00, &param);
+
+               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                               SMU_MSG_Mode1Reset, param, NULL);
+               break;
+
+       case IP_VERSION(13, 0, 10):
+               /* SMU 13_0_10 PMFW supports RAS fatal error reset from 80.28 */
+               smu_v13_0_0_set_mode1_reset_param(smu, 0x00501c00, &param);
+
+               ret = smu_cmn_send_debug_smc_msg_with_param(smu,
+                                               DEBUGSMC_MSG_Mode1Reset, param);
+               break;
+
+       default:
                ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
+               break;
+       }

        if (!ret)
                msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 768b6e7dbd7719..d5abafc5a68201 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -404,6 +404,12 @@ int smu_cmn_send_debug_smc_msg(struct smu_context *smu,
        return __smu_cmn_send_debug_msg(smu, msg, 0);  }

+int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
+                        uint32_t msg, uint32_t param)
+{
+       return __smu_cmn_send_debug_msg(smu, msg, param); }
+
 int smu_cmn_to_asic_specific_index(struct smu_context *smu,
                                   enum smu_cmn2asic_mapping_type type,
                                   uint32_t index)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index f82cf76dd3a474..d7cd358a53bdcd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -45,6 +45,9 @@ int smu_cmn_send_smc_msg(struct smu_context *smu,  int smu_cmn_send_debug_smc_msg(struct smu_context *smu,
                         uint32_t msg);

+int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
+                        uint32_t msg, uint32_t param);
+
 int smu_cmn_wait_for_response(struct smu_context *smu);

 int smu_cmn_to_asic_specific_index(struct smu_context *smu,
--
2.17.1



More information about the amd-gfx mailing list