[PATCH] drm/amd/pm: Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10
Zhang, Hawking
Hawking.Zhang at amd.com
Fri Jan 13 03:11:21 UTC 2023
[AMD Official Use Only - General]
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
Regards,
Hawking
-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Candice Li
Sent: Friday, January 13, 2023 10:40
To: amd-gfx at lists.freedesktop.org
Cc: Li, Candice <Candice.Li at amd.com>
Subject: [PATCH] drm/amd/pm: Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10
Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10.
Signed-off-by: Candice Li <candice.li at amd.com>
Reviewed-by: Evan Quan <evan.quan at amd.com>
---
.../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 42 +++++++++++++++++--
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 6 +++
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 3 ++
3 files changed, 48 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 969e5f96554015..d0cdc578344d8d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -1904,15 +1904,51 @@ static int smu_v13_0_0_set_df_cstate(struct smu_context *smu,
NULL);
}
+static void smu_v13_0_0_set_mode1_reset_param(struct smu_context *smu,
+ uint32_t supported_version,
+ uint32_t *param)
+{
+ uint32_t smu_version;
+ struct amdgpu_device *adev = smu->adev;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ smu_cmn_get_smc_version(smu, NULL, &smu_version);
+
+ if ((smu_version >= supported_version) &&
+ ras && atomic_read(&ras->in_recovery))
+ /* Set RAS fatal error reset flag */
+ *param = 1 << 16;
+ else
+ *param = 0;
+}
+
static int smu_v13_0_0_mode1_reset(struct smu_context *smu) {
int ret;
+ uint32_t param;
struct amdgpu_device *adev = smu->adev;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10))
- ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset);
- else
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(13, 0, 0):
+ /* SMU 13_0_0 PMFW supports RAS fatal error reset from 78.77 */
+ smu_v13_0_0_set_mode1_reset_param(smu, 0x004e4d00, ¶m);
+
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_Mode1Reset, param, NULL);
+ break;
+
+ case IP_VERSION(13, 0, 10):
+ /* SMU 13_0_10 PMFW supports RAS fatal error reset from 80.28 */
+ smu_v13_0_0_set_mode1_reset_param(smu, 0x00501c00, ¶m);
+
+ ret = smu_cmn_send_debug_smc_msg_with_param(smu,
+ DEBUGSMC_MSG_Mode1Reset, param);
+ break;
+
+ default:
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
+ break;
+ }
if (!ret)
msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 768b6e7dbd7719..d5abafc5a68201 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -404,6 +404,12 @@ int smu_cmn_send_debug_smc_msg(struct smu_context *smu,
return __smu_cmn_send_debug_msg(smu, msg, 0); }
+int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
+ uint32_t msg, uint32_t param)
+{
+ return __smu_cmn_send_debug_msg(smu, msg, param); }
+
int smu_cmn_to_asic_specific_index(struct smu_context *smu,
enum smu_cmn2asic_mapping_type type,
uint32_t index)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index f82cf76dd3a474..d7cd358a53bdcd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -45,6 +45,9 @@ int smu_cmn_send_smc_msg(struct smu_context *smu, int smu_cmn_send_debug_smc_msg(struct smu_context *smu,
uint32_t msg);
+int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
+ uint32_t msg, uint32_t param);
+
int smu_cmn_wait_for_response(struct smu_context *smu);
int smu_cmn_to_asic_specific_index(struct smu_context *smu,
--
2.17.1
More information about the amd-gfx
mailing list