[PATCH v2] drm/amd/pm: fix the high voltage and temperature issue
Wang, Yang(Kevin)
KevinYang.Wang at amd.com
Thu Oct 26 03:53:43 UTC 2023
-----Original Message-----
From: Kenneth Feng <kenneth.feng at amd.com>
Sent: Thursday, October 26, 2023 11:34 AM
To: amd-gfx at lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Wang, Yang(Kevin) <KevinYang.Wang at amd.com>; Feng, Kenneth <Kenneth.Feng at amd.com>
Subject: [PATCH v2] drm/amd/pm: fix the high voltage and temperature issue
fix the high voltage and temperature issue after the driver is unloaded on smu 13.0.0, smu 13.0.7 and smu 13.0.10
v2 - fix the code format and make sure it is used on the unload case only.
Signed-off-by: Kenneth Feng <kenneth.feng at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 25 ++++++++++----
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 33 +++++++++++++++++--
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 + drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 ++
.../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 13 ++++++++
.../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 18 ++++++++-- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 19 +++++++++--
7 files changed, 96 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 31f8c3ead161..1ad0fc3f3861 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3986,13 +3986,23 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
}
} else {
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ r = psp_gpu_reset(adev);
+ break;
+ default:
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ break;
+ }
+
if (r) {
dev_err(adev->dev, "asic reset on init failed\n");
goto failed;
@@ -5945,6 +5955,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
return -ENOTSUPP;
ret = amdgpu_dpm_baco_exit(adev);
+
[kevin]:
This blank line is not needed.
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 7c3356d6da5e..2e82172ba250 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -733,7 +733,7 @@ static int smu_early_init(void *handle)
smu->adev = adev;
smu->pm_enabled = !!amdgpu_dpm;
smu->is_apu = false;
- smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+ smu->smu_baco.state = SMU_BACO_STATE_NONE;
smu->smu_baco.platform_support = false;
smu->user_dpm_profile.fan_mode = -1;
@@ -1740,10 +1740,31 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
return 0;
}
+static int smu_reset_mp1_state(struct smu_context *smu) {
+ struct amdgpu_device *adev = smu->adev;
+ int ret = 0;
+
+ if ((!adev->in_runpm) && (!adev->in_suspend) &&
+ (!amdgpu_in_reset(adev)))
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
[kevin]:
Hi Kenneth,
I prefer to add a callback function in _ppt.c file to avoid IP version check in SMU common file.
But either way is okay for me.
Reviewed-by: Yang Wang <kevinyang.wang at amd.com>
Thanks.
Best Regards,
Kevin
+
static int smu_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = adev->powerplay.pp_handle;
+ int ret;
if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
@@ -1761,7 +1782,15 @@ static int smu_hw_fini(void *handle)
adev->pm.dpm_enabled = false;
- return smu_smc_hw_cleanup(smu);
+ ret = smu_smc_hw_cleanup(smu);
+ if (ret)
+ return ret;
+
+ ret = smu_reset_mp1_state(smu);
+ if (ret)
+ return ret;
+
+ return 0;
}
static void smu_late_fini(void *handle) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 1454eed76604..9f2dbc90b606 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -419,6 +419,7 @@ enum smu_reset_mode { enum smu_baco_state {
SMU_BACO_STATE_ENTER = 0,
SMU_BACO_STATE_EXIT,
+ SMU_BACO_STATE_NONE,
};
struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index cc02f979e9e9..43c7ba68eb50 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -299,5 +299,7 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
uint8_t pcie_gen_cap,
uint8_t pcie_width_cap);
+int smu_v13_0_disable_pmfw_state(struct smu_context* smu);
+
#endif
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index bcb7ab9d2221..0724441e53ef 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2473,3 +2473,16 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
return 0;
}
+
+int smu_v13_0_disable_pmfw_state(struct smu_context* smu) {
+ int ret;
+ struct amdgpu_device *adev = smu->adev;
+
+ WREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff), 0);
+
+ ret = RREG32_PCIE(MP1_Public |
+ (smnMP1_FIRMWARE_FLAGS & 0xffffffff));
+
+ return ret == 0 ? 0 : -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 47d008cbc186..02c5e7b1e43b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2570,14 +2570,20 @@ static int smu_v13_0_0_baco_enter(struct smu_context *smu) static int smu_v13_0_0_baco_exit(struct smu_context *smu) {
struct amdgpu_device *adev = smu->adev;
+ int ret;
if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
/* Wait for PMFW handling for the Dstate change */
usleep_range(10000, 11000);
- return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+ ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
} else {
- return smu_v13_0_baco_exit(smu);
+ ret = smu_v13_0_baco_exit(smu);
}
+
+ if (!ret)
+ adev->gfx.is_poweron = false;
+
+ return ret;
}
static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) @@ -2758,7 +2764,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
- ret = smu_cmn_set_mp1_state(smu, mp1_state);
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_PrepareMp1ForUnload,
+ 0x55, NULL);
+
+ if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+ ret = smu_v13_0_disable_pmfw_state(smu);
+
break;
default:
/* Ignore others */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index b8a7a1d853df..40e8d1767b71 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2429,7 +2429,13 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
- ret = smu_cmn_set_mp1_state(smu, mp1_state);
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_PrepareMp1ForUnload,
+ 0x55, NULL);
+
+ if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+ ret = smu_v13_0_disable_pmfw_state(smu);
+
break;
default:
/* Ignore others */
@@ -2455,14 +2461,21 @@ static int smu_v13_0_7_baco_enter(struct smu_context *smu) static int smu_v13_0_7_baco_exit(struct smu_context *smu) {
struct amdgpu_device *adev = smu->adev;
+ int ret;
if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
/* Wait for PMFW handling for the Dstate change */
usleep_range(10000, 11000);
- return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+ ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
} else {
- return smu_v13_0_baco_exit(smu);
+ ret = smu_v13_0_baco_exit(smu);
}
+
+ if (!ret)
+ adev->gfx.is_poweron = false;
+
+ return ret;
+
[kevin]:
Please remove this blank line too.
}
static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
--
2.34.1
More information about the amd-gfx
mailing list