[PATCH 2/2] update check condition of query for ras page retire
Tao Zhou
tao.zhou1 at amd.com
Wed Jan 17 09:14:30 UTC 2024
Support page retirement handling in debug mode.
Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 9 +++++++--
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++--
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 41139bac7643..6df32f0afd89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -90,12 +90,16 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ unsigned int error_query_mode;
int ret = 0;
+ amdgpu_ras_get_error_query_mode(adev, &error_query_mode);
+
mutex_lock(&con->page_retirement_lock);
ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
- if (ret == -EOPNOTSUPP) {
+ if (ret == -EOPNOTSUPP &&
+ error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);
@@ -119,7 +123,8 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
*/
adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
}
- } else if (!ret) {
+ } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
+ (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) {
if (adev->umc.ras &&
adev->umc.ras->ecc_info_query_ras_error_count)
adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index c560f4af214d..d86c9e7fc64b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2909,8 +2909,8 @@ static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu,
static ssize_t smu_v13_0_6_get_ecc_info(struct smu_context *smu,
void *table)
{
- /* Support ecc info by default */
- return 0;
+ /* we use debug mode flag instead of this interface */
+ return -EOPNOTSUPP;
}
static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
--
2.35.1
More information about the amd-gfx
mailing list