[PATCH] drm/amdgpu: Modify the count method of defer error
Ce Sun
cesun102 at amd.com
Mon May 12 14:33:57 UTC 2025
The number of newly added de counts and the number of
newly added error addresses remain consistent
Signed-off-by: Ce Sun <cesun102 at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 1 +
drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 8 ++++++--
3 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 8adceeee298b..c19e40d095a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -533,6 +533,7 @@ int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
pfns[i] = err_data.err_addr[i].retired_page;
}
ret = i;
+ adev->umc.err_addr_cnt = err_data.err_addr_cnt;
out:
kfree(err_data.err_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 857693bcd8d4..52fb71c4ce9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -130,6 +130,7 @@ struct amdgpu_umc {
/* active mask for umc node instance */
unsigned long active_mask;
+ unsigned long err_addr_cnt;
};
int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 0e404c074975..0a4ab63287c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -428,8 +428,12 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank
bank->regs[ACA_REG_IDX_ADDR]);
ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
- count = ext_error_code == 0 ?
- ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
+ if (umc_v12_0_is_deferred_error(adev, status))
+ count = ext_error_code == 0 ?
+ adev->umc.err_addr_cnt / adev->umc.retire_unit : 1ULL;
+ else
+ count = ext_error_code == 0 ?
+ ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
return aca_error_cache_log_bank_error(handle, &info, err_type, count);
}
--
2.34.1
More information about the amd-gfx
mailing list