[PATCH] drm/amdgpu: record correct sram correctable error count on arcturus
Guchun Chen
guchun.chen at amd.com
Sun Mar 22 08:32:49 UTC 2020
On arcturus, only sram correctable error is stable after correctable
error injection, so reduce check conditions to record hardware status.
Signed-off-by: Guchun Chen <guchun.chen at amd.com>
---
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 77122a7282e7..57e8f72cc96d 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -138,10 +138,21 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
/* check for SRAM correctable error
MCUMC_STATUS is a 64 bit register */
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
- *error_count += 1;
+ if (REG_GET_FIELD(mc_umc_status,
+ MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status,
+ MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
+ /*
+ * On arcturus, only sram correctable error is stable after
+ * error injection, so bypass the check of ErrorCodeExt bit
+ * field in MCUMC_STATUS register to record correct status.
+ */
+ if (adev->asic_type == CHIP_ARCTURUS)
+ *error_count += 1;
+ else if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0,
+ ErrorCodeExt) == 6)
+ *error_count += 1;
+ }
}
static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
--
2.17.1
More information about the amd-gfx
mailing list