[PATCH 03/11] drm/amdgpu: add helper funtion to query umc ras error
Hawking Zhang
Hawking.Zhang at amd.com
Fri Apr 2 06:43:37 UTC 2021
Add helper functions to query correctable and
uncorrectable umc ras error.
Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
Acked-by: Alex Deucher <alexander.deucher at amd.com>
Reviewed-by: John Clements <John.Clements at amd.com>
---
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 70 +++++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/umc_v6_7.h | 7 +++
2 files changed, 77 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 37aa1cf6b2ef..05fec10b1ed9 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -24,6 +24,76 @@
#include "amdgpu_ras.h"
#include "amdgpu.h"
+#include "umc/umc_6_7_0_offset.h"
+#include "umc/umc_6_7_0_sh_mask.h"
+
+static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_7_CE_CNT_INIT);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_7_CE_CNT_INIT);
+
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
const struct amdgpu_umc_funcs umc_v6_7_funcs = {
.ras_late_init = amdgpu_umc_ras_late_init,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
index 8c2ce694ec89..6b881226b4f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
@@ -23,6 +23,13 @@
#ifndef __UMC_V6_7_H__
#define __UMC_V6_7_H__
+/* EccErrCnt max value */
+#define UMC_V6_7_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V6_7_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V6_7_CE_CNT_INIT (UMC_V6_7_CE_CNT_MAX - UMC_V6_7_CE_INT_THRESHOLD)
+
extern const struct amdgpu_umc_funcs umc_v6_7_funcs;
#endif
--
2.17.1
More information about the amd-gfx
mailing list