[PATCH] drm/amdgpu: add umc_convert_error_address to simplify code

Tao Zhou tao.zhou1 at amd.com
Wed Jan 26 11:04:47 UTC 2022


Make code reusable and more simple.

Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 94 +++++++++------------------
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 82 +++++++++--------------
 2 files changed, 61 insertions(+), 115 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 47452b61b615..4abcdda42ac6 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -114,21 +114,13 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
 	}
 }
 
-static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
-					 struct ras_err_data *err_data,
-					 uint32_t ch_inst,
-					 uint32_t umc_inst)
+static void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+					struct ras_err_data *err_data, uint32_t ch_inst,
+					uint32_t umc_inst, uint64_t err_addr,
+					uint64_t mc_umc_status)
 {
-	uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column;
 	uint32_t channel_index;
-	uint32_t eccinfo_table_idx;
-	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-	eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
-	channel_index =
-		adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
-	mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+	uint64_t soc_pa, retired_page, column;
 
 	if (mc_umc_status == 0)
 		return;
@@ -136,12 +128,13 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
 	if (!err_data->err_addr)
 		return;
 
+	channel_index =
+			adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
 	/* calculate error address if ue/ce error is detected */
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
-		err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
 		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
 
 		/* translate umc channel address to soc pa, 3 parts are included */
@@ -173,6 +166,23 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
 	}
 }
 
+static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+					 struct ras_err_data *err_data,
+					 uint32_t ch_inst,
+					 uint32_t umc_inst)
+{
+	uint64_t mc_umc_status, err_addr;
+	uint32_t eccinfo_table_idx;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+	mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+	err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+
+	umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
+			err_addr, mc_umc_status);
+}
+
 static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
 					     void *ras_error_status)
 {
@@ -348,9 +358,7 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
 					 uint32_t umc_inst)
 {
 	uint32_t mc_umc_status_addr;
-	uint32_t channel_index;
-	uint64_t mc_umc_status, mc_umc_addrt0;
-	uint64_t err_addr, soc_pa, retired_page, column;
+	uint64_t mc_umc_status, mc_umc_addrt0, err_addr;
 
 	mc_umc_status_addr =
 		SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -358,54 +366,10 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
 		SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
 
 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+	err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
 
-	if (mc_umc_status == 0)
-		return;
-
-	if (!err_data->err_addr) {
-		/* clear umc status */
-		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
-		return;
-	}
-
-	channel_index =
-		adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
-	/* calculate error address if ue/ce error is detected */
-	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
-	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
-	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
-		err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
-		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
-
-		/* translate umc channel address to soc pa, 3 parts are included */
-		soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
-				ADDR_OF_256B_BLOCK(channel_index) |
-				OFFSET_IN_256B_BLOCK(err_addr);
-
-		/* The umc channel bits are not original values, they are hashed */
-		SET_CHANNEL_HASH(channel_index, soc_pa);
-
-		/* clear [C4 C3 C2] in soc physical address */
-		soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
-
-		/* we only save ue error information currently, ce is skipped */
-		if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
-				== 1) {
-			/* loop for all possibilities of [C4 C3 C2] */
-			for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
-				retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
-				amdgpu_umc_fill_error_record(err_data, err_addr,
-					retired_page, channel_index, umc_inst);
-
-				/* shift R14 bit */
-				retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
-				amdgpu_umc_fill_error_record(err_data, err_addr,
-					retired_page, channel_index, umc_inst);
-			}
-		}
-	}
+	umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
+				err_addr, mc_umc_status);
 
 	/* clear umc status */
 	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
index de85a998ef99..df15b87ae12b 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
@@ -115,21 +115,13 @@ static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
 	}
 }
 
-static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
-					struct ras_err_data *err_data,
-					uint32_t ch_inst,
-					uint32_t umc_inst)
+static void umc_v8_7_convert_error_address(struct amdgpu_device *adev,
+					struct ras_err_data *err_data, uint32_t ch_inst,
+					uint32_t umc_inst, uint64_t err_addr,
+					uint64_t mc_umc_status)
 {
-	uint64_t mc_umc_status, err_addr, retired_page;
-	uint32_t channel_index;
-	uint32_t eccinfo_table_idx;
-	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-	eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
-	channel_index =
-		adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
-	mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+	uint32_t lsb, channel_index;
+	uint64_t retired_page;
 
 	if (mc_umc_status == 0)
 		return;
@@ -137,13 +129,16 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
 	if (!err_data->err_addr)
 		return;
 
+	channel_index =
+			adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
 	/* calculate error address if ue/ce error is detected */
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
-		err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+		lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
 		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+		err_addr &= ~((0x1ULL << lsb) - 1);
 
 		/* translate umc channel address to soc pa, 3 parts are included */
 		retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
@@ -157,6 +152,22 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
 					retired_page, channel_index, umc_inst);
 	}
 }
+static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+					struct ras_err_data *err_data,
+					uint32_t ch_inst,
+					uint32_t umc_inst)
+{
+	uint64_t mc_umc_status, err_addr;
+	uint32_t eccinfo_table_idx;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+	mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+	err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+
+	umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
+				err_addr, mc_umc_status);
+}
 
 static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
 					void *ras_error_status)
@@ -330,9 +341,8 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
 					 uint32_t ch_inst,
 					 uint32_t umc_inst)
 {
-	uint32_t lsb, mc_umc_status_addr;
-	uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
-	uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+	uint32_t mc_umc_status_addr;
+	uint64_t mc_umc_status, err_addr, mc_umc_addrt0;
 
 	mc_umc_status_addr =
 		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -340,38 +350,10 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
 		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
 
 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+	err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
 
-	if (mc_umc_status == 0)
-		return;
-
-	if (!err_data->err_addr) {
-		/* clear umc status */
-		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
-		return;
-	}
-
-	/* calculate error address if ue/ce error is detected */
-	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
-	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
-	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
-		err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
-		/* the lowest lsb bits should be ignored */
-		lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
-		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
-		err_addr &= ~((0x1ULL << lsb) - 1);
-
-		/* translate umc channel address to soc pa, 3 parts are included */
-		retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
-				ADDR_OF_256B_BLOCK(channel_index) |
-				OFFSET_IN_256B_BLOCK(err_addr);
-
-		/* we only save ue error information currently, ce is skipped */
-		if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
-				== 1)
-			amdgpu_umc_fill_error_record(err_data, err_addr,
-					retired_page, channel_index, umc_inst);
-	}
+	umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
+					err_addr, mc_umc_status);
 
 	/* clear umc status */
 	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
-- 
2.17.1



More information about the amd-gfx mailing list