[PATCH 06/23] drm/amdgpu: make convert_ras_err_addr visible outside UMC block

Tao Zhou tao.zhou1 at amd.com
Fri Nov 8 11:14:06 UTC 2024


And change some UMC v12 specific functions to common version, so the
code can be shared.

Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 63 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 11 +++++
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c  | 62 ++----------------------
 3 files changed, 78 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 9e70a7b3aa64..75ded3904ee7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -445,3 +445,66 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
 
 	return ret;
 }
+
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+			uint64_t pa_addr, uint64_t *pfns, int len)
+{
+	uint32_t i, ret = 0, pos = 0;
+	struct ta_ras_query_address_output addr_out;
+	struct ras_err_data err_data;
+
+	err_data.err_addr =
+		kcalloc(adev->umc.max_ras_err_cnt_per_query,
+				sizeof(struct eeprom_table_record), GFP_KERNEL);
+	if(!err_data.err_addr) {
+		dev_warn(adev->dev, "Failed to alloc memory in "
+				"bad page lookup!\n");
+		return 0;
+	}
+
+	addr_out.pa.pa = pa_addr;
+
+	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+		adev->umc.ras->convert_ras_err_addr(adev, &err_data, NULL,
+				&addr_out, false);
+	else
+		goto out;
+
+	for (i = 0; i < adev->umc.max_ras_err_cnt_per_query; i++) {
+		if (pos >= len)
+			goto out;
+
+		pfns[pos++] = err_data.err_addr[pos].retired_page;
+	}
+	ret = pos;
+
+out:
+	kfree(err_data.err_addr);
+	return ret;
+}
+
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+			uint64_t err_addr, uint32_t ch, uint32_t umc,
+			uint32_t node, uint32_t socket,
+			uint64_t *addr, bool dump_addr)
+{
+	struct ta_ras_query_address_input addr_in;
+	struct ta_ras_query_address_output addr_out;
+
+	memset(&addr_in, 0, sizeof(addr_in));
+	addr_in.ma.err_addr = err_addr;
+	addr_in.ma.ch_inst = ch;
+	addr_in.ma.umc_inst = umc;
+	addr_in.ma.node_inst = node;
+	addr_in.ma.socket_id = socket;
+
+	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+		adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
+				&addr_out, dump_addr);
+	else
+		return 0;
+
+	*addr = addr_out.pa.pa;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index ce4179db2a6d..abde7597bda8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -70,6 +70,11 @@ struct amdgpu_umc_ras {
 			enum amdgpu_mca_error_type type, void *ras_error_status);
 	int (*update_ecc_status)(struct amdgpu_device *adev,
 			uint64_t status, uint64_t ipid, uint64_t addr);
+	void (*convert_ras_err_addr)(struct amdgpu_device *adev,
+			struct ras_err_data *err_data,
+			struct ta_ras_query_address_input *addr_in,
+			struct ta_ras_query_address_output *addr_out,
+			bool dump_addr);
 };
 
 struct amdgpu_umc_funcs {
@@ -134,4 +139,10 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
 
 void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
 			void *ras_error_status);
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+			uint64_t pa_addr, uint64_t *pfns, int len);
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+			uint64_t err_addr, uint32_t ch, uint32_t umc,
+			uint32_t node, uint32_t socket,
+			uint64_t *addr, bool dump_addr);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 65336ae12585..9b93ff769b86 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -243,61 +243,6 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 	}
 }
 
-static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
-			uint64_t pa_addr, uint64_t *pfns, int len)
-{
-	uint32_t i, ret = 0, pos = 0;
-	struct ta_ras_query_address_output addr_out;
-	struct ras_err_data err_data;
-
-	err_data.err_addr =
-		kcalloc(adev->umc.max_ras_err_cnt_per_query,
-				sizeof(struct eeprom_table_record), GFP_KERNEL);
-	if(!err_data.err_addr) {
-		dev_warn(adev->dev, "Failed to alloc memory in "
-				"bad page lookup!\n");
-		return 0;
-	}
-
-	addr_out.pa.pa = pa_addr;
-	umc_v12_0_convert_error_address(adev, &err_data, NULL, &addr_out, false);
-
-	for (i = 0; i < adev->umc.max_ras_err_cnt_per_query; i++) {
-		if (pos >= len)
-			goto out;
-
-		pfns[pos++] = err_data.err_addr[pos].retired_page;
-	}
-	ret = pos;
-
-out:
-	kfree(err_data.err_addr);
-	return ret;
-}
-
-static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev,
-			uint64_t err_addr, uint32_t ch, uint32_t umc,
-			uint32_t node, uint32_t socket,
-			uint64_t *addr, bool dump_addr)
-{
-	struct ta_ras_query_address_input addr_in;
-	struct ta_ras_query_address_output addr_out;
-
-	memset(&addr_in, 0, sizeof(addr_in));
-	addr_in.ma.err_addr = err_addr;
-	addr_in.ma.ch_inst = ch;
-	addr_in.ma.umc_inst = umc;
-	addr_in.ma.node_inst = node;
-	addr_in.ma.socket_id = socket;
-	addr_in.addr_type = TA_RAS_MCA_TO_PA;
-
-	umc_v12_0_convert_error_address(adev, NULL, &addr_in, &addr_out, dump_addr);
-
-	*addr = addr_out.pa.pa;
-
-	return 0;
-}
-
 static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
 					uint32_t node_inst, uint32_t umc_inst,
 					uint32_t ch_inst, void *data)
@@ -525,7 +470,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
 		MCA_IPID_2_UMC_CH(ipid),
 		err_addr);
 
-	ret = umc_v12_0_convert_mca_to_addr(adev,
+	ret = amdgpu_umc_mca_to_addr(adev,
 			err_addr, MCA_IPID_2_UMC_CH(ipid),
 			MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid),
 			MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true);
@@ -559,7 +504,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
 	con->umc_ecc_log.de_queried_count++;
 
 	memset(page_pfn, 0, sizeof(page_pfn));
-	count = umc_v12_0_lookup_bad_pages_in_a_row(adev,
+	count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
 				pa_addr,
 				page_pfn, ARRAY_SIZE(page_pfn));
 	if (count <= 0) {
@@ -602,7 +547,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
 		return -EINVAL;
 
 	memset(page_pfn, 0, sizeof(page_pfn));
-	count = umc_v12_0_lookup_bad_pages_in_a_row(adev,
+	count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
 				ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT,
 				page_pfn, ARRAY_SIZE(page_pfn));
 
@@ -659,5 +604,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
 	.ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr,
 	.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
 	.update_ecc_status = umc_v12_0_update_ecc_status,
+	.convert_ras_err_addr = umc_v12_0_convert_error_address,
 };
 
-- 
2.34.1



More information about the amd-gfx mailing list