[PATCH 08/23] drm/amdgpu: add return value for convert_ras_err_addr

Tao Zhou tao.zhou1 at amd.com
Fri Nov 8 11:14:08 UTC 2024


So upper layer can return failure directly if address conversion fails.

Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 19 +++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c  | 12 ++++++++----
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index c0433e6471f5..3199dca8f1ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -464,11 +464,14 @@ int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
 
 	addr_out.pa.pa = pa_addr;
 
-	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
-		adev->umc.ras->convert_ras_err_addr(adev, &err_data, NULL,
+	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+		ret = adev->umc.ras->convert_ras_err_addr(adev, &err_data, NULL,
 				&addr_out, false);
-	else
+		if (ret)
+			goto out;
+	} else {
 		goto out;
+	}
 
 	for (i = 0; i < adev->umc.retire_unit; i++) {
 		if (pos >= len)
@@ -490,6 +493,7 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
 {
 	struct ta_ras_query_address_input addr_in;
 	struct ta_ras_query_address_output addr_out;
+	int ret;
 
 	memset(&addr_in, 0, sizeof(addr_in));
 	addr_in.ma.err_addr = err_addr;
@@ -498,11 +502,14 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
 	addr_in.ma.node_inst = node;
 	addr_in.ma.socket_id = socket;
 
-	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
-		adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
+	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+		ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
 				&addr_out, dump_addr);
-	else
+		if (ret)
+			return ret;
+	} else {
 		return 0;
+	}
 
 	*addr = addr_out.pa.pa;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index abde7597bda8..f45408a6ff03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -70,7 +70,7 @@ struct amdgpu_umc_ras {
 			enum amdgpu_mca_error_type type, void *ras_error_status);
 	int (*update_ecc_status)(struct amdgpu_device *adev,
 			uint64_t status, uint64_t ipid, uint64_t addr);
-	void (*convert_ras_err_addr)(struct amdgpu_device *adev,
+	int (*convert_ras_err_addr)(struct amdgpu_device *adev,
 			struct ras_err_data *err_data,
 			struct ta_ras_query_address_input *addr_in,
 			struct ta_ras_query_address_output *addr_out,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 9b93ff769b86..ce60fd6675ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -173,7 +173,7 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
 	umc_v12_0_reset_error_count(adev);
 }
 
-static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
+static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 					struct ras_err_data *err_data,
 					struct ta_ras_query_address_input *addr_in,
 					struct ta_ras_query_address_output *addr_out,
@@ -183,6 +183,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 	uint64_t soc_pa, retired_page, column, err_addr;
 	struct ta_ras_query_address_output addr_out_tmp;
 	struct ta_ras_query_address_output *paddr_out;
+	int ret = 0;
 
 	if (!addr_out)
 		paddr_out = &addr_out_tmp;
@@ -193,11 +194,12 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 	if (addr_in) {
 		err_addr = addr_in->ma.err_addr;
 		addr_in->addr_type = TA_RAS_MCA_TO_PA;
-		if (psp_ras_query_address(&adev->psp, addr_in, paddr_out)) {
+		ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out);
+		if (ret) {
 			dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
 				err_addr);
 
-			return;
+			return ret;
 		}
 
 		bank = paddr_out->pa.bank;
@@ -209,7 +211,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 	soc_pa = paddr_out->pa.pa;
 
 	if (!err_data && !dump_addr)
-		return;
+		return ret;
 
 	col = (err_addr >> 1) & 0x1fULL;
 	/* clear [C3 C2] in soc physical address */
@@ -241,6 +243,8 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 			amdgpu_umc_fill_error_record(err_data, err_addr,
 				retired_page, channel_index, umc_inst);
 	}
+
+	return ret;
 }
 
 static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
-- 
2.34.1



More information about the amd-gfx mailing list