[PATCH 14/23] drm/amdgpu: add function to find all memory pages in one physical row

Tao Zhou tao.zhou1 at amd.com
Fri Nov 8 11:14:14 UTC 2024


And the function can be reused across amdgpu driver.

Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 37 +++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  2 ++
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 3199dca8f1ea..11f7299f773f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -446,11 +446,27 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
 	return ret;
 }
 
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+			struct ras_err_data *err_data, uint64_t pa_addr)
+{
+	struct ta_ras_query_address_output addr_out;
+
+	/* reinit err_data */
+	err_data->err_addr_cnt = 0;
+	err_data->err_addr_len = adev->umc.retire_unit;
+
+	addr_out.pa.pa = pa_addr;
+	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+		return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL,
+				&addr_out, false);
+	else
+		return -EINVAL;
+}
+
 int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
 			uint64_t pa_addr, uint64_t *pfns, int len)
 {
-	uint32_t i, ret = 0, pos = 0;
-	struct ta_ras_query_address_output addr_out;
+	int i, ret;
 	struct ras_err_data err_data;
 
 	err_data.err_addr =
@@ -462,24 +478,17 @@ int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
 		return 0;
 	}
 
-	addr_out.pa.pa = pa_addr;
-
-	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
-		ret = adev->umc.ras->convert_ras_err_addr(adev, &err_data, NULL,
-				&addr_out, false);
-		if (ret)
-			goto out;
-	} else {
+	ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr);
+	if (ret)
 		goto out;
-	}
 
 	for (i = 0; i < adev->umc.retire_unit; i++) {
-		if (pos >= len)
+		if (i >= len)
 			goto out;
 
-		pfns[pos++] = err_data.err_addr[pos].retired_page;
+		pfns[i] = err_data.err_addr[i].retired_page;
 	}
-	ret = pos;
+	ret = i;
 
 out:
 	kfree(err_data.err_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index f45408a6ff03..ce1e4fb385b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -139,6 +139,8 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
 
 void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
 			void *ras_error_status);
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+			struct ras_err_data *err_data, uint64_t pa_addr);
 int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
 			uint64_t pa_addr, uint64_t *pfns, int len);
 int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
-- 
2.34.1



More information about the amd-gfx mailing list