[PATCH] drm/amdgpu: handle old RAS eeprom data in non-nps1 mode
Tao Zhou
tao.zhou1 at amd.com
Wed Apr 30 08:38:34 UTC 2025
Get MCA address from PA in nps1, then convert MCA address to PA in specific nps
mode.
Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ++++++++++++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 22 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 ++
3 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a5a853894ab0..a9d2e7fb3e47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2889,8 +2889,20 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
bps->retired_page << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL;
} else {
- if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
- return -EINVAL;
+ if (bps->address) {
+ if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
+ return -EINVAL;
+ } else {
+ /* for specific old eeprom data, mca address is not stored,
+ * calc it from pa
+ */
+ if (amdgpu_umc_pa2mca(adev, bps->retired_page,
+ &(bps->address), AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
+
+ if (amdgpu_ras_mca2pa(adev, bps, err_data))
+ return -EOPNOTSUPP;
+ }
}
return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
adev->umc.retire_unit);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 8adceeee298b..6337b6406006 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -565,3 +565,25 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
return 0;
}
+
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps)
+{
+ struct ta_ras_query_address_input addr_in;
+ struct ta_ras_query_address_output addr_out;
+ int ret;
+
+ /* nps: the pa belongs to */
+ addr_in.pa.pa = pa | ((uint64_t)nps << 58);
+ addr_in.addr_type = TA_RAS_PA_TO_MCA;
+ ret = psp_ras_query_address(&adev->psp, &addr_in, &addr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS MCA address for 0x%llx", pa);
+
+ return ret;
+ }
+
+ *mca = addr_out.ma.err_addr;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 78a8b8654573..d6929d6f64f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -187,4 +187,6 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
uint64_t err_addr, uint32_t ch, uint32_t umc,
uint32_t node, uint32_t socket,
struct ta_ras_query_address_output *addr_out, bool dump_addr);
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps);
#endif
--
2.34.1
More information about the amd-gfx
mailing list