[PATCH 18/23] drm/amdgpu: add interface to get die id from memory address

Tao Zhou tao.zhou1 at amd.com
Fri Nov 8 11:14:18 UTC 2024


And implement it for UMC v12_0. The die id calculated from IPID
register in bad page retirement, but we don't store it on eeprom
and it can be also gotten from physical address.

Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c  | 22 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.h  |  7 +++++++
 3 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index f97c45b4eeb8..c4e3062008b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -91,6 +91,8 @@ struct amdgpu_umc_ras {
 			struct ta_ras_query_address_input *addr_in,
 			struct ta_ras_query_address_output *addr_out,
 			bool dump_addr);
+	uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+			uint64_t retired_page);
 };
 
 struct amdgpu_umc_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index cce93b4ffb58..bcce7a304d6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -619,6 +619,27 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
 	mutex_unlock(&con->umc_ecc_log.lock);
 }
 
+static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
+		uint64_t retired_page)
+{
+	uint32_t die = 0;
+
+	/* we only calculate die id for nps1 mode right now */
+	die += ((((retired_page >> 12) & 0x1ULL)^
+	    (((retired_page >> 20) & 0x1ULL) & HashIntlvCtl64K) ^
+	    (((retired_page >> 27) & 0x1ULL) & HashIntlvCtl2M) ^
+	    (((retired_page >> 34) & 0x1ULL) & HashIntlvCtl1G) ^
+	    (((retired_page >> 41) & 0x1ULL) & HashIntlvCtl1T)) << 0);
+	die += ((((retired_page >> 13) & 0x1ULL) ^
+	    (((retired_page >> 21) & 0x1ULL) & HashIntlvCtl64K) ^
+	    (((retired_page >> 28) & 0x1ULL) & HashIntlvCtl2M) ^
+	    (((retired_page >> 35) & 0x1ULL) & HashIntlvCtl1G) ^
+	    (((retired_page >> 42) & 0x1ULL) & HashIntlvCtl1T)) << 1);
+	die &= 3;
+
+	return die;
+}
+
 struct amdgpu_umc_ras umc_v12_0_ras = {
 	.ras_block = {
 		.hw_ops = &umc_v12_0_ras_hw_ops,
@@ -630,5 +651,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
 	.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
 	.update_ecc_status = umc_v12_0_update_ecc_status,
 	.convert_ras_err_addr = umc_v12_0_convert_error_address,
+	.get_die_id_from_pa = umc_v12_0_get_die_id,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index 9298018d938f..8154a5d6bfaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -77,6 +77,13 @@
 #define MCA_UMC_HWID_V12_0     0x96
 #define MCA_UMC_MCATYPE_V12_0  0x0
 
+/* interleave setting */
+#define HashIntlvCtl4K 0
+#define HashIntlvCtl64K 1
+#define HashIntlvCtl2M 1
+#define HashIntlvCtl1G 1
+#define HashIntlvCtl1T 1
+
 #define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
 			(((_ipid_lo) >> 12) & 0xF))
 #define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
-- 
2.34.1



More information about the amd-gfx mailing list