[PATCH 16/21] drm/amdgpu: add interface to get die id from memory address

Tao Zhou tao.zhou1 at amd.com
Tue Nov 19 06:35:59 UTC 2024


And implement it for UMC v12_0. The die id is calculated from IPID
register in bad page retirement flow, but we don't store it on eeprom
and it can be also gotten from physical address.

v2: get PA_C4 and PA_R13 from MCA address since they may be cleared in
retired page.

Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c  | 26 +++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index f97c45b4eeb8..a4a7e61817aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -91,6 +91,8 @@ struct amdgpu_umc_ras {
 			struct ta_ras_query_address_input *addr_in,
 			struct ta_ras_query_address_output *addr_out,
 			bool dump_addr);
+	uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+			uint64_t mca_addr, uint64_t retired_page);
 };
 
 struct amdgpu_umc_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index cce93b4ffb58..30ee4cb9aaab 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -619,6 +619,31 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
 	mutex_unlock(&con->umc_ecc_log.lock);
 }
 
+static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
+		uint64_t mca_addr, uint64_t retired_page)
+{
+	uint32_t die = 0;
+
+	/* we only calculate die id for nps1 mode right now */
+	die += ((((retired_page >> 12) & 0x1ULL)^
+	    ((retired_page >> 20) & 0x1ULL) ^
+	    ((retired_page >> 27) & 0x1ULL) ^
+	    ((retired_page >> 34) & 0x1ULL) ^
+	    ((retired_page >> 41) & 0x1ULL)) << 0);
+
+	/* the original PA_C4 and PA_R13 may be cleared in retired_page, so
+	 * get them from mca_addr.
+	 */
+	die += ((((retired_page >> 13) & 0x1ULL) ^
+	    ((mca_addr >> 5) & 0x1ULL) ^
+	    ((retired_page >> 28) & 0x1ULL) ^
+	    ((mca_addr >> 23) & 0x1ULL) ^
+	    ((retired_page >> 42) & 0x1ULL)) << 1);
+	die &= 3;
+
+	return die;
+}
+
 struct amdgpu_umc_ras umc_v12_0_ras = {
 	.ras_block = {
 		.hw_ops = &umc_v12_0_ras_hw_ops,
@@ -630,5 +655,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
 	.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
 	.update_ecc_status = umc_v12_0_update_ecc_status,
 	.convert_ras_err_addr = umc_v12_0_convert_error_address,
+	.get_die_id_from_pa = umc_v12_0_get_die_id,
 };
 
-- 
2.34.1



More information about the amd-gfx mailing list