[PATCH 06/20] x86/mce/amd: Use helper for GPU UMC bank type checks

Yazen Ghannam yazen.ghannam at amd.com
Sat Nov 18 19:32:34 UTC 2023


The type of an Scalable MCA bank should be determined solely using the
values in its MCA_IPID register.

Define and use a helper function to determine if a bank represents a GPU
Unified Memory Controller (UMC), and where the exact bank type is not
needed.

Use bitops and rename old mask until removed.

Signed-off-by: Yazen Ghannam <yazen.ghannam at amd.com>
---
 arch/x86/include/asm/mce.h              |  4 +++-
 arch/x86/kernel/cpu/mce/amd.c           | 12 +++++++++++-
 drivers/edac/amd64_edac.c               |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c |  9 ++++-----
 4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c43b41677a3e..012caf68dcbb 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -59,8 +59,9 @@
  *  - TCC bit is present in MCx_STATUS.
  */
 #define MCI_CONFIG_MCAX		0x1
-#define MCI_IPID_MCATYPE	0xFFFF0000
+#define MCI_IPID_MCATYPE_OLD	0xFFFF0000
 #define MCI_IPID_HWID_OLD	0xFFF
+#define MCI_IPID_MCATYPE	GENMASK_ULL(63, 48)
 #define MCI_IPID_HWID		GENMASK_ULL(43, 32)
 
 /*
@@ -341,6 +342,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
 
 void mce_amd_feature_init(struct cpuinfo_x86 *c);
 enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
+bool smca_gpu_umc_bank_type(u64 ipid);
 #else
 
 static inline int mce_threshold_create_device(unsigned int cpu)		{ return 0; };
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index c8fb6c24170f..6fc35967b11b 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -150,6 +150,16 @@ static bool smca_umc_bank_type(u64 ipid)
 	return FIELD_GET(MCI_IPID_HWID, ipid) == 0x96;
 }
 
+/* GPU UMCs have MCATYPE=0x1.*/
+bool smca_gpu_umc_bank_type(u64 ipid)
+{
+	if (!smca_umc_bank_type(ipid))
+		return false;
+
+	return FIELD_GET(MCI_IPID_MCATYPE, ipid) == 0x1;
+}
+EXPORT_SYMBOL_GPL(smca_gpu_umc_bank_type);
+
 static const struct smca_hwid smca_hwid_mcatypes[] = {
 	/* { bank_type, hwid_mcatype } */
 
@@ -312,7 +322,7 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
 	}
 
 	hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID_OLD,
-				    (high & MCI_IPID_MCATYPE) >> 16);
+				    (high & MCI_IPID_MCATYPE_OLD) >> 16);
 
 	for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
 		s_hwid = &smca_hwid_mcatypes[i];
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9b6642d00871..b593795e1e6b 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1032,7 +1032,7 @@ static int fixup_node_id(int node_id, struct mce *m)
 	/* MCA_IPID[InstanceIdHi] give the AMD Node ID for the bank. */
 	u8 nid = (m->ipid >> 44) & 0xF;
 
-	if (smca_get_bank_type(m->extcpu, m->bank) != SMCA_UMC_V2)
+	if (!smca_gpu_umc_bank_type(m->ipid))
 		return node_id;
 
 	/* Nodes below the GPU base node are CPU nodes and don't need a fixup. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 84e5987b14e0..7235668b3cc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3279,12 +3279,11 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
 	uint32_t umc_inst = 0, ch_inst = 0;
 
 	/*
-	 * If the error was generated in UMC_V2, which belongs to GPU UMCs,
-	 * and error occurred in DramECC (Extended error code = 0) then only
-	 * process the error, else bail out.
+	 * If the error was generated in a GPU UMC and error occurred in
+	 * DramECC (Extended error code = 0) then only process the error,
+	 * else bail out.
 	 */
-	if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
-		    (XEC(m->status, 0x3f) == 0x0)))
+	if (!m || !(smca_gpu_umc_bank_type(m->ipid) && (XEC(m->status, 0x3f) == 0x0)))
 		return NOTIFY_DONE;
 
 	/*
-- 
2.34.1



More information about the amd-gfx mailing list