[PATCH 05/15] drm/amdgpu: add interface to update umc v12_0 ecc status
YiPeng Chai
YiPeng.Chai at amd.com
Thu Apr 18 02:58:26 UTC 2024
Add interface to update umc v12_0 ecc status.
Signed-off-by: YiPeng Chai <YiPeng.Chai at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 9 +++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 6 +++++
drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 24 +++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 3 +++
.../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 5 ++++
6 files changed, 49 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 126616eaeec1..702229abe7ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -4213,6 +4213,8 @@ void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_a
{
struct ras_err_addr *mca_err_addr;
+ /* This function will be retired. */
+ return;
mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL);
if (!mca_err_addr)
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index f486510fc94c..7006a57277ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -437,3 +437,12 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
return 0;
}
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ if (adev->umc.ras->update_ecc_status)
+ return adev->umc.ras->update_ecc_status(adev,
+ status, ipid, addr);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 563b0249247e..4f3834fa10a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -66,6 +66,8 @@ struct amdgpu_umc_ras {
void *ras_error_status);
bool (*check_ecc_err_status)(struct amdgpu_device *adev,
enum amdgpu_mca_error_type type, void *ras_error_status);
+ int (*update_ecc_status)(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
};
struct amdgpu_umc_funcs {
@@ -122,4 +124,8 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
uint32_t reset, uint32_t timeout_ms);
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index a0122b22eda4..81435533c4a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -479,6 +479,29 @@ static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common
return 0;
}
+static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ uint16_t hwid, mcatype;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
+ mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
+
+ if ((hwid != MCA_UMC_HWID_V12_0) || (mcatype != MCA_UMC_MCATYPE_V12_0))
+ return 0;
+
+ if (!status)
+ return 0;
+
+ if (!umc_v12_0_is_deferred_error(adev, status))
+ return 0;
+
+ con->umc_ecc_log.de_updated = true;
+
+ return 0;
+}
+
struct amdgpu_umc_ras umc_v12_0_ras = {
.ras_block = {
.hw_ops = &umc_v12_0_ras_hw_ops,
@@ -489,5 +512,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
.ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
.ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
+ .update_ecc_status = umc_v12_0_update_ecc_status,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index 1d5f44dcffdd..5c2d7e127608 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -62,6 +62,9 @@
/* row bits in SOC physical address */
#define UMC_V12_0_PA_R13_BIT 35
+#define MCA_UMC_HWID_V12_0 0x96
+#define MCA_UMC_MCATYPE_V12_0 0x0
+
#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
(((_ipid_lo) >> 12) & 0xF))
#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 55d11ea8c717..8370c2130476 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2681,6 +2681,11 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
umc_v12_0_is_correctable_error(adev, status0))
*count = (ext_error_code == 0) ? odecc_err_cnt : 1;
+ amdgpu_umc_update_ecc_status(adev,
+ entry->regs[MCA_REG_IDX_STATUS],
+ entry->regs[MCA_REG_IDX_IPID],
+ entry->regs[MCA_REG_IDX_ADDR]);
+
return 0;
}
--
2.34.1
More information about the amd-gfx
mailing list