[PATCH] drm/amdgpu: Retrieve CE count from ce_count_lo_chip in EccInfo table

Zhang, Hawking Hawking.Zhang at amd.com
Thu Oct 26 05:31:24 UTC 2023


[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>

Regards,
Hawking
-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Candice Li
Sent: Thursday, October 26, 2023 12:37
To: amd-gfx at lists.freedesktop.org
Cc: Li, Candice <Candice.Li at amd.com>
Subject: [PATCH] drm/amdgpu: Retrieve CE count from ce_count_lo_chip in EccInfo table

Retrieve correctable error count from ce_count_lo_chip instead of mca_umc_status.

Signed-off-by: Candice Li <candice.li at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/umc_v8_10.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
index 46bfdee79bfd2a..c4c77257710c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -336,7 +336,7 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic
                                      uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
                                      unsigned long *error_count)
 {
-       uint64_t mc_umc_status;
+       uint16_t ecc_ce_cnt;
        uint32_t eccinfo_table_idx;
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);

@@ -345,12 +345,10 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic
                                  umc_inst * adev->umc.channel_inst_num +
                                  ch_inst;

-       /* check the MCUMC_STATUS */
-       mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
-       if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
-           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
-               *error_count += 1;
-       }
+       /* Retrieve CE count */
+       ecc_ce_cnt = ras->umc_ecc.ecc[eccinfo_table_idx].ce_count_lo_chip;
+       if (ecc_ce_cnt)
+               *error_count += ecc_ce_cnt;
 }

 static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_device *adev,
--
2.25.1



More information about the amd-gfx mailing list