[PATCH Review 1/1] drm/amdgpu: add umc query error status function

Zhang, Hawking Hawking.Zhang at amd.com
Fri Apr 8 10:55:03 UTC 2022


[AMD Official Use Only]

+static void umc_query_error_status_helper(struct amdgpu_device *adev,
+                                                 uint64_t mc_umc_status, uint32_t umc_reg_offset)

Might be better keep umc_v6_7 naming style. Call it umc_v6_7_query_mca_status. Apart from that, the patch is

Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>

Regards,
Hawking
-----Original Message-----
From: Stanley.Yang <Stanley.Yang at amd.com>
Sent: Friday, April 8, 2022 16:10
To: amd-gfx at lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>; Li, Candice <Candice.Li at amd.com>
Cc: Yang, Stanley <Stanley.Yang at amd.com>
Subject: [PATCH Review 1/1] drm/amdgpu: add umc query error status function

In order to debug ras error, driver will print IPID/SYND/MISC0 register value if detect correctable or uncorrectable error.
Provide umc_query_error_status_helper function to reduce code redundancy.

Change-Id: I09a2aae85cde3ab2cb6b042b973da6839ad024ec
Signed-off-by: Stanley.Yang <Stanley.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 106 ++++++++++++--------------
 1 file changed, 48 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index c45d9c14ecbc..9d3b54778417 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -64,21 +64,62 @@ static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev,
        return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];  }

+static void umc_query_error_status_helper(struct amdgpu_device *adev,
+                                                 uint64_t mc_umc_status, uint32_t umc_reg_offset) {
+       uint32_t mc_umc_addr;
+       uint64_t reg_value;
+
+       if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
+               dev_info(adev->dev, "Deferred error, no user action is needed.\n");
+
+       if (mc_umc_status)
+               dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n",
+mc_umc_status, umc_reg_offset);
+
+       /* print IPID registers value */
+       mc_umc_addr =
+               SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
+       reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+       if (reg_value)
+               dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n",
+reg_value, umc_reg_offset);
+
+       /* print SYND registers value */
+       mc_umc_addr =
+               SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
+       reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+       if (reg_value)
+               dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n",
+reg_value, umc_reg_offset);
+
+       /* print MISC0 registers value */
+       mc_umc_addr =
+               SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
+       reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+       if (reg_value)
+               dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n",
+reg_value, umc_reg_offset); }
+
 static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
                                                   uint32_t umc_inst, uint32_t ch_inst,
                                                   unsigned long *error_count)
 {
        uint64_t mc_umc_status;
        uint32_t eccinfo_table_idx;
+       uint32_t umc_reg_offset;
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);

+       umc_reg_offset = get_umc_v6_7_reg_offset(adev,
+                                               umc_inst, ch_inst);
+
        eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
        /* check for SRAM correctable error
          MCUMC_STATUS is a 64 bit register */
        mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
        if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
-           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) ==
+1) {
                *error_count += 1;
+
+               umc_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+       }
 }

 static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -88,8 +129,6 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev
        uint64_t mc_umc_status;
        uint32_t eccinfo_table_idx;
        uint32_t umc_reg_offset;
-       uint32_t mc_umc_addr;
-       uint64_t reg_value;
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);

        umc_reg_offset = get_umc_v6_7_reg_offset(adev, @@ -106,32 +145,7 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev
            REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
                *error_count += 1;

-               if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
-                       dev_info(adev->dev, "Deferred error, no user action is needed.\n");
-
-               if (mc_umc_status)
-                       dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
-
-               /* print IPID registers value */
-               mc_umc_addr =
-                       SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
-               reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
-               if (reg_value)
-                       dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
-
-               /* print SYND registers value */
-               mc_umc_addr =
-                       SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
-               reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
-               if (reg_value)
-                       dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
-
-               /* print MISC0 registers value */
-               mc_umc_addr =
-                       SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
-               reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
-               if (reg_value)
-                       dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+               umc_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
        }
 }

@@ -277,8 +291,11 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
          MCUMC_STATUS is a 64 bit register */
        mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
        if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
-           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) ==
+1) {
                *error_count += 1;
+
+               umc_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+       }
 }

 static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -287,8 +304,6 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev  {
        uint64_t mc_umc_status;
        uint32_t mc_umc_status_addr;
-       uint32_t mc_umc_addr;
-       uint64_t reg_value;

        mc_umc_status_addr =
                SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -303,32 +318,7 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev
            REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
                *error_count += 1;

-               if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
-                       dev_info(adev->dev, "Deferred error, no user action is needed.\n");
-
-               if (mc_umc_status)
-                       dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
-
-               /* print IPID registers value */
-               mc_umc_addr =
-                       SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
-               reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
-               if (reg_value)
-                       dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
-
-               /* print SYND registers value */
-               mc_umc_addr =
-                       SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
-               reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
-               if (reg_value)
-                       dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
-
-               /* print MISC0 registers value */
-               mc_umc_addr =
-                       SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
-               reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
-               if (reg_value)
-                       dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+               umc_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
        }
 }

--
2.17.1



More information about the amd-gfx mailing list