[PATCH 6/6] drm/amdgpu: drop status query/reset for GCEA 9.4.3 and MMEA 1.8

Wed Oct 18 11:53:35 UTC 2023

[AMD Official Use Only - General]

Series is

Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>

Regards,
Hawking
-----Original Message-----
From: Zhou1, Tao <Tao.Zhou1 at amd.com>
Sent: Wednesday, October 18, 2023 19:40
To: amd-gfx at lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>; Li, Candice <Candice.Li at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>; Lazar, Lijo <Lijo.Lazar at amd.com>; Wang, Yang(Kevin) <KevinYang.Wang at amd.com>
Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>
Subject: [PATCH 6/6] drm/amdgpu: drop status query/reset for GCEA 9.4.3 and MMEA 1.8

PMFW will be responsible for them.

v2: remove query interfaces.

Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c |  60 ----------  drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 143 ------------------------
 2 files changed, 203 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index a1c2c952d882..362bf51ab1d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3754,10 +3754,6 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
            AMDGPU_GFX_LDS_MEM, 4},
 };

-static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = {
-       SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
-};
-
 static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
                                        void *ras_error_status, int xcc_id)  { @@ -3846,39 +3842,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
        mutex_unlock(&adev->grbm_idx_mutex);
 }

-static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev,
-                                       int xcc_id)
-{
-       uint32_t i, j;
-       uint32_t reg_value;
-
-       mutex_lock(&adev->grbm_idx_mutex);
-
-       for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
-               for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
-                       gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
-                       reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
-                                       regGCEA_ERR_STATUS);
-                       if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
-                           REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
-                           REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
-                               dev_warn(adev->dev,
-                                       "GCEA err detected at instance: %d, status: 0x%x!\n",
-                                       j, reg_value);
-                       }
-                       /* clear after read */
-                       reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS,
-                                                 CLEAR_ERROR_STATUS, 0x1);
-                       WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS,
-                                       reg_value);
-               }
-       }
-
-       gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
-                       xcc_id);
-       mutex_unlock(&adev->grbm_idx_mutex);
-}
-
 static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev,
                                        int xcc_id)
 {
@@ -3983,7 +3946,6 @@ static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev,  static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev,
                                        void *ras_error_status, int xcc_id)  {
-       gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id);
        gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id);
        gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id);  } @@ -3996,27 +3958,6 @@ static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
        WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3);  }

-static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev,
-                                       int xcc_id)
-{
-       uint32_t i, j;
-       uint32_t value;
-
-       mutex_lock(&adev->grbm_idx_mutex);
-       for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
-               for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
-                       gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
-                       value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS);
-                       value = REG_SET_FIELD(value, GCEA_ERR_STATUS,
-                                               CLEAR_ERROR_STATUS, 0x1);
-                       WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value);
-               }
-       }
-       gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
-                       xcc_id);
-       mutex_unlock(&adev->grbm_idx_mutex);
-}
-
 static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev,
                                        int xcc_id)
 {
@@ -4042,7 +3983,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
                                        void *ras_error_status, int xcc_id)  {
        gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
-       gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id);
        gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id);  }

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index aa00483e7b37..ea142611be1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -700,152 +700,9 @@ static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev)
                mmhub_v1_8_inst_reset_ras_error_count(adev, i);  }

-static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = {
-       regMMEA0_ERR_STATUS,
-       regMMEA1_ERR_STATUS,
-       regMMEA2_ERR_STATUS,
-       regMMEA3_ERR_STATUS,
-       regMMEA4_ERR_STATUS,
-};
-
-static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev,
-                                                uint32_t mmhub_inst)
-{
-       uint32_t reg_value;
-       uint32_t mmea_err_status_addr_dist;
-       uint32_t i;
-
-       /* query mmea ras err status */
-       mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
-       for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
-               reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
-                                               regMMEA0_ERR_STATUS,
-                                               i * mmea_err_status_addr_dist);
-               if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
-                   REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
-                   REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
-                       dev_warn(adev->dev,
-                                "Detected MMEA%d err in MMHUB%d, status: 0x%x\n",
-                                i, mmhub_inst, reg_value);
-               }
-       }
-
-       /* query mm_cane ras err status */
-       reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
-       if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) ||
-           REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) ||
-           REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) {
-               dev_warn(adev->dev,
-                        "Detected MM CANE err in MMHUB%d, status: 0x%x\n",
-                        mmhub_inst, reg_value);
-       }
-}
-
-static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev) -{
-       uint32_t inst_mask;
-       uint32_t i;
-
-       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
-               dev_warn(adev->dev, "MMHUB RAS is not supported\n");
-               return;
-       }
-
-       inst_mask = adev->aid_mask;
-       for_each_inst(i, inst_mask)
-               mmhub_v1_8_inst_query_ras_err_status(adev, i);
-}
-
-static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev,
-                                                uint32_t mmhub_inst)
-{
-       uint32_t mmea_cgtt_clk_cntl_addr_dist;
-       uint32_t mmea_err_status_addr_dist;
-       uint32_t reg_value;
-       uint32_t i;
-
-       /* reset mmea ras err status */
-       mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL;
-       mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
-       for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
-               /* force clk branch on for response path
-                * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1
-                */
-               reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
-                                               regMMEA0_CGTT_CLK_CTRL,
-                                               i * mmea_cgtt_clk_cntl_addr_dist);
-               reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
-                                         SOFT_OVERRIDE_RETURN, 1);
-               WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
-                                   regMMEA0_CGTT_CLK_CTRL,
-                                   i * mmea_cgtt_clk_cntl_addr_dist,
-                                   reg_value);
-
-               /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
-               reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
-                                               regMMEA0_ERR_STATUS,
-                                               i * mmea_err_status_addr_dist);
-               reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
-                                         CLEAR_ERROR_STATUS, 1);
-               WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
-                                   regMMEA0_ERR_STATUS,
-                                   i * mmea_err_status_addr_dist,
-                                   reg_value);
-
-               /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */
-               reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
-                                               regMMEA0_CGTT_CLK_CTRL,
-                                               i * mmea_cgtt_clk_cntl_addr_dist);
-               reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
-                                         SOFT_OVERRIDE_RETURN, 0);
-               WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
-                                   regMMEA0_CGTT_CLK_CTRL,
-                                   i * mmea_cgtt_clk_cntl_addr_dist,
-                                   reg_value);
-       }
-
-       /* reset mm_cane ras err status
-        * force clk branch on for response path
-        * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1
-        */
-       reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
-       reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
-                                 SOFT_OVERRIDE_ATRET, 1);
-       WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
-
-       /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
-       reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
-       reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS,
-                                 CLEAR_ERROR_STATUS, 1);
-       WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value);
-
-       /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */
-       reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
-       reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
-                                 SOFT_OVERRIDE_ATRET, 0);
-       WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
-}
-
-static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev) -{
-       uint32_t inst_mask;
-       uint32_t i;
-
-       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
-               dev_warn(adev->dev, "MMHUB RAS is not supported\n");
-               return;
-       }
-
-       inst_mask = adev->aid_mask;
-       for_each_inst(i, inst_mask)
-               mmhub_v1_8_inst_reset_ras_err_status(adev, i);
-}
-
 static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
        .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
        .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
-       .query_ras_error_status = mmhub_v1_8_query_ras_error_status,
-       .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status,
 };

 struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
--
2.35.1