[PATCH 6/6] drm/amdgpu: drop status reset for GCEA 9.4.3 and MMEA 1.8
Yang, Stanley
Stanley.Yang at amd.com
Wed Oct 18 07:43:41 UTC 2023
[AMD Official Use Only - General]
PMfw doesn't reset any ce/ue status and count in debug mode, who takes responsible for it if in debug mode.
Regards,
Stanley
> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Sent: Tuesday, October 17, 2023 8:46 PM
> To: amd-gfx at lists.freedesktop.org; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>; Li,
> Candice <Candice.Li at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>;
> Lazar, Lijo <Lijo.Lazar at amd.com>; Wang, Yang(Kevin)
> <KevinYang.Wang at amd.com>
> Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Subject: [PATCH 6/6] drm/amdgpu: drop status reset for GCEA 9.4.3 and
> MMEA 1.8
>
> PMFW will be responsible for it.
>
> Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 22 -------
> drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 86 -------------------------
> 2 files changed, 108 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index a1c2c952d882..65da72735e52 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -3996,27 +3996,6 @@ static void
> gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
> WREG32_SOC15(GC, GET_INST(GC, xcc_id),
> regVML2_WALKER_MEM_ECC_STATUS, 0x3); }
>
> -static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device
> *adev,
> - int xcc_id)
> -{
> - uint32_t i, j;
> - uint32_t value;
> -
> - mutex_lock(&adev->grbm_idx_mutex);
> - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
> - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
> - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
> - value = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
> regGCEA_ERR_STATUS);
> - value = REG_SET_FIELD(value, GCEA_ERR_STATUS,
> - CLEAR_ERROR_STATUS, 0x1);
> - WREG32_SOC15(GC, GET_INST(GC, xcc_id),
> regGCEA_ERR_STATUS, value);
> - }
> - }
> - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
> - xcc_id);
> - mutex_unlock(&adev->grbm_idx_mutex);
> -}
> -
> static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device
> *adev,
> int xcc_id)
> {
> @@ -4042,7 +4021,6 @@ static void
> gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
> void *ras_error_status, int xcc_id) {
> gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
> - gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id);
> gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
> index aa00483e7b37..616d75add087 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
> @@ -756,96 +756,10 @@ static void
> mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev)
> mmhub_v1_8_inst_query_ras_err_status(adev, i); }
>
> -static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device
> *adev,
> - uint32_t mmhub_inst)
> -{
> - uint32_t mmea_cgtt_clk_cntl_addr_dist;
> - uint32_t mmea_err_status_addr_dist;
> - uint32_t reg_value;
> - uint32_t i;
> -
> - /* reset mmea ras err status */
> - mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL -
> regMMEA0_CGTT_CLK_CTRL;
> - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS -
> regMMEA0_ERR_STATUS;
> - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
> - /* force clk branch on for response path
> - * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1
> - */
> - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
> - regMMEA0_CGTT_CLK_CTRL,
> - i *
> mmea_cgtt_clk_cntl_addr_dist);
> - reg_value = REG_SET_FIELD(reg_value,
> MMEA0_CGTT_CLK_CTRL,
> - SOFT_OVERRIDE_RETURN, 1);
> - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
> - regMMEA0_CGTT_CLK_CTRL,
> - i * mmea_cgtt_clk_cntl_addr_dist,
> - reg_value);
> -
> - /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
> - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
> - regMMEA0_ERR_STATUS,
> - i *
> mmea_err_status_addr_dist);
> - reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
> - CLEAR_ERROR_STATUS, 1);
> - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
> - regMMEA0_ERR_STATUS,
> - i * mmea_err_status_addr_dist,
> - reg_value);
> -
> - /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0
> */
> - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
> - regMMEA0_CGTT_CLK_CTRL,
> - i *
> mmea_cgtt_clk_cntl_addr_dist);
> - reg_value = REG_SET_FIELD(reg_value,
> MMEA0_CGTT_CLK_CTRL,
> - SOFT_OVERRIDE_RETURN, 0);
> - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
> - regMMEA0_CGTT_CLK_CTRL,
> - i * mmea_cgtt_clk_cntl_addr_dist,
> - reg_value);
> - }
> -
> - /* reset mm_cane ras err status
> - * force clk branch on for response path
> - * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1
> - */
> - reg_value = RREG32_SOC15(MMHUB, mmhub_inst,
> regMM_CANE_ICG_CTRL);
> - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
> - SOFT_OVERRIDE_ATRET, 1);
> - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL,
> reg_value);
> -
> - /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
> - reg_value = RREG32_SOC15(MMHUB, mmhub_inst,
> regMM_CANE_ERR_STATUS);
> - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS,
> - CLEAR_ERROR_STATUS, 1);
> - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS,
> reg_value);
> -
> - /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */
> - reg_value = RREG32_SOC15(MMHUB, mmhub_inst,
> regMM_CANE_ICG_CTRL);
> - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
> - SOFT_OVERRIDE_ATRET, 0);
> - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL,
> reg_value);
> -}
> -
> -static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device
> *adev) -{
> - uint32_t inst_mask;
> - uint32_t i;
> -
> - if (!amdgpu_ras_is_supported(adev,
> AMDGPU_RAS_BLOCK__MMHUB)) {
> - dev_warn(adev->dev, "MMHUB RAS is not supported\n");
> - return;
> - }
> -
> - inst_mask = adev->aid_mask;
> - for_each_inst(i, inst_mask)
> - mmhub_v1_8_inst_reset_ras_err_status(adev, i);
> -}
> -
> static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
> .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
> .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
> .query_ras_error_status = mmhub_v1_8_query_ras_error_status,
> - .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status,
> };
>
> struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
> --
> 2.35.1
More information about the amd-gfx
mailing list