[PATCH] drm/amdgpu: Correct se_num and reg_inst for gfx v9_4_3 ras counters
Zhou1, Tao
Tao.Zhou1 at amd.com
Wed Sep 6 10:15:32 UTC 2023
[AMD Official Use Only - General]
Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Hawking
> Zhang
> Sent: Wednesday, September 6, 2023 6:12 PM
> To: amd-gfx at lists.freedesktop.org; Zhou1, Tao <Tao.Zhou1 at amd.com>; Yang,
> Stanley <Stanley.Yang at amd.com>; Li, Candice <Candice.Li at amd.com>; Chai,
> Thomas <YiPeng.Chai at amd.com>
> Cc: Zhang, Hawking <Hawking.Zhang at amd.com>
> Subject: [PATCH] drm/amdgpu: Correct se_num and reg_inst for gfx v9_4_3 ras
> counters
>
> gfx_v9_4_3_ue|ce_reg_list is an array per gfx core instance correct the settings of
> se_num and reg_inst for some of gfx ras counters so all the available register
> instances can be polled for ras status.
>
> Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 40 ++++++++++++-------------
> 1 file changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index 0a26a00074a6..a60d1a8405d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -3653,19 +3653,19 @@ static const struct amdgpu_gfx_ras_reg_entry
> gfx_v9_4_3_ce_reg_list[] = {
> AMDGPU_GFX_GC_CANE_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO,
> regSPI_CE_ERR_STATUS_HI),
> 1, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
> - AMDGPU_GFX_SPI_MEM, 8},
> + AMDGPU_GFX_SPI_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO,
> regSP0_CE_ERR_STATUS_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
> - AMDGPU_GFX_SP_MEM, 1},
> + AMDGPU_GFX_SP_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO,
> regSP1_CE_ERR_STATUS_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
> - AMDGPU_GFX_SP_MEM, 1},
> + AMDGPU_GFX_SP_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO,
> regSQ_CE_ERR_STATUS_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
> - AMDGPU_GFX_SQ_MEM, 8},
> + AMDGPU_GFX_SQ_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO,
> regSQC_CE_EDC_HI),
> 5, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
> - AMDGPU_GFX_SQC_MEM, 8},
> + AMDGPU_GFX_SQC_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO,
> regTCX_CE_ERR_STATUS_HI),
> 2, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
> AMDGPU_GFX_TCX_MEM, 1},
> @@ -3674,22 +3674,22 @@ static const struct amdgpu_gfx_ras_reg_entry
> gfx_v9_4_3_ce_reg_list[] = {
> AMDGPU_GFX_TCC_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO,
> regTA_CE_EDC_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
> - AMDGPU_GFX_TA_MEM, 8},
> + AMDGPU_GFX_TA_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG,
> regTCI_CE_EDC_HI_REG),
> - 31, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
> + 27, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID),
> +"TCI"},
> AMDGPU_GFX_TCI_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG,
> regTCP_CE_EDC_HI_REG),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
> - AMDGPU_GFX_TCP_MEM, 8},
> + AMDGPU_GFX_TCP_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO,
> regTD_CE_EDC_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
> - AMDGPU_GFX_TD_MEM, 8},
> + AMDGPU_GFX_TD_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO,
> regGCEA_CE_ERR_STATUS_HI),
> 16, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
> AMDGPU_GFX_GCEA_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO,
> regLDS_CE_ERR_STATUS_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
> - AMDGPU_GFX_LDS_MEM, 1},
> + AMDGPU_GFX_LDS_MEM, 4},
> };
>
> static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { @@
> -3713,19 +3713,19 @@ static const struct amdgpu_gfx_ras_reg_entry
> gfx_v9_4_3_ue_reg_list[] = {
> AMDGPU_GFX_GC_CANE_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO,
> regSPI_UE_ERR_STATUS_HI),
> 1, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
> - AMDGPU_GFX_SPI_MEM, 8},
> + AMDGPU_GFX_SPI_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO,
> regSP0_UE_ERR_STATUS_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
> - AMDGPU_GFX_SP_MEM, 1},
> + AMDGPU_GFX_SP_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO,
> regSP1_UE_ERR_STATUS_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
> - AMDGPU_GFX_SP_MEM, 1},
> + AMDGPU_GFX_SP_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO,
> regSQ_UE_ERR_STATUS_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
> - AMDGPU_GFX_SQ_MEM, 8},
> + AMDGPU_GFX_SQ_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO,
> regSQC_UE_EDC_HI),
> 5, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
> - AMDGPU_GFX_SQC_MEM, 8},
> + AMDGPU_GFX_SQC_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_UE_ERR_STATUS_LO,
> regTCX_UE_ERR_STATUS_HI),
> 2, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
> AMDGPU_GFX_TCX_MEM, 1},
> @@ -3734,16 +3734,16 @@ static const struct amdgpu_gfx_ras_reg_entry
> gfx_v9_4_3_ue_reg_list[] = {
> AMDGPU_GFX_TCC_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_UE_EDC_LO,
> regTA_UE_EDC_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
> - AMDGPU_GFX_TA_MEM, 8},
> + AMDGPU_GFX_TA_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_UE_EDC_LO_REG,
> regTCI_UE_EDC_HI_REG),
> - 31, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
> + 27, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID),
> +"TCI"},
> AMDGPU_GFX_TCI_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_UE_EDC_LO_REG,
> regTCP_UE_EDC_HI_REG),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
> - AMDGPU_GFX_TCP_MEM, 8},
> + AMDGPU_GFX_TCP_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_UE_EDC_LO,
> regTD_UE_EDC_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
> - AMDGPU_GFX_TD_MEM, 8},
> + AMDGPU_GFX_TD_MEM, 4},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCA_UE_ERR_STATUS_LO,
> regTCA_UE_ERR_STATUS_HI),
> 2, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCA"},
> AMDGPU_GFX_TCA_MEM, 1},
> @@ -3752,7 +3752,7 @@ static const struct amdgpu_gfx_ras_reg_entry
> gfx_v9_4_3_ue_reg_list[] = {
> AMDGPU_GFX_GCEA_MEM, 1},
> {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_UE_ERR_STATUS_LO,
> regLDS_UE_ERR_STATUS_HI),
> 10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
> - AMDGPU_GFX_LDS_MEM, 1},
> + AMDGPU_GFX_LDS_MEM, 4},
> };
>
> static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = {
> --
> 2.17.1
More information about the amd-gfx
mailing list