[PATCH 4/4] drm/amdgpu: read sdma edc counter to clear the counters

Alex Deucher alexdeucher at gmail.com
Wed Jan 8 16:27:25 UTC 2020


On Wed, Jan 8, 2020 at 11:18 AM Hawking Zhang <Hawking.Zhang at amd.com> wrote:
>
> SDMA edc counter registers were added in gfx edc counters
> array. When querying gfx error counter in that array, there
> is no way to differentiate sdma instance number for different
> asic and then results to NULL pointer access when trying to
> read sdma register base address for instances greater
> than 2 on Vega20.
> In addition, this also results to wrong gfx error counters
> since it actually added sdma edc counters.
> Therefore, sdma edc counter registers should be separated
> from gfx edc counter regsiter array and only get initialized
> when driver tries to enable sdma ras.
>
> Change-Id: I206917f9d7b81670a8fed84dc749085ce5a6f678
> Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 11 +----------
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  7 +++++++
>  2 files changed, 8 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 33d1c57aaaf1..c9ade16bbcc3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -4038,14 +4038,6 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
>     { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
>     { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
>     { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
> -   { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1},
> -   { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1},
> -   { SOC15_REG_ENTRY(SDMA2, 0, mmSDMA2_EDC_COUNTER), 0, 1, 1},
> -   { SOC15_REG_ENTRY(SDMA3, 0, mmSDMA3_EDC_COUNTER), 0, 1, 1},
> -   { SOC15_REG_ENTRY(SDMA4, 0, mmSDMA4_EDC_COUNTER), 0, 1, 1},
> -   { SOC15_REG_ENTRY(SDMA5, 0, mmSDMA5_EDC_COUNTER), 0, 1, 1},
> -   { SOC15_REG_ENTRY(SDMA6, 0, mmSDMA6_EDC_COUNTER), 0, 1, 1},
> -   { SOC15_REG_ENTRY(SDMA7, 0, mmSDMA7_EDC_COUNTER), 0, 1, 1},
>  };
>
>  static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
> @@ -4109,7 +4101,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>                                                 adev->gfx.config.max_sh_per_se;
>         int sgpr_work_group_size = 5;
>         int gpr_reg_size = compute_dim_x / 16 + 6;
> -       int sec_ded_counter_reg_size = adev->sdma.num_instances + 34;
>
>         /* only support when RAS is enabled */
>         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
> @@ -4249,7 +4240,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>
>         /* read back registers to clear the counters */
>         mutex_lock(&adev->grbm_idx_mutex);
> -       for (i = 0; i < sec_ded_counter_reg_size; i++) {
> +       for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
>                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
>                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
>                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index fd20594b6d6e..f4107f9b75f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1802,6 +1802,13 @@ static int sdma_v4_0_late_init(void *handle)
>         struct ras_ih_if ih_info = {
>                 .cb = sdma_v4_0_process_ras_data_cb,
>         };
> +       int i;
> +
> +       /* read back edc counter registers to clear the counters */
> +       if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
> +               for (i = 0; i < adev->sdma.num_instances; i++)
> +                       RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
> +       }
>
>         return adev->sdma.funcs->ras_late_init(adev, &ih_info);
>  }
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list