[PATCH] drm/amdgpu: Add GDS clearing workaround in later init for gfx9

Alex Deucher alexdeucher at gmail.com
Fri Jun 7 18:16:01 UTC 2019


On Fri, Jun 7, 2019 at 12:38 PM Zhu, James <James.Zhu at amd.com> wrote:
>
> Since Hardware bug, GDS exist ECC error after cold boot up,
> adding GDS clearing workaround in later init for gfx9.
>
> Signed-off-by: James Zhu <James.Zhu at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 48 +++++++++++++++++++++++++++++++++++
>  1 file changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 76722fc..81f6ba8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3634,6 +3634,50 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
>     { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
>  };
>
> +
> +static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
> +{
> +       struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
> +       int r;
> +
> +       r = amdgpu_ring_alloc(ring, 17);
> +       if (r) {
> +               DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
> +                       ring->name, r);
> +               return r;
> +       }
> +
> +       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> +       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(0) |
> +                          WRITE_DATA_DST_SEL(0));
> +       amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE));
> +       amdgpu_ring_write(ring, 0);
> +       amdgpu_ring_write(ring, 0x10000);

hardcoded size, please use the size from the driver.

> +
> +       amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
> +       amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
> +                               PACKET3_DMA_DATA_DST_SEL(1) |
> +                               PACKET3_DMA_DATA_SRC_SEL(2) |
> +                               PACKET3_DMA_DATA_ENGINE(0)));
> +       amdgpu_ring_write(ring, 0);
> +       amdgpu_ring_write(ring, 0);
> +       amdgpu_ring_write(ring, 0);
> +       amdgpu_ring_write(ring, 0);
> +       amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 0x10000);

Instead of hardcoding the size, can you use the gds size from the
driver (adev->gds.gds_size).

With that fixed:
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> +
> +       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> +       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(0) |
> +                               WRITE_DATA_DST_SEL(0));
> +       amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE));
> +       amdgpu_ring_write(ring, 0);
> +       amdgpu_ring_write(ring, 0x0);
> +
> +       amdgpu_ring_commit(ring);
> +
> +       return 0;
> +}
> +
> +
>  static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>  {
>         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
> @@ -3810,6 +3854,10 @@ static int gfx_v9_0_ecc_late_init(void *handle)
>                 return 0;
>         }
>
> +       r = gfx_v9_0_do_edc_gds_workarounds(adev);
> +       if (r)
> +               return r;
> +
>         /* requires IBs so do in late init after IB pool is initialized */
>         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
>         if (r)
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list