[PATCH] drm/amdgpu: Add GDS clearing workaround in later init for gfx9

Fri Jun 7 19:42:07 UTC 2019

On 2019-06-07 3:12 p.m., Zhu, James wrote:
> On 2019-06-07 2:16 p.m., Alex Deucher wrote:
>> On Fri, Jun 7, 2019 at 12:38 PM Zhu, James <James.Zhu at amd.com> wrote:
>>> Since Hardware bug, GDS exist ECC error after cold boot up,
>>> adding GDS clearing workaround in later init for gfx9.
>>>
>>> Signed-off-by: James Zhu <James.Zhu at amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 48 +++++++++++++++++++++++++++++++++++
>>>    1 file changed, 48 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> index 76722fc..81f6ba8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> @@ -3634,6 +3634,50 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
>>>       { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
>>>    };
>>>
>>> +
>>> +static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
>>> +{
>>> +       struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
>>> +       int r;
>>> +
>>> +       r = amdgpu_ring_alloc(ring, 17);
>>> +       if (r) {
>>> +               DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
>>> +                       ring->name, r);
>>> +               return r;
>>> +       }
>>> +
>>> +       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> +       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(0) |
>>> +                          WRITE_DATA_DST_SEL(0));
>>> +       amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE));
>>> +       amdgpu_ring_write(ring, 0);
>>> +       amdgpu_ring_write(ring, 0x10000);
>> hardcoded size, please use the size from the driver.
>>
>>> +
>>> +       amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
>>> +       amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
>>> +                               PACKET3_DMA_DATA_DST_SEL(1) |
>>> +                               PACKET3_DMA_DATA_SRC_SEL(2) |
>>> +                               PACKET3_DMA_DATA_ENGINE(0)));
>>> +       amdgpu_ring_write(ring, 0);
>>> +       amdgpu_ring_write(ring, 0);
>>> +       amdgpu_ring_write(ring, 0);
>>> +       amdgpu_ring_write(ring, 0);
>>> +       amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 0x10000);
>> Instead of hardcoding the size, can you use the gds size from the
>> driver (adev->gds.gds_size).
> Hi Alex,
>
> Do you mean adev->gds.mem.total_size?
>
> But I see below operation in gfx_v9_0_ngg_init.
>
> adev->gds.mem.total_size -= ....
>
> Or you want me to add gds_size in struct amdgpu_gds?
>
> James

Yeah, The amd-staging-drm-next branch has adev->gds.gds_size.

James

>
>> With that fixed:
>> Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
>>
>>> +
>>> +       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> +       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(0) |
>>> +                               WRITE_DATA_DST_SEL(0));
>>> +       amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE));
>>> +       amdgpu_ring_write(ring, 0);
>>> +       amdgpu_ring_write(ring, 0x0);
>>> +
>>> +       amdgpu_ring_commit(ring);
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +
>>>    static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>>>    {
>>>           struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
>>> @@ -3810,6 +3854,10 @@ static int gfx_v9_0_ecc_late_init(void *handle)
>>>                   return 0;
>>>           }
>>>
>>> +       r = gfx_v9_0_do_edc_gds_workarounds(adev);
>>> +       if (r)
>>> +               return r;
>>> +
>>>           /* requires IBs so do in late init after IB pool is initialized */
>>>           r = gfx_v9_0_do_edc_gpr_workarounds(adev);
>>>           if (r)
>>> --
>>> 2.7.4
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx