[PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

Alex Deucher alexdeucher at gmail.com
Fri Oct 25 16:05:15 UTC 2019


On Fri, Oct 25, 2019 at 2:49 AM Koenig, Christian
<Christian.Koenig at amd.com> wrote:
>
> Am 24.10.19 um 23:16 schrieb Tuikov, Luben:
> > The GRBM interface is now capable of bursting
> > 1-cycle op per register, a WRITE followed by
> > another WRITE, or a WRITE followed by a READ--much
> > faster than previous muti-cycle per
> > completed-transaction interface. This causes a
> > problem, whereby status registers requiring a
> > read/write by hardware, have a 1-cycle delay, due
> > to the register update having to go through GRBM
> > interface.
> >
> > This patch adds this delay.
> >
> > A one cycle read op is added after updating the
> > invalidate request and before reading the
> > invalidate-ACK status.
>
> Please completely drop all changes for GFX9 since this patch will most
> likely break SRIOV.
>
> Additional to that please apply the workaround only to SDMA since the CP
> driven engines should handle that in firmware.

I think the CP only handles this in firmware if we use the new TLB
invalidation packet.  I don't think it applies it to general register
writes like we do.

Alex

>
> Regards,
> Christian.
>
> >
> > See also commit
> > 534991731cb5fa94b5519957646cf849ca10d17d.
> >
> > Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +++++++++
> >   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 ++++++++
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
> >   5 files changed, 22 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index ac43b1af69e3..0042868dbd53 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
> >               5 + /* COND_EXEC */
> >               7 + /* PIPELINE_SYNC */
> >               SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> > -             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> > +             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
> >               2 + /* VM_FLUSH */
> >               8 + /* FENCE for VM_FLUSH */
> >               20 + /* GDS switch */
> > @@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> >               5 + /* hdp invalidate */
> >               7 + /* gfx_v10_0_ring_emit_pipeline_sync */
> >               SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> > -             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> > +             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
> >               2 + /* gfx_v10_0_ring_emit_vm_flush */
> >               8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
> >       .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 9fe95e7693d5..9a7a717208de 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
> >               5 +  /* COND_EXEC */
> >               7 +  /* PIPELINE_SYNC */
> >               SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> > -             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> > +             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
> >               2 + /* VM_FLUSH */
> >               8 +  /* FENCE for VM_FLUSH */
> >               20 + /* GDS switch */
> > @@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
> >               5 + /* hdp invalidate */
> >               7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> >               SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> > -             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> > +             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
> >               2 + /* gfx_v9_0_ring_emit_vm_flush */
> >               8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
> >       .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > index 6e1b25bd1fe7..100d526e9a42 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > @@ -346,6 +346,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> >
> >       amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> >
> > +     /* Insert a dummy read to delay one cycle before the ACK
> > +      * inquiry.
> > +      */
> > +     if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA ||
> > +         ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
> > +         ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
> > +             amdgpu_ring_emit_reg_wait(ring,
> > +                                       hub->vm_inv_eng0_req + eng, 0, 0);
> > +
> >       /* wait for the invalidate to complete */
> >       amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> >                                 1 << vmid, 1 << vmid);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > index 9f2a893871ec..8f3097e45299 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > @@ -495,6 +495,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> >       amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
> >                             upper_32_bits(pd_addr));
> >
> > +     /* Insert a dummy read to delay one cycle before the ACK
> > +      * inquiry.
> > +      */
> > +     if (ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
> > +         ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
> > +             amdgpu_ring_emit_reg_wait(ring,
> > +                                       hub->vm_inv_eng0_req + eng, 0, 0);
> > +
> >       amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> >                                           hub->vm_inv_eng0_ack + eng,
> >                                           req, 1 << vmid);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > index b8fdb192f6d6..0c41b4fdc58b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > @@ -1588,7 +1588,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
> >               6 + /* sdma_v5_0_ring_emit_pipeline_sync */
> >               /* sdma_v5_0_ring_emit_vm_flush */
> >               SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> > -             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> > +             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
> >               10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
> >       .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
> >       .emit_ib = sdma_v5_0_ring_emit_ib,
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list