[PATCH] drm/amdgpu/gfx6: flush caches after IB with the correct vmid
Nicolai Hähnle
nhaehnle at gmail.com
Mon May 8 16:13:54 UTC 2017
Unfortunately, further testing shows that this doesn't actually fix the
problem. FWIW, that test runs very reliably on SI with the radeon drm,
but with the amdgpu drm it fails. VI is fine on amdgpu, which is why I
was sent down this road.
Anyway, back to trying to figure this out :/
Cheers,
Nicolai
On 08.05.2017 11:30, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> Bring the code in line with what the radeon module does.
>
> Without this change, the fence following the IB may be signalled
> to the CPU even though some data written by shaders may not have
> been written back yet.
>
> This change fixes the OpenGL CTS test
> GL45-CTS.gtf32.GL3Tests.packed_pixels.packed_pixels_pbo on Verde.
>
> Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> --
> So, I'm not too familiar with the details of these syncs, but the radeon
> module effectively does:
>
> - IB
> - SURFACE_SYNC (vm_id of IB)
> - SURFACE_SYNC (vm_id == 0)
> - EVENT_WRITE_EOP (kernel fence)
>
> While amdgpu now does (with this change):
>
> - IB
> - SURFACE_SYNC (vm_id of IB) <-- this was missing
> - SURFACE_SYNC (vm_id == 0)
> - EVENT_WRITE_EOP (kernel fence)
> - SURFACE_SYNC (vm_id == 0)
> - EVENT_WRITE_EOP (user fence)
>
> It seems like at least the second SURFACE_SYNC (vm_id == 0) should be
> redundant, so the question is whether the SURFACE_SYNC (vm_id == 0)
> should be rearranged somehow, perhaps also added to the IB emission.
> But for better bisectability, that should probably be a separate
> change.
>
> Cheers,
> Nicolai
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 19 +++++++++++++++++--
> 1 file changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> index 03d2a0a..c4f444d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> @@ -1922,20 +1922,35 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
> control |= ib->length_dw | (vm_id << 24);
>
> amdgpu_ring_write(ring, header);
> amdgpu_ring_write(ring,
> #ifdef __BIG_ENDIAN
> (2 << 0) |
> #endif
> (ib->gpu_addr & 0xFFFFFFFC));
> amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
> amdgpu_ring_write(ring, control);
> +
> + if (!(ib->flags & AMDGPU_IB_FLAG_CE)) {
> + /* flush read cache over gart for this vmid */
> + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
> + amdgpu_ring_write(ring, (mmCP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START));
> + amdgpu_ring_write(ring, vm_id);
> + amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
> + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
> + PACKET3_TC_ACTION_ENA |
> + PACKET3_SH_KCACHE_ACTION_ENA |
> + PACKET3_SH_ICACHE_ACTION_ENA);
> + amdgpu_ring_write(ring, 0xFFFFFFFF);
> + amdgpu_ring_write(ring, 0);
> + amdgpu_ring_write(ring, 10); /* poll interval */
> + }
> }
>
> /**
> * gfx_v6_0_ring_test_ib - basic ring IB test
> *
> * @ring: amdgpu_ring structure holding ring information
> *
> * Allocate an IB and execute it on the gfx ring (SI).
> * Provides a basic gfx ring test to verify that IBs are working.
> * Returns 0 on success, error on failure.
> @@ -3631,21 +3646,21 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
> .get_rptr = gfx_v6_0_ring_get_rptr,
> .get_wptr = gfx_v6_0_ring_get_wptr,
> .set_wptr = gfx_v6_0_ring_set_wptr_gfx,
> .emit_frame_size =
> 5 + /* gfx_v6_0_ring_emit_hdp_flush */
> 5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
> 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
> 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
> 17 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
> 3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
> - .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
> + .emit_ib_size = 6 + 8, /* gfx_v6_0_ring_emit_ib */
> .emit_ib = gfx_v6_0_ring_emit_ib,
> .emit_fence = gfx_v6_0_ring_emit_fence,
> .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
> .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
> .emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
> .emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
> .test_ring = gfx_v6_0_ring_test_ring,
> .test_ib = gfx_v6_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
> @@ -3657,21 +3672,21 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
> .nop = 0x80000000,
> .get_rptr = gfx_v6_0_ring_get_rptr,
> .get_wptr = gfx_v6_0_ring_get_wptr,
> .set_wptr = gfx_v6_0_ring_set_wptr_compute,
> .emit_frame_size =
> 5 + /* gfx_v6_0_ring_emit_hdp_flush */
> 5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
> 7 + /* gfx_v6_0_ring_emit_pipeline_sync */
> 17 + /* gfx_v6_0_ring_emit_vm_flush */
> 14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
> - .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
> + .emit_ib_size = 6 + 8, /* gfx_v6_0_ring_emit_ib */
> .emit_ib = gfx_v6_0_ring_emit_ib,
> .emit_fence = gfx_v6_0_ring_emit_fence,
> .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
> .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
> .emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
> .emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
> .test_ring = gfx_v6_0_ring_test_ring,
> .test_ib = gfx_v6_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> };
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the amd-gfx
mailing list