[Mesa-stable] [PATCH] radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9
Dylan Baker
dylan at pnwbakers.com
Fri Jul 13 17:05:35 UTC 2018
Quoting Samuel Pitoiset (2018-07-13 03:30:01)
>
>
> On 07/13/2018 12:05 PM, Samuel Pitoiset wrote:
> >
> >
> > On 07/12/2018 09:43 PM, Dylan Baker wrote:
> >> Quoting Samuel Pitoiset (2018-07-11 02:55:55)
> >>> A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
> >>> counters) must immediately precede every timestamp event to
> >>> prevent a GPU hang on GFX9.
> >>>
> >>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> >>> Cc: 18.1 <mesa-stable at lists.freedesktop.org>
> >>> ---
> >>> src/amd/vulkan/radv_cmd_buffer.c | 15 +++++++++++++--
> >>> src/amd/vulkan/radv_device.c | 4 ++--
> >>> src/amd/vulkan/radv_private.h | 7 +++++--
> >>> src/amd/vulkan/radv_query.c | 9 ++++++---
> >>> src/amd/vulkan/si_cmd_buffer.c | 26 +++++++++++++++++++++-----
> >>> 5 files changed, 47 insertions(+), 14 deletions(-)
> >>>
> >>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
> >>> b/src/amd/vulkan/radv_cmd_buffer.c
> >>> index 9da42fe03e..325e1993f8 100644
> >>> --- a/src/amd/vulkan/radv_cmd_buffer.c
> >>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> >>> @@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer
> >>> *cmd_buffer)
> >>> }
> >>> if (cmd_buffer->device->physical_device->rad_info.chip_class
> >>> >= GFX9) {
> >>> + unsigned num_db =
> >>> cmd_buffer->device->physical_device->rad_info.num_render_backends;
> >>> + unsigned eop_bug_offset;
> >>> void *fence_ptr;
> >>> +
> >>> radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
> >>>
> >>> &cmd_buffer->gfx9_fence_offset,
> >>> &fence_ptr);
> >>> cmd_buffer->gfx9_fence_bo =
> >>> cmd_buffer->upload.upload_bo;
> >>> +
> >>> + /* Allocate a buffer for the EOP bug on GFX9. */
> >>> + radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
> >>> + &eop_bug_offset,
> >>> &fence_ptr);
> >>> + cmd_buffer->gfx9_eop_bug_va =
> >>> +
> >>> radv_buffer_get_va(cmd_buffer->upload.upload_bo);
> >>> + cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
> >>> }
> >>> cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
> >>> @@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer
> >>> *cmd_buffer,
> >>>
> >>> cmd_buffer->device->physical_device->rad_info.chip_class,
> >>> ptr, va,
> >>>
> >>> radv_cmd_buffer_uses_mec(cmd_buffer),
> >>> - flags);
> >>> + flags,
> >>> cmd_buffer->gfx9_eop_bug_va);
> >>> }
> >>> if (unlikely(cmd_buffer->device->trace_bo))
> >>> @@ -4318,7 +4328,8 @@ static void write_event(struct radv_cmd_buffer
> >>> *cmd_buffer,
> >>>
> >>> cmd_buffer->device->physical_device->rad_info.chip_class,
> >>>
> >>> radv_cmd_buffer_uses_mec(cmd_buffer),
> >>>
> >>> V_028A90_BOTTOM_OF_PIPE_TS, 0,
> >>> - EOP_DATA_SEL_VALUE_32BIT,
> >>> va, 2, value);
> >>> + EOP_DATA_SEL_VALUE_32BIT,
> >>> va, 2, value,
> >>> + cmd_buffer->gfx9_eop_bug_va);
> >>> }
> >>> assert(cmd_buffer->cs->cdw <= cdw_max);
> >>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> >>> index 73c48cef1f..1c0a50c82f 100644
> >>> --- a/src/amd/vulkan/radv_device.c
> >>> +++ b/src/amd/vulkan/radv_device.c
> >>> @@ -2240,7 +2240,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
> >>>
> >>> RADV_CMD_FLAG_INV_SMEM_L1 |
> >>>
> >>> RADV_CMD_FLAG_INV_VMEM_L1 |
> >>>
> >>> RADV_CMD_FLAG_INV_GLOBAL_L2 |
> >>> -
> >>> RADV_CMD_FLAG_START_PIPELINE_STATS);
> >>> +
> >>> RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
> >>> } else if (i == 1) {
> >>> si_cs_emit_cache_flush(cs,
> >>>
> >>> queue->device->physical_device->rad_info.chip_class,
> >>> @@ -2251,7 +2251,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
> >>>
> >>> RADV_CMD_FLAG_INV_SMEM_L1 |
> >>>
> >>> RADV_CMD_FLAG_INV_VMEM_L1 |
> >>>
> >>> RADV_CMD_FLAG_INV_GLOBAL_L2 |
> >>> -
> >>> RADV_CMD_FLAG_START_PIPELINE_STATS);
> >>> +
> >>> RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
> >>> }
> >>> if (!queue->device->ws->cs_finalize(cs))
> >>> diff --git a/src/amd/vulkan/radv_private.h
> >>> b/src/amd/vulkan/radv_private.h
> >>> index 4e4b3a6037..96218f4be2 100644
> >>> --- a/src/amd/vulkan/radv_private.h
> >>> +++ b/src/amd/vulkan/radv_private.h
> >>> @@ -1041,6 +1041,7 @@ struct radv_cmd_buffer {
> >>> uint32_t gfx9_fence_offset;
> >>> struct radeon_winsys_bo *gfx9_fence_bo;
> >>> uint32_t gfx9_fence_idx;
> >>> + uint64_t gfx9_eop_bug_va;
> >>> /**
> >>> * Whether a query pool has been resetted and we have to
> >>> flush caches.
> >>> @@ -1072,7 +1073,8 @@ void si_cs_emit_write_event_eop(struct
> >>> radeon_cmdbuf *cs,
> >>> unsigned data_sel,
> >>> uint64_t va,
> >>> uint32_t old_fence,
> >>> - uint32_t new_fence);
> >>> + uint32_t new_fence,
> >>> + uint64_t gfx9_eop_bug_va);
> >>> void si_emit_wait_fence(struct radeon_cmdbuf *cs,
> >>> uint64_t va, uint32_t ref,
> >>> @@ -1081,7 +1083,8 @@ void si_cs_emit_cache_flush(struct
> >>> radeon_cmdbuf *cs,
> >>> enum chip_class chip_class,
> >>> uint32_t *fence_ptr, uint64_t va,
> >>> bool is_mec,
> >>> - enum radv_cmd_flush_bits flush_bits);
> >>> + enum radv_cmd_flush_bits flush_bits,
> >>> + uint64_t gfx9_eop_bug_va);
> >>> void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
> >>> void si_emit_set_predication_state(struct radv_cmd_buffer
> >>> *cmd_buffer, uint64_t va);
> >>> void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
> >>> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
> >>> index 267d45890e..ba3783905f 100644
> >>> --- a/src/amd/vulkan/radv_query.c
> >>> +++ b/src/amd/vulkan/radv_query.c
> >>> @@ -1180,7 +1180,8 @@ static void emit_end_query(struct
> >>> radv_cmd_buffer *cmd_buffer,
> >>>
> >>> radv_cmd_buffer_uses_mec(cmd_buffer),
> >>>
> >>> V_028A90_BOTTOM_OF_PIPE_TS, 0,
> >>> EOP_DATA_SEL_VALUE_32BIT,
> >>> - avail_va, 0, 1);
> >>> + avail_va, 0, 1,
> >>> + cmd_buffer->gfx9_eop_bug_va);
> >>> break;
> >>> default:
> >>> unreachable("ending unhandled query type");
> >>> @@ -1303,13 +1304,15 @@ void radv_CmdWriteTimestamp(
> >>> mec,
> >>>
> >>> V_028A90_BOTTOM_OF_PIPE_TS, 0,
> >>>
> >>> EOP_DATA_SEL_TIMESTAMP,
> >>> - query_va, 0, 0);
> >>> + query_va, 0, 0,
> >>> +
> >>> cmd_buffer->gfx9_eop_bug_va);
> >>> si_cs_emit_write_event_eop(cs,
> >>>
> >>> cmd_buffer->device->physical_device->rad_info.chip_class,
> >>> mec,
> >>>
> >>> V_028A90_BOTTOM_OF_PIPE_TS, 0,
> >>>
> >>> EOP_DATA_SEL_VALUE_32BIT,
> >>> - avail_va, 0, 1);
> >>> + avail_va, 0, 1,
> >>> +
> >>> cmd_buffer->gfx9_eop_bug_va);
> >>> break;
> >>> }
> >>> query_va += pool->stride;
> >>> diff --git a/src/amd/vulkan/si_cmd_buffer.c
> >>> b/src/amd/vulkan/si_cmd_buffer.c
> >>> index 454fd8c39c..49c5eb1372 100644
> >>> --- a/src/amd/vulkan/si_cmd_buffer.c
> >>> +++ b/src/amd/vulkan/si_cmd_buffer.c
> >>> @@ -679,7 +679,8 @@ void si_cs_emit_write_event_eop(struct
> >>> radeon_cmdbuf *cs,
> >>> unsigned data_sel,
> >>> uint64_t va,
> >>> uint32_t old_fence,
> >>> - uint32_t new_fence)
> >>> + uint32_t new_fence,
> >>> + uint64_t gfx9_eop_bug_va)
> >>> {
> >>> unsigned op = EVENT_TYPE(event) |
> >>> EVENT_INDEX(5) |
> >>> @@ -693,6 +694,17 @@ void si_cs_emit_write_event_eop(struct
> >>> radeon_cmdbuf *cs,
> >>> sel |=
> >>> EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
> >>> if (chip_class >= GFX9 || is_gfx8_mec) {
> >>> + /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB
> >>> occlusion
> >>> + * counters) must immediately precede every timestamp
> >>> event to
> >>> + * prevent a GPU hang on GFX9.
> >>> + */
> >>> + if (chip_class == GFX9) {
> >>> + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
> >>> + radeon_emit(cs,
> >>> EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
> >>> + radeon_emit(cs, gfx9_eop_bug_va);
> >>> + radeon_emit(cs, gfx9_eop_bug_va >> 32);
> >>> + }
> >>> +
> >>> radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ?
> >>> 5 : 6, false));
> >>> radeon_emit(cs, op);
> >>> radeon_emit(cs, sel);
> >>> @@ -772,7 +784,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
> >>> uint32_t *flush_cnt,
> >>> uint64_t flush_va,
> >>> bool is_mec,
> >>> - enum radv_cmd_flush_bits flush_bits)
> >>> + enum radv_cmd_flush_bits flush_bits,
> >>> + uint64_t gfx9_eop_bug_va)
> >>> {
> >>> unsigned cp_coher_cntl = 0;
> >>> uint32_t flush_cb_db = flush_bits &
> >>> (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
> >>> @@ -803,7 +816,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
> >>>
> >>> V_028A90_FLUSH_AND_INV_CB_DATA_TS,
> >>> 0,
> >>>
> >>> EOP_DATA_SEL_DISCARD,
> >>> - 0, 0, 0);
> >>> + 0, 0, 0,
> >>> +
> >>> gfx9_eop_bug_va);
> >>> }
> >>> }
> >>> if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
> >>> @@ -873,7 +887,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
> >>> si_cs_emit_write_event_eop(cs, chip_class, false,
> >>> cb_db_event, tc_flags,
> >>> EOP_DATA_SEL_VALUE_32BIT,
> >>> - flush_va, old_fence,
> >>> *flush_cnt);
> >>> + flush_va, old_fence,
> >>> *flush_cnt,
> >>> + gfx9_eop_bug_va);
> >>> si_emit_wait_fence(cs, flush_va, *flush_cnt,
> >>> 0xffffffff);
> >>> }
> >>> @@ -975,7 +990,8 @@ si_emit_cache_flush(struct radv_cmd_buffer
> >>> *cmd_buffer)
> >>>
> >>> cmd_buffer->device->physical_device->rad_info.chip_class,
> >>> ptr, va,
> >>> radv_cmd_buffer_uses_mec(cmd_buffer),
> >>> - cmd_buffer->state.flush_bits);
> >>> + cmd_buffer->state.flush_bits,
> >>> + cmd_buffer->gfx9_eop_bug_va);
> >>> if (unlikely(cmd_buffer->device->trace_bo))
> >>> --
> >>> 2.18.0
> >>>
> >>> _______________________________________________
> >>> mesa-stable mailing list
> >>> mesa-stable at lists.freedesktop.org
> >>> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
> >>
> >> Hi Samuel,
> >>
> >> This patch also doesn't apply cleanly to 18.1. Could I bother you for a
> >> backport?
> >
> > Will do.
>
> Dylan, I presume the backport should be based on the staging/18.1 branch?
>
Yes, please.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 228 bytes
Desc: signature
URL: <https://lists.freedesktop.org/archives/mesa-stable/attachments/20180713/5946c95a/attachment-0001.sig>
More information about the mesa-stable
mailing list