[Mesa-stable] [PATCH] radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9

Dylan Baker dylan at pnwbakers.com
Thu Jul 19 21:39:38 UTC 2018


Applied, thank you!

Quoting Samuel Pitoiset (2018-07-13 10:37:20)
> A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
> counters) must immediately precede every timestamp event to
> prevent a GPU hang on GFX9.
> 
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 15 +++++++++++++--
>  src/amd/vulkan/radv_device.c     |  4 ++--
>  src/amd/vulkan/radv_private.h    |  7 +++++--
>  src/amd/vulkan/radv_query.c      |  9 ++++++---
>  src/amd/vulkan/si_cmd_buffer.c   | 26 +++++++++++++++++++++-----
>  5 files changed, 47 insertions(+), 14 deletions(-)
> 
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index 28348f4e45..8c0725b8f9 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
>         }
>  
>         if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
> +               unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends;
> +               unsigned eop_bug_offset;
>                 void *fence_ptr;
> +
>                 radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
>                                              &cmd_buffer->gfx9_fence_offset,
>                                              &fence_ptr);
>                 cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
> +
> +               /* Allocate a buffer for the EOP bug on GFX9. */
> +               radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
> +                                            &eop_bug_offset, &fence_ptr);
> +               cmd_buffer->gfx9_eop_bug_va =
> +                       radv_buffer_get_va(cmd_buffer->upload.upload_bo);
> +               cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
>         }
>  
>         cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
> @@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
>                                        cmd_buffer->device->physical_device->rad_info.chip_class,
>                                        ptr, va,
>                                        radv_cmd_buffer_uses_mec(cmd_buffer),
> -                                      flags);
> +                                      flags, cmd_buffer->gfx9_eop_bug_va);
>         }
>  
>         if (unlikely(cmd_buffer->device->trace_bo))
> @@ -4093,7 +4103,8 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
>                                    cmd_buffer->device->physical_device->rad_info.chip_class,
>                                    radv_cmd_buffer_uses_mec(cmd_buffer),
>                                    V_028A90_BOTTOM_OF_PIPE_TS, 0,
> -                                  1, va, 2, value);
> +                                  1, va, 2, value,
> +                                  cmd_buffer->gfx9_eop_bug_va);
>  
>         assert(cmd_buffer->cs->cdw <= cdw_max);
>  }
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index c49ab58275..bfd6f96536 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -2181,7 +2181,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
>                                                RADV_CMD_FLAG_INV_ICACHE |
>                                                RADV_CMD_FLAG_INV_SMEM_L1 |
>                                                RADV_CMD_FLAG_INV_VMEM_L1 |
> -                                              RADV_CMD_FLAG_INV_GLOBAL_L2);
> +                                              RADV_CMD_FLAG_INV_GLOBAL_L2, 0);
>                 } else if (i == 1) {
>                         si_cs_emit_cache_flush(cs,
>                                                queue->device->physical_device->rad_info.chip_class,
> @@ -2191,7 +2191,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
>                                                RADV_CMD_FLAG_INV_ICACHE |
>                                                RADV_CMD_FLAG_INV_SMEM_L1 |
>                                                RADV_CMD_FLAG_INV_VMEM_L1 |
> -                                              RADV_CMD_FLAG_INV_GLOBAL_L2);
> +                                              RADV_CMD_FLAG_INV_GLOBAL_L2, 0);
>                 }
>  
>                 if (!queue->device->ws->cs_finalize(cs))
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 3b4c80e025..3f997d348e 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1037,6 +1037,7 @@ struct radv_cmd_buffer {
>         uint32_t gfx9_fence_offset;
>         struct radeon_winsys_bo *gfx9_fence_bo;
>         uint32_t gfx9_fence_idx;
> +       uint64_t gfx9_eop_bug_va;
>  
>         /**
>          * Whether a query pool has been resetted and we have to flush caches.
> @@ -1069,7 +1070,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
>                                 unsigned data_sel,
>                                 uint64_t va,
>                                 uint32_t old_fence,
> -                               uint32_t new_fence);
> +                               uint32_t new_fence,
> +                               uint64_t gfx9_eop_bug_va);
>  
>  void si_emit_wait_fence(struct radeon_winsys_cs *cs,
>                         bool predicated,
> @@ -1079,7 +1081,8 @@ void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
>                             enum chip_class chip_class,
>                             uint32_t *fence_ptr, uint64_t va,
>                             bool is_mec,
> -                           enum radv_cmd_flush_bits flush_bits);
> +                           enum radv_cmd_flush_bits flush_bits,
> +                           uint64_t gfx9_eop_bug_va);
>  void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
>  void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
>  void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
> index 9a930e85ff..dccdee3611 100644
> --- a/src/amd/vulkan/radv_query.c
> +++ b/src/amd/vulkan/radv_query.c
> @@ -1169,7 +1169,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
>                                            cmd_buffer->device->physical_device->rad_info.chip_class,
>                                            radv_cmd_buffer_uses_mec(cmd_buffer),
>                                            V_028A90_BOTTOM_OF_PIPE_TS, 0,
> -                                          1, avail_va, 0, 1);
> +                                          1, avail_va, 0, 1,
> +                                          cmd_buffer->gfx9_eop_bug_va);
>                 break;
>         default:
>                 unreachable("ending unhandled query type");
> @@ -1292,13 +1293,15 @@ void radv_CmdWriteTimestamp(
>                                                    cmd_buffer->device->physical_device->rad_info.chip_class,
>                                                    mec,
>                                                    V_028A90_BOTTOM_OF_PIPE_TS, 0,
> -                                                  3, query_va, 0, 0);
> +                                                  3, query_va, 0, 0,
> +                                                  cmd_buffer->gfx9_eop_bug_va);
>                         si_cs_emit_write_event_eop(cs,
>                                                    false,
>                                                    cmd_buffer->device->physical_device->rad_info.chip_class,
>                                                    mec,
>                                                    V_028A90_BOTTOM_OF_PIPE_TS, 0,
> -                                                  1, avail_va, 0, 1);
> +                                                  1, avail_va, 0, 1,
> +                                                  cmd_buffer->gfx9_eop_bug_va);
>                         break;
>                 }
>                 query_va += pool->stride;
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index d4459092d0..7cd863e389 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -852,7 +852,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
>                                 unsigned data_sel,
>                                 uint64_t va,
>                                 uint32_t old_fence,
> -                               uint32_t new_fence)
> +                               uint32_t new_fence,
> +                               uint64_t gfx9_eop_bug_va)
>  {
>         unsigned op = EVENT_TYPE(event) |
>                 EVENT_INDEX(5) |
> @@ -860,6 +861,17 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
>         unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
>  
>         if (chip_class >= GFX9 || is_gfx8_mec) {
> +               /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
> +                * counters) must immediately precede every timestamp event to
> +                * prevent a GPU hang on GFX9.
> +                */
> +               if (chip_class == GFX9) {
> +                       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
> +                       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
> +                       radeon_emit(cs, gfx9_eop_bug_va);
> +                       radeon_emit(cs, gfx9_eop_bug_va >> 32);
> +               }
> +
>                 radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, predicated));
>                 radeon_emit(cs, op);
>                 radeon_emit(cs, EOP_DATA_SEL(data_sel));
> @@ -941,7 +953,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
>                        uint32_t *flush_cnt,
>                        uint64_t flush_va,
>                         bool is_mec,
> -                       enum radv_cmd_flush_bits flush_bits)
> +                       enum radv_cmd_flush_bits flush_bits,
> +                      uint64_t gfx9_eop_bug_va)
>  {
>         unsigned cp_coher_cntl = 0;
>         uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
> @@ -971,7 +984,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
>                                                            chip_class,
>                                                            is_mec,
>                                                            V_028A90_FLUSH_AND_INV_CB_DATA_TS,
> -                                                          0, 0, 0, 0, 0);
> +                                                          0, 0, 0, 0, 0,
> +                                                          gfx9_eop_bug_va);
>                         }
>                 }
>                 if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
> @@ -1057,7 +1071,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
>                 uint32_t old_fence = (*flush_cnt)++;
>  
>                 si_cs_emit_write_event_eop(cs, false, chip_class, false, cb_db_event, tc_flags, 1,
> -                                          flush_va, old_fence, *flush_cnt);
> +                                          flush_va, old_fence, *flush_cnt,
> +                                          gfx9_eop_bug_va);
>                 si_emit_wait_fence(cs, false, flush_va, *flush_cnt, 0xffffffff);
>         }
>  
> @@ -1149,7 +1164,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
>                                cmd_buffer->device->physical_device->rad_info.chip_class,
>                                ptr, va,
>                                radv_cmd_buffer_uses_mec(cmd_buffer),
> -                              cmd_buffer->state.flush_bits);
> +                              cmd_buffer->state.flush_bits,
> +                              cmd_buffer->gfx9_eop_bug_va);
>  
>  
>         if (unlikely(cmd_buffer->device->trace_bo))
> -- 
> 2.18.0
> 
> _______________________________________________
> mesa-stable mailing list
> mesa-stable at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 228 bytes
Desc: signature
URL: <https://lists.freedesktop.org/archives/mesa-stable/attachments/20180719/d6800b22/attachment.sig>


More information about the mesa-stable mailing list