[Mesa-dev] [Mesa-stable] [PATCH] radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9

Samuel Pitoiset samuel.pitoiset at gmail.com
Fri Jul 13 10:30:01 UTC 2018



On 07/13/2018 12:05 PM, Samuel Pitoiset wrote:
> 
> 
> On 07/12/2018 09:43 PM, Dylan Baker wrote:
>> Quoting Samuel Pitoiset (2018-07-11 02:55:55)
>>> A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
>>> counters) must immediately precede every timestamp event to
>>> prevent a GPU hang on GFX9.
>>>
>>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>>> Cc: 18.1 <mesa-stable at lists.freedesktop.org>
>>> ---
>>>   src/amd/vulkan/radv_cmd_buffer.c | 15 +++++++++++++--
>>>   src/amd/vulkan/radv_device.c     |  4 ++--
>>>   src/amd/vulkan/radv_private.h    |  7 +++++--
>>>   src/amd/vulkan/radv_query.c      |  9 ++++++---
>>>   src/amd/vulkan/si_cmd_buffer.c   | 26 +++++++++++++++++++++-----
>>>   5 files changed, 47 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
>>> b/src/amd/vulkan/radv_cmd_buffer.c
>>> index 9da42fe03e..325e1993f8 100644
>>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>>> @@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer 
>>> *cmd_buffer)
>>>          }
>>>          if (cmd_buffer->device->physical_device->rad_info.chip_class 
>>> >= GFX9) {
>>> +               unsigned num_db = 
>>> cmd_buffer->device->physical_device->rad_info.num_render_backends;
>>> +               unsigned eop_bug_offset;
>>>                  void *fence_ptr;
>>> +
>>>                  radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
>>>                                               
>>> &cmd_buffer->gfx9_fence_offset,
>>>                                               &fence_ptr);
>>>                  cmd_buffer->gfx9_fence_bo = 
>>> cmd_buffer->upload.upload_bo;
>>> +
>>> +               /* Allocate a buffer for the EOP bug on GFX9. */
>>> +               radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
>>> +                                            &eop_bug_offset, 
>>> &fence_ptr);
>>> +               cmd_buffer->gfx9_eop_bug_va =
>>> +                       
>>> radv_buffer_get_va(cmd_buffer->upload.upload_bo);
>>> +               cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
>>>          }
>>>          cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
>>> @@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer 
>>> *cmd_buffer,
>>>                                         
>>> cmd_buffer->device->physical_device->rad_info.chip_class,
>>>                                         ptr, va,
>>>                                         
>>> radv_cmd_buffer_uses_mec(cmd_buffer),
>>> -                                      flags);
>>> +                                      flags, 
>>> cmd_buffer->gfx9_eop_bug_va);
>>>          }
>>>          if (unlikely(cmd_buffer->device->trace_bo))
>>> @@ -4318,7 +4328,8 @@ static void write_event(struct radv_cmd_buffer 
>>> *cmd_buffer,
>>>                                             
>>> cmd_buffer->device->physical_device->rad_info.chip_class,
>>>                                             
>>> radv_cmd_buffer_uses_mec(cmd_buffer),
>>>                                             
>>> V_028A90_BOTTOM_OF_PIPE_TS, 0,
>>> -                                          EOP_DATA_SEL_VALUE_32BIT, 
>>> va, 2, value);
>>> +                                          EOP_DATA_SEL_VALUE_32BIT, 
>>> va, 2, value,
>>> +                                          cmd_buffer->gfx9_eop_bug_va);
>>>          }
>>>          assert(cmd_buffer->cs->cdw <= cdw_max);
>>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>>> index 73c48cef1f..1c0a50c82f 100644
>>> --- a/src/amd/vulkan/radv_device.c
>>> +++ b/src/amd/vulkan/radv_device.c
>>> @@ -2240,7 +2240,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
>>>                                                 
>>> RADV_CMD_FLAG_INV_SMEM_L1 |
>>>                                                 
>>> RADV_CMD_FLAG_INV_VMEM_L1 |
>>>                                                 
>>> RADV_CMD_FLAG_INV_GLOBAL_L2 |
>>> -                                              
>>> RADV_CMD_FLAG_START_PIPELINE_STATS);
>>> +                                              
>>> RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
>>>                  } else if (i == 1) {
>>>                          si_cs_emit_cache_flush(cs,
>>>                                                 
>>> queue->device->physical_device->rad_info.chip_class,
>>> @@ -2251,7 +2251,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
>>>                                                 
>>> RADV_CMD_FLAG_INV_SMEM_L1 |
>>>                                                 
>>> RADV_CMD_FLAG_INV_VMEM_L1 |
>>>                                                 
>>> RADV_CMD_FLAG_INV_GLOBAL_L2 |
>>> -                                              
>>> RADV_CMD_FLAG_START_PIPELINE_STATS);
>>> +                                              
>>> RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
>>>                  }
>>>                  if (!queue->device->ws->cs_finalize(cs))
>>> diff --git a/src/amd/vulkan/radv_private.h 
>>> b/src/amd/vulkan/radv_private.h
>>> index 4e4b3a6037..96218f4be2 100644
>>> --- a/src/amd/vulkan/radv_private.h
>>> +++ b/src/amd/vulkan/radv_private.h
>>> @@ -1041,6 +1041,7 @@ struct radv_cmd_buffer {
>>>          uint32_t gfx9_fence_offset;
>>>          struct radeon_winsys_bo *gfx9_fence_bo;
>>>          uint32_t gfx9_fence_idx;
>>> +       uint64_t gfx9_eop_bug_va;
>>>          /**
>>>           * Whether a query pool has been resetted and we have to 
>>> flush caches.
>>> @@ -1072,7 +1073,8 @@ void si_cs_emit_write_event_eop(struct 
>>> radeon_cmdbuf *cs,
>>>                                  unsigned data_sel,
>>>                                  uint64_t va,
>>>                                  uint32_t old_fence,
>>> -                               uint32_t new_fence);
>>> +                               uint32_t new_fence,
>>> +                               uint64_t gfx9_eop_bug_va);
>>>   void si_emit_wait_fence(struct radeon_cmdbuf *cs,
>>>                          uint64_t va, uint32_t ref,
>>> @@ -1081,7 +1083,8 @@ void si_cs_emit_cache_flush(struct 
>>> radeon_cmdbuf *cs,
>>>                              enum chip_class chip_class,
>>>                              uint32_t *fence_ptr, uint64_t va,
>>>                              bool is_mec,
>>> -                           enum radv_cmd_flush_bits flush_bits);
>>> +                           enum radv_cmd_flush_bits flush_bits,
>>> +                           uint64_t gfx9_eop_bug_va);
>>>   void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
>>>   void si_emit_set_predication_state(struct radv_cmd_buffer 
>>> *cmd_buffer, uint64_t va);
>>>   void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
>>> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
>>> index 267d45890e..ba3783905f 100644
>>> --- a/src/amd/vulkan/radv_query.c
>>> +++ b/src/amd/vulkan/radv_query.c
>>> @@ -1180,7 +1180,8 @@ static void emit_end_query(struct 
>>> radv_cmd_buffer *cmd_buffer,
>>>                                             
>>> radv_cmd_buffer_uses_mec(cmd_buffer),
>>>                                             
>>> V_028A90_BOTTOM_OF_PIPE_TS, 0,
>>>                                             EOP_DATA_SEL_VALUE_32BIT,
>>> -                                          avail_va, 0, 1);
>>> +                                          avail_va, 0, 1,
>>> +                                          cmd_buffer->gfx9_eop_bug_va);
>>>                  break;
>>>          default:
>>>                  unreachable("ending unhandled query type");
>>> @@ -1303,13 +1304,15 @@ void radv_CmdWriteTimestamp(
>>>                                                     mec,
>>>                                                     
>>> V_028A90_BOTTOM_OF_PIPE_TS, 0,
>>>                                                     
>>> EOP_DATA_SEL_TIMESTAMP,
>>> -                                                  query_va, 0, 0);
>>> +                                                  query_va, 0, 0,
>>> +                                                  
>>> cmd_buffer->gfx9_eop_bug_va);
>>>                          si_cs_emit_write_event_eop(cs,
>>>                                                     
>>> cmd_buffer->device->physical_device->rad_info.chip_class,
>>>                                                     mec,
>>>                                                     
>>> V_028A90_BOTTOM_OF_PIPE_TS, 0,
>>>                                                     
>>> EOP_DATA_SEL_VALUE_32BIT,
>>> -                                                  avail_va, 0, 1);
>>> +                                                  avail_va, 0, 1,
>>> +                                                  
>>> cmd_buffer->gfx9_eop_bug_va);
>>>                          break;
>>>                  }
>>>                  query_va += pool->stride;
>>> diff --git a/src/amd/vulkan/si_cmd_buffer.c 
>>> b/src/amd/vulkan/si_cmd_buffer.c
>>> index 454fd8c39c..49c5eb1372 100644
>>> --- a/src/amd/vulkan/si_cmd_buffer.c
>>> +++ b/src/amd/vulkan/si_cmd_buffer.c
>>> @@ -679,7 +679,8 @@ void si_cs_emit_write_event_eop(struct 
>>> radeon_cmdbuf *cs,
>>>                                  unsigned data_sel,
>>>                                  uint64_t va,
>>>                                  uint32_t old_fence,
>>> -                               uint32_t new_fence)
>>> +                               uint32_t new_fence,
>>> +                               uint64_t gfx9_eop_bug_va)
>>>   {
>>>          unsigned op = EVENT_TYPE(event) |
>>>                  EVENT_INDEX(5) |
>>> @@ -693,6 +694,17 @@ void si_cs_emit_write_event_eop(struct 
>>> radeon_cmdbuf *cs,
>>>                  sel |= 
>>> EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
>>>          if (chip_class >= GFX9 || is_gfx8_mec) {
>>> +               /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB 
>>> occlusion
>>> +                * counters) must immediately precede every timestamp 
>>> event to
>>> +                * prevent a GPU hang on GFX9.
>>> +                */
>>> +               if (chip_class == GFX9) {
>>> +                       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
>>> +                       radeon_emit(cs, 
>>> EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
>>> +                       radeon_emit(cs, gfx9_eop_bug_va);
>>> +                       radeon_emit(cs, gfx9_eop_bug_va >> 32);
>>> +               }
>>> +
>>>                  radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 
>>> 5 : 6, false));
>>>                  radeon_emit(cs, op);
>>>                  radeon_emit(cs, sel);
>>> @@ -772,7 +784,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
>>>                         uint32_t *flush_cnt,
>>>                         uint64_t flush_va,
>>>                          bool is_mec,
>>> -                       enum radv_cmd_flush_bits flush_bits)
>>> +                       enum radv_cmd_flush_bits flush_bits,
>>> +                      uint64_t gfx9_eop_bug_va)
>>>   {
>>>          unsigned cp_coher_cntl = 0;
>>>          uint32_t flush_cb_db = flush_bits & 
>>> (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
>>> @@ -803,7 +816,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
>>>                                                             
>>> V_028A90_FLUSH_AND_INV_CB_DATA_TS,
>>>                                                             0,
>>>                                                             
>>> EOP_DATA_SEL_DISCARD,
>>> -                                                          0, 0, 0);
>>> +                                                          0, 0, 0,
>>> +                                                          
>>> gfx9_eop_bug_va);
>>>                          }
>>>                  }
>>>                  if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
>>> @@ -873,7 +887,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
>>>                  si_cs_emit_write_event_eop(cs, chip_class, false, 
>>> cb_db_event, tc_flags,
>>>                                             EOP_DATA_SEL_VALUE_32BIT,
>>> -                                          flush_va, old_fence, 
>>> *flush_cnt);
>>> +                                          flush_va, old_fence, 
>>> *flush_cnt,
>>> +                                          gfx9_eop_bug_va);
>>>                  si_emit_wait_fence(cs, flush_va, *flush_cnt, 
>>> 0xffffffff);
>>>          }
>>> @@ -975,7 +990,8 @@ si_emit_cache_flush(struct radv_cmd_buffer 
>>> *cmd_buffer)
>>>                                 
>>> cmd_buffer->device->physical_device->rad_info.chip_class,
>>>                                 ptr, va,
>>>                                 radv_cmd_buffer_uses_mec(cmd_buffer),
>>> -                              cmd_buffer->state.flush_bits);
>>> +                              cmd_buffer->state.flush_bits,
>>> +                              cmd_buffer->gfx9_eop_bug_va);
>>>          if (unlikely(cmd_buffer->device->trace_bo))
>>> -- 
>>> 2.18.0
>>>
>>> _______________________________________________
>>> mesa-stable mailing list
>>> mesa-stable at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
>>
>> Hi Samuel,
>>
>> This patch also doesn't apply cleanly to 18.1. Could I bother you for a
>> backport?
> 
> Will do.

Dylan, I presume the backport should be based on the staging/18.1 branch?

> 
>>
>> Dylan
>>


More information about the mesa-dev mailing list