[Mesa-stable] [PATCH] radv: make sure to wait for CP DMA when needed

Dylan Baker dylan at pnwbakers.com
Thu Jul 12 19:31:12 UTC 2018


Thanks! I've pulled this into the 18.1-staging branch, it missed 18.1.4, but
will be in 18.1.5.

Dylan

Quoting Samuel Pitoiset (2018-07-12 01:02:49)
> This might fix some synchronization issues. I don't know if
> that will affect performance but it's required for correctness.
> 
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 15 +++++++++++++
>  src/amd/vulkan/radv_private.h    |  5 +++++
>  src/amd/vulkan/si_cmd_buffer.c   | 36 ++++++++++++++++++++++++++++----
>  3 files changed, 52 insertions(+), 4 deletions(-)
> 
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index 2426bd1ff3..8dafe88bd1 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -2500,6 +2500,11 @@ VkResult radv_EndCommandBuffer(
>                 si_emit_cache_flush(cmd_buffer);
>         }
>  
> +       /* Make sure CP DMA is idle at the end of IBs because the kernel
> +        * doesn't wait for it.
> +        */
> +       si_cp_dma_wait_for_idle(cmd_buffer);
> +
>         vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
>  
>         if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
> @@ -4053,6 +4058,11 @@ void radv_CmdPipelineBarrier(
>                                              0);
>         }
>  
> +       /* Make sure CP DMA is idle because the driver might have performed a
> +        * DMA operation for copying or filling buffers/images.
> +        */
> +       si_cp_dma_wait_for_idle(cmd_buffer);
> +
>         cmd_buffer->state.flush_bits |= dst_flush_bits;
>  }
>  
> @@ -4069,6 +4079,11 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
>  
>         MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18);
>  
> +       /* Make sure CP DMA is idle because the driver might have performed a
> +        * DMA operation for copying or filling buffers/images.
> +        */
> +       si_cp_dma_wait_for_idle(cmd_buffer);
> +
>         /* TODO: this is overkill. Probably should figure something out from
>          * the stage mask. */
>  
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 6cd7ab0d64..3b4c80e025 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -972,6 +972,9 @@ struct radv_cmd_state {
>         uint32_t last_num_instances;
>         uint32_t last_first_instance;
>         uint32_t last_vertex_offset;
> +
> +       /* Whether CP DMA is busy/idle. */
> +       bool dma_is_busy;
>  };
>  
>  struct radv_cmd_pool {
> @@ -1086,6 +1089,8 @@ void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
>                          unsigned size);
>  void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
>                             uint64_t size, unsigned value);
> +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
> +
>  void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
>  bool
>  radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index 80c819af49..d4459092d0 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -1214,7 +1214,6 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
>         struct radeon_winsys_cs *cs = cmd_buffer->cs;
>         uint32_t header = 0, command = 0;
>  
> -       assert(size);
>         assert(size <= cp_dma_max_byte_count(cmd_buffer));
>  
>         radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
> @@ -1273,9 +1272,14 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
>          * indices. If we wanted to execute CP DMA in PFP, this packet
>          * should precede it.
>          */
> -       if ((flags & CP_DMA_SYNC) && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
> -               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
> -               radeon_emit(cs, 0);
> +       if (flags & CP_DMA_SYNC) {
> +               if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
> +                       radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
> +                       radeon_emit(cs, 0);
> +               }
> +
> +               /* CP will see the sync flag and wait for all DMAs to complete. */
> +               cmd_buffer->state.dma_is_busy = false;
>         }
>  
>         if (unlikely(cmd_buffer->device->trace_bo))
> @@ -1339,6 +1343,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
>         uint64_t main_src_va, main_dest_va;
>         uint64_t skipped_size = 0, realign_size = 0;
>  
> +       /* Assume that we are not going to sync after the last DMA operation. */
> +       cmd_buffer->state.dma_is_busy = true;
>  
>         if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
>             cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
> @@ -1402,6 +1408,9 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
>  
>         assert(va % 4 == 0 && size % 4 == 0);
>  
> +       /* Assume that we are not going to sync after the last DMA operation. */
> +       cmd_buffer->state.dma_is_busy = true;
> +
>         while (size) {
>                 unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
>                 unsigned dma_flags = CP_DMA_CLEAR;
> @@ -1417,6 +1426,25 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
>         }
>  }
>  
> +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
> +{
> +       if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK)
> +               return;
> +
> +       if (!cmd_buffer->state.dma_is_busy)
> +               return;
> +
> +       /* Issue a dummy DMA that copies zero bytes.
> +        *
> +        * The DMA engine will see that there's no work to do and skip this
> +        * DMA request, however, the CP will see the sync flag and still wait
> +        * for all DMAs to complete.
> +        */
> +       si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC);
> +
> +       cmd_buffer->state.dma_is_busy = false;
> +}
> +
>  /* For MSAA sample positions. */
>  #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
>         (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) |                  \
> -- 
> 2.18.0
> 
> _______________________________________________
> mesa-stable mailing list
> mesa-stable at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 228 bytes
Desc: signature
URL: <https://lists.freedesktop.org/archives/mesa-stable/attachments/20180712/794a7fa9/attachment-0001.sig>


More information about the mesa-stable mailing list