[Mesa-dev] [PATCH] radv: rename and re-document cache flush flags

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Tue Jun 25 16:20:19 UTC 2019


r-b

On Tue, Jun 25, 2019 at 5:54 PM Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
>
> SMEM and VMEM caches are L0 on gfx10. Ported from RadeonSI.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c        | 16 ++++-----
>  src/amd/vulkan/radv_device.c            | 12 +++----
>  src/amd/vulkan/radv_meta_buffer.c       |  4 +--
>  src/amd/vulkan/radv_meta_clear.c        |  4 +--
>  src/amd/vulkan/radv_meta_fast_clear.c   |  2 +-
>  src/amd/vulkan/radv_meta_fmask_expand.c |  2 +-
>  src/amd/vulkan/radv_meta_resolve_cs.c   |  4 +--
>  src/amd/vulkan/radv_private.h           | 44 ++++++++++++++-----------
>  src/amd/vulkan/radv_query.c             |  8 ++---
>  src/amd/vulkan/si_cmd_buffer.c          | 24 +++++++-------
>  10 files changed, 62 insertions(+), 58 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index 29f2e0c8a60..8ffd3989634 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -2576,7 +2576,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
>                 case VK_ACCESS_SHADER_WRITE_BIT:
>                 case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
>                 case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
> -                       flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
> +                       flush_bits |= RADV_CMD_FLAG_WB_L2;
>                         break;
>                 case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
>                         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
> @@ -2591,7 +2591,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
>                 case VK_ACCESS_TRANSFER_WRITE_BIT:
>                         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
>                                       RADV_CMD_FLAG_FLUSH_AND_INV_DB |
> -                                     RADV_CMD_FLAG_INV_GLOBAL_L2;
> +                                     RADV_CMD_FLAG_INV_L2;
>
>                         if (flush_CB_meta)
>                                 flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
> @@ -2648,19 +2648,19 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
>                 case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
>                         break;
>                 case VK_ACCESS_UNIFORM_READ_BIT:
> -                       flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
> +                       flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
>                         break;
>                 case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
>                 case VK_ACCESS_TRANSFER_READ_BIT:
>                 case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
> -                       flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 |
> -                                     RADV_CMD_FLAG_INV_GLOBAL_L2;
> +                       flush_bits |= RADV_CMD_FLAG_INV_VCACHE |
> +                                     RADV_CMD_FLAG_INV_L2;
>                         break;
>                 case VK_ACCESS_SHADER_READ_BIT:
> -                       flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
> +                       flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
>
>                         if (!image_is_coherent)
> -                               flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
> +                               flush_bits |= RADV_CMD_FLAG_INV_L2;
>                         break;
>                 case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
>                         if (flush_CB)
> @@ -3355,7 +3355,7 @@ VkResult radv_EndCommandBuffer(
>
>         if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
>                 if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
> -                       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
> +                       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
>
>                 /* Make sure to sync all pending active queries at the end of
>                  * command buffer.
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index f12b8bde1f9..8d4964073cf 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -2704,9 +2704,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
>                                                  queue->device->physical_device->rad_info.chip_class >= GFX7,
>                                                (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
>                                                RADV_CMD_FLAG_INV_ICACHE |
> -                                              RADV_CMD_FLAG_INV_SMEM_L1 |
> -                                              RADV_CMD_FLAG_INV_VMEM_L1 |
> -                                              RADV_CMD_FLAG_INV_GLOBAL_L2 |
> +                                              RADV_CMD_FLAG_INV_SCACHE |
> +                                              RADV_CMD_FLAG_INV_VCACHE |
> +                                              RADV_CMD_FLAG_INV_L2 |
>                                                RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
>                 } else if (i == 1) {
>                         si_cs_emit_cache_flush(cs,
> @@ -2715,9 +2715,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
>                                                queue->queue_family_index == RING_COMPUTE &&
>                                                  queue->device->physical_device->rad_info.chip_class >= GFX7,
>                                                RADV_CMD_FLAG_INV_ICACHE |
> -                                              RADV_CMD_FLAG_INV_SMEM_L1 |
> -                                              RADV_CMD_FLAG_INV_VMEM_L1 |
> -                                              RADV_CMD_FLAG_INV_GLOBAL_L2 |
> +                                              RADV_CMD_FLAG_INV_SCACHE |
> +                                              RADV_CMD_FLAG_INV_VCACHE |
> +                                              RADV_CMD_FLAG_INV_L2 |
>                                                RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
>                 }
>
> diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c
> index c19bf0da1c3..c457ac4e5f2 100644
> --- a/src/amd/vulkan/radv_meta_buffer.c
> +++ b/src/amd/vulkan/radv_meta_buffer.c
> @@ -415,8 +415,8 @@ uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
>         if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
>                 fill_buffer_shader(cmd_buffer, bo, offset, size, value);
>                 flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> -                            RADV_CMD_FLAG_INV_VMEM_L1 |
> -                            RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
> +                            RADV_CMD_FLAG_INV_VCACHE |
> +                            RADV_CMD_FLAG_WB_L2;
>         } else if (size) {
>                 uint64_t va = radv_buffer_get_va(bo);
>                 va += offset;
> diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
> index 4d569729dda..091b73841f8 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -870,8 +870,8 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer,
>         radv_meta_restore(&saved_state, cmd_buffer);
>
>         return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> -              RADV_CMD_FLAG_INV_VMEM_L1 |
> -              RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
> +              RADV_CMD_FLAG_INV_VCACHE |
> +              RADV_CMD_FLAG_WB_L2;
>  }
>
>  static uint32_t
> diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
> index 71cf90c611e..f18f7637593 100644
> --- a/src/amd/vulkan/radv_meta_fast_clear.c
> +++ b/src/amd/vulkan/radv_meta_fast_clear.c
> @@ -873,7 +873,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
>         radv_meta_restore(&saved_state, cmd_buffer);
>
>         state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> -                            RADV_CMD_FLAG_INV_VMEM_L1;
> +                            RADV_CMD_FLAG_INV_VCACHE;
>
>
>         /* Initialize the DCC metadata as "fully expanded". */
> diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c
> index a8f5e0cc4c1..c4cec58235f 100644
> --- a/src/amd/vulkan/radv_meta_fmask_expand.c
> +++ b/src/amd/vulkan/radv_meta_fmask_expand.c
> @@ -169,7 +169,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
>         radv_meta_restore(&saved_state, cmd_buffer);
>
>         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> -                                       RADV_CMD_FLAG_INV_GLOBAL_L2;
> +                                       RADV_CMD_FLAG_INV_L2;
>
>         /* Re-initialize FMASK in fully expanded mode. */
>         radv_initialize_fmask(cmd_buffer, image, subresourceRange);
> diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
> index c06f0f2c5ce..7d3cc166e0d 100644
> --- a/src/amd/vulkan/radv_meta_resolve_cs.c
> +++ b/src/amd/vulkan/radv_meta_resolve_cs.c
> @@ -952,7 +952,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
>         }
>
>         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> -                                       RADV_CMD_FLAG_INV_VMEM_L1;
> +                                       RADV_CMD_FLAG_INV_VCACHE;
>  }
>
>  void
> @@ -1037,7 +1037,7 @@ radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
>         }
>
>         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> -                                       RADV_CMD_FLAG_INV_VMEM_L1;
> +                                       RADV_CMD_FLAG_INV_VCACHE;
>
>         if (radv_image_has_htile(dst_image)) {
>                 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index b537778001c..0c842a4d1b7 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -914,29 +914,33 @@ enum radv_cmd_dirty_bits {
>  };
>
>  enum radv_cmd_flush_bits {
> -       RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
> -       /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
> -       RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1,
> -       /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
> -       RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
> -       /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
> -       RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
> -       /* Same as above, but only writes back and doesn't invalidate */
> -       RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
> +       /* Instruction cache. */
> +       RADV_CMD_FLAG_INV_ICACHE                         = 1 << 0,
> +       /* Scalar L1 cache. */
> +       RADV_CMD_FLAG_INV_SCACHE                         = 1 << 1,
> +       /* Vector L1 cache. */
> +       RADV_CMD_FLAG_INV_VCACHE                         = 1 << 2,
> +       /* L2 cache + L2 metadata cache writeback & invalidate.
> +        * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
> +       RADV_CMD_FLAG_INV_L2                             = 1 << 3,
> +       /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
> +        * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
> +        * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
> +       RADV_CMD_FLAG_WB_L2                              = 1 << 4,
>         /* Framebuffer caches */
> -       RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
> -       RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
> -       RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
> -       RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
> +       RADV_CMD_FLAG_FLUSH_AND_INV_CB_META              = 1 << 5,
> +       RADV_CMD_FLAG_FLUSH_AND_INV_DB_META              = 1 << 6,
> +       RADV_CMD_FLAG_FLUSH_AND_INV_DB                   = 1 << 7,
> +       RADV_CMD_FLAG_FLUSH_AND_INV_CB                   = 1 << 8,
>         /* Engine synchronization. */
> -       RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
> -       RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
> -       RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
> -       RADV_CMD_FLAG_VGT_FLUSH        = 1 << 12,
> +       RADV_CMD_FLAG_VS_PARTIAL_FLUSH                   = 1 << 9,
> +       RADV_CMD_FLAG_PS_PARTIAL_FLUSH                   = 1 << 10,
> +       RADV_CMD_FLAG_CS_PARTIAL_FLUSH                   = 1 << 11,
> +       RADV_CMD_FLAG_VGT_FLUSH                          = 1 << 12,
>         /* Pipeline query controls. */
> -       RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
> -       RADV_CMD_FLAG_STOP_PIPELINE_STATS  = 1 << 14,
> -       RADV_CMD_FLAG_VGT_STREAMOUT_SYNC   = 1 << 15,
> +       RADV_CMD_FLAG_START_PIPELINE_STATS               = 1 << 13,
> +       RADV_CMD_FLAG_STOP_PIPELINE_STATS                = 1 << 14,
> +       RADV_CMD_FLAG_VGT_STREAMOUT_SYNC                 = 1 << 15,
>
>         RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
>                                               RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
> index bec7b23af05..82741c21bf7 100644
> --- a/src/amd/vulkan/radv_query.c
> +++ b/src/amd/vulkan/radv_query.c
> @@ -1012,8 +1012,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
>                                       VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
>                                       &push_constants);
>
> -       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
> -                                       RADV_CMD_FLAG_INV_VMEM_L1;
> +       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 |
> +                                       RADV_CMD_FLAG_INV_VCACHE;
>
>         if (flags & VK_QUERY_RESULT_WAIT_BIT)
>                 cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
> @@ -1639,8 +1639,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
>
>         cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
>                                                RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> -                                              RADV_CMD_FLAG_INV_GLOBAL_L2 |
> -                                              RADV_CMD_FLAG_INV_VMEM_L1;
> +                                              RADV_CMD_FLAG_INV_L2 |
> +                                              RADV_CMD_FLAG_INV_VCACHE;
>         if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
>                 cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
>                                                        RADV_CMD_FLAG_FLUSH_AND_INV_DB;
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index 126cabd390a..52cb7477c08 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -781,7 +781,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
>
>         if (flush_bits & RADV_CMD_FLAG_INV_ICACHE)
>                 cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
> -       if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
> +       if (flush_bits & RADV_CMD_FLAG_INV_SCACHE)
>                 cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
>
>         if (chip_class <= GFX8) {
> @@ -859,16 +859,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
>                            EVENT_TC_MD_ACTION_ENA;
>
>                 /* Ideally flush TC together with CB/DB. */
> -               if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
> +               if (flush_bits & RADV_CMD_FLAG_INV_L2) {
>                         /* Writeback and invalidate everything in L2 & L1. */
>                         tc_flags = EVENT_TC_ACTION_ENA |
>                                    EVENT_TC_WB_ACTION_ENA;
>
>
>                         /* Clear the flags. */
> -                       flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 |
> -                                        RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 |
> -                                        RADV_CMD_FLAG_INV_VMEM_L1);
> +                       flush_bits &= ~(RADV_CMD_FLAG_INV_L2 |
> +                                        RADV_CMD_FLAG_WB_L2 |
> +                                        RADV_CMD_FLAG_INV_VCACHE);
>                 }
>                 assert(flush_cnt);
>                 (*flush_cnt)++;
> @@ -898,16 +898,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
>          */
>         if ((cp_coher_cntl ||
>              (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> -                           RADV_CMD_FLAG_INV_VMEM_L1 |
> -                           RADV_CMD_FLAG_INV_GLOBAL_L2 |
> -                           RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
> +                           RADV_CMD_FLAG_INV_VCACHE |
> +                           RADV_CMD_FLAG_INV_L2 |
> +                           RADV_CMD_FLAG_WB_L2))) &&
>             !is_mec) {
>                 radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
>                 radeon_emit(cs, 0);
>         }
>
> -       if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
> -           (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
> +       if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
> +           (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
>                 si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
>                                     cp_coher_cntl |
>                                     S_0085F0_TC_ACTION_ENA(1) |
> @@ -915,7 +915,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
>                                     S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
>                 cp_coher_cntl = 0;
>         } else {
> -               if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
> +               if(flush_bits & RADV_CMD_FLAG_WB_L2) {
>                         /* WB = write-back
>                          * NC = apply to non-coherent MTYPEs
>                          *      (i.e. MTYPE <= 1, which is what we use everywhere)
> @@ -929,7 +929,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
>                                             S_0301F0_TC_NC_ACTION_ENA(1));
>                         cp_coher_cntl = 0;
>                 }
> -               if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
> +               if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
>                         si_emit_acquire_mem(cs, is_mec,
>                                             chip_class >= GFX9,
>                                             cp_coher_cntl |
> --
> 2.22.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list