[Mesa-dev] [PATCH] radv: rename and re-document cache flush flags
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Tue Jun 25 16:20:19 UTC 2019
r-b
On Tue, Jun 25, 2019 at 5:54 PM Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
>
> SMEM and VMEM caches are L0 on gfx10. Ported from RadeonSI.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
> src/amd/vulkan/radv_cmd_buffer.c | 16 ++++-----
> src/amd/vulkan/radv_device.c | 12 +++----
> src/amd/vulkan/radv_meta_buffer.c | 4 +--
> src/amd/vulkan/radv_meta_clear.c | 4 +--
> src/amd/vulkan/radv_meta_fast_clear.c | 2 +-
> src/amd/vulkan/radv_meta_fmask_expand.c | 2 +-
> src/amd/vulkan/radv_meta_resolve_cs.c | 4 +--
> src/amd/vulkan/radv_private.h | 44 ++++++++++++++-----------
> src/amd/vulkan/radv_query.c | 8 ++---
> src/amd/vulkan/si_cmd_buffer.c | 24 +++++++-------
> 10 files changed, 62 insertions(+), 58 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index 29f2e0c8a60..8ffd3989634 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -2576,7 +2576,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
> case VK_ACCESS_SHADER_WRITE_BIT:
> case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
> case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
> - flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
> + flush_bits |= RADV_CMD_FLAG_WB_L2;
> break;
> case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
> flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
> @@ -2591,7 +2591,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
> case VK_ACCESS_TRANSFER_WRITE_BIT:
> flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
> RADV_CMD_FLAG_FLUSH_AND_INV_DB |
> - RADV_CMD_FLAG_INV_GLOBAL_L2;
> + RADV_CMD_FLAG_INV_L2;
>
> if (flush_CB_meta)
> flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
> @@ -2648,19 +2648,19 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
> case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
> break;
> case VK_ACCESS_UNIFORM_READ_BIT:
> - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
> + flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
> break;
> case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
> case VK_ACCESS_TRANSFER_READ_BIT:
> case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
> - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 |
> - RADV_CMD_FLAG_INV_GLOBAL_L2;
> + flush_bits |= RADV_CMD_FLAG_INV_VCACHE |
> + RADV_CMD_FLAG_INV_L2;
> break;
> case VK_ACCESS_SHADER_READ_BIT:
> - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
> + flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
>
> if (!image_is_coherent)
> - flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
> + flush_bits |= RADV_CMD_FLAG_INV_L2;
> break;
> case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
> if (flush_CB)
> @@ -3355,7 +3355,7 @@ VkResult radv_EndCommandBuffer(
>
> if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
> if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
> - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
> + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
>
> /* Make sure to sync all pending active queries at the end of
> * command buffer.
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index f12b8bde1f9..8d4964073cf 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -2704,9 +2704,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
> queue->device->physical_device->rad_info.chip_class >= GFX7,
> (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
> RADV_CMD_FLAG_INV_ICACHE |
> - RADV_CMD_FLAG_INV_SMEM_L1 |
> - RADV_CMD_FLAG_INV_VMEM_L1 |
> - RADV_CMD_FLAG_INV_GLOBAL_L2 |
> + RADV_CMD_FLAG_INV_SCACHE |
> + RADV_CMD_FLAG_INV_VCACHE |
> + RADV_CMD_FLAG_INV_L2 |
> RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
> } else if (i == 1) {
> si_cs_emit_cache_flush(cs,
> @@ -2715,9 +2715,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
> queue->queue_family_index == RING_COMPUTE &&
> queue->device->physical_device->rad_info.chip_class >= GFX7,
> RADV_CMD_FLAG_INV_ICACHE |
> - RADV_CMD_FLAG_INV_SMEM_L1 |
> - RADV_CMD_FLAG_INV_VMEM_L1 |
> - RADV_CMD_FLAG_INV_GLOBAL_L2 |
> + RADV_CMD_FLAG_INV_SCACHE |
> + RADV_CMD_FLAG_INV_VCACHE |
> + RADV_CMD_FLAG_INV_L2 |
> RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
> }
>
> diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c
> index c19bf0da1c3..c457ac4e5f2 100644
> --- a/src/amd/vulkan/radv_meta_buffer.c
> +++ b/src/amd/vulkan/radv_meta_buffer.c
> @@ -415,8 +415,8 @@ uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
> if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
> fill_buffer_shader(cmd_buffer, bo, offset, size, value);
> flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> - RADV_CMD_FLAG_INV_VMEM_L1 |
> - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
> + RADV_CMD_FLAG_INV_VCACHE |
> + RADV_CMD_FLAG_WB_L2;
> } else if (size) {
> uint64_t va = radv_buffer_get_va(bo);
> va += offset;
> diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
> index 4d569729dda..091b73841f8 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -870,8 +870,8 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer,
> radv_meta_restore(&saved_state, cmd_buffer);
>
> return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> - RADV_CMD_FLAG_INV_VMEM_L1 |
> - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
> + RADV_CMD_FLAG_INV_VCACHE |
> + RADV_CMD_FLAG_WB_L2;
> }
>
> static uint32_t
> diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
> index 71cf90c611e..f18f7637593 100644
> --- a/src/amd/vulkan/radv_meta_fast_clear.c
> +++ b/src/amd/vulkan/radv_meta_fast_clear.c
> @@ -873,7 +873,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
> radv_meta_restore(&saved_state, cmd_buffer);
>
> state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> - RADV_CMD_FLAG_INV_VMEM_L1;
> + RADV_CMD_FLAG_INV_VCACHE;
>
>
> /* Initialize the DCC metadata as "fully expanded". */
> diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c
> index a8f5e0cc4c1..c4cec58235f 100644
> --- a/src/amd/vulkan/radv_meta_fmask_expand.c
> +++ b/src/amd/vulkan/radv_meta_fmask_expand.c
> @@ -169,7 +169,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
> radv_meta_restore(&saved_state, cmd_buffer);
>
> cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> - RADV_CMD_FLAG_INV_GLOBAL_L2;
> + RADV_CMD_FLAG_INV_L2;
>
> /* Re-initialize FMASK in fully expanded mode. */
> radv_initialize_fmask(cmd_buffer, image, subresourceRange);
> diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
> index c06f0f2c5ce..7d3cc166e0d 100644
> --- a/src/amd/vulkan/radv_meta_resolve_cs.c
> +++ b/src/amd/vulkan/radv_meta_resolve_cs.c
> @@ -952,7 +952,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
> }
>
> cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> - RADV_CMD_FLAG_INV_VMEM_L1;
> + RADV_CMD_FLAG_INV_VCACHE;
> }
>
> void
> @@ -1037,7 +1037,7 @@ radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
> }
>
> cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> - RADV_CMD_FLAG_INV_VMEM_L1;
> + RADV_CMD_FLAG_INV_VCACHE;
>
> if (radv_image_has_htile(dst_image)) {
> if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index b537778001c..0c842a4d1b7 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -914,29 +914,33 @@ enum radv_cmd_dirty_bits {
> };
>
> enum radv_cmd_flush_bits {
> - RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
> - /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
> - RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1,
> - /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
> - RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
> - /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
> - RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
> - /* Same as above, but only writes back and doesn't invalidate */
> - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
> + /* Instruction cache. */
> + RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
> + /* Scalar L1 cache. */
> + RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
> + /* Vector L1 cache. */
> + RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
> + /* L2 cache + L2 metadata cache writeback & invalidate.
> + * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
> + RADV_CMD_FLAG_INV_L2 = 1 << 3,
> + /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
> + * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
> + * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
> + RADV_CMD_FLAG_WB_L2 = 1 << 4,
> /* Framebuffer caches */
> - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
> - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
> - RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
> - RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
> + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
> + RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
> + RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
> + RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
> /* Engine synchronization. */
> - RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
> - RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
> - RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
> - RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
> + RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
> + RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
> + RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
> + RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
> /* Pipeline query controls. */
> - RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
> - RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14,
> - RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15,
> + RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
> + RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14,
> + RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15,
>
> RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
> RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
> index bec7b23af05..82741c21bf7 100644
> --- a/src/amd/vulkan/radv_query.c
> +++ b/src/amd/vulkan/radv_query.c
> @@ -1012,8 +1012,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
> VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
> &push_constants);
>
> - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
> - RADV_CMD_FLAG_INV_VMEM_L1;
> + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 |
> + RADV_CMD_FLAG_INV_VCACHE;
>
> if (flags & VK_QUERY_RESULT_WAIT_BIT)
> cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
> @@ -1639,8 +1639,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
>
> cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
> RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> - RADV_CMD_FLAG_INV_GLOBAL_L2 |
> - RADV_CMD_FLAG_INV_VMEM_L1;
> + RADV_CMD_FLAG_INV_L2 |
> + RADV_CMD_FLAG_INV_VCACHE;
> if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
> cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
> RADV_CMD_FLAG_FLUSH_AND_INV_DB;
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index 126cabd390a..52cb7477c08 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -781,7 +781,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
>
> if (flush_bits & RADV_CMD_FLAG_INV_ICACHE)
> cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
> - if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
> + if (flush_bits & RADV_CMD_FLAG_INV_SCACHE)
> cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
>
> if (chip_class <= GFX8) {
> @@ -859,16 +859,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
> EVENT_TC_MD_ACTION_ENA;
>
> /* Ideally flush TC together with CB/DB. */
> - if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
> + if (flush_bits & RADV_CMD_FLAG_INV_L2) {
> /* Writeback and invalidate everything in L2 & L1. */
> tc_flags = EVENT_TC_ACTION_ENA |
> EVENT_TC_WB_ACTION_ENA;
>
>
> /* Clear the flags. */
> - flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 |
> - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 |
> - RADV_CMD_FLAG_INV_VMEM_L1);
> + flush_bits &= ~(RADV_CMD_FLAG_INV_L2 |
> + RADV_CMD_FLAG_WB_L2 |
> + RADV_CMD_FLAG_INV_VCACHE);
> }
> assert(flush_cnt);
> (*flush_cnt)++;
> @@ -898,16 +898,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
> */
> if ((cp_coher_cntl ||
> (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
> - RADV_CMD_FLAG_INV_VMEM_L1 |
> - RADV_CMD_FLAG_INV_GLOBAL_L2 |
> - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
> + RADV_CMD_FLAG_INV_VCACHE |
> + RADV_CMD_FLAG_INV_L2 |
> + RADV_CMD_FLAG_WB_L2))) &&
> !is_mec) {
> radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> radeon_emit(cs, 0);
> }
>
> - if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
> - (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
> + if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
> + (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
> si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
> cp_coher_cntl |
> S_0085F0_TC_ACTION_ENA(1) |
> @@ -915,7 +915,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
> S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
> cp_coher_cntl = 0;
> } else {
> - if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
> + if(flush_bits & RADV_CMD_FLAG_WB_L2) {
> /* WB = write-back
> * NC = apply to non-coherent MTYPEs
> * (i.e. MTYPE <= 1, which is what we use everywhere)
> @@ -929,7 +929,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
> S_0301F0_TC_NC_ACTION_ENA(1));
> cp_coher_cntl = 0;
> }
> - if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
> + if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
> si_emit_acquire_mem(cs, is_mec,
> chip_class >= GFX9,
> cp_coher_cntl |
> --
> 2.22.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list