[Mesa-dev] [PATCH] radv: rename and re-document cache flush flags
Samuel Pitoiset
samuel.pitoiset at gmail.com
Tue Jun 25 15:57:45 UTC 2019
SMEM and VMEM caches are L0 on gfx10. Ported from RadeonSI.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
src/amd/vulkan/radv_cmd_buffer.c | 16 ++++-----
src/amd/vulkan/radv_device.c | 12 +++----
src/amd/vulkan/radv_meta_buffer.c | 4 +--
src/amd/vulkan/radv_meta_clear.c | 4 +--
src/amd/vulkan/radv_meta_fast_clear.c | 2 +-
src/amd/vulkan/radv_meta_fmask_expand.c | 2 +-
src/amd/vulkan/radv_meta_resolve_cs.c | 4 +--
src/amd/vulkan/radv_private.h | 44 ++++++++++++++-----------
src/amd/vulkan/radv_query.c | 8 ++---
src/amd/vulkan/si_cmd_buffer.c | 24 +++++++-------
10 files changed, 62 insertions(+), 58 deletions(-)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 29f2e0c8a60..8ffd3989634 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2576,7 +2576,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
case VK_ACCESS_SHADER_WRITE_BIT:
case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
- flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+ flush_bits |= RADV_CMD_FLAG_WB_L2;
break;
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
@@ -2591,7 +2591,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
case VK_ACCESS_TRANSFER_WRITE_BIT:
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_INV_GLOBAL_L2;
+ RADV_CMD_FLAG_INV_L2;
if (flush_CB_meta)
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
@@ -2648,19 +2648,19 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
break;
case VK_ACCESS_UNIFORM_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
+ flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
break;
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2;
+ flush_bits |= RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2;
break;
case VK_ACCESS_SHADER_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
+ flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
if (!image_is_coherent)
- flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
+ flush_bits |= RADV_CMD_FLAG_INV_L2;
break;
case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
if (flush_CB)
@@ -3355,7 +3355,7 @@ VkResult radv_EndCommandBuffer(
if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
/* Make sure to sync all pending active queries at the end of
* command buffer.
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index f12b8bde1f9..8d4964073cf 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2704,9 +2704,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
queue->device->physical_device->rad_info.chip_class >= GFX7,
(queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2 |
+ RADV_CMD_FLAG_INV_SCACHE |
+ RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
} else if (i == 1) {
si_cs_emit_cache_flush(cs,
@@ -2715,9 +2715,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
queue->queue_family_index == RING_COMPUTE &&
queue->device->physical_device->rad_info.chip_class >= GFX7,
RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2 |
+ RADV_CMD_FLAG_INV_SCACHE |
+ RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
}
diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c
index c19bf0da1c3..c457ac4e5f2 100644
--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -415,8 +415,8 @@ uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
fill_buffer_shader(cmd_buffer, bo, offset, size, value);
flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+ RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_WB_L2;
} else if (size) {
uint64_t va = radv_buffer_get_va(bo);
va += offset;
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index 4d569729dda..091b73841f8 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -870,8 +870,8 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer,
radv_meta_restore(&saved_state, cmd_buffer);
return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+ RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_WB_L2;
}
static uint32_t
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index 71cf90c611e..f18f7637593 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -873,7 +873,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
radv_meta_restore(&saved_state, cmd_buffer);
state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1;
+ RADV_CMD_FLAG_INV_VCACHE;
/* Initialize the DCC metadata as "fully expanded". */
diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c
index a8f5e0cc4c1..c4cec58235f 100644
--- a/src/amd/vulkan/radv_meta_fmask_expand.c
+++ b/src/amd/vulkan/radv_meta_fmask_expand.c
@@ -169,7 +169,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
radv_meta_restore(&saved_state, cmd_buffer);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_GLOBAL_L2;
+ RADV_CMD_FLAG_INV_L2;
/* Re-initialize FMASK in fully expanded mode. */
radv_initialize_fmask(cmd_buffer, image, subresourceRange);
diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
index c06f0f2c5ce..7d3cc166e0d 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -952,7 +952,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
}
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1;
+ RADV_CMD_FLAG_INV_VCACHE;
}
void
@@ -1037,7 +1037,7 @@ radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
}
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1;
+ RADV_CMD_FLAG_INV_VCACHE;
if (radv_image_has_htile(dst_image)) {
if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b537778001c..0c842a4d1b7 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -914,29 +914,33 @@ enum radv_cmd_dirty_bits {
};
enum radv_cmd_flush_bits {
- RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
- /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
- RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1,
- /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
- RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
- /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
- RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
- /* Same as above, but only writes back and doesn't invalidate */
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
+ /* Instruction cache. */
+ RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
+ /* Scalar L1 cache. */
+ RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
+ /* Vector L1 cache. */
+ RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
+ /* L2 cache + L2 metadata cache writeback & invalidate.
+ * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
+ RADV_CMD_FLAG_INV_L2 = 1 << 3,
+ /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
+ * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
+ * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
+ RADV_CMD_FLAG_WB_L2 = 1 << 4,
/* Framebuffer caches */
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
- RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
- RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
/* Engine synchronization. */
- RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
- RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
+ RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
+ RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
/* Pipeline query controls. */
- RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
- RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14,
- RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15,
+ RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
+ RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14,
+ RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15,
RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index bec7b23af05..82741c21bf7 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1012,8 +1012,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
- RADV_CMD_FLAG_INV_VMEM_L1;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 |
+ RADV_CMD_FLAG_INV_VCACHE;
if (flags & VK_QUERY_RESULT_WAIT_BIT)
cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
@@ -1639,8 +1639,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_GLOBAL_L2 |
- RADV_CMD_FLAG_INV_VMEM_L1;
+ RADV_CMD_FLAG_INV_L2 |
+ RADV_CMD_FLAG_INV_VCACHE;
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB;
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 126cabd390a..52cb7477c08 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -781,7 +781,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
if (flush_bits & RADV_CMD_FLAG_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
- if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
+ if (flush_bits & RADV_CMD_FLAG_INV_SCACHE)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
if (chip_class <= GFX8) {
@@ -859,16 +859,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
EVENT_TC_MD_ACTION_ENA;
/* Ideally flush TC together with CB/DB. */
- if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
+ if (flush_bits & RADV_CMD_FLAG_INV_L2) {
/* Writeback and invalidate everything in L2 & L1. */
tc_flags = EVENT_TC_ACTION_ENA |
EVENT_TC_WB_ACTION_ENA;
/* Clear the flags. */
- flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 |
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 |
- RADV_CMD_FLAG_INV_VMEM_L1);
+ flush_bits &= ~(RADV_CMD_FLAG_INV_L2 |
+ RADV_CMD_FLAG_WB_L2 |
+ RADV_CMD_FLAG_INV_VCACHE);
}
assert(flush_cnt);
(*flush_cnt)++;
@@ -898,16 +898,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
*/
if ((cp_coher_cntl ||
(flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2 |
- RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
+ RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2 |
+ RADV_CMD_FLAG_WB_L2))) &&
!is_mec) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
- if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
- (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
+ if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
+ (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
@@ -915,7 +915,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
cp_coher_cntl = 0;
} else {
- if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
+ if(flush_bits & RADV_CMD_FLAG_WB_L2) {
/* WB = write-back
* NC = apply to non-coherent MTYPEs
* (i.e. MTYPE <= 1, which is what we use everywhere)
@@ -929,7 +929,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
}
- if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
+ if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
si_emit_acquire_mem(cs, is_mec,
chip_class >= GFX9,
cp_coher_cntl |
--
2.22.0
More information about the mesa-dev
mailing list