[Mesa-dev] [PATCH 2/4] radeonsi: re-arrange CE packets for better synchronization with DE

Nicolai Hähnle nhaehnle at gmail.com
Wed Aug 16 11:13:40 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Move all WRITE_CONST_RAM before all DUMP_CONST_RAM for a draw. The benefit
is that we can insert a wait between those two blocks and have minimal
latency for dumping the CE RAM into the L2.
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 42 ++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 9372d1b6a00..e5320cadd07 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -225,7 +225,8 @@ void si_ce_enable_loads(struct radeon_winsys_cs *ib)
 
 static bool si_upload_descriptors(struct si_context *sctx,
 				  struct si_descriptors *desc,
-				  struct r600_atom * atom)
+				  struct r600_atom * atom,
+				  bool *need_ce_dump)
 {
 	unsigned slot_size = desc->element_dw_size * 4;
 	unsigned first_slot_offset = desc->first_active_slot * slot_size;
@@ -258,12 +259,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
 			radeon_emit_array(sctx->ce_ib, list + begin, count);
 		}
 
-		if (!si_ce_upload(sctx,
-				  desc->ce_offset +
-				  (first_slot_offset - desc->first_ce_slot * slot_size),
-				  upload_size, (unsigned*)&desc->buffer_offset,
-				  &desc->buffer))
-			return false;
+		*need_ce_dump = true;
 	} else {
 		uint32_t *ptr;
 
@@ -275,6 +271,9 @@ static bool si_upload_descriptors(struct si_context *sctx,
 		if (!desc->buffer)
 			return false; /* skip the draw call */
 
+		/* The shader pointer should point to slot 0. */
+		desc->buffer_offset -= first_slot_offset;
+
 		util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
 					upload_size);
 		desc->gpu_list = ptr - first_slot_offset / 4;
@@ -283,9 +282,6 @@ static bool si_upload_descriptors(struct si_context *sctx,
 	                            RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 	}
 
-	/* The shader pointer should point to slot 0. */
-	desc->buffer_offset -= first_slot_offset;
-
 	desc->dirty_mask = 0;
 
 	if (atom)
@@ -2906,16 +2902,40 @@ void si_init_all_descriptors(struct si_context *sctx)
 static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask)
 {
 	unsigned dirty = sctx->descriptors_dirty & mask;
+	unsigned ce_dump = 0;
 
 	/* Assume nothing will go wrong: */
 	sctx->shader_pointers_dirty |= dirty;
 
 	while (dirty) {
 		unsigned i = u_bit_scan(&dirty);
+		bool need_ce_dump = false;
 
 		if (!si_upload_descriptors(sctx, &sctx->descriptors[i],
-					   &sctx->shader_pointers.atom))
+					   &sctx->shader_pointers.atom,
+					   &need_ce_dump))
 			return false;
+
+		if (need_ce_dump)
+			ce_dump |= 1 << i;
+	}
+
+	while (ce_dump) {
+		unsigned i = u_bit_scan(&ce_dump);
+		struct si_descriptors *desc = &sctx->descriptors[i];
+		unsigned slot_size = desc->element_dw_size * 4;
+		unsigned first_slot_offset = desc->first_active_slot * slot_size;
+		unsigned upload_size = desc->num_active_slots * slot_size;
+
+		if (!si_ce_upload(sctx,
+				  desc->ce_offset +
+				  (first_slot_offset - desc->first_ce_slot * slot_size),
+				  upload_size, (unsigned*)&desc->buffer_offset,
+				  &desc->buffer))
+			return false;
+
+		/* The shader pointer should point to slot 0. */
+		desc->buffer_offset -= first_slot_offset;
 	}
 
 	sctx->descriptors_dirty &= ~mask;
-- 
2.11.0



More information about the mesa-dev mailing list