[Mesa-dev] [PATCH 2/4] radeonsi: re-arrange CE packets for better synchronization with DE
Nicolai Hähnle
nhaehnle at gmail.com
Wed Aug 16 11:13:40 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
Move all WRITE_CONST_RAM before all DUMP_CONST_RAM for a draw. The benefit
is that we can insert a wait between those two blocks and have minimal
latency for dumping the CE RAM into the L2.
---
src/gallium/drivers/radeonsi/si_descriptors.c | 42 ++++++++++++++++++++-------
1 file changed, 31 insertions(+), 11 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 9372d1b6a00..e5320cadd07 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -225,7 +225,8 @@ void si_ce_enable_loads(struct radeon_winsys_cs *ib)
static bool si_upload_descriptors(struct si_context *sctx,
struct si_descriptors *desc,
- struct r600_atom * atom)
+ struct r600_atom * atom,
+ bool *need_ce_dump)
{
unsigned slot_size = desc->element_dw_size * 4;
unsigned first_slot_offset = desc->first_active_slot * slot_size;
@@ -258,12 +259,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
radeon_emit_array(sctx->ce_ib, list + begin, count);
}
- if (!si_ce_upload(sctx,
- desc->ce_offset +
- (first_slot_offset - desc->first_ce_slot * slot_size),
- upload_size, (unsigned*)&desc->buffer_offset,
- &desc->buffer))
- return false;
+ *need_ce_dump = true;
} else {
uint32_t *ptr;
@@ -275,6 +271,9 @@ static bool si_upload_descriptors(struct si_context *sctx,
if (!desc->buffer)
return false; /* skip the draw call */
+ /* The shader pointer should point to slot 0. */
+ desc->buffer_offset -= first_slot_offset;
+
util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
upload_size);
desc->gpu_list = ptr - first_slot_offset / 4;
@@ -283,9 +282,6 @@ static bool si_upload_descriptors(struct si_context *sctx,
RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
}
- /* The shader pointer should point to slot 0. */
- desc->buffer_offset -= first_slot_offset;
-
desc->dirty_mask = 0;
if (atom)
@@ -2906,16 +2902,40 @@ void si_init_all_descriptors(struct si_context *sctx)
static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask)
{
unsigned dirty = sctx->descriptors_dirty & mask;
+ unsigned ce_dump = 0;
/* Assume nothing will go wrong: */
sctx->shader_pointers_dirty |= dirty;
while (dirty) {
unsigned i = u_bit_scan(&dirty);
+ bool need_ce_dump = false;
if (!si_upload_descriptors(sctx, &sctx->descriptors[i],
- &sctx->shader_pointers.atom))
+ &sctx->shader_pointers.atom,
+ &need_ce_dump))
return false;
+
+ if (need_ce_dump)
+ ce_dump |= 1 << i;
+ }
+
+ while (ce_dump) {
+ unsigned i = u_bit_scan(&ce_dump);
+ struct si_descriptors *desc = &sctx->descriptors[i];
+ unsigned slot_size = desc->element_dw_size * 4;
+ unsigned first_slot_offset = desc->first_active_slot * slot_size;
+ unsigned upload_size = desc->num_active_slots * slot_size;
+
+ if (!si_ce_upload(sctx,
+ desc->ce_offset +
+ (first_slot_offset - desc->first_ce_slot * slot_size),
+ upload_size, (unsigned*)&desc->buffer_offset,
+ &desc->buffer))
+ return false;
+
+ /* The shader pointer should point to slot 0. */
+ desc->buffer_offset -= first_slot_offset;
}
sctx->descriptors_dirty &= ~mask;
--
2.11.0
More information about the mesa-dev
mailing list