[Mesa-dev] [PATCH 3/7] radeonsi: align all CE dumps to L2 cache line size

Marek Olšák maraeo at gmail.com
Fri Aug 4 10:05:51 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_descriptors.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 02f921e..ea5b89e 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -89,20 +89,25 @@ static uint32_t null_texture_descriptor[8] = {
 
 static uint32_t null_image_descriptor[8] = {
 	0,
 	0,
 	0,
 	S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
 	/* the rest must contain zeros, which is also used by the buffer
 	 * descriptor */
 };
 
+static uint16_t si_ce_ram_size(struct si_context *sctx)
+{
+	return sctx->b.chip_class >= GFX9 ? 4096 : 32768;
+}
+
 static void si_init_descriptor_list(uint32_t *desc_list,
 				    unsigned element_dw_size,
 				    unsigned num_elements,
 				    const uint32_t *null_descriptor)
 {
 	int i;
 
 	/* Initialize the array to NULL descriptors if the element size is 8. */
 	if (null_descriptor) {
 		assert(element_dw_size % 8 == 0);
@@ -141,25 +146,32 @@ static void si_init_descriptors(struct si_context *sctx,
 static void si_release_descriptors(struct si_descriptors *desc)
 {
 	r600_resource_reference(&desc->buffer, NULL);
 	FREE(desc->list);
 }
 
 static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned size,
 			 unsigned *out_offset, struct r600_resource **out_buf)
 {
 	uint64_t va;
+	unsigned cache_line_size = sctx->screen->b.info.tcc_cache_line_size;
 
-	u_suballocator_alloc(sctx->ce_suballocator, size,
-			     si_optimal_tcc_alignment(sctx, size),
-			     out_offset,
-			     (struct pipe_resource**)out_buf);
+	/* The base and size should be aligned to the L2 cache line size
+	 * for optimal performance. (all dumps should rewrite whole lines)
+	 */
+	size = align(size, cache_line_size);
+
+	(void)si_ce_ram_size; /* silence an "unused" warning */
+	assert(offset + size <= si_ce_ram_size(sctx));
+
+	u_suballocator_alloc(sctx->ce_suballocator, size, cache_line_size,
+			     out_offset, (struct pipe_resource**)out_buf);
 	if (!out_buf)
 			return false;
 
 	va = (*out_buf)->gpu_address + *out_offset;
 
 	radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0));
 	radeon_emit(sctx->ce_ib, ce_offset);
 	radeon_emit(sctx->ce_ib, size / 4);
 	radeon_emit(sctx->ce_ib, va);
 	radeon_emit(sctx->ce_ib, va >> 32);
@@ -2845,24 +2857,21 @@ void si_init_all_descriptors(struct si_context *sctx)
 	sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS;
 
 	si_init_descriptors(sctx, &sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
 			    4, SI_NUM_VERTEX_BUFFERS, 0, 0, NULL);
 	FREE(sctx->vertex_buffers.list); /* not used */
 	sctx->vertex_buffers.list = NULL;
 
 	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 	sctx->total_ce_ram_allocated = ce_offset;
 
-	if (sctx->b.chip_class >= GFX9)
-		assert(ce_offset <= 4096);
-	else
-		assert(ce_offset <= 32768);
+	assert(ce_offset <= si_ce_ram_size(sctx));
 
 	/* Set pipe_context functions. */
 	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.b.set_shader_images = si_set_shader_images;
 	sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
 	sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
 	sctx->b.b.set_shader_buffers = si_set_shader_buffers;
 	sctx->b.b.set_sampler_views = si_set_sampler_views;
 	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
 	sctx->b.b.create_texture_handle = si_create_texture_handle;
-- 
2.7.4



More information about the mesa-dev mailing list