Mesa (master): radeonsi: use a global dirty mask for shader pointers

Marek Olšák mareko at kemper.freedesktop.org
Wed Jan 18 18:51:43 UTC 2017


Module: Mesa
Branch: master
Commit: cf248929bfdffaf53cb4865640929a6d0accb0c3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=cf248929bfdffaf53cb4865640929a6d0accb0c3

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Jan 17 21:30:23 2017 +0100

radeonsi: use a global dirty mask for shader pointers

Only vertex buffers use a separate bool flag.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

---

 src/gallium/drivers/radeonsi/si_descriptors.c | 86 +++++++++++++++------------
 src/gallium/drivers/radeonsi/si_pipe.h        |  2 +
 src/gallium/drivers/radeonsi/si_state.h       |  2 -
 src/gallium/drivers/radeonsi/si_state_draw.c  |  2 +-
 4 files changed, 51 insertions(+), 41 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index a535fa0..0a49c87 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -246,7 +246,6 @@ static bool si_upload_descriptors(struct si_context *sctx,
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
 	                            RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 	}
-	desc->pointer_dirty = true;
 	desc->dirty_mask = 0;
 
 	if (atom)
@@ -1035,9 +1034,9 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 	 * on performance (confirmed by testing). New descriptors are always
 	 * uploaded to a fresh new buffer, so I don't think flushing the const
 	 * cache is needed. */
-	desc->pointer_dirty = true;
 	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 	sctx->vertex_buffers_dirty = false;
+	sctx->vertex_buffer_pointer_dirty = true;
 	return true;
 }
 
@@ -1735,26 +1734,21 @@ void si_update_all_texture_descriptors(struct si_context *sctx)
 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
 					  unsigned shader)
 {
-	struct si_descriptors *descs =
-		&sctx->descriptors[SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS];
-
-	for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
-		descs->pointer_dirty = true;
+	sctx->shader_pointers_dirty |=
+		u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
+				  SI_NUM_SHADER_DESCS);
 
 	if (shader == PIPE_SHADER_VERTEX)
-		sctx->vertex_buffers.pointer_dirty = true;
+		sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
 
 	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 }
 
 static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
 {
-	int i;
-
-	for (i = 0; i < SI_NUM_SHADERS; i++) {
-		si_mark_shader_pointers_dirty(sctx, i);
-	}
-	sctx->descriptors[SI_DESCS_RW_BUFFERS].pointer_dirty = true;
+	sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
+	sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
+	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 }
 
 /* Set a base register address for user data constants in the given shader.
@@ -1807,13 +1801,12 @@ void si_shader_change_notify(struct si_context *sctx)
 
 static void si_emit_shader_pointer(struct si_context *sctx,
 				   struct si_descriptors *desc,
-				   unsigned sh_base, bool keep_dirty)
+				   unsigned sh_base)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	uint64_t va;
 
-	if (!desc->pointer_dirty || !desc->buffer)
-		return;
+	assert(desc->buffer);
 
 	va = desc->buffer->gpu_address +
 	     desc->buffer_offset;
@@ -1822,55 +1815,66 @@ static void si_emit_shader_pointer(struct si_context *sctx,
 	radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
-
-	desc->pointer_dirty = keep_dirty;
 }
 
 void si_emit_graphics_shader_userdata(struct si_context *sctx,
                                       struct r600_atom *atom)
 {
-	unsigned shader;
+	unsigned mask;
 	uint32_t *sh_base = sctx->shader_userdata.sh_base;
 	struct si_descriptors *descs;
 
 	descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
 
-	if (descs->pointer_dirty) {
+	if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B030_SPI_SHADER_USER_DATA_PS_0, true);
+				       R_00B030_SPI_SHADER_USER_DATA_PS_0);
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
+				       R_00B130_SPI_SHADER_USER_DATA_VS_0);
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B230_SPI_SHADER_USER_DATA_GS_0, true);
+				       R_00B230_SPI_SHADER_USER_DATA_GS_0);
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+				       R_00B330_SPI_SHADER_USER_DATA_ES_0);
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B430_SPI_SHADER_USER_DATA_HS_0, true);
-		descs->pointer_dirty = false;
+				       R_00B430_SPI_SHADER_USER_DATA_HS_0);
 	}
 
-	descs = &sctx->descriptors[SI_DESCS_FIRST_SHADER];
+	mask = sctx->shader_pointers_dirty &
+	       u_bit_consecutive(SI_DESCS_FIRST_SHADER,
+				 SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER);
 
-	for (shader = 0; shader < SI_NUM_GRAPHICS_SHADERS; shader++) {
+	while (mask) {
+		unsigned i = u_bit_scan(&mask);
+		unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS;
 		unsigned base = sh_base[shader];
-		unsigned i;
 
-		if (!base)
-			continue;
+		if (base)
+			si_emit_shader_pointer(sctx, descs + i, base);
+	}
+	sctx->shader_pointers_dirty &=
+		~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
 
-		for (i = 0; i < SI_NUM_SHADER_DESCS; i++, descs++)
-			si_emit_shader_pointer(sctx, descs, base, false);
+	if (sctx->vertex_buffer_pointer_dirty) {
+		si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
+				       sh_base[PIPE_SHADER_VERTEX]);
+		sctx->vertex_buffer_pointer_dirty = false;
 	}
-	si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
 }
 
 void si_emit_compute_shader_userdata(struct si_context *sctx)
 {
 	unsigned base = R_00B900_COMPUTE_USER_DATA_0;
-	struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_FIRST_COMPUTE];
+	struct si_descriptors *descs = sctx->descriptors;
+	unsigned compute_mask =
+		u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_SHADER_DESCS);
+	unsigned mask = sctx->shader_pointers_dirty & compute_mask;
 
-	for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
-		si_emit_shader_pointer(sctx, descs, base, false);
+	while (mask) {
+		unsigned i = u_bit_scan(&mask);
+
+		si_emit_shader_pointer(sctx, descs + i, base);
+	}
+	sctx->shader_pointers_dirty &= ~compute_mask;
 }
 
 /* INIT/DEINIT/UPLOAD */
@@ -1939,6 +1943,9 @@ bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
 	const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
 	unsigned dirty = sctx->descriptors_dirty & mask;
 
+	/* Assume nothing will go wrong: */
+	sctx->shader_pointers_dirty |= dirty;
+
 	while (dirty) {
 		unsigned i = u_bit_scan(&dirty);
 
@@ -1960,6 +1967,9 @@ bool si_upload_compute_shader_descriptors(struct si_context *sctx)
 						SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
 	unsigned dirty = sctx->descriptors_dirty & mask;
 
+	/* Assume nothing will go wrong: */
+	sctx->shader_pointers_dirty |= dirty;
+
 	while (dirty) {
 		unsigned i = u_bit_scan(&dirty);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e7d071d..421e2a4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -268,6 +268,7 @@ struct si_context {
 	struct si_descriptors		vertex_buffers;
 	struct si_descriptors		descriptors[SI_NUM_DESCS];
 	unsigned			descriptors_dirty;
+	unsigned			shader_pointers_dirty;
 	unsigned			compressed_tex_shader_mask;
 	struct si_buffer_resources	rw_buffers;
 	struct si_buffer_resources	const_buffers[SI_NUM_SHADERS];
@@ -288,6 +289,7 @@ struct si_context {
 
 	/* Vertex and index buffers. */
 	bool				vertex_buffers_dirty;
+	bool				vertex_buffer_pointer_dirty;
 	struct pipe_index_buffer	index_buffer;
 	struct pipe_vertex_buffer	vertex_buffer[SI_NUM_VERTEX_BUFFERS];
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index edc5b93..34a0f57 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -237,8 +237,6 @@ struct si_descriptors {
 	/* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
 	 * array will be stored. */
 	unsigned shader_userdata_offset;
-	/* Whether the pointer should be re-emitted. */
-	bool pointer_dirty;
 };
 
 struct si_sampler_views {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 96a0e84..837c025 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1146,7 +1146,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 		/* Vertex buffer descriptors are uploaded uncached, so prefetch
 		 * them right after the VS binary. */
-		if (sctx->vertex_buffers.pointer_dirty) {
+		if (sctx->vertex_buffer_pointer_dirty) {
 			cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
 						sctx->vertex_buffers.buffer_offset,
 						sctx->vertex_elements->count * 16);




More information about the mesa-commit mailing list