[Mesa-dev] [PATCH 08/11] radeonsi: use a global dirty mask for shader pointers

Marek Olšák maraeo at gmail.com
Tue Jan 17 22:47:58 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

Only vertex buffers use a separate bool flag.
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 85 +++++++++++++++------------
 src/gallium/drivers/radeonsi/si_pipe.h        |  2 +
 src/gallium/drivers/radeonsi/si_state.h       |  2 -
 src/gallium/drivers/radeonsi/si_state_draw.c  |  2 +-
 4 files changed, 51 insertions(+), 40 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index a535fa0..deb6df9 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -239,21 +239,20 @@ static bool si_upload_descriptors(struct si_context *sctx,
 			(struct pipe_resource**)&desc->buffer, &ptr);
 		if (!desc->buffer)
 			return false; /* skip the draw call */
 
 		util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
 		desc->gpu_list = ptr;
 
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
 	                            RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 	}
-	desc->pointer_dirty = true;
 	desc->dirty_mask = 0;
 
 	if (atom)
 		si_mark_atom_dirty(sctx, atom);
 
 	return true;
 }
 
 static void
 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
@@ -1028,23 +1027,23 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 					      (struct r600_resource*)vb->buffer,
 					      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
 			bound[ve->vertex_buffer_index] = true;
 		}
 	}
 
 	/* Don't flush the const cache. It would have a very negative effect
 	 * on performance (confirmed by testing). New descriptors are always
 	 * uploaded to a fresh new buffer, so I don't think flushing the const
 	 * cache is needed. */
-	desc->pointer_dirty = true;
 	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 	sctx->vertex_buffers_dirty = false;
+	sctx->vertex_buffer_pointer_dirty = true;
 	return true;
 }
 
 
 /* CONSTANT BUFFERS */
 
 static unsigned
 si_const_buffer_descriptors_idx(unsigned shader)
 {
 	return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
@@ -1728,40 +1727,35 @@ void si_update_all_texture_descriptors(struct si_context *sctx)
 
 		si_update_compressed_tex_shader_mask(sctx, shader);
 	}
 }
 
 /* SHADER USER DATA */
 
 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
 					  unsigned shader)
 {
-	struct si_descriptors *descs =
-		&sctx->descriptors[SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS];
-
-	for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
-		descs->pointer_dirty = true;
+	sctx->shader_pointers_dirty |=
+		u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
+				  SI_NUM_SHADER_DESCS);
 
 	if (shader == PIPE_SHADER_VERTEX)
-		sctx->vertex_buffers.pointer_dirty = true;
+		sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
 
 	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 }
 
 static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
 {
-	int i;
-
-	for (i = 0; i < SI_NUM_SHADERS; i++) {
-		si_mark_shader_pointers_dirty(sctx, i);
-	}
-	sctx->descriptors[SI_DESCS_RW_BUFFERS].pointer_dirty = true;
+	sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
+	sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
+	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 }
 
 /* Set a base register address for user data constants in the given shader.
  * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
  */
 static void si_set_user_data_base(struct si_context *sctx,
 				  unsigned shader, uint32_t new_base)
 {
 	uint32_t *base = &sctx->shader_userdata.sh_base[shader];
 
@@ -1800,84 +1794,95 @@ void si_shader_change_notify(struct si_context *sctx)
 		else
 			si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
 					      R_00B130_SPI_SHADER_USER_DATA_VS_0);
 	} else {
 		si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
 	}
 }
 
 static void si_emit_shader_pointer(struct si_context *sctx,
 				   struct si_descriptors *desc,
-				   unsigned sh_base, bool keep_dirty)
+				   unsigned sh_base)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	uint64_t va;
 
-	if (!desc->pointer_dirty || !desc->buffer)
-		return;
+	assert(desc->buffer);
 
 	va = desc->buffer->gpu_address +
 	     desc->buffer_offset;
 
 	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
 	radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 
-	desc->pointer_dirty = keep_dirty;
 }
 
 void si_emit_graphics_shader_userdata(struct si_context *sctx,
                                       struct r600_atom *atom)
 {
-	unsigned shader;
+	unsigned mask;
 	uint32_t *sh_base = sctx->shader_userdata.sh_base;
 	struct si_descriptors *descs;
 
 	descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
 
-	if (descs->pointer_dirty) {
+	if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B030_SPI_SHADER_USER_DATA_PS_0, true);
+				       R_00B030_SPI_SHADER_USER_DATA_PS_0);
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
+				       R_00B130_SPI_SHADER_USER_DATA_VS_0);
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B230_SPI_SHADER_USER_DATA_GS_0, true);
+				       R_00B230_SPI_SHADER_USER_DATA_GS_0);
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+				       R_00B330_SPI_SHADER_USER_DATA_ES_0);
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B430_SPI_SHADER_USER_DATA_HS_0, true);
-		descs->pointer_dirty = false;
+				       R_00B430_SPI_SHADER_USER_DATA_HS_0);
 	}
 
-	descs = &sctx->descriptors[SI_DESCS_FIRST_SHADER];
+	mask = sctx->shader_pointers_dirty &
+	       u_bit_consecutive(SI_DESCS_FIRST_SHADER,
+				 SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER);
 
-	for (shader = 0; shader < SI_NUM_GRAPHICS_SHADERS; shader++) {
+	while (mask) {
+		unsigned i = u_bit_scan(&mask);
+		unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS;
 		unsigned base = sh_base[shader];
-		unsigned i;
 
-		if (!base)
-			continue;
+		if (base)
+			si_emit_shader_pointer(sctx, descs + i, base);
+	}
+	sctx->shader_pointers_dirty &=
+		~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
 
-		for (i = 0; i < SI_NUM_SHADER_DESCS; i++, descs++)
-			si_emit_shader_pointer(sctx, descs, base, false);
+	if (sctx->vertex_buffer_pointer_dirty) {
+		si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
+				       sh_base[PIPE_SHADER_VERTEX]);
+		sctx->vertex_buffer_pointer_dirty = false;
 	}
-	si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
 }
 
 void si_emit_compute_shader_userdata(struct si_context *sctx)
 {
 	unsigned base = R_00B900_COMPUTE_USER_DATA_0;
-	struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_FIRST_COMPUTE];
+	struct si_descriptors *descs = sctx->descriptors;
+	unsigned compute_mask =
+		u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_SHADER_DESCS);
+	unsigned mask = sctx->shader_pointers_dirty & compute_mask;
 
-	for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
-		si_emit_shader_pointer(sctx, descs, base, false);
+	while (mask) {
+		unsigned i = u_bit_scan(&mask);
+
+		si_emit_shader_pointer(sctx, descs + i, base);
+	}
+	sctx->shader_pointers_dirty &= ~compute_mask;
 }
 
 /* INIT/DEINIT/UPLOAD */
 
 void si_init_all_descriptors(struct si_context *sctx)
 {
 	int i;
 	unsigned ce_offset = 0;
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
@@ -1932,20 +1937,23 @@ void si_init_all_descriptors(struct si_context *sctx)
 	si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0);
 	si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
 	si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
 }
 
 bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
 {
 	const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
 	unsigned dirty = sctx->descriptors_dirty & mask;
 
+	/* Assume nothing will go wrong: */
+	sctx->shader_pointers_dirty |= dirty;
+
 	while (dirty) {
 		unsigned i = u_bit_scan(&dirty);
 
 		if (!si_upload_descriptors(sctx, &sctx->descriptors[i],
 					   &sctx->shader_userdata.atom))
 			return false;
 	}
 
 	sctx->descriptors_dirty &= ~mask;
 	return true;
@@ -1953,20 +1961,23 @@ bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
 
 bool si_upload_compute_shader_descriptors(struct si_context *sctx)
 {
 	/* Does not update rw_buffers as that is not needed for compute shaders
 	 * and the input buffer is using the same SGPR's anyway.
 	 */
 	const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
 						SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
 	unsigned dirty = sctx->descriptors_dirty & mask;
 
+	/* Assume nothing will go wrong: */
+	sctx->shader_pointers_dirty |= dirty;
+
 	while (dirty) {
 		unsigned i = u_bit_scan(&dirty);
 
 		if (!si_upload_descriptors(sctx, &sctx->descriptors[i], NULL))
 			return false;
 	}
 
 	sctx->descriptors_dirty &= ~mask;
 
 	return true;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e7d071d..421e2a4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -261,40 +261,42 @@ struct si_context {
 	/* shader information */
 	struct si_vertex_element	*vertex_elements;
 	unsigned			sprite_coord_enable;
 	bool				flatshade;
 	bool				do_update_shaders;
 
 	/* shader descriptors */
 	struct si_descriptors		vertex_buffers;
 	struct si_descriptors		descriptors[SI_NUM_DESCS];
 	unsigned			descriptors_dirty;
+	unsigned			shader_pointers_dirty;
 	unsigned			compressed_tex_shader_mask;
 	struct si_buffer_resources	rw_buffers;
 	struct si_buffer_resources	const_buffers[SI_NUM_SHADERS];
 	struct si_buffer_resources	shader_buffers[SI_NUM_SHADERS];
 	struct si_textures_info		samplers[SI_NUM_SHADERS];
 	struct si_images_info		images[SI_NUM_SHADERS];
 
 	/* other shader resources */
 	struct pipe_constant_buffer	null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
 	struct pipe_resource		*esgs_ring;
 	struct pipe_resource		*gsvs_ring;
 	struct pipe_resource		*tf_ring;
 	struct pipe_resource		*tess_offchip_ring;
 	union pipe_color_union		*border_color_table; /* in CPU memory, any endian */
 	struct r600_resource		*border_color_buffer;
 	union pipe_color_union		*border_color_map; /* in VRAM (slow access), little endian */
 	unsigned			border_color_count;
 
 	/* Vertex and index buffers. */
 	bool				vertex_buffers_dirty;
+	bool				vertex_buffer_pointer_dirty;
 	struct pipe_index_buffer	index_buffer;
 	struct pipe_vertex_buffer	vertex_buffer[SI_NUM_VERTEX_BUFFERS];
 
 	/* MSAA config state. */
 	int				ps_iter_samples;
 	bool				smoothing_enabled;
 
 	/* DB render state. */
 	bool			dbcb_depth_copy_enabled;
 	bool			dbcb_stencil_copy_enabled;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index edc5b93..34a0f57 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -230,22 +230,20 @@ struct si_descriptors {
 	/* elements of the list that are changed and need to be uploaded */
 	unsigned dirty_mask;
 
 	/* Whether the CE ram is dirty and needs to be reinitialized entirely
 	 * before we can do partial updates. */
 	bool ce_ram_dirty;
 
 	/* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
 	 * array will be stored. */
 	unsigned shader_userdata_offset;
-	/* Whether the pointer should be re-emitted. */
-	bool pointer_dirty;
 };
 
 struct si_sampler_views {
 	struct pipe_sampler_view	*views[SI_NUM_SAMPLERS];
 	struct si_sampler_state		*sampler_states[SI_NUM_SAMPLERS];
 
 	/* The i-th bit is set if that element is enabled (non-NULL resource). */
 	unsigned			enabled_mask;
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 96a0e84..837c025 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1139,21 +1139,21 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 			cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
 		if (si_pm4_state_changed(sctx, es))
 			cik_prefetch_shader_async(sctx, sctx->queued.named.es);
 		if (si_pm4_state_changed(sctx, gs))
 			cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
 		if (si_pm4_state_changed(sctx, vs))
 			cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
 
 		/* Vertex buffer descriptors are uploaded uncached, so prefetch
 		 * them right after the VS binary. */
-		if (sctx->vertex_buffers.pointer_dirty) {
+		if (sctx->vertex_buffer_pointer_dirty) {
 			cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
 						sctx->vertex_buffers.buffer_offset,
 						sctx->vertex_elements->count * 16);
 		}
 		if (si_pm4_state_changed(sctx, ps))
 			cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
 	}
 
 	/* Emit states. */
 	mask = sctx->dirty_atoms;
-- 
2.7.4



More information about the mesa-dev mailing list