[Mesa-dev] [PATCH 5/7] radeonsi: move flush+prefetch invocation and state emission into separate funcs

Marek Olšák maraeo at gmail.com
Fri Aug 4 10:05:53 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_state_draw.c | 97 ++++++++++++++++------------
 1 file changed, 57 insertions(+), 40 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 3f933fe..ae48115 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1155,28 +1155,80 @@ void si_ce_pre_draw_synchronization(struct si_context *sctx)
 void si_ce_post_draw_synchronization(struct si_context *sctx)
 {
 	if (sctx->ce_need_synchronization) {
 		radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_INCREMENT_DE_COUNTER, 0, 0));
 		radeon_emit(sctx->b.gfx.cs, 0); /* unused */
 
 		sctx->ce_need_synchronization = false;
 	}
 }
 
+static bool si_cache_flush_and_prefetch(struct si_context *sctx)
+{
+	/* Flush caches before prefetches. */
+	if (sctx->b.flags)
+		si_emit_cache_flush(sctx);
+
+	/* Dumping from CE to L2 should be done after cache flushes, but
+	 * this is only guaranteed when CE is behind or in-sync with DE.
+	 */
+	if (!si_upload_graphics_shader_descriptors(sctx))
+		return false;
+
+	if (sctx->prefetch_L2)
+		cik_emit_prefetch_L2(sctx);
+
+	return true;
+}
+
+static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info)
+{
+	/* Emit state atoms. */
+	unsigned mask = sctx->dirty_atoms;
+	while (mask) {
+		struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
+
+		atom->emit(&sctx->b, atom);
+	}
+	sctx->dirty_atoms = 0;
+
+	/* Emit states. */
+	mask = sctx->dirty_states;
+	while (mask) {
+		unsigned i = u_bit_scan(&mask);
+		struct si_pm4_state *state = sctx->queued.array[i];
+
+		if (!state || sctx->emitted.array[i] == state)
+			continue;
+
+		si_pm4_emit(sctx, state);
+		sctx->emitted.array[i] = state;
+	}
+	sctx->dirty_states = 0;
+
+	/* Emit draw states. */
+	unsigned num_patches = 0;
+
+	si_emit_rasterizer_prim_state(sctx);
+	if (sctx->tes_shader.cso)
+		si_emit_derived_tess_state(sctx, info, &num_patches);
+	si_emit_vs_state(sctx, info);
+	si_emit_draw_registers(sctx, info, num_patches);
+}
+
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	struct pipe_resource *indexbuf = info->index.resource;
-	unsigned mask, dirty_tex_counter;
+	unsigned dirty_tex_counter;
 	enum pipe_prim_type rast_prim;
-	unsigned num_patches = 0;
 	unsigned index_size = info->index_size;
 	unsigned index_offset = info->indirect ? info->start * index_size : 0;
 
 	if (likely(!info->indirect)) {
 		/* SI-CI treat instance_count==0 as instance_count==1. There is
 		 * no workaround for indirect draws, but we can at least skip
 		 * direct draws.
 		 */
 		if (unlikely(!info->instance_count))
 			return;
@@ -1244,23 +1296,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 		if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) {
 			sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix;
 			sctx->do_update_shaders = true;
 		}
 	}
 
 	if (sctx->do_update_shaders && !si_update_shaders(sctx))
 		return;
 
-	if (!si_upload_graphics_shader_descriptors(sctx))
-		return;
-
 	if (index_size) {
 		/* Translate or upload, if needed. */
 		/* 8-bit indices are supported on VI. */
 		if (sctx->b.chip_class <= CIK && index_size == 1) {
 			unsigned start, count, start_offset, size, offset;
 			void *ptr;
 
 			si_get_draw_start_count(sctx, info, &start, &count);
 			start_offset = start * 2;
 			size = count * 2;
@@ -1335,55 +1384,23 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	 */
 	if (!si_upload_vertex_buffer_descriptors(sctx))
 		return;
 
 	/* GFX9 scissor bug workaround. There is also a more efficient but
 	 * more involved alternative workaround. */
 	if (sctx->b.chip_class == GFX9 &&
 	    si_is_atom_dirty(sctx, &sctx->b.scissors.atom))
 		sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
 
-	/* Flush caches before the first state atom, which does L2 prefetches. */
-	if (sctx->b.flags)
-		si_emit_cache_flush(sctx);
-
-	if (sctx->prefetch_L2)
-		cik_emit_prefetch_L2(sctx);
-
-	/* Emit state atoms. */
-	mask = sctx->dirty_atoms;
-	while (mask) {
-		struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
-
-		atom->emit(&sctx->b, atom);
-	}
-	sctx->dirty_atoms = 0;
-
-	/* Emit states. */
-	mask = sctx->dirty_states;
-	while (mask) {
-		unsigned i = u_bit_scan(&mask);
-		struct si_pm4_state *state = sctx->queued.array[i];
-
-		if (!state || sctx->emitted.array[i] == state)
-			continue;
-
-		si_pm4_emit(sctx, state);
-		sctx->emitted.array[i] = state;
-	}
-	sctx->dirty_states = 0;
-
-	si_emit_rasterizer_prim_state(sctx);
-	if (sctx->tes_shader.cso)
-		si_emit_derived_tess_state(sctx, info, &num_patches);
-	si_emit_vs_state(sctx, info);
-	si_emit_draw_registers(sctx, info, num_patches);
+	if (!si_cache_flush_and_prefetch(sctx))
+		return;
+	si_emit_all_states(sctx, info);
 
 	si_ce_pre_draw_synchronization(sctx);
 	si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
 	si_ce_post_draw_synchronization(sctx);
 
 	if (sctx->trace_buf)
 		si_trace_emit(sctx);
 
 	/* Workaround for a VGT hang when streamout is enabled.
 	 * It must be done after drawing. */
-- 
2.7.4



More information about the mesa-dev mailing list