[Mesa-dev] [PATCH 09/17] radeonsi: use a bitmask for looping over dirty PM4 states

Marek Olšák maraeo at gmail.com
Thu Jan 26 16:04:25 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

also move it to draw_vbo, because it should be 0 in most cases
---
 src/gallium/drivers/radeonsi/si_pipe.h       |  1 +
 src/gallium/drivers/radeonsi/si_pm4.c        | 16 +---------------
 src/gallium/drivers/radeonsi/si_pm4.h        |  1 -
 src/gallium/drivers/radeonsi/si_state.h      |  3 +++
 src/gallium/drivers/radeonsi/si_state_draw.c | 17 +++++++++++++++--
 5 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b6474e6..da6aca1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -221,20 +221,21 @@ struct si_context {
 
 	struct si_shader_ctx_state	fixed_func_tcs_shader;
 	LLVMTargetMachineRef		tm; /* only non-threaded compilation */
 	bool				gfx_flush_in_progress;
 	bool				compute_is_busy;
 
 	/* Atoms (direct states). */
 	union si_state_atoms		atoms;
 	unsigned			dirty_atoms; /* mask */
 	/* PM4 states (precomputed immutable states) */
+	unsigned			dirty_states;
 	union si_state			queued;
 	union si_state			emitted;
 
 	/* Atom declarations. */
 	struct r600_atom		prefetch_L2;
 	struct si_framebuffer		framebuffer;
 	struct si_sample_locs		msaa_sample_locs;
 	struct r600_atom		db_render_state;
 	struct r600_atom		msaa_config;
 	struct si_sample_mask		sample_mask;
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index 97b6799..2680439 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -22,22 +22,20 @@
  *
  * Authors:
  *      Christian König <christian.koenig at amd.com>
  */
 
 #include "radeon/r600_cs.h"
 #include "util/u_memory.h"
 #include "si_pipe.h"
 #include "sid.h"
 
-#define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
-
 void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
 {
 	state->last_opcode = opcode;
 	state->last_pm4 = state->ndw++;
 }
 
 void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
 {
 	state->pm4[state->ndw++] = dw;
 }
@@ -150,36 +148,24 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 					  RADEON_USAGE_READ,
                                           RADEON_PRIO_IB2);
 
 		radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
 		radeon_emit(cs, ib->gpu_address);
 		radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff);
 		radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
 	}
 }
 
-void si_pm4_emit_dirty(struct si_context *sctx)
-{
-	for (int i = 0; i < NUMBER_OF_STATES; ++i) {
-		struct si_pm4_state *state = sctx->queued.array[i];
-
-		if (!state || sctx->emitted.array[i] == state)
-			continue;
-
-		si_pm4_emit(sctx, state);
-		sctx->emitted.array[i] = state;
-	}
-}
-
 void si_pm4_reset_emitted(struct si_context *sctx)
 {
 	memset(&sctx->emitted, 0, sizeof(sctx->emitted));
+	sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES);
 }
 
 void si_pm4_upload_indirect_buffer(struct si_context *sctx,
 				   struct si_pm4_state *state)
 {
 	struct pipe_screen *screen = sctx->b.b.screen;
 	unsigned aligned_ndw = align(state->ndw, 8);
 
 	/* only supported on CIK and later */
 	if (sctx->b.chip_class < CIK)
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 9b02a80..106abe1 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -71,14 +71,13 @@ void si_pm4_add_bo(struct si_pm4_state *state,
 void si_pm4_upload_indirect_buffer(struct si_context *sctx,
 				   struct si_pm4_state *state);
 
 void si_pm4_clear_state(struct si_pm4_state *state);
 void si_pm4_free_state_simple(struct si_pm4_state *state);
 void si_pm4_free_state(struct si_context *sctx,
 		       struct si_pm4_state *state,
 		       unsigned idx);
 
 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
-void si_pm4_emit_dirty(struct si_context *sctx);
 void si_pm4_reset_emitted(struct si_context *sctx);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 915a8eb..bdcfb5b 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -122,20 +122,22 @@ union si_state {
 		struct si_pm4_state		*hs;
 		struct si_pm4_state		*es;
 		struct si_pm4_state		*gs;
 		struct si_pm4_state		*vgt_shader_config;
 		struct si_pm4_state		*vs;
 		struct si_pm4_state		*ps;
 	} named;
 	struct si_pm4_state	*array[0];
 };
 
+#define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
+
 union si_state_atoms {
 	struct {
 		/* The order matters. */
 		struct r600_atom *prefetch_L2;
 		struct r600_atom *render_cond;
 		struct r600_atom *streamout_begin;
 		struct r600_atom *streamout_enable; /* must be after streamout_begin */
 		struct r600_atom *framebuffer;
 		struct r600_atom *msaa_sample_locs;
 		struct r600_atom *db_render_state;
@@ -260,20 +262,21 @@ struct si_buffer_resources {
 
 #define si_pm4_block_idx(member) \
 	(offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *))
 
 #define si_pm4_state_changed(sctx, member) \
 	((sctx)->queued.named.member != (sctx)->emitted.named.member)
 
 #define si_pm4_bind_state(sctx, member, value) \
 	do { \
 		(sctx)->queued.named.member = (value); \
+		(sctx)->dirty_states |= 1 << si_pm4_block_idx(member); \
 	} while(0)
 
 #define si_pm4_delete_state(sctx, member, value) \
 	do { \
 		if ((sctx)->queued.named.member == (value)) { \
 			(sctx)->queued.named.member = NULL; \
 		} \
 		si_pm4_free_state(sctx, (struct si_pm4_state *)(value), \
 				  si_pm4_block_idx(member)); \
 	} while(0)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index c80d4d6..cce5f30 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1109,30 +1109,43 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	 * this must be called after si_need_cs_space, because we must let
 	 * need_cs_space flush before we add buffers to the buffer list.
 	 */
 	if (!si_upload_vertex_buffer_descriptors(sctx))
 		return;
 
 	/* Flush caches before the first state atom, which does L2 prefetches. */
 	if (sctx->b.flags)
 		si_emit_cache_flush(sctx);
 
-	/* Emit states. */
+	/* Emit state atoms. */
 	mask = sctx->dirty_atoms;
 	while (mask) {
 		struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
 
 		atom->emit(&sctx->b, atom);
 	}
 	sctx->dirty_atoms = 0;
 
-	si_pm4_emit_dirty(sctx);
+	/* Emit states. */
+	mask = sctx->dirty_states;
+	while (mask) {
+		unsigned i = u_bit_scan(&mask);
+		struct si_pm4_state *state = sctx->queued.array[i];
+
+		if (!state || sctx->emitted.array[i] == state)
+			continue;
+
+		si_pm4_emit(sctx, state);
+		sctx->emitted.array[i] = state;
+	}
+	sctx->dirty_states = 0;
+
 	si_emit_scratch_reloc(sctx);
 	si_emit_rasterizer_prim_state(sctx);
 	si_emit_draw_registers(sctx, info);
 
 	si_ce_pre_draw_synchronization(sctx);
 
 	si_emit_draw_packets(sctx, info, &ib);
 
 	si_ce_post_draw_synchronization(sctx);
 
-- 
2.7.4



More information about the mesa-dev mailing list