[Mesa-dev] [PATCH 06/19] r600g: do fine-grained sampler state updates

Marek Olšák maraeo at gmail.com
Mon Sep 10 16:16:19 PDT 2012


Update only those sampler states which are changed in a shader stage,
instead of always updating all sampler states in the shader stage.
That requires keeping a bitmask of those states which are enabled, and those
states which are dirty at a given point (subset of enabled states).

This is similar to how sampler views, constant buffers, and vertex buffers
are handled.
---
 src/gallium/drivers/r600/evergreen_state.c   |   23 ++++----
 src/gallium/drivers/r600/r600_blit.c         |    4 +-
 src/gallium/drivers/r600/r600_hw_context.c   |    9 ++-
 src/gallium/drivers/r600/r600_pipe.h         |   14 ++++-
 src/gallium/drivers/r600/r600_state.c        |   35 +++++++-----
 src/gallium/drivers/r600/r600_state_common.c |   76 ++++++++++++++++++++------
 6 files changed, 110 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 30d9d02..b7cd302 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2080,23 +2080,26 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
 				unsigned border_index_reg)
 {
 	struct radeon_winsys_cs *cs = rctx->cs;
-	unsigned i;
+	uint32_t dirty_mask = texinfo->states.dirty_mask;
 
-	for (i = 0; i < texinfo->n_samplers; i++) {
+	while (dirty_mask) {
+		struct r600_pipe_sampler_state *rstate;
+		unsigned i = u_bit_scan(&dirty_mask);
+
+		rstate = texinfo->states.states[i];
+		assert(rstate);
 
-		if (texinfo->samplers[i] == NULL) {
-			continue;
-		}
 		r600_write_value(cs, PKT3(PKT3_SET_SAMPLER, 3, 0));
 		r600_write_value(cs, (resource_id_base + i) * 3);
-		r600_write_array(cs, 3, texinfo->samplers[i]->tex_sampler_words);
+		r600_write_array(cs, 3, rstate->tex_sampler_words);
 
-		if (texinfo->samplers[i]->border_color_use) {
+		if (rstate->border_color_use) {
 			r600_write_config_reg_seq(cs, border_index_reg, 5);
 			r600_write_value(cs, i);
-			r600_write_array(cs, 4, texinfo->samplers[i]->border_color);
+			r600_write_array(cs, 4, rstate->border_color);
 		}
 	}
+	texinfo->states.dirty_mask = 0;
 }
 
 static void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
@@ -2149,8 +2152,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	/* shader program */
 	r600_init_atom(rctx, &rctx->cs_shader_state.atom, id++, evergreen_emit_cs_shader, 0);
 	/* sampler */
-	r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, evergreen_emit_vs_sampler_states, 0);
-	r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, evergreen_emit_ps_sampler_states, 0);
+	r600_init_atom(rctx, &rctx->vs_samplers.states.atom, id++, evergreen_emit_vs_sampler_states, 0);
+	r600_init_atom(rctx, &rctx->ps_samplers.states.atom, id++, evergreen_emit_ps_sampler_states, 0);
 	/* resources */
 	r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0);
 	r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 072df14..584b7fc 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -79,8 +79,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
 
 	if (op & R600_SAVE_TEXTURES) {
 		util_blitter_save_fragment_sampler_states(
-			rctx->blitter, rctx->ps_samplers.n_samplers,
-			(void**)rctx->ps_samplers.samplers);
+			rctx->blitter, util_last_bit(rctx->ps_samplers.states.enabled_mask),
+			(void**)rctx->ps_samplers.states.states);
 
 		util_blitter_save_fragment_sampler_views(
 			rctx->blitter, util_last_bit(rctx->ps_samplers.views.enabled_mask),
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 1db5f0d..020f626 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1043,9 +1043,12 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 	r600_atom_dirty(ctx, &ctx->alphatest_state.atom);
 	r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
 	r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
-	/* reemit sampler, will only matter if atom_sampler.num_dw != 0 */
-	r600_atom_dirty(ctx, &ctx->vs_samplers.atom_sampler);
-	r600_atom_dirty(ctx, &ctx->ps_samplers.atom_sampler);
+
+	ctx->vs_samplers.states.dirty_mask = ctx->vs_samplers.states.enabled_mask;
+	ctx->ps_samplers.states.dirty_mask = ctx->ps_samplers.states.enabled_mask;
+	r600_sampler_states_dirty(ctx, &ctx->vs_samplers.states);
+	r600_sampler_states_dirty(ctx, &ctx->ps_samplers.states);
+
 	if (ctx->chip_class <= R700) {
 		r600_atom_dirty(ctx, &ctx->seamless_cube_map.atom);
 	}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 5966cef..8d1acd2 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -261,11 +261,17 @@ struct r600_samplerview_state {
 	uint32_t			compressed_colortex_mask;
 };
 
+struct r600_sampler_states {
+	struct r600_atom		atom;
+	struct r600_pipe_sampler_state	*states[NUM_TEX_UNITS];
+	uint32_t			enabled_mask;
+	uint32_t			dirty_mask;
+	uint32_t			has_bordercolor_mask; /* which states contain the border color */
+};
+
 struct r600_textures_info {
 	struct r600_samplerview_state	views;
-	struct r600_atom		atom_sampler;
-	struct r600_pipe_sampler_state	*samplers[NUM_TEX_UNITS];
-	unsigned			n_samplers;
+	struct r600_sampler_states	states;
 	bool				is_array_sampler[NUM_TEX_UNITS];
 };
 
@@ -571,6 +577,8 @@ void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned
 void r600_vertex_buffers_dirty(struct r600_context *rctx);
 void r600_sampler_views_dirty(struct r600_context *rctx,
 			      struct r600_samplerview_state *state);
+void r600_sampler_states_dirty(struct r600_context *rctx,
+			       struct r600_sampler_states *state);
 void r600_set_max_scissor(struct r600_context *rctx);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
 void r600_draw_rectangle(struct blitter_context *blitter,
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 4469b8c..109c4c5 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1932,42 +1932,47 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
 				unsigned border_color_reg)
 {
 	struct radeon_winsys_cs *cs = rctx->cs;
-	unsigned i;
+	uint32_t dirty_mask = texinfo->states.dirty_mask;
 
-	for (i = 0; i < texinfo->n_samplers; i++) {
+	while (dirty_mask) {
+		struct r600_pipe_sampler_state *rstate;
+		struct r600_pipe_sampler_view *rview;
+		unsigned i = u_bit_scan(&dirty_mask);
 
-		if (texinfo->samplers[i] == NULL) {
-			continue;
-		}
+		rstate = texinfo->states.states[i];
+		assert(rstate);
+		rview = texinfo->views.views[i];
 
 		/* TEX_ARRAY_OVERRIDE must be set for array textures to disable
 		 * filtering between layers.
 		 * Don't update TEX_ARRAY_OVERRIDE if we don't have the sampler view.
 		 */
-		if (texinfo->views.views[i]) {
-			if (texinfo->views.views[i]->base.texture->target == PIPE_TEXTURE_1D_ARRAY ||
-			    texinfo->views.views[i]->base.texture->target == PIPE_TEXTURE_2D_ARRAY) {
-				texinfo->samplers[i]->tex_sampler_words[0] |= S_03C000_TEX_ARRAY_OVERRIDE(1);
+		if (rview) {
+			enum pipe_texture_target target = rview->base.texture->target;
+			if (target == PIPE_TEXTURE_1D_ARRAY ||
+			    target == PIPE_TEXTURE_2D_ARRAY) {
+				rstate->tex_sampler_words[0] |= S_03C000_TEX_ARRAY_OVERRIDE(1);
 				texinfo->is_array_sampler[i] = true;
 			} else {
-				texinfo->samplers[i]->tex_sampler_words[0] &= C_03C000_TEX_ARRAY_OVERRIDE;
+				rstate->tex_sampler_words[0] &= C_03C000_TEX_ARRAY_OVERRIDE;
 				texinfo->is_array_sampler[i] = false;
 			}
 		}
 
 		r600_write_value(cs, PKT3(PKT3_SET_SAMPLER, 3, 0));
 		r600_write_value(cs, (resource_id_base + i) * 3);
-		r600_write_array(cs, 3, texinfo->samplers[i]->tex_sampler_words);
+		r600_write_array(cs, 3, rstate->tex_sampler_words);
 
-		if (texinfo->samplers[i]->border_color_use) {
+		if (rstate->border_color_use) {
 			unsigned offset;
 
 			offset = border_color_reg;
 			offset += i * 16;
 			r600_write_config_reg_seq(cs, offset, 4);
-			r600_write_array(cs, 4, texinfo->samplers[i]->border_color);
+			r600_write_array(cs, 4, rstate->border_color);
 		}
 	}
+	texinfo->states.dirty_mask = 0;
 }
 
 static void r600_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
@@ -2025,8 +2030,8 @@ void r600_init_state_functions(struct r600_context *rctx)
 	/* sampler must be emited before TA_CNTL_AUX otherwise DISABLE_CUBE_WRAP change
 	 * does not take effect (TA_CNTL_AUX emited by r600_emit_seamless_cube_map)
 	 */
-	r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, r600_emit_vs_sampler_states, 0);
-	r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, r600_emit_ps_sampler_states, 0);
+	r600_init_atom(rctx, &rctx->vs_samplers.states.atom, id++, r600_emit_vs_sampler_states, 0);
+	r600_init_atom(rctx, &rctx->ps_samplers.states.atom, id++, r600_emit_ps_sampler_states, 0);
 	/* resource */
 	r600_init_atom(rctx, &rctx->vs_samplers.views.atom, id++, r600_emit_vs_sampler_views, 0);
 	r600_init_atom(rctx, &rctx->ps_samplers.views.atom, id++, r600_emit_ps_sampler_views, 0);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 13995e0..8718316 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -346,6 +346,20 @@ static void r600_sampler_view_destroy(struct pipe_context *ctx,
 	FREE(resource);
 }
 
+void r600_sampler_states_dirty(struct r600_context *rctx,
+			       struct r600_sampler_states *state)
+{
+	if (state->dirty_mask) {
+		if (state->dirty_mask & state->has_bordercolor_mask) {
+			rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
+		}
+		state->atom.num_dw =
+			util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
+			util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5;
+		r600_atom_dirty(rctx, &state->atom);
+	}
+}
+
 static void r600_bind_sampler_states(struct pipe_context *pipe,
                                unsigned shader,
 			       unsigned start,
@@ -353,8 +367,13 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
 {
 	struct r600_context *rctx = (struct r600_context *)pipe;
 	struct r600_textures_info *dst;
+	struct r600_pipe_sampler_state **rstates = (struct r600_pipe_sampler_state**)states;
 	int seamless_cube_map = -1;
 	unsigned i;
+	/* This sets 1-bit for states with index >= count. */
+	uint32_t disable_mask = ~((1ull << count) - 1);
+	/* These are the new states set by this function. */
+	uint32_t new_mask = 0;
 
 	assert(start == 0); /* XXX fix below */
 
@@ -370,33 +389,47 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
 		return;
 	}
 
-	memcpy(dst->samplers, states, sizeof(void*) * count);
-	dst->n_samplers = count;
-	dst->atom_sampler.num_dw = 0;
-
 	for (i = 0; i < count; i++) {
-		struct r600_pipe_sampler_state *sampler = states[i];
+		struct r600_pipe_sampler_state *rstate = rstates[i];
 
-		if (sampler == NULL) {
+		if (rstate == dst->states.states[i]) {
 			continue;
 		}
-		if (sampler->border_color_use) {
-			dst->atom_sampler.num_dw += 11;
-			rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
+
+		if (rstate) {
+			if (rstate->border_color_use) {
+				dst->states.has_bordercolor_mask |= 1 << i;
+			} else {
+				dst->states.has_bordercolor_mask &= ~(1 << i);
+			}
+			seamless_cube_map = rstate->seamless_cube_map;
+
+			new_mask |= 1 << i;
 		} else {
-			dst->atom_sampler.num_dw += 5;
+			disable_mask |= 1 << i;
 		}
-		seamless_cube_map = sampler->seamless_cube_map;
 	}
-	if (rctx->chip_class <= R700 && seamless_cube_map != -1 && seamless_cube_map != rctx->seamless_cube_map.enabled) {
+
+	memcpy(dst->states.states, rstates, sizeof(void*) * count);
+	memset(dst->states.states + count, 0, sizeof(void*) * (NUM_TEX_UNITS - count));
+
+	dst->states.enabled_mask &= ~disable_mask;
+	dst->states.dirty_mask &= dst->states.enabled_mask;
+	dst->states.enabled_mask |= new_mask;
+	dst->states.dirty_mask |= new_mask;
+	dst->states.has_bordercolor_mask &= dst->states.enabled_mask;
+
+	r600_sampler_states_dirty(rctx, &dst->states);
+
+	/* Seamless cubemap state. */
+	if (rctx->chip_class <= R700 &&
+	    seamless_cube_map != -1 &&
+	    seamless_cube_map != rctx->seamless_cube_map.enabled) {
 		/* change in TA_CNTL_AUX need a pipeline flush */
 		rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
 		rctx->seamless_cube_map.enabled = seamless_cube_map;
 		r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
 	}
-	if (dst->atom_sampler.num_dw) {
-		r600_atom_dirty(rctx, &dst->atom_sampler);
-	}
 }
 
 static void r600_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
@@ -540,6 +573,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
 	struct r600_context *rctx = (struct r600_context *) pipe;
 	struct r600_textures_info *dst;
 	struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views;
+	uint32_t dirty_sampler_states_mask = 0;
 	unsigned i;
 	/* This sets 1-bit for textures with index >= count. */
 	uint32_t disable_mask = ~((1ull << count) - 1);
@@ -594,12 +628,13 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
 				dst->views.compressed_colortex_mask &= ~(1 << i);
 			}
 
-			/* Changing from array to non-arrays textures and vice
-			 * versa requires updating TEX_ARRAY_OVERRIDE on R6xx-R7xx. */
+			/* Changing from array to non-arrays textures and vice versa requires
+			 * updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */
 			if (rctx->chip_class <= R700 &&
+			    (dst->states.enabled_mask & (1 << i)) &&
 			    (rviews[i]->base.texture->target == PIPE_TEXTURE_1D_ARRAY ||
 			     rviews[i]->base.texture->target == PIPE_TEXTURE_2D_ARRAY) != dst->is_array_sampler[i]) {
-				r600_atom_dirty(rctx, &dst->atom_sampler);
+				dirty_sampler_states_mask |= 1 << i;
 			}
 
 			pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
@@ -618,6 +653,11 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
 	dst->views.compressed_colortex_mask &= dst->views.enabled_mask;
 
 	r600_sampler_views_dirty(rctx, &dst->views);
+
+	if (dirty_sampler_states_mask) {
+		dst->states.dirty_mask |= dirty_sampler_states_mask;
+		r600_sampler_states_dirty(rctx, &dst->states);
+	}
 }
 
 static void r600_set_vs_sampler_views(struct pipe_context *ctx, unsigned count,
-- 
1.7.9.5



More information about the mesa-dev mailing list