Mesa (master): radeonsi: Maintain cache of pixel shader variants according to contxt state.

Michel Dänzer daenzer at kemper.freedesktop.org
Mon Aug 27 10:13:57 UTC 2012


Module: Mesa
Branch: master
Commit: d1e40b3d40b2e90ad4f275565f1ae27fe6f964cc
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d1e40b3d40b2e90ad4f275565f1ae27fe6f964cc

Author: Michel Dänzer <michel.daenzer at amd.com>
Date:   Thu Aug 23 17:10:37 2012 +0200

radeonsi: Maintain cache of pixel shader variants according to contxt state.

Mostly inspired by r600g commit 4acf71f01ea1edb253cd38cc059d4af1a2a40bf4
('r600g: cache shader variants instead of rebuilding v3').

Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

---

 src/gallium/drivers/radeonsi/radeonsi_pipe.h   |    4 +-
 src/gallium/drivers/radeonsi/radeonsi_shader.c |   14 +-
 src/gallium/drivers/radeonsi/radeonsi_shader.h |   24 +++-
 src/gallium/drivers/radeonsi/si_state.c        |  185 ++++++++++++++++++++----
 src/gallium/drivers/radeonsi/si_state.h        |    5 +
 src/gallium/drivers/radeonsi/si_state_draw.c   |   37 +++---
 6 files changed, 210 insertions(+), 59 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index bec2939..989bb49 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -126,8 +126,8 @@ struct r600_context {
 	unsigned			pa_cl_vs_out_cntl;
 	/* for saving when using blitter */
 	struct pipe_stencil_ref		stencil_ref;
-	struct si_pipe_shader		*ps_shader;
-	struct si_pipe_shader		*vs_shader;
+	struct si_pipe_shader_selector	*ps_shader;
+	struct si_pipe_shader_selector	*vs_shader;
 	struct pipe_query		*current_render_cond;
 	unsigned			current_render_cond_mode;
 	struct pipe_query		*saved_render_cond;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 0f2aaef..671eda4 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -282,7 +282,8 @@ static void declare_input_fs(
 	switch (decl->Interp.Interpolate) {
 	case TGSI_INTERPOLATE_COLOR:
 		/* XXX: Flat shading hangs the GPU */
-		if (si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
+		if (si_shader_ctx->rctx->queued.named.rasterizer &&
+		    si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
 #if 0
 			intr_name = "llvm.SI.fs.interp.constant";
 #else
@@ -617,6 +618,7 @@ int si_pipe_shader_create(
 	struct si_pipe_shader *shader)
 {
 	struct r600_context *rctx = (struct r600_context*)ctx;
+	struct si_pipe_shader_selector *sel = shader->selector;
 	struct si_shader_context si_shader_ctx;
 	struct tgsi_shader_info shader_info;
 	struct lp_build_tgsi_context * bld_base;
@@ -633,7 +635,7 @@ int si_pipe_shader_create(
 	radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
 	bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
 
-	tgsi_scan_shader(shader->tokens, &shader_info);
+	tgsi_scan_shader(sel->tokens, &shader_info);
 	bld_base->info = &shader_info;
 	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
 	bld_base->emit_epilogue = si_llvm_emit_epilogue;
@@ -642,7 +644,7 @@ int si_pipe_shader_create(
 	bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
 
 	si_shader_ctx.radeon_bld.load_input = declare_input;
-	si_shader_ctx.tokens = shader->tokens;
+	si_shader_ctx.tokens = sel->tokens;
 	tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
 	si_shader_ctx.shader = shader;
 	si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
@@ -653,10 +655,10 @@ int si_pipe_shader_create(
 	/* Dump TGSI code before doing TGSI->LLVM conversion in case the
 	 * conversion fails. */
 	if (dump) {
-		tgsi_dump(shader->tokens, 0);
+		tgsi_dump(sel->tokens, 0);
 	}
 
-	if (!lp_build_tgsi_llvm(bld_base, shader->tokens)) {
+	if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
 		fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
 		return -EINVAL;
 	}
@@ -710,6 +712,4 @@ int si_pipe_shader_create(
 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
 {
 	si_resource_reference(&shader->bo, NULL);
-
-	memset(&shader->shader,0,sizeof(struct si_shader));
 }
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h
index d44ee9b..aa2888c 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
@@ -37,6 +37,25 @@ struct si_shader_io {
 	bool			centroid;
 };
 
+struct si_pipe_shader;
+
+struct si_pipe_shader_selector {
+	struct si_pipe_shader *current;
+
+	struct tgsi_token       *tokens;
+	struct pipe_stream_output_info  so;
+
+	unsigned	num_shaders;
+
+	/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
+	unsigned	type;
+
+	/* 1 when the shader contains
+	 * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0.
+	 * Used to determine whether we need to include nr_cbufs in the key */
+	unsigned	fs_write_all;
+};
+
 struct si_shader {
 	unsigned		ninput;
 	struct si_shader_io	input[32];
@@ -50,16 +69,17 @@ struct si_shader {
 };
 
 struct si_pipe_shader {
+	struct si_pipe_shader_selector	*selector;
+	struct si_pipe_shader		*next_variant;
 	struct si_shader		shader;
 	struct si_pm4_state		*pm4;
 	struct si_resource		*bo;
-	struct tgsi_token		*tokens;
 	unsigned			num_sgprs;
 	unsigned			num_vgprs;
 	unsigned			spi_ps_input_ena;
 	unsigned			sprite_coord_enable;
-	struct pipe_stream_output_info	so;
 	unsigned			so_strides[4];
+	unsigned			key;
 };
 
 /* radeonsi_shader.c */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 5df22dd..5c2e743 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1717,77 +1717,200 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
  * shaders
  */
 
+/* Compute the key for the hw shader variant */
+static INLINE unsigned si_shader_selector_key(struct pipe_context *ctx,
+					      struct si_pipe_shader_selector *sel)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+	unsigned key = 0;
+
+	if (sel->type == PIPE_SHADER_FRAGMENT) {
+		if (sel->fs_write_all)
+			key |= rctx->framebuffer.nr_cbufs;
+		/*if (rctx->queued.named.rasterizer)
+			  key |= rctx->queued.named.rasterizer->flatshade << 4;*/
+		/*key |== rctx->two_side << 5;*/
+	}
+
+	return key;
+}
+
+/* Select the hw shader variant depending on the current state.
+ * (*dirty) is set to 1 if current variant was changed */
+int si_shader_select(struct pipe_context *ctx,
+		     struct si_pipe_shader_selector *sel,
+		     unsigned *dirty)
+{
+	unsigned key;
+	struct si_pipe_shader * shader = NULL;
+	int r;
+
+	key = si_shader_selector_key(ctx, sel);
+
+	/* Check if we don't need to change anything.
+	 * This path is also used for most shaders that don't need multiple
+	 * variants, it will cost just a computation of the key and this
+	 * test. */
+	if (likely(sel->current && sel->current->key == key)) {
+		return 0;
+	}
+
+	/* lookup if we have other variants in the list */
+	if (sel->num_shaders > 1) {
+		struct si_pipe_shader *p = sel->current, *c = p->next_variant;
+
+		while (c && c->key != key) {
+			p = c;
+			c = c->next_variant;
+		}
+
+		if (c) {
+			p->next_variant = c->next_variant;
+			shader = c;
+		}
+	}
+
+	if (unlikely(!shader)) {
+		shader = CALLOC(1, sizeof(struct si_pipe_shader));
+		shader->selector = sel;
+
+		r = si_pipe_shader_create(ctx, shader);
+		if (unlikely(r)) {
+			R600_ERR("Failed to build shader variant (type=%u, key=%u) %d\n",
+				 sel->type, key, r);
+			sel->current = NULL;
+			return r;
+		}
+
+		/* We don't know the value of fs_write_all property until we built
+		 * at least one variant, so we may need to recompute the key (include
+		 * rctx->framebuffer.nr_cbufs) after building first variant. */
+		if (sel->type == PIPE_SHADER_FRAGMENT &&
+		    sel->num_shaders == 0 &&
+		    shader->shader.fs_write_all) {
+			sel->fs_write_all = 1;
+			key = si_shader_selector_key(ctx, sel);
+		}
+
+		shader->key = key;
+		sel->num_shaders++;
+	}
+
+	if (dirty)
+		*dirty = 1;
+
+	shader->next_variant = sel->current;
+	sel->current = shader;
+
+	return 0;
+}
+
 static void *si_create_shader_state(struct pipe_context *ctx,
-                             const struct pipe_shader_state *state)
+				    const struct pipe_shader_state *state,
+				    unsigned pipe_shader_type)
 {
-	struct si_pipe_shader *shader = CALLOC_STRUCT(si_pipe_shader);
+	struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector);
+	int r;
 
-	shader->tokens = tgsi_dup_tokens(state->tokens);
-	shader->so = state->stream_output;
+	sel->type = pipe_shader_type;
+	sel->tokens = tgsi_dup_tokens(state->tokens);
+	sel->so = state->stream_output;
+
+	r = si_shader_select(ctx, sel, NULL);
+	if (r) {
+	    free(sel);
+	    return NULL;
+	}
+
+	return sel;
+}
+
+static void *si_create_fs_state(struct pipe_context *ctx,
+				const struct pipe_shader_state *state)
+{
+	return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
+}
 
-	return shader;
+static void *si_create_vs_state(struct pipe_context *ctx,
+				const struct pipe_shader_state *state)
+{
+	return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
 }
 
 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct si_pipe_shader *shader = state;
+	struct si_pipe_shader_selector *sel = state;
 
-	if (rctx->vs_shader == state)
+	if (rctx->vs_shader == sel)
 		return;
 
 	rctx->shader_dirty = true;
-	rctx->vs_shader = shader;
+	rctx->vs_shader = sel;
 
-	if (shader) {
-		si_pm4_bind_state(rctx, vs, shader->pm4);
-	}
+	if (sel && sel->current)
+		si_pm4_bind_state(rctx, vs, sel->current->pm4);
+	else
+		si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
 }
 
 static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct si_pipe_shader *shader = state;
+	struct si_pipe_shader_selector *sel = state;
 
-	if (rctx->ps_shader == state)
+	if (rctx->ps_shader == sel)
 		return;
 
 	rctx->shader_dirty = true;
-	rctx->ps_shader = shader;
+	rctx->ps_shader = sel;
 
-	if (shader) {
-		si_pm4_bind_state(rctx, ps, shader->pm4);
-	}
+	if (sel && sel->current)
+		si_pm4_bind_state(rctx, ps, sel->current->pm4);
+	else
+		si_pm4_bind_state(rctx, ps, rctx->dummy_pixel_shader->pm4);
 }
 
+static void si_delete_shader_selector(struct pipe_context *ctx,
+				      struct si_pipe_shader_selector *sel)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct si_pipe_shader *p = sel->current, *c;
+
+	while (p) {
+		c = p->next_variant;
+		si_pm4_delete_state(rctx, vs, p->pm4);
+		si_pipe_shader_destroy(ctx, p);
+		free(p);
+		p = c;
+	}
+
+	free(sel->tokens);
+	free(sel);
+ }
+
 static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct si_pipe_shader *shader = (struct si_pipe_shader *)state;
+	struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
 
-	if (rctx->vs_shader == shader) {
+	if (rctx->vs_shader == sel) {
 		rctx->vs_shader = NULL;
 	}
 
-	si_pm4_delete_state(rctx, vs, shader->pm4);
-	free(shader->tokens);
-	si_pipe_shader_destroy(ctx, shader);
-	free(shader);
+	si_delete_shader_selector(ctx, sel);
 }
 
 static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct si_pipe_shader *shader = (struct si_pipe_shader *)state;
+	struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
 
-	if (rctx->ps_shader == shader) {
+	if (rctx->ps_shader == sel) {
 		rctx->ps_shader = NULL;
 	}
 
-	si_pm4_delete_state(rctx, ps, shader->pm4);
-	free(shader->tokens);
-	si_pipe_shader_destroy(ctx, shader);
-	free(shader);
+	si_delete_shader_selector(ctx, sel);
 }
 
 /*
@@ -2269,8 +2392,8 @@ void si_init_state_functions(struct r600_context *rctx)
 
 	rctx->context.set_framebuffer_state = si_set_framebuffer_state;
 
-	rctx->context.create_vs_state = si_create_shader_state;
-	rctx->context.create_fs_state = si_create_shader_state;
+	rctx->context.create_vs_state = si_create_vs_state;
+	rctx->context.create_fs_state = si_create_fs_state;
 	rctx->context.bind_vs_state = si_bind_vs_shader;
 	rctx->context.bind_fs_state = si_bind_ps_shader;
 	rctx->context.delete_vs_state = si_delete_vs_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f53ecb7..d59624c 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -126,11 +126,16 @@ union si_state {
 	} while(0)
 
 /* si_state.c */
+struct si_pipe_shader_selector;
+
 bool si_is_format_supported(struct pipe_screen *screen,
 			    enum pipe_format format,
 			    enum pipe_texture_target target,
 			    unsigned sample_count,
 			    unsigned usage);
+int si_shader_select(struct pipe_context *ctx,
+		     struct si_pipe_shader_selector *sel,
+		     unsigned *dirty);
 void si_init_state_functions(struct r600_context *rctx);
 void si_init_config(struct r600_context *rctx);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index fda8b0b..95821dc 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -317,8 +317,8 @@ static void si_update_alpha_ref(struct r600_context *rctx)
 
 static void si_update_spi_map(struct r600_context *rctx)
 {
-	struct si_shader *ps = &rctx->ps_shader->shader;
-	struct si_shader *vs = &rctx->vs_shader->shader;
+	struct si_shader *ps = &rctx->ps_shader->current->shader;
+	struct si_shader *vs = &rctx->vs_shader->current->shader;
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 	unsigned i, j, tmp;
 
@@ -362,36 +362,39 @@ static void si_update_spi_map(struct r600_context *rctx)
 static void si_update_derived_state(struct r600_context *rctx)
 {
 	struct pipe_context * ctx = (struct pipe_context*)rctx;
+	unsigned ps_dirty = 0;
 
 	if (!rctx->blitter->running) {
 		if (rctx->have_depth_fb || rctx->have_depth_texture)
 			si_flush_depth_textures(rctx);
 	}
 
-	if ((rctx->ps_shader->shader.fs_write_all &&
-	     (rctx->ps_shader->shader.nr_cbufs != rctx->framebuffer.nr_cbufs)) ||
-	    (rctx->sprite_coord_enable &&
-	     (rctx->ps_shader->sprite_coord_enable != rctx->sprite_coord_enable))) {
-		si_pipe_shader_destroy(&rctx->context, rctx->ps_shader);
-	}
+	si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
 
 	if (rctx->alpha_ref_dirty) {
 		si_update_alpha_ref(rctx);
 	}
 
-	if (!rctx->vs_shader->bo) {
-		si_pipe_shader_vs(ctx, rctx->vs_shader);
+	if (!rctx->vs_shader->current->pm4) {
+		si_pipe_shader_vs(ctx, rctx->vs_shader->current);
 	}
 
-	if (!rctx->ps_shader->bo) {
-		si_pipe_shader_ps(ctx, rctx->ps_shader);
+	if (!rctx->ps_shader->current->pm4) {
+		si_pipe_shader_ps(ctx, rctx->ps_shader->current);
+		ps_dirty = 0;
 	}
-	if (!rctx->ps_shader->bo) {
-		if (!rctx->dummy_pixel_shader->bo)
+	if (!rctx->ps_shader->current->bo) {
+		if (!rctx->dummy_pixel_shader->pm4)
 			si_pipe_shader_ps(ctx, rctx->dummy_pixel_shader);
-
-		if (rctx->dummy_pixel_shader->pm4)
+		else
 			si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
+
+		ps_dirty = 0;
+	}
+
+	if (ps_dirty) {
+		si_pm4_bind_state(rctx, ps, rctx->ps_shader->current->pm4);
+		rctx->shader_dirty = true;
 	}
 
 	if (rctx->shader_dirty) {
@@ -545,7 +548,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		r600_context_draw_opaque_count(rctx, (struct r600_so_target*)info->count_from_stream_output);
 	}
 
-	rctx->vs_shader_so_strides = rctx->vs_shader->so_strides;
+	rctx->vs_shader_so_strides = rctx->vs_shader->current->so_strides;
 
 	if (!si_update_draw_info_state(rctx, info))
 		return;




More information about the mesa-commit mailing list