[Mesa-dev] [PATCH 4/4] radeonsi: add instance divisor support v3

Christian König deathsimple at vodafone.de
Wed Mar 27 08:35:55 PDT 2013


From: Christian König <christian.koenig at amd.com>

v2: reduce key size, don't copy key around to much.
v3: remove key size reduction

Signed-off-by: Christian König <christian.koenig at amd.com>
---
 src/gallium/drivers/radeonsi/radeonsi_shader.c |   67 +++++++++++++++---------
 src/gallium/drivers/radeonsi/radeonsi_shader.h |   24 +++++----
 src/gallium/drivers/radeonsi/si_state.c        |   44 +++++++++-------
 src/gallium/drivers/radeonsi/si_state_draw.c   |   18 +++++--
 4 files changed, 94 insertions(+), 59 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 0512528..5fdf46e 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -54,11 +54,9 @@
 struct si_shader_context
 {
 	struct radeon_llvm_context radeon_bld;
-	struct r600_context *rctx;
 	struct tgsi_parse_context parse;
 	struct tgsi_token * tokens;
 	struct si_pipe_shader *shader;
-	struct si_shader_key key;
 	unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
 	LLVMValueRef const_md;
 	LLVMValueRef const_resource;
@@ -112,22 +110,41 @@ static LLVMValueRef build_indexed_load(
 	return result;
 }
 
+static LLVMValueRef get_instance_index(
+	struct radeon_llvm_context * radeon_bld,
+	unsigned divisor)
+{
+	struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm;
+
+	LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_INSTANCE_ID);
+	result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
+			radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
+
+	if (divisor > 1)
+		result = LLVMBuildUDiv(gallivm->builder, result,
+				lp_build_const_int32(gallivm, divisor), "");
+
+	return result;
+}
+
 static void declare_input_vs(
 	struct si_shader_context * si_shader_ctx,
 	unsigned input_index,
 	const struct tgsi_full_declaration *decl)
 {
+	struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
+	unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index];
+
+	unsigned chan;
+
 	LLVMValueRef t_list_ptr;
 	LLVMValueRef t_offset;
 	LLVMValueRef t_list;
 	LLVMValueRef attribute_offset;
-	LLVMValueRef buffer_index_reg;
+	LLVMValueRef buffer_index;
 	LLVMValueRef args[3];
 	LLVMTypeRef vec4_type;
 	LLVMValueRef input;
-	struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
-	//struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
-	unsigned chan;
 
 	/* Load the T list */
 	t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);
@@ -139,14 +156,20 @@ static void declare_input_vs(
 	/* Build the attribute offset */
 	attribute_offset = lp_build_const_int32(base->gallivm, 0);
 
-	/* Load the buffer index, which is always stored in VGPR0
-	 * for Vertex Shaders */
-	buffer_index_reg = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_ID);
+	if (divisor) {
+		/* Build index from instance ID, start instance and divisor */
+		si_shader_ctx->shader->shader.uses_instanceid = true;
+		buffer_index = get_instance_index(&si_shader_ctx->radeon_bld, divisor);
+	} else {
+		/* Load the buffer index, which is always stored in VGPR0
+		 * for Vertex Shaders */
+		buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_ID);
+	}
 
 	vec4_type = LLVMVectorType(base->elem_type, 4);
 	args[0] = t_list;
 	args[1] = attribute_offset;
-	args[2] = buffer_index_reg;
+	args[2] = buffer_index;
 	input = build_intrinsic(base->gallivm->builder,
 		"llvm.SI.vs.load.input", vec4_type, args, 3,
 		LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -239,7 +262,7 @@ static void declare_input_fs(
 	/* XXX: Handle all possible interpolation modes */
 	switch (decl->Interp.Interpolate) {
 	case TGSI_INTERPOLATE_COLOR:
-		if (si_shader_ctx->key.flatshade) {
+		if (si_shader_ctx->shader->key.ps.flatshade) {
 			interp_param = 0;
 		} else {
 			if (decl->Interp.Centroid)
@@ -272,7 +295,7 @@ static void declare_input_fs(
 
 	/* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
 	if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
-	    si_shader_ctx->key.color_two_side) {
+	    si_shader_ctx->shader->key.ps.color_two_side) {
 		LLVMValueRef args[4];
 		LLVMValueRef face, is_face_positive;
 		LLVMValueRef back_attr_number =
@@ -351,15 +374,12 @@ static void declare_system_value(
 	unsigned index,
 	const struct tgsi_full_declaration *decl)
 {
-	struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm;
 
 	LLVMValueRef value = 0;
 
 	switch (decl->Semantic.Name) {
 	case TGSI_SEMANTIC_INSTANCEID:
-		value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_INSTANCE_ID);
-		value = LLVMBuildAdd(gallivm->builder, value,
-			LLVMGetParam(radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
+		value = get_instance_index(radeon_bld, 1);
 		break;
 
 	case TGSI_SEMANTIC_VERTEXID:
@@ -433,7 +453,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 		int cbuf = target - V_008DFC_SQ_EXP_MRT;
 
 		if (cbuf >= 0 && cbuf < 8) {
-			compressed = (si_shader_ctx->key.export_16bpc >> cbuf) & 0x1;
+			compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
 
 			if (compressed)
 				si_shader_ctx->shader->spi_shader_col_format |=
@@ -509,13 +529,13 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
 	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 
-	if (si_shader_ctx->key.alpha_func != PIPE_FUNC_NEVER) {
+	if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
 		LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3];
 		LLVMValueRef alpha_pass =
 			lp_build_cmp(&bld_base->base,
-				     si_shader_ctx->key.alpha_func,
+				     si_shader_ctx->shader->key.ps.alpha_func,
 				     LLVMBuildLoad(gallivm->builder, out_ptr, ""),
-				     lp_build_const_float(gallivm, si_shader_ctx->key.alpha_ref));
+				     lp_build_const_float(gallivm, si_shader_ctx->shader->key.ps.alpha_ref));
 		LLVMValueRef arg =
 			lp_build_select(&bld_base->base,
 					alpha_pass,
@@ -612,7 +632,7 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 				} else {
 					target = V_008DFC_SQ_EXP_MRT + color_count;
 					if (color_count == 0 &&
-					    si_shader_ctx->key.alpha_func != PIPE_FUNC_ALWAYS)
+					    si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
 						si_alpha_test(bld_base, index);
 
 					color_count++;
@@ -1075,8 +1095,7 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx)
 
 int si_pipe_shader_create(
 	struct pipe_context *ctx,
-	struct si_pipe_shader *shader,
-	struct si_shader_key key)
+	struct si_pipe_shader *shader)
 {
 	struct r600_context *rctx = (struct r600_context*)ctx;
 	struct si_pipe_shader_selector *sel = shader->selector;
@@ -1117,9 +1136,7 @@ int si_pipe_shader_create(
 	si_shader_ctx.tokens = sel->tokens;
 	tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
 	si_shader_ctx.shader = shader;
-	si_shader_ctx.key = key;
 	si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
-	si_shader_ctx.rctx = rctx;
 
 	create_meta_data(&si_shader_ctx);
 	create_function(&si_shader_ctx);
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h
index 9dae742..9d3c14b 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
@@ -111,13 +111,18 @@ struct si_shader {
 	unsigned		nr_cbufs;
 };
 
-struct si_shader_key {
-	unsigned		export_16bpc:8;
-	unsigned		nr_cbufs:4;
-	unsigned		color_two_side:1;
-	unsigned		alpha_func:3;
-	unsigned		flatshade:1;
-	float			alpha_ref;
+union si_shader_key {
+	struct {
+		unsigned	export_16bpc:8;
+		unsigned	nr_cbufs:4;
+		unsigned	color_two_side:1;
+		unsigned	alpha_func:3;
+		unsigned	flatshade:1;
+		float		alpha_ref;
+	} ps;
+	struct {
+		unsigned	instance_divisors[PIPE_MAX_ATTRIBS];
+	} vs;
 };
 
 struct si_pipe_shader {
@@ -132,12 +137,11 @@ struct si_pipe_shader {
 	unsigned			spi_shader_col_format;
 	unsigned			sprite_coord_enable;
 	unsigned			so_strides[4];
-	struct si_shader_key		key;
+	union si_shader_key		key;
 };
 
 /* radeonsi_shader.c */
-int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader,
-			  struct si_shader_key key);
+int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader);
 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index bdd41b4..ca9e8b4 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1870,30 +1870,36 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
  */
 
 /* Compute the key for the hw shader variant */
-static INLINE struct si_shader_key si_shader_selector_key(struct pipe_context *ctx,
-							  struct si_pipe_shader_selector *sel)
+static INLINE void si_shader_selector_key(struct pipe_context *ctx,
+					  struct si_pipe_shader_selector *sel,
+					  union si_shader_key *key)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct si_shader_key key;
-	memset(&key, 0, sizeof(key));
+	memset(key, 0, sizeof(*key));
 
-	if (sel->type == PIPE_SHADER_FRAGMENT) {
+	if (sel->type == PIPE_SHADER_VERTEX) {
+		unsigned i;
+		if (!rctx->vertex_elements)
+			return;
+
+		for (i = 0; i < rctx->vertex_elements->count; ++i)
+			key->vs.instance_divisors[i] = rctx->vertex_elements->elements[i].instance_divisor;
+
+	} else if (sel->type == PIPE_SHADER_FRAGMENT) {
 		if (sel->fs_write_all)
-			key.nr_cbufs = rctx->framebuffer.nr_cbufs;
-		key.export_16bpc = rctx->export_16bpc;
+			key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
+		key->ps.export_16bpc = rctx->export_16bpc;
 		if (rctx->queued.named.rasterizer) {
-			key.color_two_side = rctx->queued.named.rasterizer->two_side;
-			key.flatshade = rctx->queued.named.rasterizer->flatshade;
+			key->ps.color_two_side = rctx->queued.named.rasterizer->two_side;
+			key->ps.flatshade = rctx->queued.named.rasterizer->flatshade;
 		}
 		if (rctx->queued.named.dsa) {
-			key.alpha_func = rctx->queued.named.dsa->alpha_func;
-			key.alpha_ref = rctx->queued.named.dsa->alpha_ref;
+			key->ps.alpha_func = rctx->queued.named.dsa->alpha_func;
+			key->ps.alpha_ref = rctx->queued.named.dsa->alpha_ref;
 		} else {
-			key.alpha_func = PIPE_FUNC_ALWAYS;
+			key->ps.alpha_func = PIPE_FUNC_ALWAYS;
 		}
 	}
-
-	return key;
 }
 
 /* Select the hw shader variant depending on the current state.
@@ -1902,11 +1908,11 @@ int si_shader_select(struct pipe_context *ctx,
 		     struct si_pipe_shader_selector *sel,
 		     unsigned *dirty)
 {
-	struct si_shader_key key;
+	union si_shader_key key;
 	struct si_pipe_shader * shader = NULL;
 	int r;
 
-	key = si_shader_selector_key(ctx, sel);
+	si_shader_selector_key(ctx, sel, &key);
 
 	/* Check if we don't need to change anything.
 	 * This path is also used for most shaders that don't need multiple
@@ -1934,8 +1940,9 @@ int si_shader_select(struct pipe_context *ctx,
 	if (unlikely(!shader)) {
 		shader = CALLOC(1, sizeof(struct si_pipe_shader));
 		shader->selector = sel;
+		shader->key = key;
 
-		r = si_pipe_shader_create(ctx, shader, key);
+		r = si_pipe_shader_create(ctx, shader);
 		if (unlikely(r)) {
 			R600_ERR("Failed to build shader variant (type=%u) %d\n",
 				 sel->type, r);
@@ -1951,10 +1958,9 @@ int si_shader_select(struct pipe_context *ctx,
 		    sel->num_shaders == 0 &&
 		    shader->shader.fs_write_all) {
 			sel->fs_write_all = 1;
-			key = si_shader_selector_key(ctx, sel);
+			si_shader_selector_key(ctx, sel, &shader->key);
 		}
 
-		shader->key = key;
 		sel->num_shaders++;
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 383d2a0..1e21e03 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -145,7 +145,7 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s
 		if (shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL)
 			db_shader_control |= S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1);
 	}
-	if (shader->shader.uses_kill || shader->key.alpha_func != PIPE_FUNC_ALWAYS)
+	if (shader->shader.uses_kill || shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
 		db_shader_control |= S_02880C_KILL_ENABLE(1);
 
 	exports_ps = 0;
@@ -329,7 +329,7 @@ bcolor:
 
 		if (ps->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
 		    (ps->input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
-		     rctx->ps_shader->current->key.flatshade)) {
+		     rctx->ps_shader->current->key.ps.flatshade)) {
 			tmp |= S_028644_FLAT_SHADE(1);
 		}
 
@@ -356,7 +356,7 @@ bcolor:
 			       tmp);
 
 		if (name == TGSI_SEMANTIC_COLOR &&
-		    rctx->ps_shader->current->key.color_two_side) {
+		    rctx->ps_shader->current->key.ps.color_two_side) {
 			name = TGSI_SEMANTIC_BCOLOR;
 			param_offset++;
 			goto bcolor;
@@ -369,7 +369,7 @@ bcolor:
 static void si_update_derived_state(struct r600_context *rctx)
 {
 	struct pipe_context * ctx = (struct pipe_context*)rctx;
-	unsigned ps_dirty = 0;
+	unsigned vs_dirty = 0, ps_dirty = 0;
 
 	if (!rctx->blitter->running) {
 		/* Flush depth textures which need to be flushed. */
@@ -381,12 +381,20 @@ static void si_update_derived_state(struct r600_context *rctx)
 		}
 	}
 
-	si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
+	si_shader_select(ctx, rctx->vs_shader, &vs_dirty);
 
 	if (!rctx->vs_shader->current->pm4) {
 		si_pipe_shader_vs(ctx, rctx->vs_shader->current);
+		vs_dirty = 0;
+	}
+
+	if (vs_dirty) {
+		si_pm4_bind_state(rctx, vs, rctx->vs_shader->current->pm4);
 	}
 
+
+	si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
+
 	if (!rctx->ps_shader->current->pm4) {
 		si_pipe_shader_ps(ctx, rctx->ps_shader->current);
 		ps_dirty = 0;
-- 
1.7.9.5



More information about the mesa-dev mailing list