[Mesa-dev] [PATCH] radeonsi: fix vertex fetches for 2_10_10_10 formats

Nicolai Hähnle nhaehnle at gmail.com
Thu Nov 3 10:16:26 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

The hardware always treats the alpha channel as unsigned, so add a shader
workaround. This is rare enough that we'll just build a monolithic vertex
shader.

The SINT case cannot actually happen in OpenGL, but I've included it for
completeness since it's just a mix of the other cases.
---
 src/gallium/drivers/radeonsi/si_shader.c        | 54 ++++++++++++++++++++++---
 src/gallium/drivers/radeonsi/si_shader.h        | 11 +++++
 src/gallium/drivers/radeonsi/si_state.c         | 14 +++++++
 src/gallium/drivers/radeonsi/si_state.h         |  1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c |  4 ++
 5 files changed, 78 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 28a8b1f..b170eb9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -362,67 +362,105 @@ static LLVMValueRef get_instance_index_for_fetch(
 	/* The division must be done before START_INSTANCE is added. */
 	if (divisor > 1)
 		result = LLVMBuildUDiv(gallivm->builder, result,
 				lp_build_const_int32(gallivm, divisor), "");
 
 	return LLVMBuildAdd(gallivm->builder, result,
 			    LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
 }
 
 static void declare_input_vs(
-	struct si_shader_context *radeon_bld,
+	struct si_shader_context *ctx,
 	unsigned input_index,
 	const struct tgsi_full_declaration *decl,
 	LLVMValueRef out[4])
 {
-	struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
+	struct lp_build_context *base = &ctx->soa.bld_base.base;
 	struct gallivm_state *gallivm = base->gallivm;
-	struct si_shader_context *ctx =
-		si_shader_context(&radeon_bld->soa.bld_base);
 
 	unsigned chan;
+	unsigned fix_fetch;
 
 	LLVMValueRef t_list_ptr;
 	LLVMValueRef t_offset;
 	LLVMValueRef t_list;
 	LLVMValueRef attribute_offset;
 	LLVMValueRef buffer_index;
 	LLVMValueRef args[3];
 	LLVMValueRef input;
 
 	/* Load the T list */
 	t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS);
 
 	t_offset = lp_build_const_int32(gallivm, input_index);
 
 	t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
 
 	/* Build the attribute offset */
 	attribute_offset = lp_build_const_int32(gallivm, 0);
 
-	buffer_index = LLVMGetParam(radeon_bld->main_fn,
+	buffer_index = LLVMGetParam(ctx->main_fn,
 				    ctx->param_vertex_index0 +
 				    input_index);
 
 	args[0] = t_list;
 	args[1] = attribute_offset;
 	args[2] = buffer_index;
 	input = lp_build_intrinsic(gallivm->builder,
 		"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
 		LLVMReadNoneAttribute);
 
 	/* Break up the vec4 into individual components */
 	for (chan = 0; chan < 4; chan++) {
 		LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
 		out[chan] = LLVMBuildExtractElement(gallivm->builder,
 						    input, llvm_chan, "");
 	}
+
+	fix_fetch = (ctx->shader->key.vs.fix_fetch >> (2 * input_index)) & 3;
+	if (fix_fetch) {
+		/* The hardware returns an unsigned value; convert it to a
+		 * signed one.
+		 */
+		LLVMValueRef tmp = out[3];
+		LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0);
+
+		/* First, recover the sign-extended signed integer value. */
+		if (fix_fetch == SI_FIX_FETCH_A2_SSCALED)
+			tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, "");
+		else
+			tmp = LLVMBuildBitCast(gallivm->builder, tmp, ctx->i32, "");
+
+		/* For the integer-like cases, do a natural sign extension.
+		 *
+		 * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
+		 * and happen to contain 0, 1, 2, 3 as the two LSBs of the
+		 * exponent.
+		 */
+		tmp = LLVMBuildShl(gallivm->builder, tmp,
+				   fix_fetch == SI_FIX_FETCH_A2_SNORM ?
+				   LLVMConstInt(ctx->i32, 7, 0) : c30, "");
+		tmp = LLVMBuildAShr(gallivm->builder, tmp, c30, "");
+
+		/* Convert back to the right type. */
+		if (fix_fetch == SI_FIX_FETCH_A2_SNORM) {
+			LLVMValueRef clamp;
+			LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
+			tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
+			clamp = LLVMBuildFCmp(gallivm->builder, LLVMRealULT, tmp, neg_one, "");
+			tmp = LLVMBuildSelect(gallivm->builder, clamp, neg_one, tmp, "");
+		} else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) {
+			tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
+		}
+
+		out[3] = tmp;
+	}
 }
 
 static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
 				     unsigned swizzle)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 
 	if (swizzle > 0)
 		return bld_base->uint_bld.zero;
 
@@ -8095,25 +8133,29 @@ static void si_fix_num_sgprs(struct si_shader *shader)
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		     struct si_shader *shader,
 		     struct pipe_debug_callback *debug)
 {
 	struct si_shader_selector *sel = shader->selector;
 	struct si_shader *mainp = sel->main_shader_part;
 	int r;
 
 	/* LS, ES, VS are compiled on demand if the main part hasn't been
 	 * compiled for that stage.
+	 *
+	 * Vertex shaders are compiled on demand when a vertex fetch
+	 * workaround must be applied.
 	 */
 	if (!mainp ||
 	    (sel->type == PIPE_SHADER_VERTEX &&
 	     (shader->key.vs.as_es != mainp->key.vs.as_es ||
-	      shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
+	      shader->key.vs.as_ls != mainp->key.vs.as_ls ||
+	      shader->key.vs.fix_fetch)) ||
 	    (sel->type == PIPE_SHADER_TESS_EVAL &&
 	     shader->key.tes.as_es != mainp->key.tes.as_es) ||
 	    (sel->type == PIPE_SHADER_TESS_CTRL &&
 	     shader->key.tcs.epilog.inputs_to_copy) ||
 	    sel->type == PIPE_SHADER_COMPUTE) {
 		/* Monolithic shader (compiled as a whole, has many variants,
 		 * may take a long time to compile).
 		 */
 		r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
 		if (r)
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index d8ab2a4..59e7bfb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -226,20 +226,28 @@ enum {
 
 	SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */
 };
 
 /* SI-specific system values. */
 enum {
 	TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
 	TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
 };
 
+/* For VS shader key fix_fetch. */
+enum {
+	SI_FIX_FETCH_NONE = 0,
+	SI_FIX_FETCH_A2_SNORM = 1,
+	SI_FIX_FETCH_A2_SSCALED = 2,
+	SI_FIX_FETCH_A2_SINT = 3,
+};
+
 struct si_shader;
 
 /* A shader selector is a gallium CSO and contains shader variants and
  * binaries for one TGSI program. This can be shared by multiple contexts.
  */
 struct si_shader_selector {
 	struct si_screen	*screen;
 	struct util_queue_fence ready;
 
 	/* Should only be used by si_init_shader_selector_async
@@ -393,20 +401,23 @@ union si_shader_part_key {
 union si_shader_key {
 	struct {
 		struct si_ps_prolog_bits prolog;
 		struct si_ps_epilog_bits epilog;
 	} ps;
 	struct {
 		struct si_vs_prolog_bits prolog;
 		struct si_vs_epilog_bits epilog;
 		unsigned	as_es:1; /* export shader */
 		unsigned	as_ls:1; /* local shader */
+
+		/* One pair of bits for every input: SI_FIX_FETCH_* enums. */
+		uint32_t	fix_fetch;
 	} vs;
 	struct {
 		struct si_tcs_epilog_bits epilog;
 	} tcs; /* tessellation control shader */
 	struct {
 		struct si_vs_epilog_bits epilog; /* same as VS */
 		unsigned	as_es:1; /* export shader */
 	} tes; /* tessellation evaluation shader */
 	struct {
 		struct si_gs_prolog_bits prolog;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 642ce79..24c7b10 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3274,20 +3274,34 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
 		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
 
 		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
 				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
 				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
 				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
 				   S_008F0C_NUM_FORMAT(num_format) |
 				   S_008F0C_DATA_FORMAT(data_format);
 		v->format_size[i] = desc->block.bits / 8;
+
+		/* The hardware always treats the 2-bit alpha channel as
+		 * unsigned, so a shader workaround is needed.
+		 */
+		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
+			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
+				v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i);
+			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
+				v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i);
+			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
+				/* This isn't actually used in OpenGL. */
+				v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i);
+			}
+		}
 	}
 	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
 
 	return v;
 }
 
 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_vertex_element *v = (struct si_vertex_element*)state;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 3ebf578..c444a69 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -92,20 +92,21 @@ struct si_state_dsa {
 
 struct si_stencil_ref {
 	struct r600_atom		atom;
 	struct pipe_stencil_ref		state;
 	struct si_dsa_stencil_ref_part	dsa_part;
 };
 
 struct si_vertex_element
 {
 	unsigned			count;
+	uint32_t			fix_fetch;
 	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
 	uint32_t			format_size[SI_MAX_ATTRIBS];
 	struct pipe_vertex_element	elements[SI_MAX_ATTRIBS];
 };
 
 union si_state {
 	struct {
 		struct si_state_blend		*blend;
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 2a41bf1..9e95fea 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -865,20 +865,24 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 	memset(key, 0, sizeof(*key));
 
 	switch (sel->type) {
 	case PIPE_SHADER_VERTEX:
 		if (sctx->vertex_elements) {
 			unsigned count = MIN2(sel->info.num_inputs,
 					      sctx->vertex_elements->count);
 			for (i = 0; i < count; ++i)
 				key->vs.prolog.instance_divisors[i] =
 					sctx->vertex_elements->elements[i].instance_divisor;
+
+			key->vs.fix_fetch =
+				sctx->vertex_elements->fix_fetch &
+				u_bit_consecutive(0, 2 * count);
 		}
 		if (sctx->tes_shader.cso)
 			key->vs.as_ls = 1;
 		else if (sctx->gs_shader.cso)
 			key->vs.as_es = 1;
 
 		if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
 		    sctx->ps_shader.cso->info.uses_primid)
 			key->vs.epilog.export_prim_id = 1;
 		break;
-- 
2.7.4



More information about the mesa-dev mailing list