[Mesa-dev] [PATCH] radeonsi: make fix_fetch 64-bit

Marek Olšák maraeo at gmail.com
Mon Jan 16 14:00:14 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

v2: add u_bit_consecutive64
---
 src/gallium/drivers/radeonsi/si_shader.c        | 4 ++--
 src/gallium/drivers/radeonsi/si_shader.h        | 4 ++--
 src/gallium/drivers/radeonsi/si_state.c         | 6 +++---
 src/gallium/drivers/radeonsi/si_state.h         | 2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +-
 src/util/bitscan.h                              | 9 +++++++++
 6 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 6f0f414..dfba9d4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -426,21 +426,21 @@ static void declare_input_vs(
 		"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
 		LP_FUNC_ATTR_READNONE);
 
 	/* Break up the vec4 into individual components */
 	for (chan = 0; chan < 4; chan++) {
 		LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
 		out[chan] = LLVMBuildExtractElement(gallivm->builder,
 						    input, llvm_chan, "");
 	}
 
-	fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (2 * input_index)) & 3;
+	fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf;
 	if (fix_fetch) {
 		/* The hardware returns an unsigned value; convert it to a
 		 * signed one.
 		 */
 		LLVMValueRef tmp = out[3];
 		LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0);
 
 		/* First, recover the sign-extended signed integer value. */
 		if (fix_fetch == SI_FIX_FETCH_A2_SSCALED)
 			tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, "");
@@ -6578,21 +6578,21 @@ static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
 	switch (shader) {
 	case PIPE_SHADER_VERTEX:
 		fprintf(f, "  part.vs.prolog.instance_divisors = {");
 		for (i = 0; i < ARRAY_SIZE(key->part.vs.prolog.instance_divisors); i++)
 			fprintf(f, !i ? "%u" : ", %u",
 				key->part.vs.prolog.instance_divisors[i]);
 		fprintf(f, "}\n");
 		fprintf(f, "  part.vs.epilog.export_prim_id = %u\n", key->part.vs.epilog.export_prim_id);
 		fprintf(f, "  as_es = %u\n", key->as_es);
 		fprintf(f, "  as_ls = %u\n", key->as_ls);
-		fprintf(f, "  mono.vs.fix_fetch = 0x%x\n", key->mono.vs.fix_fetch);
+		fprintf(f, "  mono.vs.fix_fetch = 0x%"PRIx64"\n", key->mono.vs.fix_fetch);
 		break;
 
 	case PIPE_SHADER_TESS_CTRL:
 		fprintf(f, "  part.tcs.epilog.prim_mode = %u\n", key->part.tcs.epilog.prim_mode);
 		fprintf(f, "  mono.tcs.inputs_to_copy = 0x%"PRIx64"\n", key->mono.tcs.inputs_to_copy);
 		break;
 
 	case PIPE_SHADER_TESS_EVAL:
 		fprintf(f, "  part.tes.epilog.export_prim_id = %u\n", key->part.tes.epilog.export_prim_id);
 		fprintf(f, "  as_es = %u\n", key->as_es);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 1b5dec2..89f9628 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -418,22 +418,22 @@ struct si_shader_key {
 
 	/* These two are initially set according to the NEXT_SHADER property,
 	 * or guessed if the property doesn't seem correct.
 	 */
 	unsigned as_es:1; /* export shader */
 	unsigned as_ls:1; /* local shader */
 
 	/* Flags for monolithic compilation only. */
 	union {
 		struct {
-			/* One pair of bits for every input: SI_FIX_FETCH_* enums. */
-			uint32_t	fix_fetch;
+			/* One nibble for every input: SI_FIX_FETCH_* enums. */
+			uint64_t	fix_fetch;
 		} vs;
 		struct {
 			uint64_t	inputs_to_copy; /* for fixed-func TCS */
 		} tcs;
 	} mono;
 
 	/* Optimization flags for asynchronous compilation only. */
 	union {
 		struct {
 			uint64_t	kill_outputs; /* "get_unique_index" bits */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6e7d8da..fa78a56 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3356,26 +3356,26 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
 				   S_008F0C_NUM_FORMAT(num_format) |
 				   S_008F0C_DATA_FORMAT(data_format);
 		v->format_size[i] = desc->block.bits / 8;
 
 		/* The hardware always treats the 2-bit alpha channel as
 		 * unsigned, so a shader workaround is needed.
 		 */
 		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
 			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
-				v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i);
+				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i);
 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
-				v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i);
+				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
 				/* This isn't actually used in OpenGL. */
-				v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i);
+				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
 			}
 		}
 
 		/* We work around the fact that 8_8_8 and 16_16_16 data formats
 		 * do not exist by using the corresponding 4-component formats.
 		 * This requires a fixup of the descriptor for bounds checks.
 		 */
 		if (desc->block.bits == 3 * 8 ||
 		    desc->block.bits == 3 * 16) {
 			v->fix_size3 |= (desc->block.bits / 24) << (2 * i);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index a17dbc7..edc5b93 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -92,26 +92,26 @@ struct si_state_dsa {
 
 struct si_stencil_ref {
 	struct r600_atom		atom;
 	struct pipe_stencil_ref		state;
 	struct si_dsa_stencil_ref_part	dsa_part;
 };
 
 struct si_vertex_element
 {
 	unsigned			count;
-	uint32_t			fix_fetch;
 
 	/* Two bits per attribute indicating the size of each vector component
 	 * in bytes if the size 3-workaround must be applied.
 	 */
 	uint32_t			fix_size3;
+	uint64_t			fix_fetch;
 
 	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
 	uint32_t			format_size[SI_MAX_ATTRIBS];
 	struct pipe_vertex_element	elements[SI_MAX_ATTRIBS];
 };
 
 union si_state {
 	struct {
 		struct si_state_blend		*blend;
 		struct si_state_rasterizer	*rasterizer;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 9967837..d2f04bc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -927,21 +927,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 	case PIPE_SHADER_VERTEX:
 		if (sctx->vertex_elements) {
 			unsigned count = MIN2(sel->info.num_inputs,
 					      sctx->vertex_elements->count);
 			for (i = 0; i < count; ++i)
 				key->part.vs.prolog.instance_divisors[i] =
 					sctx->vertex_elements->elements[i].instance_divisor;
 
 			key->mono.vs.fix_fetch =
 				sctx->vertex_elements->fix_fetch &
-				u_bit_consecutive(0, 2 * count);
+				u_bit_consecutive64(0, 4 * count);
 		}
 		if (sctx->tes_shader.cso)
 			key->as_ls = 1;
 		else if (sctx->gs_shader.cso)
 			key->as_es = 1;
 		else {
 			si_shader_selector_key_hw_vs(sctx, sel, key);
 
 			if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
 				key->part.vs.epilog.export_prim_id = 1;
diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index a5dfa1f..4f8b608 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -219,16 +219,25 @@ util_last_bit_signed(int i)
  */
 static inline unsigned
 u_bit_consecutive(unsigned start, unsigned count)
 {
    assert(start + count <= 32);
    if (count == 32)
       return ~0;
    return ((1u << count) - 1) << start;
 }
 
+static inline unsigned
+u_bit_consecutive64(unsigned start, unsigned count)
+{
+   assert(start + count <= 64);
+   if (count == 64)
+      return ~(uint64_t)0;
+   return (((uint64_t)1 << count) - 1) << start;
+}
+
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif /* BITSCAN_H */
-- 
2.7.4



More information about the mesa-dev mailing list