[Mesa-dev] [PATCH 01/18] radeonsi: make fix_fetch an array of uint8_t

Marek Olšák maraeo at gmail.com
Thu Feb 16 12:52:53 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

so that we can add 3-component fallbacks.
---
 src/gallium/drivers/radeonsi/si_shader.c        |  8 +++++--
 src/gallium/drivers/radeonsi/si_shader.h        |  5 ++---
 src/gallium/drivers/radeonsi/si_state.c         | 28 ++++++++++++-------------
 src/gallium/drivers/radeonsi/si_state.h         |  2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c |  5 ++---
 5 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index cfff54a..8b9fed9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -359,21 +359,21 @@ static void declare_input_vs(
 	t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS);
 
 	t_offset = lp_build_const_int32(gallivm, input_index);
 
 	t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
 
 	vertex_index = LLVMGetParam(ctx->main_fn,
 				    ctx->param_vertex_index0 +
 				    input_index);
 
-	fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf;
+	fix_fetch = ctx->shader->key.mono.vs.fix_fetch[input_index];
 
 	/* Do multiple loads for double formats. */
 	if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) {
 		num_fetches = 3; /* 3 2-dword loads */
 		fetch_stride = 8;
 	} else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) {
 		num_fetches = 2; /* 2 4-dword loads */
 		fetch_stride = 16;
 	} else {
 		num_fetches = 1;
@@ -6263,21 +6263,25 @@ static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
 	switch (shader) {
 	case PIPE_SHADER_VERTEX:
 		fprintf(f, "  part.vs.prolog.instance_divisors = {");
 		for (i = 0; i < ARRAY_SIZE(key->part.vs.prolog.instance_divisors); i++)
 			fprintf(f, !i ? "%u" : ", %u",
 				key->part.vs.prolog.instance_divisors[i]);
 		fprintf(f, "}\n");
 		fprintf(f, "  part.vs.epilog.export_prim_id = %u\n", key->part.vs.epilog.export_prim_id);
 		fprintf(f, "  as_es = %u\n", key->as_es);
 		fprintf(f, "  as_ls = %u\n", key->as_ls);
-		fprintf(f, "  mono.vs.fix_fetch = 0x%"PRIx64"\n", key->mono.vs.fix_fetch);
+
+		fprintf(f, "  mono.vs.fix_fetch = {");
+		for (i = 0; i < SI_MAX_ATTRIBS; i++)
+			fprintf(f, !i ? "%u" : ", %u", key->mono.vs.fix_fetch[i]);
+		fprintf(f, "}\n");
 		break;
 
 	case PIPE_SHADER_TESS_CTRL:
 		fprintf(f, "  part.tcs.epilog.prim_mode = %u\n", key->part.tcs.epilog.prim_mode);
 		fprintf(f, "  mono.tcs.inputs_to_copy = 0x%"PRIx64"\n", key->mono.tcs.inputs_to_copy);
 		break;
 
 	case PIPE_SHADER_TESS_EVAL:
 		fprintf(f, "  part.tes.epilog.export_prim_id = %u\n", key->part.tes.epilog.export_prim_id);
 		fprintf(f, "  as_es = %u\n", key->as_es);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 6398b39..4616190 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -243,21 +243,20 @@ enum {
 	SI_FIX_FETCH_RGBX_32_UNORM,
 	SI_FIX_FETCH_RGBA_32_SNORM,
 	SI_FIX_FETCH_RGBX_32_SNORM,
 	SI_FIX_FETCH_RGBA_32_USCALED,
 	SI_FIX_FETCH_RGBA_32_SSCALED,
 	SI_FIX_FETCH_RGBA_32_FIXED,
 	SI_FIX_FETCH_RGBX_32_FIXED,
 	SI_FIX_FETCH_RG_64_FLOAT,
 	SI_FIX_FETCH_RGB_64_FLOAT,
 	SI_FIX_FETCH_RGBA_64_FLOAT,
-	SI_FIX_FETCH_RESERVED_15, /* maximum */
 };
 
 struct si_shader;
 
 /* State of the context creating the shader object. */
 struct si_compiler_ctx_state {
 	/* Should only be used by si_init_shader_selector_async and
 	 * si_build_shader_variant if thread_index == -1 (non-threaded). */
 	LLVMTargetMachineRef		tm;
 
@@ -438,22 +437,22 @@ struct si_shader_key {
 
 	/* These two are initially set according to the NEXT_SHADER property,
 	 * or guessed if the property doesn't seem correct.
 	 */
 	unsigned as_es:1; /* export shader */
 	unsigned as_ls:1; /* local shader */
 
 	/* Flags for monolithic compilation only. */
 	union {
 		struct {
-			/* One nibble for every input: SI_FIX_FETCH_* enums. */
-			uint64_t	fix_fetch;
+			/* One byte for every input: SI_FIX_FETCH_* enums. */
+			uint8_t		fix_fetch[SI_MAX_ATTRIBS];
 		} vs;
 		struct {
 			uint64_t	inputs_to_copy; /* for fixed-func TCS */
 		} tcs;
 	} mono;
 
 	/* Optimization flags for asynchronous compilation only. */
 	union {
 		struct {
 			uint64_t	kill_outputs; /* "get_unique_index" bits */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 4ccca52..d9b9f83 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3392,72 +3392,72 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 		channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
 		memcpy(swizzle, desc->swizzle, sizeof(swizzle));
 
 		v->format_size[i] = desc->block.bits / 8;
 
 		/* The hardware always treats the 2-bit alpha channel as
 		 * unsigned, so a shader workaround is needed.
 		 */
 		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
 			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
-				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i);
+				v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM;
 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
-				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
+				v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED;
 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
 				/* This isn't actually used in OpenGL. */
-				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
+				v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT;
 			}
 		} else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
 			if (desc->swizzle[3] == PIPE_SWIZZLE_1)
-				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i);
+				v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_FIXED;
 			else
-				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i);
+				v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_FIXED;
 		} else if (channel && channel->size == 32 && !channel->pure_integer) {
 			if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
 				if (channel->normalized) {
 					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
-						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i);
+						v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_SNORM;
 					else
-						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i);
+						v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SNORM;
 				} else {
-					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i);
+					v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SSCALED;
 				}
 			} else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
 				if (channel->normalized) {
 					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
-						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i);
+						v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_UNORM;
 					else
-						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i);
+						v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_UNORM;
 				} else {
-					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
+					v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_USCALED;
 				}
 			}
 		} else if (channel && channel->size == 64 &&
 			   channel->type == UTIL_FORMAT_TYPE_FLOAT) {
 			switch (desc->nr_channels) {
 			case 1:
 			case 2:
-				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RG_64_FLOAT << (4 * i);
+				v->fix_fetch[i] = SI_FIX_FETCH_RG_64_FLOAT;
 				swizzle[0] = PIPE_SWIZZLE_X;
 				swizzle[1] = PIPE_SWIZZLE_Y;
 				swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0;
 				swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0;
 				break;
 			case 3:
-				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGB_64_FLOAT << (4 * i);
+				v->fix_fetch[i] = SI_FIX_FETCH_RGB_64_FLOAT;
 				swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */
 				swizzle[1] = PIPE_SWIZZLE_Y;
 				swizzle[2] = PIPE_SWIZZLE_0;
 				swizzle[3] = PIPE_SWIZZLE_0;
 				break;
 			case 4:
-				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_64_FLOAT << (4 * i);
+				v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT;
 				swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */
 				swizzle[1] = PIPE_SWIZZLE_Y;
 				swizzle[2] = PIPE_SWIZZLE_Z;
 				swizzle[3] = PIPE_SWIZZLE_W;
 				break;
 			default:
 				assert(0);
 			}
 		}
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 07b7d58..cd44ed1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -100,21 +100,21 @@ struct si_vertex_element
 {
 	unsigned			count;
 	unsigned			first_vb_use_mask;
 	/* Vertex buffer descriptor list size aligned for optimal prefetch. */
 	unsigned			desc_list_byte_size;
 
 	/* Two bits per attribute indicating the size of each vector component
 	 * in bytes if the size 3-workaround must be applied.
 	 */
 	uint32_t			fix_size3;
-	uint64_t			fix_fetch;
+	uint8_t				fix_fetch[SI_MAX_ATTRIBS];
 
 	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
 	uint32_t			format_size[SI_MAX_ATTRIBS];
 	struct pipe_vertex_element	elements[SI_MAX_ATTRIBS];
 };
 
 union si_state {
 	struct {
 		struct si_state_blend		*blend;
 		struct si_state_rasterizer	*rasterizer;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index bde02f5..9570259 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -968,23 +968,22 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 
 	switch (sel->type) {
 	case PIPE_SHADER_VERTEX:
 		if (sctx->vertex_elements) {
 			unsigned count = MIN2(sel->info.num_inputs,
 					      sctx->vertex_elements->count);
 			for (i = 0; i < count; ++i)
 				key->part.vs.prolog.instance_divisors[i] =
 					sctx->vertex_elements->elements[i].instance_divisor;
 
-			key->mono.vs.fix_fetch =
-				sctx->vertex_elements->fix_fetch &
-				u_bit_consecutive64(0, 4 * count);
+			memcpy(key->mono.vs.fix_fetch,
+			       sctx->vertex_elements->fix_fetch, count);
 		}
 		if (sctx->tes_shader.cso)
 			key->as_ls = 1;
 		else if (sctx->gs_shader.cso)
 			key->as_es = 1;
 		else {
 			si_shader_selector_key_hw_vs(sctx, sel, key);
 
 			if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
 				key->part.vs.epilog.export_prim_id = 1;
-- 
2.7.4



More information about the mesa-dev mailing list