[Mesa-dev] [PATCH 5/5] radeonsi: get rid of secondary input/output word

Nicolai Hähnle nhaehnle at gmail.com
Wed May 10 17:30:30 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

By keeping track of fewer generics, everything can fit into 64 bits.
---
 src/gallium/drivers/radeonsi/si_shader.c        | 32 +++++++------------------
 src/gallium/drivers/radeonsi/si_shader.h        |  6 +----
 src/gallium/drivers/radeonsi/si_state_shaders.c | 22 +++--------------
 3 files changed, 13 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c12c8ea..837cc1c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -141,43 +141,36 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
 	case TGSI_SEMANTIC_CLIPDIST:
 		assert(index <= 1);
 		return 2 + index;
 	case TGSI_SEMANTIC_GENERIC:
 		if (index < SI_MAX_IO_GENERIC)
 			return 4 + index;
 
 		assert(!"invalid generic index");
 		return 0;
 
-	default:
-		assert(!"invalid semantic name");
-		return 0;
-	}
-}
-
-unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
-{
-	switch (name) {
 	case TGSI_SEMANTIC_FOG:
-		return 0;
+		return SI_MAX_IO_GENERIC + 4;
 	case TGSI_SEMANTIC_LAYER:
-		return 1;
+		return SI_MAX_IO_GENERIC + 5;
 	case TGSI_SEMANTIC_VIEWPORT_INDEX:
-		return 2;
+		return SI_MAX_IO_GENERIC + 6;
 	case TGSI_SEMANTIC_PRIMID:
-		return 3;
+		return SI_MAX_IO_GENERIC + 7;
 	case TGSI_SEMANTIC_COLOR: /* these alias */
 	case TGSI_SEMANTIC_BCOLOR:
-		return 4 + index;
+		assert(index < 2);
+		return SI_MAX_IO_GENERIC + 8 + index;
 	case TGSI_SEMANTIC_TEXCOORD:
 		assert(index < 8);
-		return 6 + index;
+		assert(SI_MAX_IO_GENERIC + 10 + index < 64);
+		return SI_MAX_IO_GENERIC + 10 + index;
 	default:
 		assert(!"invalid semantic name");
 		return 0;
 	}
 }
 
 /**
  * Get the value of a shader input parameter and extract a bitfield.
  */
 static LLVMValueRef unpack_param(struct si_shader_context *ctx,
@@ -2291,30 +2284,24 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
 		case TGSI_SEMANTIC_POSITION: /* ignore these */
 		case TGSI_SEMANTIC_PSIZE:
 		case TGSI_SEMANTIC_CLIPVERTEX:
 		case TGSI_SEMANTIC_EDGEFLAG:
 			break;
 		case TGSI_SEMANTIC_GENERIC:
 			/* don't process indices the function can't handle */
 			if (semantic_index >= SI_MAX_IO_GENERIC)
 				break;
 			/* fall through */
-		case TGSI_SEMANTIC_CLIPDIST:
+		default:
 			if (shader->key.opt.hw_vs.kill_outputs &
 			    (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
 				export_param = false;
-			break;
-		default:
-			if (shader->key.opt.hw_vs.kill_outputs2 &
-			    (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index)))
-				export_param = false;
-			break;
 		}
 
 		if (outputs[i].vertex_stream[0] != 0 &&
 		    outputs[i].vertex_stream[1] != 0 &&
 		    outputs[i].vertex_stream[2] != 0 &&
 		    outputs[i].vertex_stream[3] != 0)
 			export_param = false;
 
 handle_semantic:
 		/* Select the correct target */
@@ -7152,21 +7139,20 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade
 
 	default:
 		assert(0);
 	}
 
 	if ((processor == PIPE_SHADER_GEOMETRY ||
 	     processor == PIPE_SHADER_TESS_EVAL ||
 	     processor == PIPE_SHADER_VERTEX) &&
 	    !key->as_es && !key->as_ls) {
 		fprintf(f, "  opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
-		fprintf(f, "  opt.hw_vs.kill_outputs2 = 0x%x\n", key->opt.hw_vs.kill_outputs2);
 		fprintf(f, "  opt.hw_vs.clip_disable = %u\n", key->opt.hw_vs.clip_disable);
 	}
 }
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
 			       struct si_screen *sscreen,
 			       LLVMTargetMachineRef tm)
 {
 	struct lp_build_tgsi_context *bld_base;
 	struct lp_build_tgsi_action tmpl = {};
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 3075900..1627de3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -354,25 +354,23 @@ struct si_shader_selector {
 	unsigned	db_shader_control;
 	/* Set 0xf or 0x0 (4 bits) per each written output.
 	 * ANDed with spi_shader_col_format.
 	 */
 	unsigned	colors_written_4bit;
 
 	/* CS parameters */
 	unsigned local_size;
 
 	uint64_t	outputs_written;	/* "get_unique_index" bits */
-	uint32_t	patch_outputs_written;	/* "get_unique_index" bits */
-	uint32_t	outputs_written2;	/* "get_unique_index2" bits */
+	uint32_t	patch_outputs_written;	/* "get_unique_index_patch" bits */
 
 	uint64_t	inputs_read;		/* "get_unique_index" bits */
-	uint32_t	inputs_read2;		/* "get_unique_index2" bits */
 };
 
 /* Valid shader configurations:
  *
  * API shaders       VS | TCS | TES | GS |pass| PS
  * are compiled as:     |     |     |    |thru|
  *                      |     |     |    |    |
  * Only VS & PS:     VS |     |     |    |    | PS
  * GFX6 - with GS:   ES |     |     | GS | VS | PS
  *      - with tess: LS | HS  | VS  |    |    | PS
@@ -498,21 +496,20 @@ struct si_shader_key {
 		uint8_t		vs_fix_fetch[SI_MAX_ATTRIBS];
 		uint64_t	ff_tcs_inputs_to_copy; /* for fixed-func TCS */
 		/* When PS needs PrimID and GS is disabled. */
 		unsigned	vs_export_prim_id:1;
 	} mono;
 
 	/* Optimization flags for asynchronous compilation only. */
 	struct {
 		struct {
 			uint64_t	kill_outputs; /* "get_unique_index" bits */
-			uint32_t	kill_outputs2; /* "get_unique_index2" bits */
 			unsigned	clip_disable:1;
 		} hw_vs; /* HW VS (it can be VS, TES, GS) */
 
 		/* For shaders where monolithic variants have better code.
 		 *
 		 * This is a flag that has no effect on code generation,
 		 * but forces monolithic shaders to be used as soon as
 		 * possible, because it's in the "opt" group.
 		 */
 		unsigned	prefer_mono:1;
@@ -597,21 +594,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 			   LLVMTargetMachineRef tm,
 			   struct si_shader *shader,
 			   bool is_monolithic,
 			   struct pipe_debug_callback *debug);
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		     struct si_shader *shader,
 		     struct pipe_debug_callback *debug);
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
-unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
 void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
 		    struct pipe_debug_callback *debug, unsigned processor,
 		    FILE *f, bool check_debug_option);
 void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
 				      unsigned *lds_size);
 void si_shader_apply_scratch_relocs(struct si_shader *shader,
 				    uint64_t scratch_va);
 void si_shader_binary_read_config(struct ac_shader_binary *binary,
 				  struct si_shader_config *conf,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6020bec..5da6014 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1224,36 +1224,31 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
 			ps_colormask &= ps->colors_written_4bit;
 
 		ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard ||
 			      (!ps_colormask &&
 			       !ps_modifies_zs &&
 			       !ps->info.writes_memory);
 	}
 
 	/* Find out which VS outputs aren't used by the PS. */
 	uint64_t outputs_written = vs->outputs_written;
-	uint32_t outputs_written2 = vs->outputs_written2;
 	uint64_t inputs_read = 0;
-	uint32_t inputs_read2 = 0;
 
 	outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
 
 	if (!ps_disabled) {
 		inputs_read = ps->inputs_read;
-		inputs_read2 = ps->inputs_read2;
 	}
 
 	uint64_t linked = outputs_written & inputs_read;
-	uint32_t linked2 = outputs_written2 & inputs_read2;
 
 	key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
-	key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
 }
 
 /* Compute the key for the hw shader variant */
 static inline void si_shader_selector_key(struct pipe_context *ctx,
 					  struct si_shader_selector *sel,
 					  struct si_shader_key *key)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	memset(key, 0, sizeof(*key));
@@ -1839,32 +1834,29 @@ void si_init_shader_selector_async(void *job, int thread_index)
 				unsigned name = sel->info.output_semantic_name[i];
 				unsigned index = sel->info.output_semantic_index[i];
 				unsigned id;
 
 				switch (name) {
 				case TGSI_SEMANTIC_GENERIC:
 					/* don't process indices the function can't handle */
 					if (index >= SI_MAX_IO_GENERIC)
 						break;
 					/* fall through */
-				case TGSI_SEMANTIC_CLIPDIST:
+				default:
 					id = si_shader_io_get_unique_index(name, index);
 					sel->outputs_written &= ~(1ull << id);
 					break;
 				case TGSI_SEMANTIC_POSITION: /* ignore these */
 				case TGSI_SEMANTIC_PSIZE:
 				case TGSI_SEMANTIC_CLIPVERTEX:
 				case TGSI_SEMANTIC_EDGEFLAG:
 					break;
-				default:
-					id = si_shader_io_get_unique_index2(name, index);
-					sel->outputs_written2 &= ~(1u << id);
 				}
 			}
 		}
 	}
 
 	/* Pre-compilation. */
 	if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
 		struct si_shader_ctx_state state = {sel};
 		struct si_shader_key key;
 
@@ -1996,32 +1988,27 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 			case TGSI_SEMANTIC_PATCH:
 				sel->patch_outputs_written |=
 					1llu << si_shader_io_get_unique_index_patch(name, index);
 				break;
 
 			case TGSI_SEMANTIC_GENERIC:
 				/* don't process indices the function can't handle */
 				if (index >= SI_MAX_IO_GENERIC)
 					break;
 				/* fall through */
-			case TGSI_SEMANTIC_POSITION:
-			case TGSI_SEMANTIC_PSIZE:
-			case TGSI_SEMANTIC_CLIPDIST:
+			default:
 				sel->outputs_written |=
 					1llu << si_shader_io_get_unique_index(name, index);
 				break;
 			case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
 			case TGSI_SEMANTIC_EDGEFLAG:
 				break;
-			default:
-				sel->outputs_written2 |=
-					1u << si_shader_io_get_unique_index2(name, index);
 			}
 		}
 		sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
 
 		/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
 		 * conflicts, i.e. each vertex will start at a different bank.
 		 */
 		if (sctx->b.chip_class >= GFX9)
 			sel->esgs_itemsize += 4;
 		break;
@@ -2030,29 +2017,26 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 		for (i = 0; i < sel->info.num_inputs; i++) {
 			unsigned name = sel->info.input_semantic_name[i];
 			unsigned index = sel->info.input_semantic_index[i];
 
 			switch (name) {
 			case TGSI_SEMANTIC_GENERIC:
 				/* don't process indices the function can't handle */
 				if (index >= SI_MAX_IO_GENERIC)
 					break;
 				/* fall through */
-			case TGSI_SEMANTIC_CLIPDIST:
+			default:
 				sel->inputs_read |=
 					1llu << si_shader_io_get_unique_index(name, index);
 				break;
 			case TGSI_SEMANTIC_PCOORD: /* ignore this */
 				break;
-			default:
-				sel->inputs_read2 |=
-					1u << si_shader_io_get_unique_index2(name, index);
 			}
 		}
 
 		for (i = 0; i < 8; i++)
 			if (sel->info.colors_written & (1 << i))
 				sel->colors_written_4bit |= 0xf << (4 * i);
 
 		for (i = 0; i < sel->info.num_inputs; i++) {
 			if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
 				int index = sel->info.input_semantic_index[i];
-- 
2.9.3



More information about the mesa-dev mailing list