[Mesa-dev] [PATCH 5/5] radeonsi: get rid of secondary input/output word

Dieter Nützel Dieter at nuetzel-hh.de
Thu May 11 03:49:25 UTC 2017


For the series:

Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>

on radeonsi / RX580, 8 GB

Unigine_Heaven-4.0
Unigine_Valley-1.0
Unigine_Superposition-1.0

Nine (as Nicolai requested):
running wine-2.7_gallium_nine+staging

Steam: PES2015 + TS2017
LS2015 + LS2017 (!!!)

Dieter

Am 10.05.2017 19:30, schrieb Nicolai Hähnle:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
> 
> By keeping track of fewer generics, everything can fit into 64 bits.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c        | 32 
> +++++++------------------
>  src/gallium/drivers/radeonsi/si_shader.h        |  6 +----
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 22 +++--------------
>  3 files changed, 13 insertions(+), 47 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index c12c8ea..837cc1c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -141,43 +141,36 @@ unsigned si_shader_io_get_unique_index(unsigned
> semantic_name, unsigned index)
>  	case TGSI_SEMANTIC_CLIPDIST:
>  		assert(index <= 1);
>  		return 2 + index;
>  	case TGSI_SEMANTIC_GENERIC:
>  		if (index < SI_MAX_IO_GENERIC)
>  			return 4 + index;
> 
>  		assert(!"invalid generic index");
>  		return 0;
> 
> -	default:
> -		assert(!"invalid semantic name");
> -		return 0;
> -	}
> -}
> -
> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
> -{
> -	switch (name) {
>  	case TGSI_SEMANTIC_FOG:
> -		return 0;
> +		return SI_MAX_IO_GENERIC + 4;
>  	case TGSI_SEMANTIC_LAYER:
> -		return 1;
> +		return SI_MAX_IO_GENERIC + 5;
>  	case TGSI_SEMANTIC_VIEWPORT_INDEX:
> -		return 2;
> +		return SI_MAX_IO_GENERIC + 6;
>  	case TGSI_SEMANTIC_PRIMID:
> -		return 3;
> +		return SI_MAX_IO_GENERIC + 7;
>  	case TGSI_SEMANTIC_COLOR: /* these alias */
>  	case TGSI_SEMANTIC_BCOLOR:
> -		return 4 + index;
> +		assert(index < 2);
> +		return SI_MAX_IO_GENERIC + 8 + index;
>  	case TGSI_SEMANTIC_TEXCOORD:
>  		assert(index < 8);
> -		return 6 + index;
> +		assert(SI_MAX_IO_GENERIC + 10 + index < 64);
> +		return SI_MAX_IO_GENERIC + 10 + index;
>  	default:
>  		assert(!"invalid semantic name");
>  		return 0;
>  	}
>  }
> 
>  /**
>   * Get the value of a shader input parameter and extract a bitfield.
>   */
>  static LLVMValueRef unpack_param(struct si_shader_context *ctx,
> @@ -2291,30 +2284,24 @@ static void si_llvm_export_vs(struct
> lp_build_tgsi_context *bld_base,
>  		case TGSI_SEMANTIC_POSITION: /* ignore these */
>  		case TGSI_SEMANTIC_PSIZE:
>  		case TGSI_SEMANTIC_CLIPVERTEX:
>  		case TGSI_SEMANTIC_EDGEFLAG:
>  			break;
>  		case TGSI_SEMANTIC_GENERIC:
>  			/* don't process indices the function can't handle */
>  			if (semantic_index >= SI_MAX_IO_GENERIC)
>  				break;
>  			/* fall through */
> -		case TGSI_SEMANTIC_CLIPDIST:
> +		default:
>  			if (shader->key.opt.hw_vs.kill_outputs &
>  			    (1ull << si_shader_io_get_unique_index(semantic_name, 
> semantic_index)))
>  				export_param = false;
> -			break;
> -		default:
> -			if (shader->key.opt.hw_vs.kill_outputs2 &
> -			    (1u << si_shader_io_get_unique_index2(semantic_name, 
> semantic_index)))
> -				export_param = false;
> -			break;
>  		}
> 
>  		if (outputs[i].vertex_stream[0] != 0 &&
>  		    outputs[i].vertex_stream[1] != 0 &&
>  		    outputs[i].vertex_stream[2] != 0 &&
>  		    outputs[i].vertex_stream[3] != 0)
>  			export_param = false;
> 
>  handle_semantic:
>  		/* Select the correct target */
> @@ -7152,21 +7139,20 @@ static void si_dump_shader_key(unsigned
> processor, const struct si_shader *shade
> 
>  	default:
>  		assert(0);
>  	}
> 
>  	if ((processor == PIPE_SHADER_GEOMETRY ||
>  	     processor == PIPE_SHADER_TESS_EVAL ||
>  	     processor == PIPE_SHADER_VERTEX) &&
>  	    !key->as_es && !key->as_ls) {
>  		fprintf(f, "  opt.hw_vs.kill_outputs = 0x%"PRIx64"\n",
> key->opt.hw_vs.kill_outputs);
> -		fprintf(f, "  opt.hw_vs.kill_outputs2 = 0x%x\n",
> key->opt.hw_vs.kill_outputs2);
>  		fprintf(f, "  opt.hw_vs.clip_disable = %u\n", 
> key->opt.hw_vs.clip_disable);
>  	}
>  }
> 
>  static void si_init_shader_ctx(struct si_shader_context *ctx,
>  			       struct si_screen *sscreen,
>  			       LLVMTargetMachineRef tm)
>  {
>  	struct lp_build_tgsi_context *bld_base;
>  	struct lp_build_tgsi_action tmpl = {};
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
> b/src/gallium/drivers/radeonsi/si_shader.h
> index 3075900..1627de3 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -354,25 +354,23 @@ struct si_shader_selector {
>  	unsigned	db_shader_control;
>  	/* Set 0xf or 0x0 (4 bits) per each written output.
>  	 * ANDed with spi_shader_col_format.
>  	 */
>  	unsigned	colors_written_4bit;
> 
>  	/* CS parameters */
>  	unsigned local_size;
> 
>  	uint64_t	outputs_written;	/* "get_unique_index" bits */
> -	uint32_t	patch_outputs_written;	/* "get_unique_index" bits */
> -	uint32_t	outputs_written2;	/* "get_unique_index2" bits */
> +	uint32_t	patch_outputs_written;	/* "get_unique_index_patch" bits */
> 
>  	uint64_t	inputs_read;		/* "get_unique_index" bits */
> -	uint32_t	inputs_read2;		/* "get_unique_index2" bits */
>  };
> 
>  /* Valid shader configurations:
>   *
>   * API shaders       VS | TCS | TES | GS |pass| PS
>   * are compiled as:     |     |     |    |thru|
>   *                      |     |     |    |    |
>   * Only VS & PS:     VS |     |     |    |    | PS
>   * GFX6 - with GS:   ES |     |     | GS | VS | PS
>   *      - with tess: LS | HS  | VS  |    |    | PS
> @@ -498,21 +496,20 @@ struct si_shader_key {
>  		uint8_t		vs_fix_fetch[SI_MAX_ATTRIBS];
>  		uint64_t	ff_tcs_inputs_to_copy; /* for fixed-func TCS */
>  		/* When PS needs PrimID and GS is disabled. */
>  		unsigned	vs_export_prim_id:1;
>  	} mono;
> 
>  	/* Optimization flags for asynchronous compilation only. */
>  	struct {
>  		struct {
>  			uint64_t	kill_outputs; /* "get_unique_index" bits */
> -			uint32_t	kill_outputs2; /* "get_unique_index2" bits */
>  			unsigned	clip_disable:1;
>  		} hw_vs; /* HW VS (it can be VS, TES, GS) */
> 
>  		/* For shaders where monolithic variants have better code.
>  		 *
>  		 * This is a flag that has no effect on code generation,
>  		 * but forces monolithic shaders to be used as soon as
>  		 * possible, because it's in the "opt" group.
>  		 */
>  		unsigned	prefer_mono:1;
> @@ -597,21 +594,20 @@ int si_compile_tgsi_shader(struct si_screen 
> *sscreen,
>  			   LLVMTargetMachineRef tm,
>  			   struct si_shader *shader,
>  			   bool is_monolithic,
>  			   struct pipe_debug_callback *debug);
>  int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef 
> tm,
>  		     struct si_shader *shader,
>  		     struct pipe_debug_callback *debug);
>  void si_shader_destroy(struct si_shader *shader);
>  unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name,
> unsigned index);
>  unsigned si_shader_io_get_unique_index(unsigned semantic_name, 
> unsigned index);
> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned 
> index);
>  int si_shader_binary_upload(struct si_screen *sscreen, struct
> si_shader *shader);
>  void si_shader_dump(struct si_screen *sscreen, const struct si_shader 
> *shader,
>  		    struct pipe_debug_callback *debug, unsigned processor,
>  		    FILE *f, bool check_debug_option);
>  void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
>  				      unsigned *lds_size);
>  void si_shader_apply_scratch_relocs(struct si_shader *shader,
>  				    uint64_t scratch_va);
>  void si_shader_binary_read_config(struct ac_shader_binary *binary,
>  				  struct si_shader_config *conf,
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
> b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 6020bec..5da6014 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -1224,36 +1224,31 @@ static void
> si_shader_selector_key_hw_vs(struct si_context *sctx,
>  			ps_colormask &= ps->colors_written_4bit;
> 
>  		ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard ||
>  			      (!ps_colormask &&
>  			       !ps_modifies_zs &&
>  			       !ps->info.writes_memory);
>  	}
> 
>  	/* Find out which VS outputs aren't used by the PS. */
>  	uint64_t outputs_written = vs->outputs_written;
> -	uint32_t outputs_written2 = vs->outputs_written2;
>  	uint64_t inputs_read = 0;
> -	uint32_t inputs_read2 = 0;
> 
>  	outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
> 
>  	if (!ps_disabled) {
>  		inputs_read = ps->inputs_read;
> -		inputs_read2 = ps->inputs_read2;
>  	}
> 
>  	uint64_t linked = outputs_written & inputs_read;
> -	uint32_t linked2 = outputs_written2 & inputs_read2;
> 
>  	key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
> -	key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
>  }
> 
>  /* Compute the key for the hw shader variant */
>  static inline void si_shader_selector_key(struct pipe_context *ctx,
>  					  struct si_shader_selector *sel,
>  					  struct si_shader_key *key)
>  {
>  	struct si_context *sctx = (struct si_context *)ctx;
> 
>  	memset(key, 0, sizeof(*key));
> @@ -1839,32 +1834,29 @@ void si_init_shader_selector_async(void *job,
> int thread_index)
>  				unsigned name = sel->info.output_semantic_name[i];
>  				unsigned index = sel->info.output_semantic_index[i];
>  				unsigned id;
> 
>  				switch (name) {
>  				case TGSI_SEMANTIC_GENERIC:
>  					/* don't process indices the function can't handle */
>  					if (index >= SI_MAX_IO_GENERIC)
>  						break;
>  					/* fall through */
> -				case TGSI_SEMANTIC_CLIPDIST:
> +				default:
>  					id = si_shader_io_get_unique_index(name, index);
>  					sel->outputs_written &= ~(1ull << id);
>  					break;
>  				case TGSI_SEMANTIC_POSITION: /* ignore these */
>  				case TGSI_SEMANTIC_PSIZE:
>  				case TGSI_SEMANTIC_CLIPVERTEX:
>  				case TGSI_SEMANTIC_EDGEFLAG:
>  					break;
> -				default:
> -					id = si_shader_io_get_unique_index2(name, index);
> -					sel->outputs_written2 &= ~(1u << id);
>  				}
>  			}
>  		}
>  	}
> 
>  	/* Pre-compilation. */
>  	if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
>  		struct si_shader_ctx_state state = {sel};
>  		struct si_shader_key key;
> 
> @@ -1996,32 +1988,27 @@ static void *si_create_shader_selector(struct
> pipe_context *ctx,
>  			case TGSI_SEMANTIC_PATCH:
>  				sel->patch_outputs_written |=
>  					1llu << si_shader_io_get_unique_index_patch(name, index);
>  				break;
> 
>  			case TGSI_SEMANTIC_GENERIC:
>  				/* don't process indices the function can't handle */
>  				if (index >= SI_MAX_IO_GENERIC)
>  					break;
>  				/* fall through */
> -			case TGSI_SEMANTIC_POSITION:
> -			case TGSI_SEMANTIC_PSIZE:
> -			case TGSI_SEMANTIC_CLIPDIST:
> +			default:
>  				sel->outputs_written |=
>  					1llu << si_shader_io_get_unique_index(name, index);
>  				break;
>  			case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
>  			case TGSI_SEMANTIC_EDGEFLAG:
>  				break;
> -			default:
> -				sel->outputs_written2 |=
> -					1u << si_shader_io_get_unique_index2(name, index);
>  			}
>  		}
>  		sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
> 
>  		/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
>  		 * conflicts, i.e. each vertex will start at a different bank.
>  		 */
>  		if (sctx->b.chip_class >= GFX9)
>  			sel->esgs_itemsize += 4;
>  		break;
> @@ -2030,29 +2017,26 @@ static void *si_create_shader_selector(struct
> pipe_context *ctx,
>  		for (i = 0; i < sel->info.num_inputs; i++) {
>  			unsigned name = sel->info.input_semantic_name[i];
>  			unsigned index = sel->info.input_semantic_index[i];
> 
>  			switch (name) {
>  			case TGSI_SEMANTIC_GENERIC:
>  				/* don't process indices the function can't handle */
>  				if (index >= SI_MAX_IO_GENERIC)
>  					break;
>  				/* fall through */
> -			case TGSI_SEMANTIC_CLIPDIST:
> +			default:
>  				sel->inputs_read |=
>  					1llu << si_shader_io_get_unique_index(name, index);
>  				break;
>  			case TGSI_SEMANTIC_PCOORD: /* ignore this */
>  				break;
> -			default:
> -				sel->inputs_read2 |=
> -					1u << si_shader_io_get_unique_index2(name, index);
>  			}
>  		}
> 
>  		for (i = 0; i < 8; i++)
>  			if (sel->info.colors_written & (1 << i))
>  				sel->colors_written_4bit |= 0xf << (4 * i);
> 
>  		for (i = 0; i < sel->info.num_inputs; i++) {
>  			if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
>  				int index = sel->info.input_semantic_index[i];


More information about the mesa-dev mailing list