[Mesa-dev] [PATCH 5/5] radeonsi: get rid of secondary input/output word
Dieter Nützel
Dieter at nuetzel-hh.de
Thu May 11 03:49:25 UTC 2017
For the series:
Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>
on radeonsi / RX580, 8 GB
Unigine_Heaven-4.0
Unigine_Valley-1.0
Unigine_Superposition-1.0
Nine (as Nicolai requested):
running wine-2.7_gallium_nine+staging
Steam: PES2015 + TS2017
LS2015 + LS2017 (!!!)
Dieter
Am 10.05.2017 19:30, schrieb Nicolai Hähnle:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> By keeping track of fewer generics, everything can fit into 64 bits.
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 32
> +++++++------------------
> src/gallium/drivers/radeonsi/si_shader.h | 6 +----
> src/gallium/drivers/radeonsi/si_state_shaders.c | 22 +++--------------
> 3 files changed, 13 insertions(+), 47 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index c12c8ea..837cc1c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -141,43 +141,36 @@ unsigned si_shader_io_get_unique_index(unsigned
> semantic_name, unsigned index)
> case TGSI_SEMANTIC_CLIPDIST:
> assert(index <= 1);
> return 2 + index;
> case TGSI_SEMANTIC_GENERIC:
> if (index < SI_MAX_IO_GENERIC)
> return 4 + index;
>
> assert(!"invalid generic index");
> return 0;
>
> - default:
> - assert(!"invalid semantic name");
> - return 0;
> - }
> -}
> -
> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
> -{
> - switch (name) {
> case TGSI_SEMANTIC_FOG:
> - return 0;
> + return SI_MAX_IO_GENERIC + 4;
> case TGSI_SEMANTIC_LAYER:
> - return 1;
> + return SI_MAX_IO_GENERIC + 5;
> case TGSI_SEMANTIC_VIEWPORT_INDEX:
> - return 2;
> + return SI_MAX_IO_GENERIC + 6;
> case TGSI_SEMANTIC_PRIMID:
> - return 3;
> + return SI_MAX_IO_GENERIC + 7;
> case TGSI_SEMANTIC_COLOR: /* these alias */
> case TGSI_SEMANTIC_BCOLOR:
> - return 4 + index;
> + assert(index < 2);
> + return SI_MAX_IO_GENERIC + 8 + index;
> case TGSI_SEMANTIC_TEXCOORD:
> assert(index < 8);
> - return 6 + index;
> + assert(SI_MAX_IO_GENERIC + 10 + index < 64);
> + return SI_MAX_IO_GENERIC + 10 + index;
> default:
> assert(!"invalid semantic name");
> return 0;
> }
> }
>
> /**
> * Get the value of a shader input parameter and extract a bitfield.
> */
> static LLVMValueRef unpack_param(struct si_shader_context *ctx,
> @@ -2291,30 +2284,24 @@ static void si_llvm_export_vs(struct
> lp_build_tgsi_context *bld_base,
> case TGSI_SEMANTIC_POSITION: /* ignore these */
> case TGSI_SEMANTIC_PSIZE:
> case TGSI_SEMANTIC_CLIPVERTEX:
> case TGSI_SEMANTIC_EDGEFLAG:
> break;
> case TGSI_SEMANTIC_GENERIC:
> /* don't process indices the function can't handle */
> if (semantic_index >= SI_MAX_IO_GENERIC)
> break;
> /* fall through */
> - case TGSI_SEMANTIC_CLIPDIST:
> + default:
> if (shader->key.opt.hw_vs.kill_outputs &
> (1ull << si_shader_io_get_unique_index(semantic_name,
> semantic_index)))
> export_param = false;
> - break;
> - default:
> - if (shader->key.opt.hw_vs.kill_outputs2 &
> - (1u << si_shader_io_get_unique_index2(semantic_name,
> semantic_index)))
> - export_param = false;
> - break;
> }
>
> if (outputs[i].vertex_stream[0] != 0 &&
> outputs[i].vertex_stream[1] != 0 &&
> outputs[i].vertex_stream[2] != 0 &&
> outputs[i].vertex_stream[3] != 0)
> export_param = false;
>
> handle_semantic:
> /* Select the correct target */
> @@ -7152,21 +7139,20 @@ static void si_dump_shader_key(unsigned
> processor, const struct si_shader *shade
>
> default:
> assert(0);
> }
>
> if ((processor == PIPE_SHADER_GEOMETRY ||
> processor == PIPE_SHADER_TESS_EVAL ||
> processor == PIPE_SHADER_VERTEX) &&
> !key->as_es && !key->as_ls) {
> fprintf(f, " opt.hw_vs.kill_outputs = 0x%"PRIx64"\n",
> key->opt.hw_vs.kill_outputs);
> - fprintf(f, " opt.hw_vs.kill_outputs2 = 0x%x\n",
> key->opt.hw_vs.kill_outputs2);
> fprintf(f, " opt.hw_vs.clip_disable = %u\n",
> key->opt.hw_vs.clip_disable);
> }
> }
>
> static void si_init_shader_ctx(struct si_shader_context *ctx,
> struct si_screen *sscreen,
> LLVMTargetMachineRef tm)
> {
> struct lp_build_tgsi_context *bld_base;
> struct lp_build_tgsi_action tmpl = {};
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
> b/src/gallium/drivers/radeonsi/si_shader.h
> index 3075900..1627de3 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -354,25 +354,23 @@ struct si_shader_selector {
> unsigned db_shader_control;
> /* Set 0xf or 0x0 (4 bits) per each written output.
> * ANDed with spi_shader_col_format.
> */
> unsigned colors_written_4bit;
>
> /* CS parameters */
> unsigned local_size;
>
> uint64_t outputs_written; /* "get_unique_index" bits */
> - uint32_t patch_outputs_written; /* "get_unique_index" bits */
> - uint32_t outputs_written2; /* "get_unique_index2" bits */
> + uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */
>
> uint64_t inputs_read; /* "get_unique_index" bits */
> - uint32_t inputs_read2; /* "get_unique_index2" bits */
> };
>
> /* Valid shader configurations:
> *
> * API shaders VS | TCS | TES | GS |pass| PS
> * are compiled as: | | | |thru|
> * | | | | |
> * Only VS & PS: VS | | | | | PS
> * GFX6 - with GS: ES | | | GS | VS | PS
> * - with tess: LS | HS | VS | | | PS
> @@ -498,21 +496,20 @@ struct si_shader_key {
> uint8_t vs_fix_fetch[SI_MAX_ATTRIBS];
> uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */
> /* When PS needs PrimID and GS is disabled. */
> unsigned vs_export_prim_id:1;
> } mono;
>
> /* Optimization flags for asynchronous compilation only. */
> struct {
> struct {
> uint64_t kill_outputs; /* "get_unique_index" bits */
> - uint32_t kill_outputs2; /* "get_unique_index2" bits */
> unsigned clip_disable:1;
> } hw_vs; /* HW VS (it can be VS, TES, GS) */
>
> /* For shaders where monolithic variants have better code.
> *
> * This is a flag that has no effect on code generation,
> * but forces monolithic shaders to be used as soon as
> * possible, because it's in the "opt" group.
> */
> unsigned prefer_mono:1;
> @@ -597,21 +594,20 @@ int si_compile_tgsi_shader(struct si_screen
> *sscreen,
> LLVMTargetMachineRef tm,
> struct si_shader *shader,
> bool is_monolithic,
> struct pipe_debug_callback *debug);
> int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef
> tm,
> struct si_shader *shader,
> struct pipe_debug_callback *debug);
> void si_shader_destroy(struct si_shader *shader);
> unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name,
> unsigned index);
> unsigned si_shader_io_get_unique_index(unsigned semantic_name,
> unsigned index);
> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned
> index);
> int si_shader_binary_upload(struct si_screen *sscreen, struct
> si_shader *shader);
> void si_shader_dump(struct si_screen *sscreen, const struct si_shader
> *shader,
> struct pipe_debug_callback *debug, unsigned processor,
> FILE *f, bool check_debug_option);
> void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
> unsigned *lds_size);
> void si_shader_apply_scratch_relocs(struct si_shader *shader,
> uint64_t scratch_va);
> void si_shader_binary_read_config(struct ac_shader_binary *binary,
> struct si_shader_config *conf,
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
> b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 6020bec..5da6014 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -1224,36 +1224,31 @@ static void
> si_shader_selector_key_hw_vs(struct si_context *sctx,
> ps_colormask &= ps->colors_written_4bit;
>
> ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard ||
> (!ps_colormask &&
> !ps_modifies_zs &&
> !ps->info.writes_memory);
> }
>
> /* Find out which VS outputs aren't used by the PS. */
> uint64_t outputs_written = vs->outputs_written;
> - uint32_t outputs_written2 = vs->outputs_written2;
> uint64_t inputs_read = 0;
> - uint32_t inputs_read2 = 0;
>
> outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
>
> if (!ps_disabled) {
> inputs_read = ps->inputs_read;
> - inputs_read2 = ps->inputs_read2;
> }
>
> uint64_t linked = outputs_written & inputs_read;
> - uint32_t linked2 = outputs_written2 & inputs_read2;
>
> key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
> - key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
> }
>
> /* Compute the key for the hw shader variant */
> static inline void si_shader_selector_key(struct pipe_context *ctx,
> struct si_shader_selector *sel,
> struct si_shader_key *key)
> {
> struct si_context *sctx = (struct si_context *)ctx;
>
> memset(key, 0, sizeof(*key));
> @@ -1839,32 +1834,29 @@ void si_init_shader_selector_async(void *job,
> int thread_index)
> unsigned name = sel->info.output_semantic_name[i];
> unsigned index = sel->info.output_semantic_index[i];
> unsigned id;
>
> switch (name) {
> case TGSI_SEMANTIC_GENERIC:
> /* don't process indices the function can't handle */
> if (index >= SI_MAX_IO_GENERIC)
> break;
> /* fall through */
> - case TGSI_SEMANTIC_CLIPDIST:
> + default:
> id = si_shader_io_get_unique_index(name, index);
> sel->outputs_written &= ~(1ull << id);
> break;
> case TGSI_SEMANTIC_POSITION: /* ignore these */
> case TGSI_SEMANTIC_PSIZE:
> case TGSI_SEMANTIC_CLIPVERTEX:
> case TGSI_SEMANTIC_EDGEFLAG:
> break;
> - default:
> - id = si_shader_io_get_unique_index2(name, index);
> - sel->outputs_written2 &= ~(1u << id);
> }
> }
> }
> }
>
> /* Pre-compilation. */
> if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
> struct si_shader_ctx_state state = {sel};
> struct si_shader_key key;
>
> @@ -1996,32 +1988,27 @@ static void *si_create_shader_selector(struct
> pipe_context *ctx,
> case TGSI_SEMANTIC_PATCH:
> sel->patch_outputs_written |=
> 1llu << si_shader_io_get_unique_index_patch(name, index);
> break;
>
> case TGSI_SEMANTIC_GENERIC:
> /* don't process indices the function can't handle */
> if (index >= SI_MAX_IO_GENERIC)
> break;
> /* fall through */
> - case TGSI_SEMANTIC_POSITION:
> - case TGSI_SEMANTIC_PSIZE:
> - case TGSI_SEMANTIC_CLIPDIST:
> + default:
> sel->outputs_written |=
> 1llu << si_shader_io_get_unique_index(name, index);
> break;
> case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
> case TGSI_SEMANTIC_EDGEFLAG:
> break;
> - default:
> - sel->outputs_written2 |=
> - 1u << si_shader_io_get_unique_index2(name, index);
> }
> }
> sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
>
> /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
> * conflicts, i.e. each vertex will start at a different bank.
> */
> if (sctx->b.chip_class >= GFX9)
> sel->esgs_itemsize += 4;
> break;
> @@ -2030,29 +2017,26 @@ static void *si_create_shader_selector(struct
> pipe_context *ctx,
> for (i = 0; i < sel->info.num_inputs; i++) {
> unsigned name = sel->info.input_semantic_name[i];
> unsigned index = sel->info.input_semantic_index[i];
>
> switch (name) {
> case TGSI_SEMANTIC_GENERIC:
> /* don't process indices the function can't handle */
> if (index >= SI_MAX_IO_GENERIC)
> break;
> /* fall through */
> - case TGSI_SEMANTIC_CLIPDIST:
> + default:
> sel->inputs_read |=
> 1llu << si_shader_io_get_unique_index(name, index);
> break;
> case TGSI_SEMANTIC_PCOORD: /* ignore this */
> break;
> - default:
> - sel->inputs_read2 |=
> - 1u << si_shader_io_get_unique_index2(name, index);
> }
> }
>
> for (i = 0; i < 8; i++)
> if (sel->info.colors_written & (1 << i))
> sel->colors_written_4bit |= 0xf << (4 * i);
>
> for (i = 0; i < sel->info.num_inputs; i++) {
> if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
> int index = sel->info.input_semantic_index[i];
More information about the mesa-dev
mailing list