[Mesa-dev] [PATCH 2/2] radeonsi: get rid of secondary input/output word
Marek Olšák
maraeo at gmail.com
Fri May 5 17:35:52 UTC 2017
Hi Nicolai,
This might break Nine, because it uses GENERIC indices greater than
31. The idea is that we support 32, but allow indices up to 60 as long
as the number of declared varyings is <= 32.
Axel can probably answer which maximum GENERIC index we can normally expect.
Marek
On Wed, May 3, 2017 at 3:54 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> We only advertise a maximum of 32 inputs and outputs in each shader stage,
> so everything fits into 64 bits.
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++---------------
> src/gallium/drivers/radeonsi/si_shader.h | 6 +---
> src/gallium/drivers/radeonsi/si_state_shaders.c | 40 ++++---------------------
> 3 files changed, 17 insertions(+), 64 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index a48a552..67d62c3 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -135,48 +135,41 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
> {
> switch (semantic_name) {
> case TGSI_SEMANTIC_POSITION:
> return 0;
> case TGSI_SEMANTIC_PSIZE:
> return 1;
> case TGSI_SEMANTIC_CLIPDIST:
> assert(index <= 1);
> return 2 + index;
> case TGSI_SEMANTIC_GENERIC:
> - if (index <= 63-4)
> + if (index < 32)
> return 4 + index;
>
> assert(!"invalid generic index");
> return 0;
>
> - default:
> - assert(!"invalid semantic name");
> - return 0;
> - }
> -}
> -
> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
> -{
> - switch (name) {
> case TGSI_SEMANTIC_FOG:
> - return 0;
> + return 36;
> case TGSI_SEMANTIC_LAYER:
> - return 1;
> + return 37;
> case TGSI_SEMANTIC_VIEWPORT_INDEX:
> - return 2;
> + return 38;
> case TGSI_SEMANTIC_PRIMID:
> - return 3;
> + return 39;
> case TGSI_SEMANTIC_COLOR: /* these alias */
> case TGSI_SEMANTIC_BCOLOR:
> - return 4 + index;
> + assert(index < 2);
> + return 40 + index;
> case TGSI_SEMANTIC_TEXCOORD:
> - return 6 + index;
> + assert(index < 8);
> + return 42 + index;
> default:
> assert(!"invalid semantic name");
> return 0;
> }
> }
>
> /**
> * Get the value of a shader input parameter and extract a bitfield.
> */
> static LLVMValueRef unpack_param(struct si_shader_context *ctx,
> @@ -2297,31 +2290,24 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
> semantic_name = outputs[i].semantic_name;
> semantic_index = outputs[i].semantic_index;
> bool export_param = true;
>
> switch (semantic_name) {
> case TGSI_SEMANTIC_POSITION: /* ignore these */
> case TGSI_SEMANTIC_PSIZE:
> case TGSI_SEMANTIC_CLIPVERTEX:
> case TGSI_SEMANTIC_EDGEFLAG:
> break;
> - case TGSI_SEMANTIC_GENERIC:
> - case TGSI_SEMANTIC_CLIPDIST:
> + default:
> if (shader->key.opt.hw_vs.kill_outputs &
> (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
> export_param = false;
> - break;
> - default:
> - if (shader->key.opt.hw_vs.kill_outputs2 &
> - (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index)))
> - export_param = false;
> - break;
> }
>
> if (outputs[i].vertex_stream[0] != 0 &&
> outputs[i].vertex_stream[1] != 0 &&
> outputs[i].vertex_stream[2] != 0 &&
> outputs[i].vertex_stream[3] != 0)
> export_param = false;
>
> handle_semantic:
> /* Select the correct target */
> @@ -7154,21 +7140,20 @@ static void si_dump_shader_key(unsigned processor, struct si_shader *shader,
>
> default:
> assert(0);
> }
>
> if ((processor == PIPE_SHADER_GEOMETRY ||
> processor == PIPE_SHADER_TESS_EVAL ||
> processor == PIPE_SHADER_VERTEX) &&
> !key->as_es && !key->as_ls) {
> fprintf(f, " opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
> - fprintf(f, " opt.hw_vs.kill_outputs2 = 0x%x\n", key->opt.hw_vs.kill_outputs2);
> fprintf(f, " opt.hw_vs.clip_disable = %u\n", key->opt.hw_vs.clip_disable);
> }
> }
>
> static void si_init_shader_ctx(struct si_shader_context *ctx,
> struct si_screen *sscreen,
> LLVMTargetMachineRef tm)
> {
> struct lp_build_tgsi_context *bld_base;
> struct lp_build_tgsi_action tmpl = {};
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index cb8a902..5e43b4c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -349,25 +349,23 @@ struct si_shader_selector {
> unsigned db_shader_control;
> /* Set 0xf or 0x0 (4 bits) per each written output.
> * ANDed with spi_shader_col_format.
> */
> unsigned colors_written_4bit;
>
> /* CS parameters */
> unsigned local_size;
>
> uint64_t outputs_written; /* "get_unique_index" bits */
> - uint32_t patch_outputs_written; /* "get_unique_index" bits */
> - uint32_t outputs_written2; /* "get_unique_index2" bits */
> + uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */
>
> uint64_t inputs_read; /* "get_unique_index" bits */
> - uint32_t inputs_read2; /* "get_unique_index2" bits */
> };
>
> /* Valid shader configurations:
> *
> * API shaders VS | TCS | TES | GS |pass| PS
> * are compiled as: | | | |thru|
> * | | | | |
> * Only VS & PS: VS | | | | | PS
> * GFX6 - with GS: ES | | | GS | VS | PS
> * - with tess: LS | HS | VS | | | PS
> @@ -493,21 +491,20 @@ struct si_shader_key {
> uint8_t vs_fix_fetch[SI_MAX_ATTRIBS];
> uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */
> /* When PS needs PrimID and GS is disabled. */
> unsigned vs_export_prim_id:1;
> } mono;
>
> /* Optimization flags for asynchronous compilation only. */
> struct {
> struct {
> uint64_t kill_outputs; /* "get_unique_index" bits */
> - uint32_t kill_outputs2; /* "get_unique_index2" bits */
> unsigned clip_disable:1;
> } hw_vs; /* HW VS (it can be VS, TES, GS) */
>
> /* For shaders where monolithic variants have better code.
> *
> * This is a flag that has no effect on code generation,
> * but forces monolithic shaders to be used as soon as
> * possible, because it's in the "opt" group.
> */
> unsigned prefer_mono:1;
> @@ -600,21 +597,20 @@ int si_compile_llvm(struct si_screen *sscreen,
> struct ac_shader_binary *binary,
> struct si_shader_config *conf,
> LLVMTargetMachineRef tm,
> LLVMModuleRef mod,
> struct pipe_debug_callback *debug,
> unsigned processor,
> const char *name);
> void si_shader_destroy(struct si_shader *shader);
> unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
> unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index);
> int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
> void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
> struct pipe_debug_callback *debug, unsigned processor,
> FILE *f, bool check_debug_option);
> void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
> unsigned *lds_size);
> void si_shader_apply_scratch_relocs(struct si_context *sctx,
> struct si_shader *shader,
> struct si_shader_config *config,
> uint64_t scratch_va);
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 68f4d21..cf0c11f 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -1225,36 +1225,31 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
> ps_colormask &= ps->colors_written_4bit;
>
> ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard ||
> (!ps_colormask &&
> !ps_modifies_zs &&
> !ps->info.writes_memory);
> }
>
> /* Find out which VS outputs aren't used by the PS. */
> uint64_t outputs_written = vs->outputs_written;
> - uint32_t outputs_written2 = vs->outputs_written2;
> uint64_t inputs_read = 0;
> - uint32_t inputs_read2 = 0;
>
> outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
>
> if (!ps_disabled) {
> inputs_read = ps->inputs_read;
> - inputs_read2 = ps->inputs_read2;
> }
>
> uint64_t linked = outputs_written & inputs_read;
> - uint32_t linked2 = outputs_written2 & inputs_read2;
>
> key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
> - key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
> }
>
> /* Compute the key for the hw shader variant */
> static inline void si_shader_selector_key(struct pipe_context *ctx,
> struct si_shader_selector *sel,
> struct si_shader_key *key)
> {
> struct si_context *sctx = (struct si_context *)ctx;
>
> memset(key, 0, sizeof(*key));
> @@ -1835,37 +1830,29 @@ void si_init_shader_selector_async(void *job, int thread_index)
> unsigned offset = shader->info.vs_output_param_offset[i];
>
> if (offset <= AC_EXP_PARAM_OFFSET_31)
> continue;
>
> unsigned name = sel->info.output_semantic_name[i];
> unsigned index = sel->info.output_semantic_index[i];
> unsigned id;
>
> switch (name) {
> - case TGSI_SEMANTIC_GENERIC:
> - /* don't process indices the function can't handle */
> - if (index >= 60)
> - break;
> - /* fall through */
> - case TGSI_SEMANTIC_CLIPDIST:
> - id = si_shader_io_get_unique_index(name, index);
> - sel->outputs_written &= ~(1ull << id);
> - break;
> case TGSI_SEMANTIC_POSITION: /* ignore these */
> case TGSI_SEMANTIC_PSIZE:
> case TGSI_SEMANTIC_CLIPVERTEX:
> case TGSI_SEMANTIC_EDGEFLAG:
> break;
> default:
> - id = si_shader_io_get_unique_index2(name, index);
> - sel->outputs_written2 &= ~(1u << id);
> + id = si_shader_io_get_unique_index(name, index);
> + sel->outputs_written &= ~(1ull << id);
> + break;
> }
> }
> }
> }
>
> /* Pre-compilation. */
> if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
> struct si_shader_ctx_state state = {sel};
> struct si_shader_key key;
>
> @@ -1992,64 +1979,49 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
> unsigned index = sel->info.output_semantic_index[i];
>
> switch (name) {
> case TGSI_SEMANTIC_TESSINNER:
> case TGSI_SEMANTIC_TESSOUTER:
> case TGSI_SEMANTIC_PATCH:
> sel->patch_outputs_written |=
> 1llu << si_shader_io_get_unique_index_patch(name, index);
> break;
>
> - case TGSI_SEMANTIC_GENERIC:
> - /* don't process indices the function can't handle */
> - if (index >= 60)
> - break;
> - /* fall through */
> - case TGSI_SEMANTIC_POSITION:
> - case TGSI_SEMANTIC_PSIZE:
> - case TGSI_SEMANTIC_CLIPDIST:
> - sel->outputs_written |=
> - 1llu << si_shader_io_get_unique_index(name, index);
> - break;
> case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
> case TGSI_SEMANTIC_EDGEFLAG:
> break;
> default:
> - sel->outputs_written2 |=
> - 1u << si_shader_io_get_unique_index2(name, index);
> + sel->outputs_written |=
> + 1llu << si_shader_io_get_unique_index(name, index);
> }
> }
> sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
>
> /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
> * conflicts, i.e. each vertex will start at a different bank.
> */
> if (sctx->b.chip_class >= GFX9)
> sel->esgs_itemsize += 4;
> break;
>
> case PIPE_SHADER_FRAGMENT:
> for (i = 0; i < sel->info.num_inputs; i++) {
> unsigned name = sel->info.input_semantic_name[i];
> unsigned index = sel->info.input_semantic_index[i];
>
> switch (name) {
> - case TGSI_SEMANTIC_CLIPDIST:
> - case TGSI_SEMANTIC_GENERIC:
> + default:
> sel->inputs_read |=
> 1llu << si_shader_io_get_unique_index(name, index);
> break;
> case TGSI_SEMANTIC_PCOORD: /* ignore this */
> break;
> - default:
> - sel->inputs_read2 |=
> - 1u << si_shader_io_get_unique_index2(name, index);
> }
> }
>
> for (i = 0; i < 8; i++)
> if (sel->info.colors_written & (1 << i))
> sel->colors_written_4bit |= 0xf << (4 * i);
>
> for (i = 0; i < sel->info.num_inputs; i++) {
> if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
> int index = sel->info.input_semantic_index[i];
> --
> 2.9.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list