[Mesa-dev] [PATCH 2/2] radeonsi: get rid of secondary input/output word
Nicolai Hähnle
nhaehnle at gmail.com
Wed May 3 13:54:16 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
We only advertise a maximum of 32 inputs and outputs in each shader stage,
so everything fits into 64 bits.
---
src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++---------------
src/gallium/drivers/radeonsi/si_shader.h | 6 +---
src/gallium/drivers/radeonsi/si_state_shaders.c | 40 ++++---------------------
3 files changed, 17 insertions(+), 64 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a48a552..67d62c3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -135,48 +135,41 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
{
switch (semantic_name) {
case TGSI_SEMANTIC_POSITION:
return 0;
case TGSI_SEMANTIC_PSIZE:
return 1;
case TGSI_SEMANTIC_CLIPDIST:
assert(index <= 1);
return 2 + index;
case TGSI_SEMANTIC_GENERIC:
- if (index <= 63-4)
+ if (index < 32)
return 4 + index;
assert(!"invalid generic index");
return 0;
- default:
- assert(!"invalid semantic name");
- return 0;
- }
-}
-
-unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
-{
- switch (name) {
case TGSI_SEMANTIC_FOG:
- return 0;
+ return 36;
case TGSI_SEMANTIC_LAYER:
- return 1;
+ return 37;
case TGSI_SEMANTIC_VIEWPORT_INDEX:
- return 2;
+ return 38;
case TGSI_SEMANTIC_PRIMID:
- return 3;
+ return 39;
case TGSI_SEMANTIC_COLOR: /* these alias */
case TGSI_SEMANTIC_BCOLOR:
- return 4 + index;
+ assert(index < 2);
+ return 40 + index;
case TGSI_SEMANTIC_TEXCOORD:
- return 6 + index;
+ assert(index < 8);
+ return 42 + index;
default:
assert(!"invalid semantic name");
return 0;
}
}
/**
* Get the value of a shader input parameter and extract a bitfield.
*/
static LLVMValueRef unpack_param(struct si_shader_context *ctx,
@@ -2297,31 +2290,24 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
semantic_name = outputs[i].semantic_name;
semantic_index = outputs[i].semantic_index;
bool export_param = true;
switch (semantic_name) {
case TGSI_SEMANTIC_POSITION: /* ignore these */
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_CLIPVERTEX:
case TGSI_SEMANTIC_EDGEFLAG:
break;
- case TGSI_SEMANTIC_GENERIC:
- case TGSI_SEMANTIC_CLIPDIST:
+ default:
if (shader->key.opt.hw_vs.kill_outputs &
(1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
export_param = false;
- break;
- default:
- if (shader->key.opt.hw_vs.kill_outputs2 &
- (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index)))
- export_param = false;
- break;
}
if (outputs[i].vertex_stream[0] != 0 &&
outputs[i].vertex_stream[1] != 0 &&
outputs[i].vertex_stream[2] != 0 &&
outputs[i].vertex_stream[3] != 0)
export_param = false;
handle_semantic:
/* Select the correct target */
@@ -7154,21 +7140,20 @@ static void si_dump_shader_key(unsigned processor, struct si_shader *shader,
default:
assert(0);
}
if ((processor == PIPE_SHADER_GEOMETRY ||
processor == PIPE_SHADER_TESS_EVAL ||
processor == PIPE_SHADER_VERTEX) &&
!key->as_es && !key->as_ls) {
fprintf(f, " opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
- fprintf(f, " opt.hw_vs.kill_outputs2 = 0x%x\n", key->opt.hw_vs.kill_outputs2);
fprintf(f, " opt.hw_vs.clip_disable = %u\n", key->opt.hw_vs.clip_disable);
}
}
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
LLVMTargetMachineRef tm)
{
struct lp_build_tgsi_context *bld_base;
struct lp_build_tgsi_action tmpl = {};
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index cb8a902..5e43b4c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -349,25 +349,23 @@ struct si_shader_selector {
unsigned db_shader_control;
/* Set 0xf or 0x0 (4 bits) per each written output.
* ANDed with spi_shader_col_format.
*/
unsigned colors_written_4bit;
/* CS parameters */
unsigned local_size;
uint64_t outputs_written; /* "get_unique_index" bits */
- uint32_t patch_outputs_written; /* "get_unique_index" bits */
- uint32_t outputs_written2; /* "get_unique_index2" bits */
+ uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */
uint64_t inputs_read; /* "get_unique_index" bits */
- uint32_t inputs_read2; /* "get_unique_index2" bits */
};
/* Valid shader configurations:
*
* API shaders VS | TCS | TES | GS |pass| PS
* are compiled as: | | | |thru|
* | | | | |
* Only VS & PS: VS | | | | | PS
* GFX6 - with GS: ES | | | GS | VS | PS
* - with tess: LS | HS | VS | | | PS
@@ -493,21 +491,20 @@ struct si_shader_key {
uint8_t vs_fix_fetch[SI_MAX_ATTRIBS];
uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */
/* When PS needs PrimID and GS is disabled. */
unsigned vs_export_prim_id:1;
} mono;
/* Optimization flags for asynchronous compilation only. */
struct {
struct {
uint64_t kill_outputs; /* "get_unique_index" bits */
- uint32_t kill_outputs2; /* "get_unique_index2" bits */
unsigned clip_disable:1;
} hw_vs; /* HW VS (it can be VS, TES, GS) */
/* For shaders where monolithic variants have better code.
*
* This is a flag that has no effect on code generation,
* but forces monolithic shaders to be used as soon as
* possible, because it's in the "opt" group.
*/
unsigned prefer_mono:1;
@@ -600,21 +597,20 @@ int si_compile_llvm(struct si_screen *sscreen,
struct ac_shader_binary *binary,
struct si_shader_config *conf,
LLVMTargetMachineRef tm,
LLVMModuleRef mod,
struct pipe_debug_callback *debug,
unsigned processor,
const char *name);
void si_shader_destroy(struct si_shader *shader);
unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
-unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor,
FILE *f, bool check_debug_option);
void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
unsigned *lds_size);
void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
struct si_shader_config *config,
uint64_t scratch_va);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 68f4d21..cf0c11f 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1225,36 +1225,31 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
ps_colormask &= ps->colors_written_4bit;
ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard ||
(!ps_colormask &&
!ps_modifies_zs &&
!ps->info.writes_memory);
}
/* Find out which VS outputs aren't used by the PS. */
uint64_t outputs_written = vs->outputs_written;
- uint32_t outputs_written2 = vs->outputs_written2;
uint64_t inputs_read = 0;
- uint32_t inputs_read2 = 0;
outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
if (!ps_disabled) {
inputs_read = ps->inputs_read;
- inputs_read2 = ps->inputs_read2;
}
uint64_t linked = outputs_written & inputs_read;
- uint32_t linked2 = outputs_written2 & inputs_read2;
key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
- key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
}
/* Compute the key for the hw shader variant */
static inline void si_shader_selector_key(struct pipe_context *ctx,
struct si_shader_selector *sel,
struct si_shader_key *key)
{
struct si_context *sctx = (struct si_context *)ctx;
memset(key, 0, sizeof(*key));
@@ -1835,37 +1830,29 @@ void si_init_shader_selector_async(void *job, int thread_index)
unsigned offset = shader->info.vs_output_param_offset[i];
if (offset <= AC_EXP_PARAM_OFFSET_31)
continue;
unsigned name = sel->info.output_semantic_name[i];
unsigned index = sel->info.output_semantic_index[i];
unsigned id;
switch (name) {
- case TGSI_SEMANTIC_GENERIC:
- /* don't process indices the function can't handle */
- if (index >= 60)
- break;
- /* fall through */
- case TGSI_SEMANTIC_CLIPDIST:
- id = si_shader_io_get_unique_index(name, index);
- sel->outputs_written &= ~(1ull << id);
- break;
case TGSI_SEMANTIC_POSITION: /* ignore these */
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_CLIPVERTEX:
case TGSI_SEMANTIC_EDGEFLAG:
break;
default:
- id = si_shader_io_get_unique_index2(name, index);
- sel->outputs_written2 &= ~(1u << id);
+ id = si_shader_io_get_unique_index(name, index);
+ sel->outputs_written &= ~(1ull << id);
+ break;
}
}
}
}
/* Pre-compilation. */
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
struct si_shader_key key;
@@ -1992,64 +1979,49 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
unsigned index = sel->info.output_semantic_index[i];
switch (name) {
case TGSI_SEMANTIC_TESSINNER:
case TGSI_SEMANTIC_TESSOUTER:
case TGSI_SEMANTIC_PATCH:
sel->patch_outputs_written |=
1llu << si_shader_io_get_unique_index_patch(name, index);
break;
- case TGSI_SEMANTIC_GENERIC:
- /* don't process indices the function can't handle */
- if (index >= 60)
- break;
- /* fall through */
- case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_PSIZE:
- case TGSI_SEMANTIC_CLIPDIST:
- sel->outputs_written |=
- 1llu << si_shader_io_get_unique_index(name, index);
- break;
case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
case TGSI_SEMANTIC_EDGEFLAG:
break;
default:
- sel->outputs_written2 |=
- 1u << si_shader_io_get_unique_index2(name, index);
+ sel->outputs_written |=
+ 1llu << si_shader_io_get_unique_index(name, index);
}
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
* conflicts, i.e. each vertex will start at a different bank.
*/
if (sctx->b.chip_class >= GFX9)
sel->esgs_itemsize += 4;
break;
case PIPE_SHADER_FRAGMENT:
for (i = 0; i < sel->info.num_inputs; i++) {
unsigned name = sel->info.input_semantic_name[i];
unsigned index = sel->info.input_semantic_index[i];
switch (name) {
- case TGSI_SEMANTIC_CLIPDIST:
- case TGSI_SEMANTIC_GENERIC:
+ default:
sel->inputs_read |=
1llu << si_shader_io_get_unique_index(name, index);
break;
case TGSI_SEMANTIC_PCOORD: /* ignore this */
break;
- default:
- sel->inputs_read2 |=
- 1u << si_shader_io_get_unique_index2(name, index);
}
}
for (i = 0; i < 8; i++)
if (sel->info.colors_written & (1 << i))
sel->colors_written_4bit |= 0xf << (4 * i);
for (i = 0; i < sel->info.num_inputs; i++) {
if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
int index = sel->info.input_semantic_index[i];
--
2.9.3
More information about the mesa-dev
mailing list