[Mesa-dev] [PATCH 2/2] radeonsi: get rid of secondary input/output word
Axel Davy
axel.davy at normalesup.org
Wed May 10 06:58:34 UTC 2017
Hi Nicolai and Marek,
Gallium nine associates to every d3d usage/index a generic unique index.
That should fit on a 16bits integer, but not for 32 values.
We could fit in 32 indexes by recompiling vertex and pixel shaders to match,
but one advantage of gallium nine is we don't need much recompiling.
My understanding is that handling the matching of the generic unique
indices is easier driver side.
Yours,
Axel
On 10/05/2017 08:48, Nicolai Hähnle wrote:
> Hi Axel,
>
> Any idea about how many GENERIC indices nine uses? It would be nice to
> be able to remove the second bitfield in radeonsi, and we still have
> some slack, so could support more than the 32 that are exposed for
> OpenGL.
>
> Thanks,
> Nicolai
>
> On 05.05.2017 19:35, Marek Olšák wrote:
>> Hi Nicolai,
>>
>> This might break Nine, because it uses GENERIC indices greater than
>> 31. The idea is that we support 32, but allow indices up to 60 as long
>> as the number of declared varyings is <= 32.
>>
>> Axel can probably answer which maximum GENERIC index we can normally
>> expect.
>>
>> Marek
>>
>> On Wed, May 3, 2017 at 3:54 PM, Nicolai Hähnle <nhaehnle at gmail.com>
>> wrote:
>>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>>
>>> We only advertise a maximum of 32 inputs and outputs in each shader
>>> stage,
>>> so everything fits into 64 bits.
>>> ---
>>> src/gallium/drivers/radeonsi/si_shader.c | 35
>>> +++++++---------------
>>> src/gallium/drivers/radeonsi/si_shader.h | 6 +---
>>> src/gallium/drivers/radeonsi/si_state_shaders.c | 40
>>> ++++---------------------
>>> 3 files changed, 17 insertions(+), 64 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>>> b/src/gallium/drivers/radeonsi/si_shader.c
>>> index a48a552..67d62c3 100644
>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>> @@ -135,48 +135,41 @@ unsigned
>>> si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
>>> {
>>> switch (semantic_name) {
>>> case TGSI_SEMANTIC_POSITION:
>>> return 0;
>>> case TGSI_SEMANTIC_PSIZE:
>>> return 1;
>>> case TGSI_SEMANTIC_CLIPDIST:
>>> assert(index <= 1);
>>> return 2 + index;
>>> case TGSI_SEMANTIC_GENERIC:
>>> - if (index <= 63-4)
>>> + if (index < 32)
>>> return 4 + index;
>>>
>>> assert(!"invalid generic index");
>>> return 0;
>>>
>>> - default:
>>> - assert(!"invalid semantic name");
>>> - return 0;
>>> - }
>>> -}
>>> -
>>> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
>>> -{
>>> - switch (name) {
>>> case TGSI_SEMANTIC_FOG:
>>> - return 0;
>>> + return 36;
>>> case TGSI_SEMANTIC_LAYER:
>>> - return 1;
>>> + return 37;
>>> case TGSI_SEMANTIC_VIEWPORT_INDEX:
>>> - return 2;
>>> + return 38;
>>> case TGSI_SEMANTIC_PRIMID:
>>> - return 3;
>>> + return 39;
>>> case TGSI_SEMANTIC_COLOR: /* these alias */
>>> case TGSI_SEMANTIC_BCOLOR:
>>> - return 4 + index;
>>> + assert(index < 2);
>>> + return 40 + index;
>>> case TGSI_SEMANTIC_TEXCOORD:
>>> - return 6 + index;
>>> + assert(index < 8);
>>> + return 42 + index;
>>> default:
>>> assert(!"invalid semantic name");
>>> return 0;
>>> }
>>> }
>>>
>>> /**
>>> * Get the value of a shader input parameter and extract a bitfield.
>>> */
>>> static LLVMValueRef unpack_param(struct si_shader_context *ctx,
>>> @@ -2297,31 +2290,24 @@ static void si_llvm_export_vs(struct
>>> lp_build_tgsi_context *bld_base,
>>> semantic_name = outputs[i].semantic_name;
>>> semantic_index = outputs[i].semantic_index;
>>> bool export_param = true;
>>>
>>> switch (semantic_name) {
>>> case TGSI_SEMANTIC_POSITION: /* ignore these */
>>> case TGSI_SEMANTIC_PSIZE:
>>> case TGSI_SEMANTIC_CLIPVERTEX:
>>> case TGSI_SEMANTIC_EDGEFLAG:
>>> break;
>>> - case TGSI_SEMANTIC_GENERIC:
>>> - case TGSI_SEMANTIC_CLIPDIST:
>>> + default:
>>> if (shader->key.opt.hw_vs.kill_outputs &
>>> (1ull <<
>>> si_shader_io_get_unique_index(semantic_name, semantic_index)))
>>> export_param = false;
>>> - break;
>>> - default:
>>> - if (shader->key.opt.hw_vs.kill_outputs2 &
>>> - (1u <<
>>> si_shader_io_get_unique_index2(semantic_name, semantic_index)))
>>> - export_param = false;
>>> - break;
>>> }
>>>
>>> if (outputs[i].vertex_stream[0] != 0 &&
>>> outputs[i].vertex_stream[1] != 0 &&
>>> outputs[i].vertex_stream[2] != 0 &&
>>> outputs[i].vertex_stream[3] != 0)
>>> export_param = false;
>>>
>>> handle_semantic:
>>> /* Select the correct target */
>>> @@ -7154,21 +7140,20 @@ static void si_dump_shader_key(unsigned
>>> processor, struct si_shader *shader,
>>>
>>> default:
>>> assert(0);
>>> }
>>>
>>> if ((processor == PIPE_SHADER_GEOMETRY ||
>>> processor == PIPE_SHADER_TESS_EVAL ||
>>> processor == PIPE_SHADER_VERTEX) &&
>>> !key->as_es && !key->as_ls) {
>>> fprintf(f, " opt.hw_vs.kill_outputs =
>>> 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
>>> - fprintf(f, " opt.hw_vs.kill_outputs2 = 0x%x\n",
>>> key->opt.hw_vs.kill_outputs2);
>>> fprintf(f, " opt.hw_vs.clip_disable = %u\n",
>>> key->opt.hw_vs.clip_disable);
>>> }
>>> }
>>>
>>> static void si_init_shader_ctx(struct si_shader_context *ctx,
>>> struct si_screen *sscreen,
>>> LLVMTargetMachineRef tm)
>>> {
>>> struct lp_build_tgsi_context *bld_base;
>>> struct lp_build_tgsi_action tmpl = {};
>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
>>> b/src/gallium/drivers/radeonsi/si_shader.h
>>> index cb8a902..5e43b4c 100644
>>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>>> @@ -349,25 +349,23 @@ struct si_shader_selector {
>>> unsigned db_shader_control;
>>> /* Set 0xf or 0x0 (4 bits) per each written output.
>>> * ANDed with spi_shader_col_format.
>>> */
>>> unsigned colors_written_4bit;
>>>
>>> /* CS parameters */
>>> unsigned local_size;
>>>
>>> uint64_t outputs_written; /*
>>> "get_unique_index" bits */
>>> - uint32_t patch_outputs_written; /*
>>> "get_unique_index" bits */
>>> - uint32_t outputs_written2; /*
>>> "get_unique_index2" bits */
>>> + uint32_t patch_outputs_written; /*
>>> "get_unique_index_patch" bits */
>>>
>>> uint64_t inputs_read; /*
>>> "get_unique_index" bits */
>>> - uint32_t inputs_read2; /*
>>> "get_unique_index2" bits */
>>> };
>>>
>>> /* Valid shader configurations:
>>> *
>>> * API shaders VS | TCS | TES | GS |pass| PS
>>> * are compiled as: | | | |thru|
>>> * | | | | |
>>> * Only VS & PS: VS | | | | | PS
>>> * GFX6 - with GS: ES | | | GS | VS | PS
>>> * - with tess: LS | HS | VS | | | PS
>>> @@ -493,21 +491,20 @@ struct si_shader_key {
>>> uint8_t vs_fix_fetch[SI_MAX_ATTRIBS];
>>> uint64_t ff_tcs_inputs_to_copy; /* for
>>> fixed-func TCS */
>>> /* When PS needs PrimID and GS is disabled. */
>>> unsigned vs_export_prim_id:1;
>>> } mono;
>>>
>>> /* Optimization flags for asynchronous compilation only. */
>>> struct {
>>> struct {
>>> uint64_t kill_outputs; /*
>>> "get_unique_index" bits */
>>> - uint32_t kill_outputs2; /*
>>> "get_unique_index2" bits */
>>> unsigned clip_disable:1;
>>> } hw_vs; /* HW VS (it can be VS, TES, GS) */
>>>
>>> /* For shaders where monolithic variants have better
>>> code.
>>> *
>>> * This is a flag that has no effect on code
>>> generation,
>>> * but forces monolithic shaders to be used as soon as
>>> * possible, because it's in the "opt" group.
>>> */
>>> unsigned prefer_mono:1;
>>> @@ -600,21 +597,20 @@ int si_compile_llvm(struct si_screen *sscreen,
>>> struct ac_shader_binary *binary,
>>> struct si_shader_config *conf,
>>> LLVMTargetMachineRef tm,
>>> LLVMModuleRef mod,
>>> struct pipe_debug_callback *debug,
>>> unsigned processor,
>>> const char *name);
>>> void si_shader_destroy(struct si_shader *shader);
>>> unsigned si_shader_io_get_unique_index_patch(unsigned
>>> semantic_name, unsigned index);
>>> unsigned si_shader_io_get_unique_index(unsigned semantic_name,
>>> unsigned index);
>>> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned
>>> index);
>>> int si_shader_binary_upload(struct si_screen *sscreen, struct
>>> si_shader *shader);
>>> void si_shader_dump(struct si_screen *sscreen, struct si_shader
>>> *shader,
>>> struct pipe_debug_callback *debug, unsigned
>>> processor,
>>> FILE *f, bool check_debug_option);
>>> void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
>>> unsigned *lds_size);
>>> void si_shader_apply_scratch_relocs(struct si_context *sctx,
>>> struct si_shader *shader,
>>> struct si_shader_config *config,
>>> uint64_t scratch_va);
>>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> index 68f4d21..cf0c11f 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> @@ -1225,36 +1225,31 @@ static void
>>> si_shader_selector_key_hw_vs(struct si_context *sctx,
>>> ps_colormask &= ps->colors_written_4bit;
>>>
>>> ps_disabled =
>>> sctx->queued.named.rasterizer->rasterizer_discard ||
>>> (!ps_colormask &&
>>> !ps_modifies_zs &&
>>> !ps->info.writes_memory);
>>> }
>>>
>>> /* Find out which VS outputs aren't used by the PS. */
>>> uint64_t outputs_written = vs->outputs_written;
>>> - uint32_t outputs_written2 = vs->outputs_written2;
>>> uint64_t inputs_read = 0;
>>> - uint32_t inputs_read2 = 0;
>>>
>>> outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
>>>
>>> if (!ps_disabled) {
>>> inputs_read = ps->inputs_read;
>>> - inputs_read2 = ps->inputs_read2;
>>> }
>>>
>>> uint64_t linked = outputs_written & inputs_read;
>>> - uint32_t linked2 = outputs_written2 & inputs_read2;
>>>
>>> key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
>>> - key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
>>> }
>>>
>>> /* Compute the key for the hw shader variant */
>>> static inline void si_shader_selector_key(struct pipe_context *ctx,
>>> struct si_shader_selector
>>> *sel,
>>> struct si_shader_key *key)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>>
>>> memset(key, 0, sizeof(*key));
>>> @@ -1835,37 +1830,29 @@ void si_init_shader_selector_async(void
>>> *job, int thread_index)
>>> unsigned offset =
>>> shader->info.vs_output_param_offset[i];
>>>
>>> if (offset <= AC_EXP_PARAM_OFFSET_31)
>>> continue;
>>>
>>> unsigned name =
>>> sel->info.output_semantic_name[i];
>>> unsigned index =
>>> sel->info.output_semantic_index[i];
>>> unsigned id;
>>>
>>> switch (name) {
>>> - case TGSI_SEMANTIC_GENERIC:
>>> - /* don't process indices the
>>> function can't handle */
>>> - if (index >= 60)
>>> - break;
>>> - /* fall through */
>>> - case TGSI_SEMANTIC_CLIPDIST:
>>> - id =
>>> si_shader_io_get_unique_index(name, index);
>>> - sel->outputs_written &= ~(1ull << id);
>>> - break;
>>> case TGSI_SEMANTIC_POSITION: /*
>>> ignore these */
>>> case TGSI_SEMANTIC_PSIZE:
>>> case TGSI_SEMANTIC_CLIPVERTEX:
>>> case TGSI_SEMANTIC_EDGEFLAG:
>>> break;
>>> default:
>>> - id =
>>> si_shader_io_get_unique_index2(name, index);
>>> - sel->outputs_written2 &= ~(1u << id);
>>> + id =
>>> si_shader_io_get_unique_index(name, index);
>>> + sel->outputs_written &= ~(1ull << id);
>>> + break;
>>> }
>>> }
>>> }
>>> }
>>>
>>> /* Pre-compilation. */
>>> if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
>>> struct si_shader_ctx_state state = {sel};
>>> struct si_shader_key key;
>>>
>>> @@ -1992,64 +1979,49 @@ static void
>>> *si_create_shader_selector(struct pipe_context *ctx,
>>> unsigned index =
>>> sel->info.output_semantic_index[i];
>>>
>>> switch (name) {
>>> case TGSI_SEMANTIC_TESSINNER:
>>> case TGSI_SEMANTIC_TESSOUTER:
>>> case TGSI_SEMANTIC_PATCH:
>>> sel->patch_outputs_written |=
>>> 1llu <<
>>> si_shader_io_get_unique_index_patch(name, index);
>>> break;
>>>
>>> - case TGSI_SEMANTIC_GENERIC:
>>> - /* don't process indices the
>>> function can't handle */
>>> - if (index >= 60)
>>> - break;
>>> - /* fall through */
>>> - case TGSI_SEMANTIC_POSITION:
>>> - case TGSI_SEMANTIC_PSIZE:
>>> - case TGSI_SEMANTIC_CLIPDIST:
>>> - sel->outputs_written |=
>>> - 1llu <<
>>> si_shader_io_get_unique_index(name, index);
>>> - break;
>>> case TGSI_SEMANTIC_CLIPVERTEX: /* ignore
>>> these */
>>> case TGSI_SEMANTIC_EDGEFLAG:
>>> break;
>>> default:
>>> - sel->outputs_written2 |=
>>> - 1u <<
>>> si_shader_io_get_unique_index2(name, index);
>>> + sel->outputs_written |=
>>> + 1llu <<
>>> si_shader_io_get_unique_index(name, index);
>>> }
>>> }
>>> sel->esgs_itemsize =
>>> util_last_bit64(sel->outputs_written) * 16;
>>>
>>> /* For the ESGS ring in LDS, add 1 dword to reduce
>>> LDS bank
>>> * conflicts, i.e. each vertex will start at a
>>> different bank.
>>> */
>>> if (sctx->b.chip_class >= GFX9)
>>> sel->esgs_itemsize += 4;
>>> break;
>>>
>>> case PIPE_SHADER_FRAGMENT:
>>> for (i = 0; i < sel->info.num_inputs; i++) {
>>> unsigned name =
>>> sel->info.input_semantic_name[i];
>>> unsigned index =
>>> sel->info.input_semantic_index[i];
>>>
>>> switch (name) {
>>> - case TGSI_SEMANTIC_CLIPDIST:
>>> - case TGSI_SEMANTIC_GENERIC:
>>> + default:
>>> sel->inputs_read |=
>>> 1llu <<
>>> si_shader_io_get_unique_index(name, index);
>>> break;
>>> case TGSI_SEMANTIC_PCOORD: /* ignore this */
>>> break;
>>> - default:
>>> - sel->inputs_read2 |=
>>> - 1u <<
>>> si_shader_io_get_unique_index2(name, index);
>>> }
>>> }
>>>
>>> for (i = 0; i < 8; i++)
>>> if (sel->info.colors_written & (1 << i))
>>> sel->colors_written_4bit |= 0xf <<
>>> (4 * i);
>>>
>>> for (i = 0; i < sel->info.num_inputs; i++) {
>>> if (sel->info.input_semantic_name[i] ==
>>> TGSI_SEMANTIC_COLOR) {
>>> int index =
>>> sel->info.input_semantic_index[i];
>>> --
>>> 2.9.3
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
More information about the mesa-dev
mailing list