[Mesa-dev] [PATCH 2/2] radeonsi: get rid of secondary input/output word
Axel Davy
axel.davy at normalesup.org
Wed May 10 07:35:29 UTC 2017
On 10/05/2017 09:17, Nicolai Hähnle wrote:
> On 10.05.2017 08:58, Axel Davy wrote:
>> Hi Nicolai and Marek,
>>
>> Gallium nine associates to every d3d usage/index a generic unique index.
>>
>> That should fit on a 16bits integer, but not for 32 values.
>
> Hold on, you're saying the semantic index could be as high as 65535? I
> guess you're saved by the fact that nine only supports VS-PS pipelines...
>
> I do think the patch can be salvaged, by adjusting the guards where we
> "don't process indices that si_shader_io_get_unique_index can't
> handle". This would reduce the scope of the VS output optimizations,
> but that's probably an acceptable trade-off, as long as we just allow
> as many generics as we have bits left to play with.
>
> Cheers,
> Nicolai
Indeed, because the d3d index is 8 bits, with 15 usages. Max is thus
255*15+14 = 3839.
Axel
>
>>
>> We could fit in 32 indexes by recompiling vertex and pixel shaders to
>> match,
>> but one advantage of gallium nine is we don't need much recompiling.
>> My understanding is that handling the matching of the generic unique
>> indices is easier driver side.
>>
>> Yours,
>>
>> Axel
>>
>>
>> On 10/05/2017 08:48, Nicolai Hähnle wrote:
>>> Hi Axel,
>>>
>>> Any idea about how many GENERIC indices nine uses? It would be nice to
>>> be able to remove the second bitfield in radeonsi, and we still have
>>> some slack, so could support more than the 32 that are exposed for
>>> OpenGL.
>>>
>>> Thanks,
>>> Nicolai
>>>
>>> On 05.05.2017 19:35, Marek Olšák wrote:
>>>> Hi Nicolai,
>>>>
>>>> This might break Nine, because it uses GENERIC indices greater than
>>>> 31. The idea is that we support 32, but allow indices up to 60 as long
>>>> as the number of declared varyings is <= 32.
>>>>
>>>> Axel can probably answer which maximum GENERIC index we can normally
>>>> expect.
>>>>
>>>> Marek
>>>>
>>>> On Wed, May 3, 2017 at 3:54 PM, Nicolai Hähnle <nhaehnle at gmail.com>
>>>> wrote:
>>>>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>>>>
>>>>> We only advertise a maximum of 32 inputs and outputs in each shader
>>>>> stage,
>>>>> so everything fits into 64 bits.
>>>>> ---
>>>>> src/gallium/drivers/radeonsi/si_shader.c | 35
>>>>> +++++++---------------
>>>>> src/gallium/drivers/radeonsi/si_shader.h | 6 +---
>>>>> src/gallium/drivers/radeonsi/si_state_shaders.c | 40
>>>>> ++++---------------------
>>>>> 3 files changed, 17 insertions(+), 64 deletions(-)
>>>>>
>>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>>>>> b/src/gallium/drivers/radeonsi/si_shader.c
>>>>> index a48a552..67d62c3 100644
>>>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>>>> @@ -135,48 +135,41 @@ unsigned
>>>>> si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
>>>>> {
>>>>> switch (semantic_name) {
>>>>> case TGSI_SEMANTIC_POSITION:
>>>>> return 0;
>>>>> case TGSI_SEMANTIC_PSIZE:
>>>>> return 1;
>>>>> case TGSI_SEMANTIC_CLIPDIST:
>>>>> assert(index <= 1);
>>>>> return 2 + index;
>>>>> case TGSI_SEMANTIC_GENERIC:
>>>>> - if (index <= 63-4)
>>>>> + if (index < 32)
>>>>> return 4 + index;
>>>>>
>>>>> assert(!"invalid generic index");
>>>>> return 0;
>>>>>
>>>>> - default:
>>>>> - assert(!"invalid semantic name");
>>>>> - return 0;
>>>>> - }
>>>>> -}
>>>>> -
>>>>> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned
>>>>> index)
>>>>> -{
>>>>> - switch (name) {
>>>>> case TGSI_SEMANTIC_FOG:
>>>>> - return 0;
>>>>> + return 36;
>>>>> case TGSI_SEMANTIC_LAYER:
>>>>> - return 1;
>>>>> + return 37;
>>>>> case TGSI_SEMANTIC_VIEWPORT_INDEX:
>>>>> - return 2;
>>>>> + return 38;
>>>>> case TGSI_SEMANTIC_PRIMID:
>>>>> - return 3;
>>>>> + return 39;
>>>>> case TGSI_SEMANTIC_COLOR: /* these alias */
>>>>> case TGSI_SEMANTIC_BCOLOR:
>>>>> - return 4 + index;
>>>>> + assert(index < 2);
>>>>> + return 40 + index;
>>>>> case TGSI_SEMANTIC_TEXCOORD:
>>>>> - return 6 + index;
>>>>> + assert(index < 8);
>>>>> + return 42 + index;
>>>>> default:
>>>>> assert(!"invalid semantic name");
>>>>> return 0;
>>>>> }
>>>>> }
>>>>>
>>>>> /**
>>>>> * Get the value of a shader input parameter and extract a bitfield.
>>>>> */
>>>>> static LLVMValueRef unpack_param(struct si_shader_context *ctx,
>>>>> @@ -2297,31 +2290,24 @@ static void si_llvm_export_vs(struct
>>>>> lp_build_tgsi_context *bld_base,
>>>>> semantic_name = outputs[i].semantic_name;
>>>>> semantic_index = outputs[i].semantic_index;
>>>>> bool export_param = true;
>>>>>
>>>>> switch (semantic_name) {
>>>>> case TGSI_SEMANTIC_POSITION: /* ignore these */
>>>>> case TGSI_SEMANTIC_PSIZE:
>>>>> case TGSI_SEMANTIC_CLIPVERTEX:
>>>>> case TGSI_SEMANTIC_EDGEFLAG:
>>>>> break;
>>>>> - case TGSI_SEMANTIC_GENERIC:
>>>>> - case TGSI_SEMANTIC_CLIPDIST:
>>>>> + default:
>>>>> if (shader->key.opt.hw_vs.kill_outputs &
>>>>> (1ull <<
>>>>> si_shader_io_get_unique_index(semantic_name, semantic_index)))
>>>>> export_param = false;
>>>>> - break;
>>>>> - default:
>>>>> - if (shader->key.opt.hw_vs.kill_outputs2 &
>>>>> - (1u <<
>>>>> si_shader_io_get_unique_index2(semantic_name, semantic_index)))
>>>>> - export_param = false;
>>>>> - break;
>>>>> }
>>>>>
>>>>> if (outputs[i].vertex_stream[0] != 0 &&
>>>>> outputs[i].vertex_stream[1] != 0 &&
>>>>> outputs[i].vertex_stream[2] != 0 &&
>>>>> outputs[i].vertex_stream[3] != 0)
>>>>> export_param = false;
>>>>>
>>>>> handle_semantic:
>>>>> /* Select the correct target */
>>>>> @@ -7154,21 +7140,20 @@ static void si_dump_shader_key(unsigned
>>>>> processor, struct si_shader *shader,
>>>>>
>>>>> default:
>>>>> assert(0);
>>>>> }
>>>>>
>>>>> if ((processor == PIPE_SHADER_GEOMETRY ||
>>>>> processor == PIPE_SHADER_TESS_EVAL ||
>>>>> processor == PIPE_SHADER_VERTEX) &&
>>>>> !key->as_es && !key->as_ls) {
>>>>> fprintf(f, " opt.hw_vs.kill_outputs =
>>>>> 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
>>>>> - fprintf(f, " opt.hw_vs.kill_outputs2 = 0x%x\n",
>>>>> key->opt.hw_vs.kill_outputs2);
>>>>> fprintf(f, " opt.hw_vs.clip_disable = %u\n",
>>>>> key->opt.hw_vs.clip_disable);
>>>>> }
>>>>> }
>>>>>
>>>>> static void si_init_shader_ctx(struct si_shader_context *ctx,
>>>>> struct si_screen *sscreen,
>>>>> LLVMTargetMachineRef tm)
>>>>> {
>>>>> struct lp_build_tgsi_context *bld_base;
>>>>> struct lp_build_tgsi_action tmpl = {};
>>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
>>>>> b/src/gallium/drivers/radeonsi/si_shader.h
>>>>> index cb8a902..5e43b4c 100644
>>>>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>>>>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>>>>> @@ -349,25 +349,23 @@ struct si_shader_selector {
>>>>> unsigned db_shader_control;
>>>>> /* Set 0xf or 0x0 (4 bits) per each written output.
>>>>> * ANDed with spi_shader_col_format.
>>>>> */
>>>>> unsigned colors_written_4bit;
>>>>>
>>>>> /* CS parameters */
>>>>> unsigned local_size;
>>>>>
>>>>> uint64_t outputs_written; /*
>>>>> "get_unique_index" bits */
>>>>> - uint32_t patch_outputs_written; /*
>>>>> "get_unique_index" bits */
>>>>> - uint32_t outputs_written2; /*
>>>>> "get_unique_index2" bits */
>>>>> + uint32_t patch_outputs_written; /*
>>>>> "get_unique_index_patch" bits */
>>>>>
>>>>> uint64_t inputs_read; /*
>>>>> "get_unique_index" bits */
>>>>> - uint32_t inputs_read2; /*
>>>>> "get_unique_index2" bits */
>>>>> };
>>>>>
>>>>> /* Valid shader configurations:
>>>>> *
>>>>> * API shaders VS | TCS | TES | GS |pass| PS
>>>>> * are compiled as: | | | |thru|
>>>>> * | | | | |
>>>>> * Only VS & PS: VS | | | | | PS
>>>>> * GFX6 - with GS: ES | | | GS | VS | PS
>>>>> * - with tess: LS | HS | VS | | | PS
>>>>> @@ -493,21 +491,20 @@ struct si_shader_key {
>>>>> uint8_t vs_fix_fetch[SI_MAX_ATTRIBS];
>>>>> uint64_t ff_tcs_inputs_to_copy; /* for
>>>>> fixed-func TCS */
>>>>> /* When PS needs PrimID and GS is disabled. */
>>>>> unsigned vs_export_prim_id:1;
>>>>> } mono;
>>>>>
>>>>> /* Optimization flags for asynchronous compilation only. */
>>>>> struct {
>>>>> struct {
>>>>> uint64_t kill_outputs; /*
>>>>> "get_unique_index" bits */
>>>>> - uint32_t kill_outputs2; /*
>>>>> "get_unique_index2" bits */
>>>>> unsigned clip_disable:1;
>>>>> } hw_vs; /* HW VS (it can be VS, TES, GS) */
>>>>>
>>>>> /* For shaders where monolithic variants have better
>>>>> code.
>>>>> *
>>>>> * This is a flag that has no effect on code
>>>>> generation,
>>>>> * but forces monolithic shaders to be used as
>>>>> soon as
>>>>> * possible, because it's in the "opt" group.
>>>>> */
>>>>> unsigned prefer_mono:1;
>>>>> @@ -600,21 +597,20 @@ int si_compile_llvm(struct si_screen *sscreen,
>>>>> struct ac_shader_binary *binary,
>>>>> struct si_shader_config *conf,
>>>>> LLVMTargetMachineRef tm,
>>>>> LLVMModuleRef mod,
>>>>> struct pipe_debug_callback *debug,
>>>>> unsigned processor,
>>>>> const char *name);
>>>>> void si_shader_destroy(struct si_shader *shader);
>>>>> unsigned si_shader_io_get_unique_index_patch(unsigned
>>>>> semantic_name, unsigned index);
>>>>> unsigned si_shader_io_get_unique_index(unsigned semantic_name,
>>>>> unsigned index);
>>>>> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned
>>>>> index);
>>>>> int si_shader_binary_upload(struct si_screen *sscreen, struct
>>>>> si_shader *shader);
>>>>> void si_shader_dump(struct si_screen *sscreen, struct si_shader
>>>>> *shader,
>>>>> struct pipe_debug_callback *debug, unsigned
>>>>> processor,
>>>>> FILE *f, bool check_debug_option);
>>>>> void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
>>>>> unsigned *lds_size);
>>>>> void si_shader_apply_scratch_relocs(struct si_context *sctx,
>>>>> struct si_shader *shader,
>>>>> struct si_shader_config *config,
>>>>> uint64_t scratch_va);
>>>>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>>>> b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>>>> index 68f4d21..cf0c11f 100644
>>>>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>>>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>>>> @@ -1225,36 +1225,31 @@ static void
>>>>> si_shader_selector_key_hw_vs(struct si_context *sctx,
>>>>> ps_colormask &= ps->colors_written_4bit;
>>>>>
>>>>> ps_disabled =
>>>>> sctx->queued.named.rasterizer->rasterizer_discard ||
>>>>> (!ps_colormask &&
>>>>> !ps_modifies_zs &&
>>>>> !ps->info.writes_memory);
>>>>> }
>>>>>
>>>>> /* Find out which VS outputs aren't used by the PS. */
>>>>> uint64_t outputs_written = vs->outputs_written;
>>>>> - uint32_t outputs_written2 = vs->outputs_written2;
>>>>> uint64_t inputs_read = 0;
>>>>> - uint32_t inputs_read2 = 0;
>>>>>
>>>>> outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
>>>>>
>>>>> if (!ps_disabled) {
>>>>> inputs_read = ps->inputs_read;
>>>>> - inputs_read2 = ps->inputs_read2;
>>>>> }
>>>>>
>>>>> uint64_t linked = outputs_written & inputs_read;
>>>>> - uint32_t linked2 = outputs_written2 & inputs_read2;
>>>>>
>>>>> key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
>>>>> - key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
>>>>> }
>>>>>
>>>>> /* Compute the key for the hw shader variant */
>>>>> static inline void si_shader_selector_key(struct pipe_context *ctx,
>>>>> struct si_shader_selector
>>>>> *sel,
>>>>> struct si_shader_key *key)
>>>>> {
>>>>> struct si_context *sctx = (struct si_context *)ctx;
>>>>>
>>>>> memset(key, 0, sizeof(*key));
>>>>> @@ -1835,37 +1830,29 @@ void si_init_shader_selector_async(void
>>>>> *job, int thread_index)
>>>>> unsigned offset =
>>>>> shader->info.vs_output_param_offset[i];
>>>>>
>>>>> if (offset <= AC_EXP_PARAM_OFFSET_31)
>>>>> continue;
>>>>>
>>>>> unsigned name =
>>>>> sel->info.output_semantic_name[i];
>>>>> unsigned index =
>>>>> sel->info.output_semantic_index[i];
>>>>> unsigned id;
>>>>>
>>>>> switch (name) {
>>>>> - case TGSI_SEMANTIC_GENERIC:
>>>>> - /* don't process indices the
>>>>> function can't handle */
>>>>> - if (index >= 60)
>>>>> - break;
>>>>> - /* fall through */
>>>>> - case TGSI_SEMANTIC_CLIPDIST:
>>>>> - id =
>>>>> si_shader_io_get_unique_index(name, index);
>>>>> - sel->outputs_written &= ~(1ull << id);
>>>>> - break;
>>>>> case TGSI_SEMANTIC_POSITION: /*
>>>>> ignore these */
>>>>> case TGSI_SEMANTIC_PSIZE:
>>>>> case TGSI_SEMANTIC_CLIPVERTEX:
>>>>> case TGSI_SEMANTIC_EDGEFLAG:
>>>>> break;
>>>>> default:
>>>>> - id =
>>>>> si_shader_io_get_unique_index2(name, index);
>>>>> - sel->outputs_written2 &= ~(1u << id);
>>>>> + id =
>>>>> si_shader_io_get_unique_index(name, index);
>>>>> + sel->outputs_written &= ~(1ull << id);
>>>>> + break;
>>>>> }
>>>>> }
>>>>> }
>>>>> }
>>>>>
>>>>> /* Pre-compilation. */
>>>>> if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
>>>>> struct si_shader_ctx_state state = {sel};
>>>>> struct si_shader_key key;
>>>>>
>>>>> @@ -1992,64 +1979,49 @@ static void
>>>>> *si_create_shader_selector(struct pipe_context *ctx,
>>>>> unsigned index =
>>>>> sel->info.output_semantic_index[i];
>>>>>
>>>>> switch (name) {
>>>>> case TGSI_SEMANTIC_TESSINNER:
>>>>> case TGSI_SEMANTIC_TESSOUTER:
>>>>> case TGSI_SEMANTIC_PATCH:
>>>>> sel->patch_outputs_written |=
>>>>> 1llu <<
>>>>> si_shader_io_get_unique_index_patch(name, index);
>>>>> break;
>>>>>
>>>>> - case TGSI_SEMANTIC_GENERIC:
>>>>> - /* don't process indices the
>>>>> function can't handle */
>>>>> - if (index >= 60)
>>>>> - break;
>>>>> - /* fall through */
>>>>> - case TGSI_SEMANTIC_POSITION:
>>>>> - case TGSI_SEMANTIC_PSIZE:
>>>>> - case TGSI_SEMANTIC_CLIPDIST:
>>>>> - sel->outputs_written |=
>>>>> - 1llu <<
>>>>> si_shader_io_get_unique_index(name, index);
>>>>> - break;
>>>>> case TGSI_SEMANTIC_CLIPVERTEX: /* ignore
>>>>> these */
>>>>> case TGSI_SEMANTIC_EDGEFLAG:
>>>>> break;
>>>>> default:
>>>>> - sel->outputs_written2 |=
>>>>> - 1u <<
>>>>> si_shader_io_get_unique_index2(name, index);
>>>>> + sel->outputs_written |=
>>>>> + 1llu <<
>>>>> si_shader_io_get_unique_index(name, index);
>>>>> }
>>>>> }
>>>>> sel->esgs_itemsize =
>>>>> util_last_bit64(sel->outputs_written) * 16;
>>>>>
>>>>> /* For the ESGS ring in LDS, add 1 dword to reduce
>>>>> LDS bank
>>>>> * conflicts, i.e. each vertex will start at a
>>>>> different bank.
>>>>> */
>>>>> if (sctx->b.chip_class >= GFX9)
>>>>> sel->esgs_itemsize += 4;
>>>>> break;
>>>>>
>>>>> case PIPE_SHADER_FRAGMENT:
>>>>> for (i = 0; i < sel->info.num_inputs; i++) {
>>>>> unsigned name =
>>>>> sel->info.input_semantic_name[i];
>>>>> unsigned index =
>>>>> sel->info.input_semantic_index[i];
>>>>>
>>>>> switch (name) {
>>>>> - case TGSI_SEMANTIC_CLIPDIST:
>>>>> - case TGSI_SEMANTIC_GENERIC:
>>>>> + default:
>>>>> sel->inputs_read |=
>>>>> 1llu <<
>>>>> si_shader_io_get_unique_index(name, index);
>>>>> break;
>>>>> case TGSI_SEMANTIC_PCOORD: /* ignore this */
>>>>> break;
>>>>> - default:
>>>>> - sel->inputs_read2 |=
>>>>> - 1u <<
>>>>> si_shader_io_get_unique_index2(name, index);
>>>>> }
>>>>> }
>>>>>
>>>>> for (i = 0; i < 8; i++)
>>>>> if (sel->info.colors_written & (1 << i))
>>>>> sel->colors_written_4bit |= 0xf <<
>>>>> (4 * i);
>>>>>
>>>>> for (i = 0; i < sel->info.num_inputs; i++) {
>>>>> if (sel->info.input_semantic_name[i] ==
>>>>> TGSI_SEMANTIC_COLOR) {
>>>>> int index =
>>>>> sel->info.input_semantic_index[i];
>>>>> --
>>>>> 2.9.3
>>>>>
>>>>> _______________________________________________
>>>>> mesa-dev mailing list
>>>>> mesa-dev at lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>
>
>
More information about the mesa-dev
mailing list