[Mesa-dev] [PATCH 2/2] radeonsi: get rid of secondary input/output word
Nicolai Hähnle
nicolai.haehnle at amd.com
Wed May 10 07:17:29 UTC 2017
On 10.05.2017 08:58, Axel Davy wrote:
> Hi Nicolai and Marek,
>
> Gallium nine associates to every d3d usage/index a generic unique index.
>
> That should fit on a 16bits integer, but not for 32 values.
Hold on, you're saying the semantic index could be as high as 65535? I
guess you're saved by the fact that nine only supports VS-PS pipelines...
I do think the patch can be salvaged, by adjusting the guards where we
"don't process indices that si_shader_io_get_unique_index can't handle".
This would reduce the scope of the VS output optimizations, but that's
probably an acceptable trade-off, as long as we just allow as many
generics as we have bits left to play with.
Cheers,
Nicolai
>
> We could fit in 32 indexes by recompiling vertex and pixel shaders to
> match,
> but one advantage of gallium nine is we don't need much recompiling.
> My understanding is that handling the matching of the generic unique
> indices is easier driver side.
>
> Yours,
>
> Axel
>
>
> On 10/05/2017 08:48, Nicolai Hähnle wrote:
>> Hi Axel,
>>
>> Any idea about how many GENERIC indices nine uses? It would be nice to
>> be able to remove the second bitfield in radeonsi, and we still have
>> some slack, so could support more than the 32 that are exposed for
>> OpenGL.
>>
>> Thanks,
>> Nicolai
>>
>> On 05.05.2017 19:35, Marek Olšák wrote:
>>> Hi Nicolai,
>>>
>>> This might break Nine, because it uses GENERIC indices greater than
>>> 31. The idea is that we support 32, but allow indices up to 60 as long
>>> as the number of declared varyings is <= 32.
>>>
>>> Axel can probably answer which maximum GENERIC index we can normally
>>> expect.
>>>
>>> Marek
>>>
>>> On Wed, May 3, 2017 at 3:54 PM, Nicolai Hähnle <nhaehnle at gmail.com>
>>> wrote:
>>>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>>>
>>>> We only advertise a maximum of 32 inputs and outputs in each shader
>>>> stage,
>>>> so everything fits into 64 bits.
>>>> ---
>>>> src/gallium/drivers/radeonsi/si_shader.c | 35
>>>> +++++++---------------
>>>> src/gallium/drivers/radeonsi/si_shader.h | 6 +---
>>>> src/gallium/drivers/radeonsi/si_state_shaders.c | 40
>>>> ++++---------------------
>>>> 3 files changed, 17 insertions(+), 64 deletions(-)
>>>>
>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>>>> b/src/gallium/drivers/radeonsi/si_shader.c
>>>> index a48a552..67d62c3 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>>> @@ -135,48 +135,41 @@ unsigned
>>>> si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
>>>> {
>>>> switch (semantic_name) {
>>>> case TGSI_SEMANTIC_POSITION:
>>>> return 0;
>>>> case TGSI_SEMANTIC_PSIZE:
>>>> return 1;
>>>> case TGSI_SEMANTIC_CLIPDIST:
>>>> assert(index <= 1);
>>>> return 2 + index;
>>>> case TGSI_SEMANTIC_GENERIC:
>>>> - if (index <= 63-4)
>>>> + if (index < 32)
>>>> return 4 + index;
>>>>
>>>> assert(!"invalid generic index");
>>>> return 0;
>>>>
>>>> - default:
>>>> - assert(!"invalid semantic name");
>>>> - return 0;
>>>> - }
>>>> -}
>>>> -
>>>> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
>>>> -{
>>>> - switch (name) {
>>>> case TGSI_SEMANTIC_FOG:
>>>> - return 0;
>>>> + return 36;
>>>> case TGSI_SEMANTIC_LAYER:
>>>> - return 1;
>>>> + return 37;
>>>> case TGSI_SEMANTIC_VIEWPORT_INDEX:
>>>> - return 2;
>>>> + return 38;
>>>> case TGSI_SEMANTIC_PRIMID:
>>>> - return 3;
>>>> + return 39;
>>>> case TGSI_SEMANTIC_COLOR: /* these alias */
>>>> case TGSI_SEMANTIC_BCOLOR:
>>>> - return 4 + index;
>>>> + assert(index < 2);
>>>> + return 40 + index;
>>>> case TGSI_SEMANTIC_TEXCOORD:
>>>> - return 6 + index;
>>>> + assert(index < 8);
>>>> + return 42 + index;
>>>> default:
>>>> assert(!"invalid semantic name");
>>>> return 0;
>>>> }
>>>> }
>>>>
>>>> /**
>>>> * Get the value of a shader input parameter and extract a bitfield.
>>>> */
>>>> static LLVMValueRef unpack_param(struct si_shader_context *ctx,
>>>> @@ -2297,31 +2290,24 @@ static void si_llvm_export_vs(struct
>>>> lp_build_tgsi_context *bld_base,
>>>> semantic_name = outputs[i].semantic_name;
>>>> semantic_index = outputs[i].semantic_index;
>>>> bool export_param = true;
>>>>
>>>> switch (semantic_name) {
>>>> case TGSI_SEMANTIC_POSITION: /* ignore these */
>>>> case TGSI_SEMANTIC_PSIZE:
>>>> case TGSI_SEMANTIC_CLIPVERTEX:
>>>> case TGSI_SEMANTIC_EDGEFLAG:
>>>> break;
>>>> - case TGSI_SEMANTIC_GENERIC:
>>>> - case TGSI_SEMANTIC_CLIPDIST:
>>>> + default:
>>>> if (shader->key.opt.hw_vs.kill_outputs &
>>>> (1ull <<
>>>> si_shader_io_get_unique_index(semantic_name, semantic_index)))
>>>> export_param = false;
>>>> - break;
>>>> - default:
>>>> - if (shader->key.opt.hw_vs.kill_outputs2 &
>>>> - (1u <<
>>>> si_shader_io_get_unique_index2(semantic_name, semantic_index)))
>>>> - export_param = false;
>>>> - break;
>>>> }
>>>>
>>>> if (outputs[i].vertex_stream[0] != 0 &&
>>>> outputs[i].vertex_stream[1] != 0 &&
>>>> outputs[i].vertex_stream[2] != 0 &&
>>>> outputs[i].vertex_stream[3] != 0)
>>>> export_param = false;
>>>>
>>>> handle_semantic:
>>>> /* Select the correct target */
>>>> @@ -7154,21 +7140,20 @@ static void si_dump_shader_key(unsigned
>>>> processor, struct si_shader *shader,
>>>>
>>>> default:
>>>> assert(0);
>>>> }
>>>>
>>>> if ((processor == PIPE_SHADER_GEOMETRY ||
>>>> processor == PIPE_SHADER_TESS_EVAL ||
>>>> processor == PIPE_SHADER_VERTEX) &&
>>>> !key->as_es && !key->as_ls) {
>>>> fprintf(f, " opt.hw_vs.kill_outputs =
>>>> 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
>>>> - fprintf(f, " opt.hw_vs.kill_outputs2 = 0x%x\n",
>>>> key->opt.hw_vs.kill_outputs2);
>>>> fprintf(f, " opt.hw_vs.clip_disable = %u\n",
>>>> key->opt.hw_vs.clip_disable);
>>>> }
>>>> }
>>>>
>>>> static void si_init_shader_ctx(struct si_shader_context *ctx,
>>>> struct si_screen *sscreen,
>>>> LLVMTargetMachineRef tm)
>>>> {
>>>> struct lp_build_tgsi_context *bld_base;
>>>> struct lp_build_tgsi_action tmpl = {};
>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
>>>> b/src/gallium/drivers/radeonsi/si_shader.h
>>>> index cb8a902..5e43b4c 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>>>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>>>> @@ -349,25 +349,23 @@ struct si_shader_selector {
>>>> unsigned db_shader_control;
>>>> /* Set 0xf or 0x0 (4 bits) per each written output.
>>>> * ANDed with spi_shader_col_format.
>>>> */
>>>> unsigned colors_written_4bit;
>>>>
>>>> /* CS parameters */
>>>> unsigned local_size;
>>>>
>>>> uint64_t outputs_written; /*
>>>> "get_unique_index" bits */
>>>> - uint32_t patch_outputs_written; /*
>>>> "get_unique_index" bits */
>>>> - uint32_t outputs_written2; /*
>>>> "get_unique_index2" bits */
>>>> + uint32_t patch_outputs_written; /*
>>>> "get_unique_index_patch" bits */
>>>>
>>>> uint64_t inputs_read; /*
>>>> "get_unique_index" bits */
>>>> - uint32_t inputs_read2; /*
>>>> "get_unique_index2" bits */
>>>> };
>>>>
>>>> /* Valid shader configurations:
>>>> *
>>>> * API shaders VS | TCS | TES | GS |pass| PS
>>>> * are compiled as: | | | |thru|
>>>> * | | | | |
>>>> * Only VS & PS: VS | | | | | PS
>>>> * GFX6 - with GS: ES | | | GS | VS | PS
>>>> * - with tess: LS | HS | VS | | | PS
>>>> @@ -493,21 +491,20 @@ struct si_shader_key {
>>>> uint8_t vs_fix_fetch[SI_MAX_ATTRIBS];
>>>> uint64_t ff_tcs_inputs_to_copy; /* for
>>>> fixed-func TCS */
>>>> /* When PS needs PrimID and GS is disabled. */
>>>> unsigned vs_export_prim_id:1;
>>>> } mono;
>>>>
>>>> /* Optimization flags for asynchronous compilation only. */
>>>> struct {
>>>> struct {
>>>> uint64_t kill_outputs; /*
>>>> "get_unique_index" bits */
>>>> - uint32_t kill_outputs2; /*
>>>> "get_unique_index2" bits */
>>>> unsigned clip_disable:1;
>>>> } hw_vs; /* HW VS (it can be VS, TES, GS) */
>>>>
>>>> /* For shaders where monolithic variants have better
>>>> code.
>>>> *
>>>> * This is a flag that has no effect on code
>>>> generation,
>>>> * but forces monolithic shaders to be used as soon as
>>>> * possible, because it's in the "opt" group.
>>>> */
>>>> unsigned prefer_mono:1;
>>>> @@ -600,21 +597,20 @@ int si_compile_llvm(struct si_screen *sscreen,
>>>> struct ac_shader_binary *binary,
>>>> struct si_shader_config *conf,
>>>> LLVMTargetMachineRef tm,
>>>> LLVMModuleRef mod,
>>>> struct pipe_debug_callback *debug,
>>>> unsigned processor,
>>>> const char *name);
>>>> void si_shader_destroy(struct si_shader *shader);
>>>> unsigned si_shader_io_get_unique_index_patch(unsigned
>>>> semantic_name, unsigned index);
>>>> unsigned si_shader_io_get_unique_index(unsigned semantic_name,
>>>> unsigned index);
>>>> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned
>>>> index);
>>>> int si_shader_binary_upload(struct si_screen *sscreen, struct
>>>> si_shader *shader);
>>>> void si_shader_dump(struct si_screen *sscreen, struct si_shader
>>>> *shader,
>>>> struct pipe_debug_callback *debug, unsigned
>>>> processor,
>>>> FILE *f, bool check_debug_option);
>>>> void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
>>>> unsigned *lds_size);
>>>> void si_shader_apply_scratch_relocs(struct si_context *sctx,
>>>> struct si_shader *shader,
>>>> struct si_shader_config *config,
>>>> uint64_t scratch_va);
>>>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>>> b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>>> index 68f4d21..cf0c11f 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>>> @@ -1225,36 +1225,31 @@ static void
>>>> si_shader_selector_key_hw_vs(struct si_context *sctx,
>>>> ps_colormask &= ps->colors_written_4bit;
>>>>
>>>> ps_disabled =
>>>> sctx->queued.named.rasterizer->rasterizer_discard ||
>>>> (!ps_colormask &&
>>>> !ps_modifies_zs &&
>>>> !ps->info.writes_memory);
>>>> }
>>>>
>>>> /* Find out which VS outputs aren't used by the PS. */
>>>> uint64_t outputs_written = vs->outputs_written;
>>>> - uint32_t outputs_written2 = vs->outputs_written2;
>>>> uint64_t inputs_read = 0;
>>>> - uint32_t inputs_read2 = 0;
>>>>
>>>> outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
>>>>
>>>> if (!ps_disabled) {
>>>> inputs_read = ps->inputs_read;
>>>> - inputs_read2 = ps->inputs_read2;
>>>> }
>>>>
>>>> uint64_t linked = outputs_written & inputs_read;
>>>> - uint32_t linked2 = outputs_written2 & inputs_read2;
>>>>
>>>> key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
>>>> - key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
>>>> }
>>>>
>>>> /* Compute the key for the hw shader variant */
>>>> static inline void si_shader_selector_key(struct pipe_context *ctx,
>>>> struct si_shader_selector
>>>> *sel,
>>>> struct si_shader_key *key)
>>>> {
>>>> struct si_context *sctx = (struct si_context *)ctx;
>>>>
>>>> memset(key, 0, sizeof(*key));
>>>> @@ -1835,37 +1830,29 @@ void si_init_shader_selector_async(void
>>>> *job, int thread_index)
>>>> unsigned offset =
>>>> shader->info.vs_output_param_offset[i];
>>>>
>>>> if (offset <= AC_EXP_PARAM_OFFSET_31)
>>>> continue;
>>>>
>>>> unsigned name =
>>>> sel->info.output_semantic_name[i];
>>>> unsigned index =
>>>> sel->info.output_semantic_index[i];
>>>> unsigned id;
>>>>
>>>> switch (name) {
>>>> - case TGSI_SEMANTIC_GENERIC:
>>>> - /* don't process indices the
>>>> function can't handle */
>>>> - if (index >= 60)
>>>> - break;
>>>> - /* fall through */
>>>> - case TGSI_SEMANTIC_CLIPDIST:
>>>> - id =
>>>> si_shader_io_get_unique_index(name, index);
>>>> - sel->outputs_written &= ~(1ull << id);
>>>> - break;
>>>> case TGSI_SEMANTIC_POSITION: /*
>>>> ignore these */
>>>> case TGSI_SEMANTIC_PSIZE:
>>>> case TGSI_SEMANTIC_CLIPVERTEX:
>>>> case TGSI_SEMANTIC_EDGEFLAG:
>>>> break;
>>>> default:
>>>> - id =
>>>> si_shader_io_get_unique_index2(name, index);
>>>> - sel->outputs_written2 &= ~(1u << id);
>>>> + id =
>>>> si_shader_io_get_unique_index(name, index);
>>>> + sel->outputs_written &= ~(1ull << id);
>>>> + break;
>>>> }
>>>> }
>>>> }
>>>> }
>>>>
>>>> /* Pre-compilation. */
>>>> if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
>>>> struct si_shader_ctx_state state = {sel};
>>>> struct si_shader_key key;
>>>>
>>>> @@ -1992,64 +1979,49 @@ static void
>>>> *si_create_shader_selector(struct pipe_context *ctx,
>>>> unsigned index =
>>>> sel->info.output_semantic_index[i];
>>>>
>>>> switch (name) {
>>>> case TGSI_SEMANTIC_TESSINNER:
>>>> case TGSI_SEMANTIC_TESSOUTER:
>>>> case TGSI_SEMANTIC_PATCH:
>>>> sel->patch_outputs_written |=
>>>> 1llu <<
>>>> si_shader_io_get_unique_index_patch(name, index);
>>>> break;
>>>>
>>>> - case TGSI_SEMANTIC_GENERIC:
>>>> - /* don't process indices the
>>>> function can't handle */
>>>> - if (index >= 60)
>>>> - break;
>>>> - /* fall through */
>>>> - case TGSI_SEMANTIC_POSITION:
>>>> - case TGSI_SEMANTIC_PSIZE:
>>>> - case TGSI_SEMANTIC_CLIPDIST:
>>>> - sel->outputs_written |=
>>>> - 1llu <<
>>>> si_shader_io_get_unique_index(name, index);
>>>> - break;
>>>> case TGSI_SEMANTIC_CLIPVERTEX: /* ignore
>>>> these */
>>>> case TGSI_SEMANTIC_EDGEFLAG:
>>>> break;
>>>> default:
>>>> - sel->outputs_written2 |=
>>>> - 1u <<
>>>> si_shader_io_get_unique_index2(name, index);
>>>> + sel->outputs_written |=
>>>> + 1llu <<
>>>> si_shader_io_get_unique_index(name, index);
>>>> }
>>>> }
>>>> sel->esgs_itemsize =
>>>> util_last_bit64(sel->outputs_written) * 16;
>>>>
>>>> /* For the ESGS ring in LDS, add 1 dword to reduce
>>>> LDS bank
>>>> * conflicts, i.e. each vertex will start at a
>>>> different bank.
>>>> */
>>>> if (sctx->b.chip_class >= GFX9)
>>>> sel->esgs_itemsize += 4;
>>>> break;
>>>>
>>>> case PIPE_SHADER_FRAGMENT:
>>>> for (i = 0; i < sel->info.num_inputs; i++) {
>>>> unsigned name =
>>>> sel->info.input_semantic_name[i];
>>>> unsigned index =
>>>> sel->info.input_semantic_index[i];
>>>>
>>>> switch (name) {
>>>> - case TGSI_SEMANTIC_CLIPDIST:
>>>> - case TGSI_SEMANTIC_GENERIC:
>>>> + default:
>>>> sel->inputs_read |=
>>>> 1llu <<
>>>> si_shader_io_get_unique_index(name, index);
>>>> break;
>>>> case TGSI_SEMANTIC_PCOORD: /* ignore this */
>>>> break;
>>>> - default:
>>>> - sel->inputs_read2 |=
>>>> - 1u <<
>>>> si_shader_io_get_unique_index2(name, index);
>>>> }
>>>> }
>>>>
>>>> for (i = 0; i < 8; i++)
>>>> if (sel->info.colors_written & (1 << i))
>>>> sel->colors_written_4bit |= 0xf <<
>>>> (4 * i);
>>>>
>>>> for (i = 0; i < sel->info.num_inputs; i++) {
>>>> if (sel->info.input_semantic_name[i] ==
>>>> TGSI_SEMANTIC_COLOR) {
>>>> int index =
>>>> sel->info.input_semantic_index[i];
>>>> --
>>>> 2.9.3
>>>>
>>>> _______________________________________________
>>>> mesa-dev mailing list
>>>> mesa-dev at lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
More information about the mesa-dev
mailing list