[Mesa-dev] [PATCH 2/2] radeonsi: get rid of secondary input/output word

Nicolai Hähnle nicolai.haehnle at amd.com
Wed May 10 06:48:37 UTC 2017


Hi Axel,

Any idea about how many GENERIC indices nine uses? It would be nice to 
be able to remove the second bitfield in radeonsi, and we still have 
some slack, so could support more than the 32 that are exposed for OpenGL.

Thanks,
Nicolai

On 05.05.2017 19:35, Marek Olšák wrote:
> Hi Nicolai,
>
> This might break Nine, because it uses GENERIC indices greater than
> 31. The idea is that we support 32, but allow indices up to 60 as long
> as the number of declared varyings is <= 32.
>
> Axel can probably answer which maximum GENERIC index we can normally expect.
>
> Marek
>
> On Wed, May 3, 2017 at 3:54 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>
>> We only advertise a maximum of 32 inputs and outputs in each shader stage,
>> so everything fits into 64 bits.
>> ---
>>  src/gallium/drivers/radeonsi/si_shader.c        | 35 +++++++---------------
>>  src/gallium/drivers/radeonsi/si_shader.h        |  6 +---
>>  src/gallium/drivers/radeonsi/si_state_shaders.c | 40 ++++---------------------
>>  3 files changed, 17 insertions(+), 64 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
>> index a48a552..67d62c3 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -135,48 +135,41 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
>>  {
>>         switch (semantic_name) {
>>         case TGSI_SEMANTIC_POSITION:
>>                 return 0;
>>         case TGSI_SEMANTIC_PSIZE:
>>                 return 1;
>>         case TGSI_SEMANTIC_CLIPDIST:
>>                 assert(index <= 1);
>>                 return 2 + index;
>>         case TGSI_SEMANTIC_GENERIC:
>> -               if (index <= 63-4)
>> +               if (index < 32)
>>                         return 4 + index;
>>
>>                 assert(!"invalid generic index");
>>                 return 0;
>>
>> -       default:
>> -               assert(!"invalid semantic name");
>> -               return 0;
>> -       }
>> -}
>> -
>> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
>> -{
>> -       switch (name) {
>>         case TGSI_SEMANTIC_FOG:
>> -               return 0;
>> +               return 36;
>>         case TGSI_SEMANTIC_LAYER:
>> -               return 1;
>> +               return 37;
>>         case TGSI_SEMANTIC_VIEWPORT_INDEX:
>> -               return 2;
>> +               return 38;
>>         case TGSI_SEMANTIC_PRIMID:
>> -               return 3;
>> +               return 39;
>>         case TGSI_SEMANTIC_COLOR: /* these alias */
>>         case TGSI_SEMANTIC_BCOLOR:
>> -               return 4 + index;
>> +               assert(index < 2);
>> +               return 40 + index;
>>         case TGSI_SEMANTIC_TEXCOORD:
>> -               return 6 + index;
>> +               assert(index < 8);
>> +               return 42 + index;
>>         default:
>>                 assert(!"invalid semantic name");
>>                 return 0;
>>         }
>>  }
>>
>>  /**
>>   * Get the value of a shader input parameter and extract a bitfield.
>>   */
>>  static LLVMValueRef unpack_param(struct si_shader_context *ctx,
>> @@ -2297,31 +2290,24 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
>>                 semantic_name = outputs[i].semantic_name;
>>                 semantic_index = outputs[i].semantic_index;
>>                 bool export_param = true;
>>
>>                 switch (semantic_name) {
>>                 case TGSI_SEMANTIC_POSITION: /* ignore these */
>>                 case TGSI_SEMANTIC_PSIZE:
>>                 case TGSI_SEMANTIC_CLIPVERTEX:
>>                 case TGSI_SEMANTIC_EDGEFLAG:
>>                         break;
>> -               case TGSI_SEMANTIC_GENERIC:
>> -               case TGSI_SEMANTIC_CLIPDIST:
>> +               default:
>>                         if (shader->key.opt.hw_vs.kill_outputs &
>>                             (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
>>                                 export_param = false;
>> -                       break;
>> -               default:
>> -                       if (shader->key.opt.hw_vs.kill_outputs2 &
>> -                           (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index)))
>> -                               export_param = false;
>> -                       break;
>>                 }
>>
>>                 if (outputs[i].vertex_stream[0] != 0 &&
>>                     outputs[i].vertex_stream[1] != 0 &&
>>                     outputs[i].vertex_stream[2] != 0 &&
>>                     outputs[i].vertex_stream[3] != 0)
>>                         export_param = false;
>>
>>  handle_semantic:
>>                 /* Select the correct target */
>> @@ -7154,21 +7140,20 @@ static void si_dump_shader_key(unsigned processor, struct si_shader *shader,
>>
>>         default:
>>                 assert(0);
>>         }
>>
>>         if ((processor == PIPE_SHADER_GEOMETRY ||
>>              processor == PIPE_SHADER_TESS_EVAL ||
>>              processor == PIPE_SHADER_VERTEX) &&
>>             !key->as_es && !key->as_ls) {
>>                 fprintf(f, "  opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
>> -               fprintf(f, "  opt.hw_vs.kill_outputs2 = 0x%x\n", key->opt.hw_vs.kill_outputs2);
>>                 fprintf(f, "  opt.hw_vs.clip_disable = %u\n", key->opt.hw_vs.clip_disable);
>>         }
>>  }
>>
>>  static void si_init_shader_ctx(struct si_shader_context *ctx,
>>                                struct si_screen *sscreen,
>>                                LLVMTargetMachineRef tm)
>>  {
>>         struct lp_build_tgsi_context *bld_base;
>>         struct lp_build_tgsi_action tmpl = {};
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
>> index cb8a902..5e43b4c 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>> @@ -349,25 +349,23 @@ struct si_shader_selector {
>>         unsigned        db_shader_control;
>>         /* Set 0xf or 0x0 (4 bits) per each written output.
>>          * ANDed with spi_shader_col_format.
>>          */
>>         unsigned        colors_written_4bit;
>>
>>         /* CS parameters */
>>         unsigned local_size;
>>
>>         uint64_t        outputs_written;        /* "get_unique_index" bits */
>> -       uint32_t        patch_outputs_written;  /* "get_unique_index" bits */
>> -       uint32_t        outputs_written2;       /* "get_unique_index2" bits */
>> +       uint32_t        patch_outputs_written;  /* "get_unique_index_patch" bits */
>>
>>         uint64_t        inputs_read;            /* "get_unique_index" bits */
>> -       uint32_t        inputs_read2;           /* "get_unique_index2" bits */
>>  };
>>
>>  /* Valid shader configurations:
>>   *
>>   * API shaders       VS | TCS | TES | GS |pass| PS
>>   * are compiled as:     |     |     |    |thru|
>>   *                      |     |     |    |    |
>>   * Only VS & PS:     VS |     |     |    |    | PS
>>   * GFX6 - with GS:   ES |     |     | GS | VS | PS
>>   *      - with tess: LS | HS  | VS  |    |    | PS
>> @@ -493,21 +491,20 @@ struct si_shader_key {
>>                 uint8_t         vs_fix_fetch[SI_MAX_ATTRIBS];
>>                 uint64_t        ff_tcs_inputs_to_copy; /* for fixed-func TCS */
>>                 /* When PS needs PrimID and GS is disabled. */
>>                 unsigned        vs_export_prim_id:1;
>>         } mono;
>>
>>         /* Optimization flags for asynchronous compilation only. */
>>         struct {
>>                 struct {
>>                         uint64_t        kill_outputs; /* "get_unique_index" bits */
>> -                       uint32_t        kill_outputs2; /* "get_unique_index2" bits */
>>                         unsigned        clip_disable:1;
>>                 } hw_vs; /* HW VS (it can be VS, TES, GS) */
>>
>>                 /* For shaders where monolithic variants have better code.
>>                  *
>>                  * This is a flag that has no effect on code generation,
>>                  * but forces monolithic shaders to be used as soon as
>>                  * possible, because it's in the "opt" group.
>>                  */
>>                 unsigned        prefer_mono:1;
>> @@ -600,21 +597,20 @@ int si_compile_llvm(struct si_screen *sscreen,
>>                     struct ac_shader_binary *binary,
>>                     struct si_shader_config *conf,
>>                     LLVMTargetMachineRef tm,
>>                     LLVMModuleRef mod,
>>                     struct pipe_debug_callback *debug,
>>                     unsigned processor,
>>                     const char *name);
>>  void si_shader_destroy(struct si_shader *shader);
>>  unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
>>  unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
>> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index);
>>  int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
>>  void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
>>                     struct pipe_debug_callback *debug, unsigned processor,
>>                     FILE *f, bool check_debug_option);
>>  void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
>>                                       unsigned *lds_size);
>>  void si_shader_apply_scratch_relocs(struct si_context *sctx,
>>                         struct si_shader *shader,
>>                         struct si_shader_config *config,
>>                         uint64_t scratch_va);
>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
>> index 68f4d21..cf0c11f 100644
>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>> @@ -1225,36 +1225,31 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
>>                         ps_colormask &= ps->colors_written_4bit;
>>
>>                 ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard ||
>>                               (!ps_colormask &&
>>                                !ps_modifies_zs &&
>>                                !ps->info.writes_memory);
>>         }
>>
>>         /* Find out which VS outputs aren't used by the PS. */
>>         uint64_t outputs_written = vs->outputs_written;
>> -       uint32_t outputs_written2 = vs->outputs_written2;
>>         uint64_t inputs_read = 0;
>> -       uint32_t inputs_read2 = 0;
>>
>>         outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
>>
>>         if (!ps_disabled) {
>>                 inputs_read = ps->inputs_read;
>> -               inputs_read2 = ps->inputs_read2;
>>         }
>>
>>         uint64_t linked = outputs_written & inputs_read;
>> -       uint32_t linked2 = outputs_written2 & inputs_read2;
>>
>>         key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
>> -       key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
>>  }
>>
>>  /* Compute the key for the hw shader variant */
>>  static inline void si_shader_selector_key(struct pipe_context *ctx,
>>                                           struct si_shader_selector *sel,
>>                                           struct si_shader_key *key)
>>  {
>>         struct si_context *sctx = (struct si_context *)ctx;
>>
>>         memset(key, 0, sizeof(*key));
>> @@ -1835,37 +1830,29 @@ void si_init_shader_selector_async(void *job, int thread_index)
>>                                 unsigned offset = shader->info.vs_output_param_offset[i];
>>
>>                                 if (offset <= AC_EXP_PARAM_OFFSET_31)
>>                                         continue;
>>
>>                                 unsigned name = sel->info.output_semantic_name[i];
>>                                 unsigned index = sel->info.output_semantic_index[i];
>>                                 unsigned id;
>>
>>                                 switch (name) {
>> -                               case TGSI_SEMANTIC_GENERIC:
>> -                                       /* don't process indices the function can't handle */
>> -                                       if (index >= 60)
>> -                                               break;
>> -                                       /* fall through */
>> -                               case TGSI_SEMANTIC_CLIPDIST:
>> -                                       id = si_shader_io_get_unique_index(name, index);
>> -                                       sel->outputs_written &= ~(1ull << id);
>> -                                       break;
>>                                 case TGSI_SEMANTIC_POSITION: /* ignore these */
>>                                 case TGSI_SEMANTIC_PSIZE:
>>                                 case TGSI_SEMANTIC_CLIPVERTEX:
>>                                 case TGSI_SEMANTIC_EDGEFLAG:
>>                                         break;
>>                                 default:
>> -                                       id = si_shader_io_get_unique_index2(name, index);
>> -                                       sel->outputs_written2 &= ~(1u << id);
>> +                                       id = si_shader_io_get_unique_index(name, index);
>> +                                       sel->outputs_written &= ~(1ull << id);
>> +                                       break;
>>                                 }
>>                         }
>>                 }
>>         }
>>
>>         /* Pre-compilation. */
>>         if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
>>                 struct si_shader_ctx_state state = {sel};
>>                 struct si_shader_key key;
>>
>> @@ -1992,64 +1979,49 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
>>                         unsigned index = sel->info.output_semantic_index[i];
>>
>>                         switch (name) {
>>                         case TGSI_SEMANTIC_TESSINNER:
>>                         case TGSI_SEMANTIC_TESSOUTER:
>>                         case TGSI_SEMANTIC_PATCH:
>>                                 sel->patch_outputs_written |=
>>                                         1llu << si_shader_io_get_unique_index_patch(name, index);
>>                                 break;
>>
>> -                       case TGSI_SEMANTIC_GENERIC:
>> -                               /* don't process indices the function can't handle */
>> -                               if (index >= 60)
>> -                                       break;
>> -                               /* fall through */
>> -                       case TGSI_SEMANTIC_POSITION:
>> -                       case TGSI_SEMANTIC_PSIZE:
>> -                       case TGSI_SEMANTIC_CLIPDIST:
>> -                               sel->outputs_written |=
>> -                                       1llu << si_shader_io_get_unique_index(name, index);
>> -                               break;
>>                         case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
>>                         case TGSI_SEMANTIC_EDGEFLAG:
>>                                 break;
>>                         default:
>> -                               sel->outputs_written2 |=
>> -                                       1u << si_shader_io_get_unique_index2(name, index);
>> +                               sel->outputs_written |=
>> +                                       1llu << si_shader_io_get_unique_index(name, index);
>>                         }
>>                 }
>>                 sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
>>
>>                 /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
>>                  * conflicts, i.e. each vertex will start at a different bank.
>>                  */
>>                 if (sctx->b.chip_class >= GFX9)
>>                         sel->esgs_itemsize += 4;
>>                 break;
>>
>>         case PIPE_SHADER_FRAGMENT:
>>                 for (i = 0; i < sel->info.num_inputs; i++) {
>>                         unsigned name = sel->info.input_semantic_name[i];
>>                         unsigned index = sel->info.input_semantic_index[i];
>>
>>                         switch (name) {
>> -                       case TGSI_SEMANTIC_CLIPDIST:
>> -                       case TGSI_SEMANTIC_GENERIC:
>> +                       default:
>>                                 sel->inputs_read |=
>>                                         1llu << si_shader_io_get_unique_index(name, index);
>>                                 break;
>>                         case TGSI_SEMANTIC_PCOORD: /* ignore this */
>>                                 break;
>> -                       default:
>> -                               sel->inputs_read2 |=
>> -                                       1u << si_shader_io_get_unique_index2(name, index);
>>                         }
>>                 }
>>
>>                 for (i = 0; i < 8; i++)
>>                         if (sel->info.colors_written & (1 << i))
>>                                 sel->colors_written_4bit |= 0xf << (4 * i);
>>
>>                 for (i = 0; i < sel->info.num_inputs; i++) {
>>                         if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
>>                                 int index = sel->info.input_semantic_index[i];
>> --
>> 2.9.3
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list