[Mesa-dev] [PATCH 5/5] radeonsi: get rid of secondary input/output word

Marek Olšák maraeo at gmail.com
Thu May 11 16:26:23 UTC 2017


For the series:

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Wed, May 10, 2017 at 7:30 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> By keeping track of fewer generics, everything can fit into 64 bits.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c        | 32 +++++++------------------
>  src/gallium/drivers/radeonsi/si_shader.h        |  6 +----
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 22 +++--------------
>  3 files changed, 13 insertions(+), 47 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index c12c8ea..837cc1c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -141,43 +141,36 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
>         case TGSI_SEMANTIC_CLIPDIST:
>                 assert(index <= 1);
>                 return 2 + index;
>         case TGSI_SEMANTIC_GENERIC:
>                 if (index < SI_MAX_IO_GENERIC)
>                         return 4 + index;
>
>                 assert(!"invalid generic index");
>                 return 0;
>
> -       default:
> -               assert(!"invalid semantic name");
> -               return 0;
> -       }
> -}
> -
> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
> -{
> -       switch (name) {
>         case TGSI_SEMANTIC_FOG:
> -               return 0;
> +               return SI_MAX_IO_GENERIC + 4;
>         case TGSI_SEMANTIC_LAYER:
> -               return 1;
> +               return SI_MAX_IO_GENERIC + 5;
>         case TGSI_SEMANTIC_VIEWPORT_INDEX:
> -               return 2;
> +               return SI_MAX_IO_GENERIC + 6;
>         case TGSI_SEMANTIC_PRIMID:
> -               return 3;
> +               return SI_MAX_IO_GENERIC + 7;
>         case TGSI_SEMANTIC_COLOR: /* these alias */
>         case TGSI_SEMANTIC_BCOLOR:
> -               return 4 + index;
> +               assert(index < 2);
> +               return SI_MAX_IO_GENERIC + 8 + index;
>         case TGSI_SEMANTIC_TEXCOORD:
>                 assert(index < 8);
> -               return 6 + index;
> +               assert(SI_MAX_IO_GENERIC + 10 + index < 64);
> +               return SI_MAX_IO_GENERIC + 10 + index;
>         default:
>                 assert(!"invalid semantic name");
>                 return 0;
>         }
>  }
>
>  /**
>   * Get the value of a shader input parameter and extract a bitfield.
>   */
>  static LLVMValueRef unpack_param(struct si_shader_context *ctx,
> @@ -2291,30 +2284,24 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
>                 case TGSI_SEMANTIC_POSITION: /* ignore these */
>                 case TGSI_SEMANTIC_PSIZE:
>                 case TGSI_SEMANTIC_CLIPVERTEX:
>                 case TGSI_SEMANTIC_EDGEFLAG:
>                         break;
>                 case TGSI_SEMANTIC_GENERIC:
>                         /* don't process indices the function can't handle */
>                         if (semantic_index >= SI_MAX_IO_GENERIC)
>                                 break;
>                         /* fall through */
> -               case TGSI_SEMANTIC_CLIPDIST:
> +               default:
>                         if (shader->key.opt.hw_vs.kill_outputs &
>                             (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
>                                 export_param = false;
> -                       break;
> -               default:
> -                       if (shader->key.opt.hw_vs.kill_outputs2 &
> -                           (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index)))
> -                               export_param = false;
> -                       break;
>                 }
>
>                 if (outputs[i].vertex_stream[0] != 0 &&
>                     outputs[i].vertex_stream[1] != 0 &&
>                     outputs[i].vertex_stream[2] != 0 &&
>                     outputs[i].vertex_stream[3] != 0)
>                         export_param = false;
>
>  handle_semantic:
>                 /* Select the correct target */
> @@ -7152,21 +7139,20 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade
>
>         default:
>                 assert(0);
>         }
>
>         if ((processor == PIPE_SHADER_GEOMETRY ||
>              processor == PIPE_SHADER_TESS_EVAL ||
>              processor == PIPE_SHADER_VERTEX) &&
>             !key->as_es && !key->as_ls) {
>                 fprintf(f, "  opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
> -               fprintf(f, "  opt.hw_vs.kill_outputs2 = 0x%x\n", key->opt.hw_vs.kill_outputs2);
>                 fprintf(f, "  opt.hw_vs.clip_disable = %u\n", key->opt.hw_vs.clip_disable);
>         }
>  }
>
>  static void si_init_shader_ctx(struct si_shader_context *ctx,
>                                struct si_screen *sscreen,
>                                LLVMTargetMachineRef tm)
>  {
>         struct lp_build_tgsi_context *bld_base;
>         struct lp_build_tgsi_action tmpl = {};
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 3075900..1627de3 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -354,25 +354,23 @@ struct si_shader_selector {
>         unsigned        db_shader_control;
>         /* Set 0xf or 0x0 (4 bits) per each written output.
>          * ANDed with spi_shader_col_format.
>          */
>         unsigned        colors_written_4bit;
>
>         /* CS parameters */
>         unsigned local_size;
>
>         uint64_t        outputs_written;        /* "get_unique_index" bits */
> -       uint32_t        patch_outputs_written;  /* "get_unique_index" bits */
> -       uint32_t        outputs_written2;       /* "get_unique_index2" bits */
> +       uint32_t        patch_outputs_written;  /* "get_unique_index_patch" bits */
>
>         uint64_t        inputs_read;            /* "get_unique_index" bits */
> -       uint32_t        inputs_read2;           /* "get_unique_index2" bits */
>  };
>
>  /* Valid shader configurations:
>   *
>   * API shaders       VS | TCS | TES | GS |pass| PS
>   * are compiled as:     |     |     |    |thru|
>   *                      |     |     |    |    |
>   * Only VS & PS:     VS |     |     |    |    | PS
>   * GFX6 - with GS:   ES |     |     | GS | VS | PS
>   *      - with tess: LS | HS  | VS  |    |    | PS
> @@ -498,21 +496,20 @@ struct si_shader_key {
>                 uint8_t         vs_fix_fetch[SI_MAX_ATTRIBS];
>                 uint64_t        ff_tcs_inputs_to_copy; /* for fixed-func TCS */
>                 /* When PS needs PrimID and GS is disabled. */
>                 unsigned        vs_export_prim_id:1;
>         } mono;
>
>         /* Optimization flags for asynchronous compilation only. */
>         struct {
>                 struct {
>                         uint64_t        kill_outputs; /* "get_unique_index" bits */
> -                       uint32_t        kill_outputs2; /* "get_unique_index2" bits */
>                         unsigned        clip_disable:1;
>                 } hw_vs; /* HW VS (it can be VS, TES, GS) */
>
>                 /* For shaders where monolithic variants have better code.
>                  *
>                  * This is a flag that has no effect on code generation,
>                  * but forces monolithic shaders to be used as soon as
>                  * possible, because it's in the "opt" group.
>                  */
>                 unsigned        prefer_mono:1;
> @@ -597,21 +594,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
>                            LLVMTargetMachineRef tm,
>                            struct si_shader *shader,
>                            bool is_monolithic,
>                            struct pipe_debug_callback *debug);
>  int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
>                      struct si_shader *shader,
>                      struct pipe_debug_callback *debug);
>  void si_shader_destroy(struct si_shader *shader);
>  unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
>  unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
> -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index);
>  int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
>  void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
>                     struct pipe_debug_callback *debug, unsigned processor,
>                     FILE *f, bool check_debug_option);
>  void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
>                                       unsigned *lds_size);
>  void si_shader_apply_scratch_relocs(struct si_shader *shader,
>                                     uint64_t scratch_va);
>  void si_shader_binary_read_config(struct ac_shader_binary *binary,
>                                   struct si_shader_config *conf,
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 6020bec..5da6014 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -1224,36 +1224,31 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
>                         ps_colormask &= ps->colors_written_4bit;
>
>                 ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard ||
>                               (!ps_colormask &&
>                                !ps_modifies_zs &&
>                                !ps->info.writes_memory);
>         }
>
>         /* Find out which VS outputs aren't used by the PS. */
>         uint64_t outputs_written = vs->outputs_written;
> -       uint32_t outputs_written2 = vs->outputs_written2;
>         uint64_t inputs_read = 0;
> -       uint32_t inputs_read2 = 0;
>
>         outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
>
>         if (!ps_disabled) {
>                 inputs_read = ps->inputs_read;
> -               inputs_read2 = ps->inputs_read2;
>         }
>
>         uint64_t linked = outputs_written & inputs_read;
> -       uint32_t linked2 = outputs_written2 & inputs_read2;
>
>         key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
> -       key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
>  }
>
>  /* Compute the key for the hw shader variant */
>  static inline void si_shader_selector_key(struct pipe_context *ctx,
>                                           struct si_shader_selector *sel,
>                                           struct si_shader_key *key)
>  {
>         struct si_context *sctx = (struct si_context *)ctx;
>
>         memset(key, 0, sizeof(*key));
> @@ -1839,32 +1834,29 @@ void si_init_shader_selector_async(void *job, int thread_index)
>                                 unsigned name = sel->info.output_semantic_name[i];
>                                 unsigned index = sel->info.output_semantic_index[i];
>                                 unsigned id;
>
>                                 switch (name) {
>                                 case TGSI_SEMANTIC_GENERIC:
>                                         /* don't process indices the function can't handle */
>                                         if (index >= SI_MAX_IO_GENERIC)
>                                                 break;
>                                         /* fall through */
> -                               case TGSI_SEMANTIC_CLIPDIST:
> +                               default:
>                                         id = si_shader_io_get_unique_index(name, index);
>                                         sel->outputs_written &= ~(1ull << id);
>                                         break;
>                                 case TGSI_SEMANTIC_POSITION: /* ignore these */
>                                 case TGSI_SEMANTIC_PSIZE:
>                                 case TGSI_SEMANTIC_CLIPVERTEX:
>                                 case TGSI_SEMANTIC_EDGEFLAG:
>                                         break;
> -                               default:
> -                                       id = si_shader_io_get_unique_index2(name, index);
> -                                       sel->outputs_written2 &= ~(1u << id);
>                                 }
>                         }
>                 }
>         }
>
>         /* Pre-compilation. */
>         if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
>                 struct si_shader_ctx_state state = {sel};
>                 struct si_shader_key key;
>
> @@ -1996,32 +1988,27 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
>                         case TGSI_SEMANTIC_PATCH:
>                                 sel->patch_outputs_written |=
>                                         1llu << si_shader_io_get_unique_index_patch(name, index);
>                                 break;
>
>                         case TGSI_SEMANTIC_GENERIC:
>                                 /* don't process indices the function can't handle */
>                                 if (index >= SI_MAX_IO_GENERIC)
>                                         break;
>                                 /* fall through */
> -                       case TGSI_SEMANTIC_POSITION:
> -                       case TGSI_SEMANTIC_PSIZE:
> -                       case TGSI_SEMANTIC_CLIPDIST:
> +                       default:
>                                 sel->outputs_written |=
>                                         1llu << si_shader_io_get_unique_index(name, index);
>                                 break;
>                         case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
>                         case TGSI_SEMANTIC_EDGEFLAG:
>                                 break;
> -                       default:
> -                               sel->outputs_written2 |=
> -                                       1u << si_shader_io_get_unique_index2(name, index);
>                         }
>                 }
>                 sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
>
>                 /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
>                  * conflicts, i.e. each vertex will start at a different bank.
>                  */
>                 if (sctx->b.chip_class >= GFX9)
>                         sel->esgs_itemsize += 4;
>                 break;
> @@ -2030,29 +2017,26 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
>                 for (i = 0; i < sel->info.num_inputs; i++) {
>                         unsigned name = sel->info.input_semantic_name[i];
>                         unsigned index = sel->info.input_semantic_index[i];
>
>                         switch (name) {
>                         case TGSI_SEMANTIC_GENERIC:
>                                 /* don't process indices the function can't handle */
>                                 if (index >= SI_MAX_IO_GENERIC)
>                                         break;
>                                 /* fall through */
> -                       case TGSI_SEMANTIC_CLIPDIST:
> +                       default:
>                                 sel->inputs_read |=
>                                         1llu << si_shader_io_get_unique_index(name, index);
>                                 break;
>                         case TGSI_SEMANTIC_PCOORD: /* ignore this */
>                                 break;
> -                       default:
> -                               sel->inputs_read2 |=
> -                                       1u << si_shader_io_get_unique_index2(name, index);
>                         }
>                 }
>
>                 for (i = 0; i < 8; i++)
>                         if (sel->info.colors_written & (1 << i))
>                                 sel->colors_written_4bit |= 0xf << (4 * i);
>
>                 for (i = 0; i < sel->info.num_inputs; i++) {
>                         if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
>                                 int index = sel->info.input_semantic_index[i];
> --
> 2.9.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list