[Mesa-dev] [PATCH] radeonsi/gfx9: fix VM fault with fetched instance divisors

Marek Olšák maraeo at gmail.com
Fri Nov 17 15:55:31 UTC 2017


Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Wed, Nov 15, 2017 at 11:23 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> We need to account for SGPR locations in merged shaders.
>
> This case is exercised by KHR-GL45.enhanced_layouts.vertex_attrib_locations
>
> Fixes: 79c2e7388c7f ("radeonsi/gfx9: use SPI_SHADER_USER_DATA_COMMON")
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 13 +++++++++++--
>  src/gallium/drivers/radeonsi/si_shader.h |  4 +---
>  2 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 3293dd44c63..281449e9eeb 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -5878,25 +5878,27 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
>                                  unsigned num_input_sgprs,
>                                  const struct si_vs_prolog_bits *prolog_key,
>                                  struct si_shader *shader_out,
>                                  union si_shader_part_key *key)
>  {
>         memset(key, 0, sizeof(*key));
>         key->vs_prolog.states = *prolog_key;
>         key->vs_prolog.num_input_sgprs = num_input_sgprs;
>         key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
>         key->vs_prolog.as_ls = shader_out->key.as_ls;
> +       key->vs_prolog.as_es = shader_out->key.as_es;
>
>         if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
>                 key->vs_prolog.as_ls = 1;
>                 key->vs_prolog.num_merged_next_stage_vgprs = 2;
>         } else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
> +               key->vs_prolog.as_es = 1;
>                 key->vs_prolog.num_merged_next_stage_vgprs = 5;
>         }
>
>         /* Enable loading the InstanceID VGPR. */
>         uint16_t input_mask = u_bit_consecutive(0, info->num_inputs);
>
>         if ((key->vs_prolog.states.instance_divisor_is_one |
>              key->vs_prolog.states.instance_divisor_is_fetched) & input_mask)
>                 shader_out->info.uses_instanceid = true;
>  }
> @@ -6763,20 +6765,22 @@ si_get_shader_part(struct si_screen *sscreen,
>
>         struct si_shader shader = {};
>         struct si_shader_context ctx;
>
>         si_init_shader_ctx(&ctx, sscreen, tm);
>         ctx.shader = &shader;
>         ctx.type = type;
>
>         switch (type) {
>         case PIPE_SHADER_VERTEX:
> +               shader.key.as_ls = key->vs_prolog.as_ls;
> +               shader.key.as_es = key->vs_prolog.as_es;
>                 break;
>         case PIPE_SHADER_TESS_CTRL:
>                 assert(!prolog);
>                 shader.key.part.tcs.epilog = key->tcs_epilog.states;
>                 break;
>         case PIPE_SHADER_GEOMETRY:
>                 assert(prolog);
>                 break;
>         case PIPE_SHADER_FRAGMENT:
>                 if (prolog)
> @@ -6805,24 +6809,29 @@ si_get_shader_part(struct si_screen *sscreen,
>
>  out:
>         si_llvm_dispose(&ctx);
>         mtx_unlock(&sscreen->shader_parts_mutex);
>         return result;
>  }
>
>  static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
>  {
>         LLVMValueRef ptr[2], list;
> +       bool is_merged_shader =
> +               ctx->screen->b.chip_class >= GFX9 &&
> +               (ctx->type == PIPE_SHADER_TESS_CTRL ||
> +                ctx->type == PIPE_SHADER_GEOMETRY ||
> +                ctx->shader->key.as_ls || ctx->shader->key.as_es);
>
>         /* Get the pointer to rw buffers. */
> -       ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
> -       ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
> +       ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
> +       ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI);
>         list = lp_build_gather_values(&ctx->gallivm, ptr, 2);
>         list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
>         list = LLVMBuildIntToPtr(ctx->ac.builder, list,
>                                  si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), "");
>         return list;
>  }
>
>  /**
>   * Build the vertex shader prolog function.
>   *
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 41851627a87..148356b87ff 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -142,23 +142,20 @@ struct nir_shader;
>
>  #define SI_MAX_VS_OUTPUTS      40
>
>  /* Shader IO unique indices are supported for TGSI_SEMANTIC_GENERIC with an
>   * index smaller than this.
>   */
>  #define SI_MAX_IO_GENERIC       46
>
>  /* SGPR user data indices */
>  enum {
> -       /* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
> -        * and these two are used for other purposes.
> -        */
>         SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
>         SI_SGPR_RW_BUFFERS_HI,
>         SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
>         SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI,
>         SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */
>         SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
>         SI_SGPR_SAMPLERS_AND_IMAGES,
>         SI_SGPR_SAMPLERS_AND_IMAGES_HI,
>         SI_NUM_RESOURCE_SGPRS,
>
> @@ -448,20 +445,21 @@ struct si_ps_epilog_bits {
>  };
>
>  union si_shader_part_key {
>         struct {
>                 struct si_vs_prolog_bits states;
>                 unsigned        num_input_sgprs:6;
>                 /* For merged stages such as LS-HS, HS input VGPRs are first. */
>                 unsigned        num_merged_next_stage_vgprs:3;
>                 unsigned        last_input:4;
>                 unsigned        as_ls:1;
> +               unsigned        as_es:1;
>                 /* Prologs for monolithic shaders shouldn't set EXEC. */
>                 unsigned        is_monolithic:1;
>         } vs_prolog;
>         struct {
>                 struct si_tcs_epilog_bits states;
>         } tcs_epilog;
>         struct {
>                 struct si_gs_prolog_bits states;
>                 /* Prologs of monolithic shaders shouldn't set EXEC. */
>                 unsigned        is_monolithic:1;
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list