[Mesa-dev] [PATCH] radeonsi: fix vertex fetches for 2_10_10_10 formats

Marek Olšák maraeo at gmail.com
Fri Nov 4 11:21:51 UTC 2016


Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Thu, Nov 3, 2016 at 11:16 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> The hardware always treats the alpha channel as unsigned, so add a shader
> workaround. This is rare enough that we'll just build a monolithic vertex
> shader.
>
> The SINT case cannot actually happen in OpenGL, but I've included it for
> completeness since it's just a mix of the other cases.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c        | 54 ++++++++++++++++++++++---
>  src/gallium/drivers/radeonsi/si_shader.h        | 11 +++++
>  src/gallium/drivers/radeonsi/si_state.c         | 14 +++++++
>  src/gallium/drivers/radeonsi/si_state.h         |  1 +
>  src/gallium/drivers/radeonsi/si_state_shaders.c |  4 ++
>  5 files changed, 78 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 28a8b1f..b170eb9 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -362,67 +362,105 @@ static LLVMValueRef get_instance_index_for_fetch(
>         /* The division must be done before START_INSTANCE is added. */
>         if (divisor > 1)
>                 result = LLVMBuildUDiv(gallivm->builder, result,
>                                 lp_build_const_int32(gallivm, divisor), "");
>
>         return LLVMBuildAdd(gallivm->builder, result,
>                             LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
>  }
>
>  static void declare_input_vs(
> -       struct si_shader_context *radeon_bld,
> +       struct si_shader_context *ctx,
>         unsigned input_index,
>         const struct tgsi_full_declaration *decl,
>         LLVMValueRef out[4])
>  {
> -       struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
> +       struct lp_build_context *base = &ctx->soa.bld_base.base;
>         struct gallivm_state *gallivm = base->gallivm;
> -       struct si_shader_context *ctx =
> -               si_shader_context(&radeon_bld->soa.bld_base);
>
>         unsigned chan;
> +       unsigned fix_fetch;
>
>         LLVMValueRef t_list_ptr;
>         LLVMValueRef t_offset;
>         LLVMValueRef t_list;
>         LLVMValueRef attribute_offset;
>         LLVMValueRef buffer_index;
>         LLVMValueRef args[3];
>         LLVMValueRef input;
>
>         /* Load the T list */
>         t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS);
>
>         t_offset = lp_build_const_int32(gallivm, input_index);
>
>         t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
>
>         /* Build the attribute offset */
>         attribute_offset = lp_build_const_int32(gallivm, 0);
>
> -       buffer_index = LLVMGetParam(radeon_bld->main_fn,
> +       buffer_index = LLVMGetParam(ctx->main_fn,
>                                     ctx->param_vertex_index0 +
>                                     input_index);
>
>         args[0] = t_list;
>         args[1] = attribute_offset;
>         args[2] = buffer_index;
>         input = lp_build_intrinsic(gallivm->builder,
>                 "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
>                 LLVMReadNoneAttribute);
>
>         /* Break up the vec4 into individual components */
>         for (chan = 0; chan < 4; chan++) {
>                 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
>                 out[chan] = LLVMBuildExtractElement(gallivm->builder,
>                                                     input, llvm_chan, "");
>         }
> +
> +       fix_fetch = (ctx->shader->key.vs.fix_fetch >> (2 * input_index)) & 3;
> +       if (fix_fetch) {
> +               /* The hardware returns an unsigned value; convert it to a
> +                * signed one.
> +                */
> +               LLVMValueRef tmp = out[3];
> +               LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0);
> +
> +               /* First, recover the sign-extended signed integer value. */
> +               if (fix_fetch == SI_FIX_FETCH_A2_SSCALED)
> +                       tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, "");
> +               else
> +                       tmp = LLVMBuildBitCast(gallivm->builder, tmp, ctx->i32, "");
> +
> +               /* For the integer-like cases, do a natural sign extension.
> +                *
> +                * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
> +                * and happen to contain 0, 1, 2, 3 as the two LSBs of the
> +                * exponent.
> +                */
> +               tmp = LLVMBuildShl(gallivm->builder, tmp,
> +                                  fix_fetch == SI_FIX_FETCH_A2_SNORM ?
> +                                  LLVMConstInt(ctx->i32, 7, 0) : c30, "");
> +               tmp = LLVMBuildAShr(gallivm->builder, tmp, c30, "");
> +
> +               /* Convert back to the right type. */
> +               if (fix_fetch == SI_FIX_FETCH_A2_SNORM) {
> +                       LLVMValueRef clamp;
> +                       LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
> +                       tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
> +                       clamp = LLVMBuildFCmp(gallivm->builder, LLVMRealULT, tmp, neg_one, "");
> +                       tmp = LLVMBuildSelect(gallivm->builder, clamp, neg_one, tmp, "");
> +               } else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) {
> +                       tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
> +               }
> +
> +               out[3] = tmp;
> +       }
>  }
>
>  static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
>                                      unsigned swizzle)
>  {
>         struct si_shader_context *ctx = si_shader_context(bld_base);
>
>         if (swizzle > 0)
>                 return bld_base->uint_bld.zero;
>
> @@ -8095,25 +8133,29 @@ static void si_fix_num_sgprs(struct si_shader *shader)
>  int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
>                      struct si_shader *shader,
>                      struct pipe_debug_callback *debug)
>  {
>         struct si_shader_selector *sel = shader->selector;
>         struct si_shader *mainp = sel->main_shader_part;
>         int r;
>
>         /* LS, ES, VS are compiled on demand if the main part hasn't been
>          * compiled for that stage.
> +        *
> +        * Vertex shaders are compiled on demand when a vertex fetch
> +        * workaround must be applied.
>          */
>         if (!mainp ||
>             (sel->type == PIPE_SHADER_VERTEX &&
>              (shader->key.vs.as_es != mainp->key.vs.as_es ||
> -             shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
> +             shader->key.vs.as_ls != mainp->key.vs.as_ls ||
> +             shader->key.vs.fix_fetch)) ||
>             (sel->type == PIPE_SHADER_TESS_EVAL &&
>              shader->key.tes.as_es != mainp->key.tes.as_es) ||
>             (sel->type == PIPE_SHADER_TESS_CTRL &&
>              shader->key.tcs.epilog.inputs_to_copy) ||
>             sel->type == PIPE_SHADER_COMPUTE) {
>                 /* Monolithic shader (compiled as a whole, has many variants,
>                  * may take a long time to compile).
>                  */
>                 r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
>                 if (r)
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index d8ab2a4..59e7bfb 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -226,20 +226,28 @@ enum {
>
>         SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */
>  };
>
>  /* SI-specific system values. */
>  enum {
>         TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
>         TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
>  };
>
> +/* For VS shader key fix_fetch. */
> +enum {
> +       SI_FIX_FETCH_NONE = 0,
> +       SI_FIX_FETCH_A2_SNORM = 1,
> +       SI_FIX_FETCH_A2_SSCALED = 2,
> +       SI_FIX_FETCH_A2_SINT = 3,
> +};
> +
>  struct si_shader;
>
>  /* A shader selector is a gallium CSO and contains shader variants and
>   * binaries for one TGSI program. This can be shared by multiple contexts.
>   */
>  struct si_shader_selector {
>         struct si_screen        *screen;
>         struct util_queue_fence ready;
>
>         /* Should only be used by si_init_shader_selector_async
> @@ -393,20 +401,23 @@ union si_shader_part_key {
>  union si_shader_key {
>         struct {
>                 struct si_ps_prolog_bits prolog;
>                 struct si_ps_epilog_bits epilog;
>         } ps;
>         struct {
>                 struct si_vs_prolog_bits prolog;
>                 struct si_vs_epilog_bits epilog;
>                 unsigned        as_es:1; /* export shader */
>                 unsigned        as_ls:1; /* local shader */
> +
> +               /* One pair of bits for every input: SI_FIX_FETCH_* enums. */
> +               uint32_t        fix_fetch;
>         } vs;
>         struct {
>                 struct si_tcs_epilog_bits epilog;
>         } tcs; /* tessellation control shader */
>         struct {
>                 struct si_vs_epilog_bits epilog; /* same as VS */
>                 unsigned        as_es:1; /* export shader */
>         } tes; /* tessellation evaluation shader */
>         struct {
>                 struct si_gs_prolog_bits prolog;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 642ce79..24c7b10 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -3274,20 +3274,34 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
>                 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
>                 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
>
>                 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
>                                    S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
>                                    S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
>                                    S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
>                                    S_008F0C_NUM_FORMAT(num_format) |
>                                    S_008F0C_DATA_FORMAT(data_format);
>                 v->format_size[i] = desc->block.bits / 8;
> +
> +               /* The hardware always treats the 2-bit alpha channel as
> +                * unsigned, so a shader workaround is needed.
> +                */
> +               if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
> +                       if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
> +                               v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i);
> +                       } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
> +                               v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i);
> +                       } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
> +                               /* This isn't actually used in OpenGL. */
> +                               v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i);
> +                       }
> +               }
>         }
>         memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
>
>         return v;
>  }
>
>  static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
>  {
>         struct si_context *sctx = (struct si_context *)ctx;
>         struct si_vertex_element *v = (struct si_vertex_element*)state;
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 3ebf578..c444a69 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -92,20 +92,21 @@ struct si_state_dsa {
>
>  struct si_stencil_ref {
>         struct r600_atom                atom;
>         struct pipe_stencil_ref         state;
>         struct si_dsa_stencil_ref_part  dsa_part;
>  };
>
>  struct si_vertex_element
>  {
>         unsigned                        count;
> +       uint32_t                        fix_fetch;
>         uint32_t                        rsrc_word3[SI_MAX_ATTRIBS];
>         uint32_t                        format_size[SI_MAX_ATTRIBS];
>         struct pipe_vertex_element      elements[SI_MAX_ATTRIBS];
>  };
>
>  union si_state {
>         struct {
>                 struct si_state_blend           *blend;
>                 struct si_state_rasterizer      *rasterizer;
>                 struct si_state_dsa             *dsa;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 2a41bf1..9e95fea 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -865,20 +865,24 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
>         memset(key, 0, sizeof(*key));
>
>         switch (sel->type) {
>         case PIPE_SHADER_VERTEX:
>                 if (sctx->vertex_elements) {
>                         unsigned count = MIN2(sel->info.num_inputs,
>                                               sctx->vertex_elements->count);
>                         for (i = 0; i < count; ++i)
>                                 key->vs.prolog.instance_divisors[i] =
>                                         sctx->vertex_elements->elements[i].instance_divisor;
> +
> +                       key->vs.fix_fetch =
> +                               sctx->vertex_elements->fix_fetch &
> +                               u_bit_consecutive(0, 2 * count);
>                 }
>                 if (sctx->tes_shader.cso)
>                         key->vs.as_ls = 1;
>                 else if (sctx->gs_shader.cso)
>                         key->vs.as_es = 1;
>
>                 if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
>                     sctx->ps_shader.cso->info.uses_primid)
>                         key->vs.epilog.export_prim_id = 1;
>                 break;
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list