[Mesa-dev] [PATCH] radeonsi: fix vertex fetches for 2_10_10_10 formats
Marek Olšák
maraeo at gmail.com
Fri Nov 4 11:21:51 UTC 2016
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Marek
On Thu, Nov 3, 2016 at 11:16 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> The hardware always treats the alpha channel as unsigned, so add a shader
> workaround. This is rare enough that we'll just build a monolithic vertex
> shader.
>
> The SINT case cannot actually happen in OpenGL, but I've included it for
> completeness since it's just a mix of the other cases.
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 54 ++++++++++++++++++++++---
> src/gallium/drivers/radeonsi/si_shader.h | 11 +++++
> src/gallium/drivers/radeonsi/si_state.c | 14 +++++++
> src/gallium/drivers/radeonsi/si_state.h | 1 +
> src/gallium/drivers/radeonsi/si_state_shaders.c | 4 ++
> 5 files changed, 78 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 28a8b1f..b170eb9 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -362,67 +362,105 @@ static LLVMValueRef get_instance_index_for_fetch(
> /* The division must be done before START_INSTANCE is added. */
> if (divisor > 1)
> result = LLVMBuildUDiv(gallivm->builder, result,
> lp_build_const_int32(gallivm, divisor), "");
>
> return LLVMBuildAdd(gallivm->builder, result,
> LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
> }
>
> static void declare_input_vs(
> - struct si_shader_context *radeon_bld,
> + struct si_shader_context *ctx,
> unsigned input_index,
> const struct tgsi_full_declaration *decl,
> LLVMValueRef out[4])
> {
> - struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
> + struct lp_build_context *base = &ctx->soa.bld_base.base;
> struct gallivm_state *gallivm = base->gallivm;
> - struct si_shader_context *ctx =
> - si_shader_context(&radeon_bld->soa.bld_base);
>
> unsigned chan;
> + unsigned fix_fetch;
>
> LLVMValueRef t_list_ptr;
> LLVMValueRef t_offset;
> LLVMValueRef t_list;
> LLVMValueRef attribute_offset;
> LLVMValueRef buffer_index;
> LLVMValueRef args[3];
> LLVMValueRef input;
>
> /* Load the T list */
> t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS);
>
> t_offset = lp_build_const_int32(gallivm, input_index);
>
> t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
>
> /* Build the attribute offset */
> attribute_offset = lp_build_const_int32(gallivm, 0);
>
> - buffer_index = LLVMGetParam(radeon_bld->main_fn,
> + buffer_index = LLVMGetParam(ctx->main_fn,
> ctx->param_vertex_index0 +
> input_index);
>
> args[0] = t_list;
> args[1] = attribute_offset;
> args[2] = buffer_index;
> input = lp_build_intrinsic(gallivm->builder,
> "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> LLVMReadNoneAttribute);
>
> /* Break up the vec4 into individual components */
> for (chan = 0; chan < 4; chan++) {
> LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
> out[chan] = LLVMBuildExtractElement(gallivm->builder,
> input, llvm_chan, "");
> }
> +
> + fix_fetch = (ctx->shader->key.vs.fix_fetch >> (2 * input_index)) & 3;
> + if (fix_fetch) {
> + /* The hardware returns an unsigned value; convert it to a
> + * signed one.
> + */
> + LLVMValueRef tmp = out[3];
> + LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0);
> +
> + /* First, recover the sign-extended signed integer value. */
> + if (fix_fetch == SI_FIX_FETCH_A2_SSCALED)
> + tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, "");
> + else
> + tmp = LLVMBuildBitCast(gallivm->builder, tmp, ctx->i32, "");
> +
> + /* For the integer-like cases, do a natural sign extension.
> + *
> + * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
> + * and happen to contain 0, 1, 2, 3 as the two LSBs of the
> + * exponent.
> + */
> + tmp = LLVMBuildShl(gallivm->builder, tmp,
> + fix_fetch == SI_FIX_FETCH_A2_SNORM ?
> + LLVMConstInt(ctx->i32, 7, 0) : c30, "");
> + tmp = LLVMBuildAShr(gallivm->builder, tmp, c30, "");
> +
> + /* Convert back to the right type. */
> + if (fix_fetch == SI_FIX_FETCH_A2_SNORM) {
> + LLVMValueRef clamp;
> + LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
> + tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
> + clamp = LLVMBuildFCmp(gallivm->builder, LLVMRealULT, tmp, neg_one, "");
> + tmp = LLVMBuildSelect(gallivm->builder, clamp, neg_one, tmp, "");
> + } else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) {
> + tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
> + }
> +
> + out[3] = tmp;
> + }
> }
>
> static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
> unsigned swizzle)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
>
> if (swizzle > 0)
> return bld_base->uint_bld.zero;
>
> @@ -8095,25 +8133,29 @@ static void si_fix_num_sgprs(struct si_shader *shader)
> int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
> struct si_shader *shader,
> struct pipe_debug_callback *debug)
> {
> struct si_shader_selector *sel = shader->selector;
> struct si_shader *mainp = sel->main_shader_part;
> int r;
>
> /* LS, ES, VS are compiled on demand if the main part hasn't been
> * compiled for that stage.
> + *
> + * Vertex shaders are compiled on demand when a vertex fetch
> + * workaround must be applied.
> */
> if (!mainp ||
> (sel->type == PIPE_SHADER_VERTEX &&
> (shader->key.vs.as_es != mainp->key.vs.as_es ||
> - shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
> + shader->key.vs.as_ls != mainp->key.vs.as_ls ||
> + shader->key.vs.fix_fetch)) ||
> (sel->type == PIPE_SHADER_TESS_EVAL &&
> shader->key.tes.as_es != mainp->key.tes.as_es) ||
> (sel->type == PIPE_SHADER_TESS_CTRL &&
> shader->key.tcs.epilog.inputs_to_copy) ||
> sel->type == PIPE_SHADER_COMPUTE) {
> /* Monolithic shader (compiled as a whole, has many variants,
> * may take a long time to compile).
> */
> r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
> if (r)
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index d8ab2a4..59e7bfb 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -226,20 +226,28 @@ enum {
>
> SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */
> };
>
> /* SI-specific system values. */
> enum {
> TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
> TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
> };
>
> +/* For VS shader key fix_fetch. */
> +enum {
> + SI_FIX_FETCH_NONE = 0,
> + SI_FIX_FETCH_A2_SNORM = 1,
> + SI_FIX_FETCH_A2_SSCALED = 2,
> + SI_FIX_FETCH_A2_SINT = 3,
> +};
> +
> struct si_shader;
>
> /* A shader selector is a gallium CSO and contains shader variants and
> * binaries for one TGSI program. This can be shared by multiple contexts.
> */
> struct si_shader_selector {
> struct si_screen *screen;
> struct util_queue_fence ready;
>
> /* Should only be used by si_init_shader_selector_async
> @@ -393,20 +401,23 @@ union si_shader_part_key {
> union si_shader_key {
> struct {
> struct si_ps_prolog_bits prolog;
> struct si_ps_epilog_bits epilog;
> } ps;
> struct {
> struct si_vs_prolog_bits prolog;
> struct si_vs_epilog_bits epilog;
> unsigned as_es:1; /* export shader */
> unsigned as_ls:1; /* local shader */
> +
> + /* One pair of bits for every input: SI_FIX_FETCH_* enums. */
> + uint32_t fix_fetch;
> } vs;
> struct {
> struct si_tcs_epilog_bits epilog;
> } tcs; /* tessellation control shader */
> struct {
> struct si_vs_epilog_bits epilog; /* same as VS */
> unsigned as_es:1; /* export shader */
> } tes; /* tessellation evaluation shader */
> struct {
> struct si_gs_prolog_bits prolog;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 642ce79..24c7b10 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -3274,20 +3274,34 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
> data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
> num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
>
> v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
> S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
> S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
> S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
> S_008F0C_NUM_FORMAT(num_format) |
> S_008F0C_DATA_FORMAT(data_format);
> v->format_size[i] = desc->block.bits / 8;
> +
> + /* The hardware always treats the 2-bit alpha channel as
> + * unsigned, so a shader workaround is needed.
> + */
> + if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
> + if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
> + v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i);
> + } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
> + v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i);
> + } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
> + /* This isn't actually used in OpenGL. */
> + v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i);
> + }
> + }
> }
> memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
>
> return v;
> }
>
> static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
> {
> struct si_context *sctx = (struct si_context *)ctx;
> struct si_vertex_element *v = (struct si_vertex_element*)state;
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 3ebf578..c444a69 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -92,20 +92,21 @@ struct si_state_dsa {
>
> struct si_stencil_ref {
> struct r600_atom atom;
> struct pipe_stencil_ref state;
> struct si_dsa_stencil_ref_part dsa_part;
> };
>
> struct si_vertex_element
> {
> unsigned count;
> + uint32_t fix_fetch;
> uint32_t rsrc_word3[SI_MAX_ATTRIBS];
> uint32_t format_size[SI_MAX_ATTRIBS];
> struct pipe_vertex_element elements[SI_MAX_ATTRIBS];
> };
>
> union si_state {
> struct {
> struct si_state_blend *blend;
> struct si_state_rasterizer *rasterizer;
> struct si_state_dsa *dsa;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 2a41bf1..9e95fea 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -865,20 +865,24 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
> memset(key, 0, sizeof(*key));
>
> switch (sel->type) {
> case PIPE_SHADER_VERTEX:
> if (sctx->vertex_elements) {
> unsigned count = MIN2(sel->info.num_inputs,
> sctx->vertex_elements->count);
> for (i = 0; i < count; ++i)
> key->vs.prolog.instance_divisors[i] =
> sctx->vertex_elements->elements[i].instance_divisor;
> +
> + key->vs.fix_fetch =
> + sctx->vertex_elements->fix_fetch &
> + u_bit_consecutive(0, 2 * count);
> }
> if (sctx->tes_shader.cso)
> key->vs.as_ls = 1;
> else if (sctx->gs_shader.cso)
> key->vs.as_es = 1;
>
> if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
> sctx->ps_shader.cso->info.uses_primid)
> key->vs.epilog.export_prim_id = 1;
> break;
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list