[Mesa-dev] [PATCH 08/14] radeonsi: Store inputs to memory when not using a TCS.
Nicolai Hähnle
nhaehnle at gmail.com
Tue May 10 16:40:06 UTC 2016
On 10.05.2016 05:52, Bas Nieuwenhuizen wrote:
> We need to copy the VS outputs to memory. I decided to do this
> using a shader key, as the value depends on other shaders.
>
> I also switch the fixed function TCS over to monolithic, as
> otherwisze many of the user SGPR's need to be passed to the
> epilog, which increases register pressure, or complexity to
> avoid that. The main body of the fixed function TCS is not
> that interesting to precompile anyway, since we do it on
> demand and it is very small.
>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 45 +++++++++++++++++++++++++
> src/gallium/drivers/radeonsi/si_shader.h | 1 +
> src/gallium/drivers/radeonsi/si_state_shaders.c | 3 ++
> 3 files changed, 49 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 90830ee..50c48bf 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2423,6 +2423,48 @@ handle_semantic:
> }
> }
>
> +static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
> +{
> + struct si_shader_context *ctx = si_shader_context(bld_base);
> + struct gallivm_state *gallivm = bld_base->base.gallivm;
> + LLVMValueRef invocation_id, rw_buffers, buffer, buffer_offset;
> + LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
> + unsigned num_outputs, i;
> +
> + invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
> +
> + rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_RW_BUFFERS);
> + buffer = build_indexed_load_const(ctx, rw_buffers,
> + lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP));
> +
> + buffer_offset = LLVMGetParam(ctx->radeon_bld.main_fn, ctx->param_oc_lds);
> +
> + lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
> + lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
> + lds_vertex_stride, "");
> + lds_base = get_tcs_in_current_patch_offset(ctx);
> + lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, "");
> +
> + num_outputs = util_last_bit64(ctx->shader->key.tcs.epilog.inputs_to_copy);
> + for (i = 0; i < num_outputs; i++) {
> + if (!((1llu << i) & ctx->shader->key.tcs.epilog.inputs_to_copy))
> + continue;
Use u_bit_scan64, please.
Nicolai
> +
> + LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base,
> + lp_build_const_int32(gallivm, 4 * i),
> + "");
> +
> + LLVMValueRef buffer_addr = get_buffer_address(ctx, invocation_id,
> + lp_build_const_int32(gallivm, i));
> +
> + LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0,
> + lds_ptr);
> +
> + build_tbuffer_store_dwords(ctx, buffer, value, 4, buffer_addr,
> + buffer_offset, 0);
> + }
> +}
> +
> static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
> LLVMValueRef rel_patch_id,
> LLVMValueRef invocation_id,
> @@ -2564,6 +2606,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
> return;
> }
>
> + si_copy_tcs_inputs(bld_base);
> si_write_tess_factors(bld_base, rel_patch_id, invocation_id, tf_lds_offset);
> }
>
> @@ -7374,6 +7417,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
> shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
> (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
> shader->key.tes.as_es != mainp->key.tes.as_es) ||
> + (shader->selector->type == PIPE_SHADER_TESS_CTRL &&
> + shader->key.tcs.epilog.inputs_to_copy) ||
> shader->selector->type == PIPE_SHADER_COMPUTE) {
> /* Monolithic shader (compiled as a whole, has many variants,
> * may take a long time to compile).
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 26be25e..67b457b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -304,6 +304,7 @@ struct si_vs_epilog_bits {
> /* Common TCS bits between the shader key and the epilog key. */
> struct si_tcs_epilog_bits {
> unsigned prim_mode:3;
> + uint64_t inputs_to_copy;
> };
>
> /* Common PS bits between the shader key and the prolog key. */
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 32ac95d..f48582a 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -841,6 +841,9 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
> case PIPE_SHADER_TESS_CTRL:
> key->tcs.epilog.prim_mode =
> sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
> +
> + if (sel == sctx->fixed_func_tcs_shader.cso)
> + key->tcs.epilog.inputs_to_copy = sctx->vs_shader.cso->outputs_written;
> break;
> case PIPE_SHADER_TESS_EVAL:
> if (sctx->gs_shader.cso)
>
More information about the mesa-dev
mailing list