[Mesa-dev] [PATCH 08/14] radeonsi: Store inputs to memory when not using a TCS.

Nicolai Hähnle nhaehnle at gmail.com
Tue May 10 16:40:06 UTC 2016


On 10.05.2016 05:52, Bas Nieuwenhuizen wrote:
> We need to copy the VS outputs to memory. I decided to do this
> using a shader key, as the value depends on other shaders.
>
> I also switch the fixed function TCS over to monolithic, as
> otherwisze many of the user SGPR's need to be passed to the
> epilog, which increases register pressure, or complexity to
> avoid that. The main body of the fixed function TCS is not
> that interesting to precompile anyway, since we do it on
> demand and it is very small.
>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
>   src/gallium/drivers/radeonsi/si_shader.c        | 45 +++++++++++++++++++++++++
>   src/gallium/drivers/radeonsi/si_shader.h        |  1 +
>   src/gallium/drivers/radeonsi/si_state_shaders.c |  3 ++
>   3 files changed, 49 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 90830ee..50c48bf 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2423,6 +2423,48 @@ handle_semantic:
>   	}
>   }
>
> +static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
> +{
> +	struct si_shader_context *ctx = si_shader_context(bld_base);
> +	struct gallivm_state *gallivm = bld_base->base.gallivm;
> +	LLVMValueRef invocation_id, rw_buffers, buffer, buffer_offset;
> +	LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
> +	unsigned num_outputs, i;
> +
> +	invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
> +
> +	rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_RW_BUFFERS);
> +	buffer = build_indexed_load_const(ctx, rw_buffers,
> +	                lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP));
> +
> +	buffer_offset = LLVMGetParam(ctx->radeon_bld.main_fn, ctx->param_oc_lds);
> +
> +	lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
> +	lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
> +	                                 lds_vertex_stride, "");
> +	lds_base = get_tcs_in_current_patch_offset(ctx);
> +	lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, "");
> +
> +	num_outputs = util_last_bit64(ctx->shader->key.tcs.epilog.inputs_to_copy);
> +	for (i = 0; i < num_outputs; i++) {
> +		if (!((1llu << i) & ctx->shader->key.tcs.epilog.inputs_to_copy))
> +			continue;

Use u_bit_scan64, please.

Nicolai

> +
> +		LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base,
> +		                            lp_build_const_int32(gallivm, 4 * i),
> +		                             "");
> +
> +		LLVMValueRef buffer_addr = get_buffer_address(ctx, invocation_id,
> +		                              lp_build_const_int32(gallivm, i));
> +
> +		LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0,
> +		                              lds_ptr);
> +
> +		build_tbuffer_store_dwords(ctx, buffer, value, 4, buffer_addr,
> +		                           buffer_offset, 0);
> +	}
> +}
> +
>   static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
>   				  LLVMValueRef rel_patch_id,
>   				  LLVMValueRef invocation_id,
> @@ -2564,6 +2606,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
>   		return;
>   	}
>
> +	si_copy_tcs_inputs(bld_base);
>   	si_write_tess_factors(bld_base, rel_patch_id, invocation_id, tf_lds_offset);
>   }
>
> @@ -7374,6 +7417,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
>   	      shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
>   	    (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
>   	     shader->key.tes.as_es != mainp->key.tes.as_es) ||
> +	    (shader->selector->type == PIPE_SHADER_TESS_CTRL &&
> +	     shader->key.tcs.epilog.inputs_to_copy) ||
>   	    shader->selector->type == PIPE_SHADER_COMPUTE) {
>   		/* Monolithic shader (compiled as a whole, has many variants,
>   		 * may take a long time to compile).
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 26be25e..67b457b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -304,6 +304,7 @@ struct si_vs_epilog_bits {
>   /* Common TCS bits between the shader key and the epilog key. */
>   struct si_tcs_epilog_bits {
>   	unsigned	prim_mode:3;
> +	uint64_t	inputs_to_copy;
>   };
>
>   /* Common PS bits between the shader key and the prolog key. */
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 32ac95d..f48582a 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -841,6 +841,9 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
>   	case PIPE_SHADER_TESS_CTRL:
>   		key->tcs.epilog.prim_mode =
>   			sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
> +
> +		if (sel == sctx->fixed_func_tcs_shader.cso)
> +			key->tcs.epilog.inputs_to_copy = sctx->vs_shader.cso->outputs_written;
>   		break;
>   	case PIPE_SHADER_TESS_EVAL:
>   		if (sctx->gs_shader.cso)
>


More information about the mesa-dev mailing list