[Mesa-stable] [PATCH 2/5] radeonsi/gfx9: always wrap GS and TCS in an if-block
Marek Olšák
maraeo at gmail.com
Tue Jul 25 18:47:54 UTC 2017
On Mon, Jul 17, 2017 at 12:57 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> With merged ESGS shaders, the GS part of a wave may be empty, and the
> hardware gets confused if any GS messages are sent from that wave. Since
> S_SENDMSG is executed even when EXEC = 0, we have to wrap even
> non-monolithic GS shaders in an if-block, so that the entire shader and
> hence the S_SENDMSG instructions are skipped in empty waves.
>
> This change is not required for TCS/HS, but applying it there as well
> simplifies the code a bit.
>
> Fixes GL45-CTS.geometry_shader.rendering.rendering.*
>
> Cc: mesa-stable at lists.freedesktop.org
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 74 +++++++++++++----------
> src/gallium/drivers/radeonsi/si_shader_internal.h | 3 +
> 2 files changed, 45 insertions(+), 32 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 7a44e61..9aeda49 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2713,20 +2713,23 @@ si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret,
> }
>
> /* This only writes the tessellation factor levels. */
> static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
>
> si_copy_tcs_inputs(bld_base);
>
> + if (ctx->screen->b.chip_class >= GFX9)
> + lp_build_endif(&ctx->merged_wrap_if_state);
> +
> rel_patch_id = get_rel_patch_id(ctx);
> invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
> tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
>
> /* Return epilog parameters from this function. */
> LLVMBuilderRef builder = ctx->gallivm.builder;
> LLVMValueRef ret = ctx->return_value;
> unsigned vgpr;
>
> if (ctx->screen->b.chip_class >= GFX9) {
> @@ -2946,20 +2949,23 @@ static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
> else
> return LLVMGetParam(ctx->main_fn, ctx->param_gs_wave_id);
> }
>
> static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
>
> ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
> si_get_gs_wave_id(ctx));
> +
> + if (ctx->screen->b.chip_class >= GFX9)
> + lp_build_endif(&ctx->merged_wrap_if_state);
> }
>
> static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct gallivm_state *gallivm = &ctx->gallivm;
> struct tgsi_shader_info *info = &ctx->shader->selector->info;
> struct si_shader_output_values *outputs = NULL;
> int i,j;
>
> @@ -5523,39 +5529,55 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
> break;
> default:
> assert(!"Unsupported shader type");
> return false;
> }
>
> create_function(ctx);
> preload_ring_buffers(ctx);
>
> /* For GFX9 merged shaders:
> - * - Set EXEC. If the prolog is present, set EXEC there instead.
> + * - Set EXEC for the first shader. If the prolog is present, set
> + * EXEC there instead.
> * - Add a barrier before the second shader.
> + * - In the second shader, reset EXEC to ~0 and wrap the main part in
> + * an if-statement. This is required for correctness in geometry
> + * shaders, to ensure that empty GS waves do not send GS_EMIT and
> + * GS_CUT messages.
> *
> - * The same thing for monolithic shaders is done in
> - * si_build_wrapper_function.
> + * For monolithic merged shaders, the first shader is wrapped in an
> + * if-block together with its prolog in si_build_wrapper_function.
> */
> - if (ctx->screen->b.chip_class >= GFX9 && !is_monolithic) {
> - if (sel->info.num_instructions > 1 && /* not empty shader */
> + if (ctx->screen->b.chip_class >= GFX9) {
> + if (!is_monolithic &&
> + sel->info.num_instructions > 1 && /* not empty shader */
> (shader->key.as_es || shader->key.as_ls) &&
> (ctx->type == PIPE_SHADER_TESS_EVAL ||
> (ctx->type == PIPE_SHADER_VERTEX &&
> !sel->vs_needs_prolog))) {
> si_init_exec_from_input(ctx,
> ctx->param_merged_wave_info, 0);
> } else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
> ctx->type == PIPE_SHADER_GEOMETRY) {
> - si_init_exec_from_input(ctx,
> - ctx->param_merged_wave_info, 8);
> + if (!is_monolithic)
> + si_init_exec_full_mask(ctx);
The TCS epilog will execute with the full EXEC mask, which might cause issues.
Marek
More information about the mesa-stable
mailing list