[Mesa-dev] [PATCH 4/4] radeonsi: emit PS exports last
Nicolai Hähnle
nhaehnle at gmail.com
Tue Jul 19 13:43:30 UTC 2016
Patches 1, 3 & 4 are
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
On 18.07.2016 14:14, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> This effectively removes s_waitcnt instructions after FP16 exports.
>
> Before:
>
> v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
> v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
> exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100
> s_waitcnt expcnt(0) ; BF8C0F0F
> v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04
> v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06
> exp 15, 1, 1, 0, 0, v0, v1, v0, v0 ; F800041F 00000100
> s_waitcnt expcnt(0) ; BF8C0F0F
> v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308
> v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A
> exp 15, 2, 1, 0, 0, v0, v1, v0, v0 ; F800042F 00000100
> s_waitcnt expcnt(0) ; BF8C0F0F
> v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C
> v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E
> exp 15, 3, 1, 1, 1, v0, v1, v0, v0 ; F8001C3F 00000100
> s_endpgm ; BF810000
>
> After:
>
> v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
> v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
> v_cvt_pkrtz_f16_f32_e32 v2, v4, v5 ; 5E040B04
> v_cvt_pkrtz_f16_f32_e32 v3, v6, v7 ; 5E060F06
> exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100
> v_cvt_pkrtz_f16_f32_e32 v4, v8, v9 ; 5E081308
> v_cvt_pkrtz_f16_f32_e32 v5, v10, v11 ; 5E0A170A
> exp 15, 1, 1, 0, 0, v2, v3, v0, v0 ; F800041F 00000302
> v_cvt_pkrtz_f16_f32_e32 v6, v12, v13 ; 5E0C1B0C
> v_cvt_pkrtz_f16_f32_e32 v7, v14, v15 ; 5E0E1F0E
> exp 15, 2, 1, 0, 0, v4, v5, v0, v0 ; F800042F 00000504
> exp 15, 3, 1, 1, 1, v6, v7, v0, v0 ; F8001C3F 00000706
> s_endpgm ; BF810000
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 44 ++++++++++++++++++++++----------
> 1 file changed, 31 insertions(+), 13 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 41bcbd4..adf706c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2917,9 +2917,14 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
> FREE(outputs);
> }
>
> +struct si_ps_exports {
> + unsigned num;
> + LLVMValueRef args[10][9];
> +};
> +
> static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
> - LLVMValueRef depth, LLVMValueRef stencil,
> - LLVMValueRef samplemask)
> + LLVMValueRef depth, LLVMValueRef stencil,
> + LLVMValueRef samplemask, struct si_ps_exports *exp)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct lp_build_context *base = &bld_base->base;
> @@ -2965,14 +2970,13 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
> /* Specify which components to enable */
> args[0] = lp_build_const_int32(base->gallivm, mask);
>
> - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
> - ctx->voidt, args, 9, 0);
> + memcpy(exp->args[exp->num++], args, sizeof(args));
> }
>
> static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
> LLVMValueRef *color, unsigned index,
> unsigned samplemask_param,
> - bool is_last)
> + bool is_last, struct si_ps_exports *exp)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct lp_build_context *base = &bld_base->base;
> @@ -3018,8 +3022,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
> } else if (args[c][0] == bld_base->uint_bld.zero)
> continue; /* unnecessary NULL export */
>
> - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
> - ctx->voidt, args[c], 9, 0);
> + memcpy(exp->args[exp->num++], args[c], sizeof(args[c]));
> }
> } else {
> LLVMValueRef args[9];
> @@ -3033,11 +3036,19 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
> } else if (args[0] == bld_base->uint_bld.zero)
> return; /* unnecessary NULL export */
>
> - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
> - ctx->voidt, args, 9, 0);
> + memcpy(exp->args[exp->num++], args, sizeof(args));
> }
> }
>
> +static void si_emit_ps_exports(struct si_shader_context *ctx,
> + struct si_ps_exports *exp)
> +{
> + for (unsigned i = 0; i < exp->num; i++)
> + lp_build_intrinsic(ctx->radeon_bld.gallivm.builder,
> + "llvm.SI.export", ctx->voidt,
> + exp->args[i], 9, 0);
> +}
> +
> static void si_export_null(struct lp_build_tgsi_context *bld_base)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> @@ -3069,6 +3080,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
> LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
> int last_color_export = -1;
> int i;
> + struct si_ps_exports exp = {};
>
> /* Determine the last export. If MRTZ is present, it's always last.
> * Otherwise, find the last color export.
> @@ -3135,7 +3147,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
>
> si_export_mrt_color(bld_base, color, semantic_index,
> SI_PARAM_SAMPLE_COVERAGE,
> - last_color_export == i);
> + last_color_export == i, &exp);
> break;
> default:
> fprintf(stderr,
> @@ -3145,7 +3157,9 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
> }
>
> if (depth || stencil || samplemask)
> - si_export_mrt_z(bld_base, depth, stencil, samplemask);
> + si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
> +
> + si_emit_ps_exports(ctx, &exp);
> }
>
> /**
> @@ -7495,6 +7509,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
> LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
> int last_sgpr, num_params, i;
> bool status = true;
> + struct si_ps_exports exp = {};
>
> si_init_shader_ctx(&ctx, sscreen, &shader, tm);
> ctx.type = PIPE_SHADER_FRAGMENT;
> @@ -7564,7 +7579,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
>
> si_export_mrt_color(bld_base, color, mrt,
> num_params - 1,
> - mrt == last_color_export);
> + mrt == last_color_export, &exp);
> }
>
> /* Process depth, stencil, samplemask. */
> @@ -7576,10 +7591,13 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
> samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
>
> if (depth || stencil || samplemask)
> - si_export_mrt_z(bld_base, depth, stencil, samplemask);
> + si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
> else if (last_color_export == -1)
> si_export_null(bld_base);
>
> + if (exp.num)
> + si_emit_ps_exports(&ctx, &exp);
> +
> /* Compile. */
> LLVMBuildRetVoid(gallivm->builder);
> radeon_llvm_finalize_module(&ctx.radeon_bld);
>
More information about the mesa-dev
mailing list