[Mesa-dev] [PATCH] radv/ac: setup mrt exports then export them in one go.
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Tue Apr 25 19:51:54 UTC 2017
On Sun, Apr 23, 2017 at 8:54 PM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> Noticed while looking at Sascha Willems deferred shaders.
>
> This is a bit of an llvm workaround, llvm was producing this:
> v_cvt_pkrtz_f16_f32_e64 v4, v7, v8 ; D2960004 00021107
> v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0 ; D2960006 0001E509
> s_waitcnt vmcnt(0) ; BF8C0F70
> exp mrt0 v4, v4, v6, v6 compr ; C400040F 00000604
> s_waitcnt expcnt(0) ; BF8C0F0F
> v_cvt_pkrtz_f16_f32_e64 v4, v12, v5 ; D2960004 00020B0C
> v_cvt_pkrtz_f16_f32_e64 v5, v14, 1.0 ; D2960005 0001E50E
> exp mrt1 v4, v4, v5, v5 compr ; C400041F 00000504
> s_waitcnt expcnt(0) ; BF8C0F0F
> v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300
> v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702
> exp mrt2 v0, v0, v1, v1 done compr vm ; C4001C2F 00000100
>
> After this change:
> v_cvt_pkrtz_f16_f32_e64 v4, v7, v8 ; D2960004 00021107
> s_waitcnt vmcnt(0) ; BF8C0F70
> v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300
> v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0 ; D2960006 0001E509
> v_cvt_pkrtz_f16_f32_e64 v5, v12, v5 ; D2960005 00020B0C
> v_cvt_pkrtz_f16_f32_e64 v7, v14, 1.0 ; D2960007 0001E50E
> exp mrt0 v4, v4, v6, v6 compr ; C400040F 00000604
> v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702
> exp mrt1 v5, v5, v7, v7 compr ; C400041F 00000705
> exp mrt2 v0, v0, v1, v1 done compr vm ; C4001C2F 00000100
>
> No waitcnt for exports are emitted.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/amd/common/ac_nir_to_llvm.c | 34 +++++++++++++++++++---------------
> 1 file changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 38d5359..4d2e469 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -5572,24 +5572,22 @@ handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
> write_tess_factors(ctx);
> }
>
> -static void
> +static bool
> si_export_mrt_color(struct nir_to_llvm_context *ctx,
> - LLVMValueRef *color, unsigned param, bool is_last)
> + LLVMValueRef *color, unsigned param, bool is_last,
> + struct ac_export_args *args)
> {
> -
> - struct ac_export_args args;
> -
> /* Export */
> si_llvm_init_export_args(ctx, color, param,
> - &args);
> + args);
>
> if (is_last) {
> - args.valid_mask = 1; /* whether the EXEC mask is valid */
> - args.done = 1; /* DONE bit */
> - } else if (!args.enabled_channels)
> - return; /* unnecessary NULL export */
> + args->valid_mask = 1; /* whether the EXEC mask is valid */
> + args->done = 1; /* DONE bit */
> + } else if (!args->enabled_channels)
> + return false; /* unnecessary NULL export */
>
> - ac_build_export(&ctx->ac, &args);
> + return true;
> }
>
> static void
> @@ -5639,6 +5637,7 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
> {
> unsigned index = 0;
> LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
> + struct ac_export_args color_args[8];
>
> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> LLVMValueRef values[4];
> @@ -5667,15 +5666,20 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
> if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
> last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
>
> - si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
> - index++;
> + bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last, &color_args[index]);
> + if (ret)
> + index++;
If we skip the increment here, it gets out of sync we the MRT it was
supposed to be for? Might just be defensive programming, but would it
make sense to replace V_008DFC_SQ_EXP_MRT + index with
V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_COLOR0) ?
> }
> }
>
> + for (unsigned i = 0; i < index; i++)
> + ac_build_export(&ctx->ac, &color_args[i]);
> if (depth || stencil || samplemask)
> si_export_mrt_z(ctx, depth, stencil, samplemask);
> - else if (!index)
> - si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
> + else if (!index) {
> + si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]);
> + ac_build_export(&ctx->ac, &color_args[0]);
> + }
>
> ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
> }
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list