[Mesa-dev] [PATCH] radv/ac: setup mrt exports then export them in one go.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Tue Apr 25 19:51:54 UTC 2017


On Sun, Apr 23, 2017 at 8:54 PM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> Noticed while looking at Sascha Willems deferred shaders.
>
> This is a bit of an llvm workaround, llvm was producing this:
>         v_cvt_pkrtz_f16_f32_e64 v4, v7, v8                       ; D2960004 00021107
>         v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0                      ; D2960006 0001E509
>         s_waitcnt vmcnt(0)                                       ; BF8C0F70
>         exp mrt0 v4, v4, v6, v6 compr                            ; C400040F 00000604
>         s_waitcnt expcnt(0)                                      ; BF8C0F0F
>         v_cvt_pkrtz_f16_f32_e64 v4, v12, v5                      ; D2960004 00020B0C
>         v_cvt_pkrtz_f16_f32_e64 v5, v14, 1.0                     ; D2960005 0001E50E
>         exp mrt1 v4, v4, v5, v5 compr                            ; C400041F 00000504
>         s_waitcnt expcnt(0)                                      ; BF8C0F0F
>         v_cvt_pkrtz_f16_f32_e64 v0, v0, v1                       ; D2960000 00020300
>         v_cvt_pkrtz_f16_f32_e64 v1, v2, v3                       ; D2960001 00020702
>         exp mrt2 v0, v0, v1, v1 done compr vm                    ; C4001C2F 00000100
>
> After this change:
>         v_cvt_pkrtz_f16_f32_e64 v4, v7, v8                       ; D2960004 00021107
>         s_waitcnt vmcnt(0)                                       ; BF8C0F70
>         v_cvt_pkrtz_f16_f32_e64 v0, v0, v1                       ; D2960000 00020300
>         v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0                      ; D2960006 0001E509
>         v_cvt_pkrtz_f16_f32_e64 v5, v12, v5                      ; D2960005 00020B0C
>         v_cvt_pkrtz_f16_f32_e64 v7, v14, 1.0                     ; D2960007 0001E50E
>         exp mrt0 v4, v4, v6, v6 compr                            ; C400040F 00000604
>         v_cvt_pkrtz_f16_f32_e64 v1, v2, v3                       ; D2960001 00020702
>         exp mrt1 v5, v5, v7, v7 compr                            ; C400041F 00000705
>         exp mrt2 v0, v0, v1, v1 done compr vm                    ; C4001C2F 00000100
>
> No waitcnt for exports are emitted.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/amd/common/ac_nir_to_llvm.c | 34 +++++++++++++++++++---------------
>  1 file changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 38d5359..4d2e469 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -5572,24 +5572,22 @@ handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
>         write_tess_factors(ctx);
>  }
>
> -static void
> +static bool
>  si_export_mrt_color(struct nir_to_llvm_context *ctx,
> -                   LLVMValueRef *color, unsigned param, bool is_last)
> +                   LLVMValueRef *color, unsigned param, bool is_last,
> +                   struct ac_export_args *args)
>  {
> -
> -       struct ac_export_args args;
> -
>         /* Export */
>         si_llvm_init_export_args(ctx, color, param,
> -                                &args);
> +                                args);
>
>         if (is_last) {
> -               args.valid_mask = 1; /* whether the EXEC mask is valid */
> -               args.done = 1; /* DONE bit */
> -       } else if (!args.enabled_channels)
> -               return; /* unnecessary NULL export */
> +               args->valid_mask = 1; /* whether the EXEC mask is valid */
> +               args->done = 1; /* DONE bit */
> +       } else if (!args->enabled_channels)
> +               return false; /* unnecessary NULL export */
>
> -       ac_build_export(&ctx->ac, &args);
> +       return true;
>  }
>
>  static void
> @@ -5639,6 +5637,7 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
>  {
>         unsigned index = 0;
>         LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
> +       struct ac_export_args color_args[8];
>
>         for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
>                 LLVMValueRef values[4];
> @@ -5667,15 +5666,20 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
>                         if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
>                                 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
>
> -                       si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
> -                       index++;
> +                       bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last, &color_args[index]);
> +                       if (ret)
> +                               index++;

If we skip the increment here, it gets out of sync we the MRT it was
supposed to be for? Might just be defensive programming, but would it
make sense to replace V_008DFC_SQ_EXP_MRT + index with
V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_COLOR0)  ?
>                 }
>         }
>
> +       for (unsigned i = 0; i < index; i++)
> +               ac_build_export(&ctx->ac, &color_args[i]);
>         if (depth || stencil || samplemask)
>                 si_export_mrt_z(ctx, depth, stencil, samplemask);
> -       else if (!index)
> -               si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
> +       else if (!index) {
> +               si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]);
> +               ac_build_export(&ctx->ac, &color_args[0]);
> +       }
>
>         ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
>  }
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list