[Mesa-dev] [PATCH v2 12/14] radeonsi: Use barrier instructions for TCS barriers.

Mon May 16 23:39:30 UTC 2016

On Mon, May 16, 2016 at 7:48 PM, Marek Olšák <maraeo at gmail.com> wrote:
> NAK. The barrier is never needed with tessellation, because every
> patch always fits into a single wave and there is no patch splitting
> between waves.
>
> Marek

OK.

I don't see any optimization barrier before loading the tess factors
though, is the part that added the barrier (now optimization barrier)
before loading the tess factors also  NAKed?

- Bas

>
> On Fri, May 13, 2016 at 3:37 AM, Bas Nieuwenhuizen
> <bas at basnieuwenhuizen.nl> wrote:
>> With potentially more than 1 wave working on a patch we
>> need the barrier.
>>
>> Also adds a barrier before loading the tessellation factors to
>> write them to the TF ring.
>>
>> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>> Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
>> ---
>>  src/gallium/drivers/radeonsi/si_shader.c | 48 ++++++++++----------------------
>>  1 file changed, 15 insertions(+), 33 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
>> index 087b5bb..235983a 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -2514,6 +2514,19 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
>>         }
>>  }
>>
>> +static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
>> +                                struct lp_build_tgsi_context *bld_base,
>> +                                struct lp_build_emit_data *emit_data)
>> +{
>> +       struct si_shader_context *ctx = si_shader_context(bld_base);
>> +       struct gallivm_state *gallivm = bld_base->base.gallivm;
>> +
>> +       lp_build_intrinsic(gallivm->builder,
>> +                          HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier"
>> +                                              : "llvm.AMDGPU.barrier.local",
>> +                          ctx->voidt, NULL, 0, LLVMNoUnwindAttribute);
>> +}
>> +
>>  static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
>>                                   LLVMValueRef rel_patch_id,
>>                                   LLVMValueRef invocation_id,
>> @@ -2528,6 +2541,8 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
>>         unsigned stride, outer_comps, inner_comps, i;
>>         struct lp_build_if_state if_ctx;
>>
>> +       si_llvm_emit_barrier(NULL, bld_base, NULL);
>> +
>>         /* Do this only for invocation 0, because the tess levels are per-patch,
>>          * not per-vertex.
>>          *
>> @@ -3213,18 +3228,6 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
>>                                 struct lp_build_tgsi_context *bld_base,
>>                                 struct lp_build_emit_data *emit_data);
>>
>> -/* Prevent optimizations (at least of memory accesses) across the current
>> - * point in the program by emitting empty inline assembly that is marked as
>> - * having side effects.
>> - */
>> -static void emit_optimization_barrier(struct si_shader_context *ctx)
>> -{
>> -       LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
>> -       LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
>> -       LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
>> -       LLVMBuildCall(builder, inlineasm, NULL, 0, "");
>> -}
>> -
>>  static void emit_waitcnt(struct si_shader_context *ctx)
>>  {
>>         struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
>> @@ -5154,27 +5157,6 @@ static void si_llvm_emit_primitive(
>>                            ctx->voidt, args, 2, LLVMNoUnwindAttribute);
>>  }
>>
>> -static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
>> -                                struct lp_build_tgsi_context *bld_base,
>> -                                struct lp_build_emit_data *emit_data)
>> -{
>> -       struct si_shader_context *ctx = si_shader_context(bld_base);
>> -       struct gallivm_state *gallivm = bld_base->base.gallivm;
>> -
>> -       /* The real barrier instruction isn’t needed, because an entire patch
>> -        * always fits into a single wave.
>> -        */
>> -       if (ctx->type == PIPE_SHADER_TESS_CTRL) {
>> -               emit_optimization_barrier(ctx);
>> -               return;
>> -       }
>> -
>> -       lp_build_intrinsic(gallivm->builder,
>> -                          HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier"
>> -                                              : "llvm.AMDGPU.barrier.local",
>> -                          ctx->voidt, NULL, 0, LLVMNoUnwindAttribute);
>> -}
>> -
>>  static const struct lp_build_tgsi_action tex_action = {
>>         .fetch_args = tex_fetch_args,
>>         .emit = build_tex_intrinsic,
>> --
>> 2.8.2
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev