[Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

Marek Olšák maraeo at gmail.com
Tue Aug 1 21:22:28 UTC 2017


Tested on SI.

Acked-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Tue, Aug 1, 2017 at 6:14 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This looks like it's supported since llvm 3.9 at least,
> so switch over radeonsi and radv to using it, -pro also
> uses this. We can now drop creating lds for these operations
> as the ds_swizzle operation doesn't actually write to lds at all.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/amd/common/ac_llvm_build.c           | 57 +++++++++++++++++++++++---------
>  src/amd/common/ac_llvm_build.h           |  1 -
>  src/amd/common/ac_nir_to_llvm.c          |  9 +----
>  src/gallium/drivers/radeonsi/si_shader.c | 16 +--------
>  4 files changed, 44 insertions(+), 39 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 9b939c1..a38aad6 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>               bool has_ds_bpermute,
>               uint32_t mask,
>               int idx,
> -             LLVMValueRef lds,
>               LLVMValueRef val)
>  {
> -       LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
> +       LLVMValueRef tl, trbl, args[2];
>         LLVMValueRef result;
>
> -       thread_id = ac_get_thread_id(ctx);
> +       if (has_ds_bpermute) {
> +               LLVMValueRef thread_id, tl_tid, trbl_tid;
> +               thread_id = ac_get_thread_id(ctx);
>
> -       tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> -                             LLVMConstInt(ctx->i32, mask, false), "");
> +               tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> +                                     LLVMConstInt(ctx->i32, mask, false), "");
>
> -       trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> -                               LLVMConstInt(ctx->i32, idx, false), "");
> +               trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> +                                       LLVMConstInt(ctx->i32, idx, false), "");
>
> -       if (has_ds_bpermute) {
>                 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
>                                        LLVMConstInt(ctx->i32, 4, false), "");
>                 args[1] = val;
> @@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>                                           AC_FUNC_ATTR_READNONE |
>                                           AC_FUNC_ATTR_CONVERGENT);
>         } else {
> -               LLVMValueRef store_ptr, load_ptr0, load_ptr1;
> +               uint32_t masks[2];
> +
> +               switch (mask) {
> +               case AC_TID_MASK_TOP_LEFT:
> +                       masks[0] = 0x8000;
> +                       if (idx == 1)
> +                               masks[1] = 0x8055;
> +                       else
> +                               masks[1] = 0x80aa;
> +
> +                       break;
> +               case AC_TID_MASK_TOP:
> +                       masks[0] = 0x8044;
> +                       masks[1] = 0x80ee;
> +                       break;
> +               case AC_TID_MASK_LEFT:
> +                       masks[0] = 0x80a0;
> +                       masks[1] = 0x80f5;
> +                       break;
> +               }
>
> -               store_ptr = ac_build_gep0(ctx, lds, thread_id);
> -               load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
> -               load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
> +               args[0] = val;
> +               args[1] = LLVMConstInt(ctx->i32, masks[0], false);
>
> -               LLVMBuildStore(ctx->builder, val, store_ptr);
> -               tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
> -               trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
> +               tl = ac_build_intrinsic(ctx,
> +                                       "llvm.amdgcn.ds.swizzle", ctx->i32,
> +                                       args, 2,
> +                                       AC_FUNC_ATTR_READNONE |
> +                                       AC_FUNC_ATTR_CONVERGENT);
> +
> +               args[1] = LLVMConstInt(ctx->i32, masks[1], false);
> +               trbl = ac_build_intrinsic(ctx,
> +                                       "llvm.amdgcn.ds.swizzle", ctx->i32,
> +                                       args, 2,
> +                                       AC_FUNC_ATTR_READNONE |
> +                                       AC_FUNC_ATTR_CONVERGENT);
>         }
>
>         tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 09fd585..ee27d3c 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>               bool has_ds_bpermute,
>               uint32_t mask,
>               int idx,
> -             LLVMValueRef lds,
>               LLVMValueRef val);
>
>  #define AC_SENDMSG_GS 2
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 530b581..dc765fe 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -68,8 +68,6 @@ struct ac_nir_context {
>         int num_locals;
>         LLVMValueRef *locals;
>
> -       LLVMValueRef ddxy_lds;
> -
>         struct nir_to_llvm_context *nctx; /* TODO get rid of this */
>  };
>
> @@ -1453,11 +1451,6 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
>         LLVMValueRef result;
>         bool has_ds_bpermute = ctx->abi->chip_class >= VI;
>
> -       if (!ctx->ddxy_lds && !has_ds_bpermute)
> -               ctx->ddxy_lds = LLVMAddGlobalInAddressSpace(ctx->ac.module,
> -                                                      LLVMArrayType(ctx->ac.i32, 64),
> -                                                      "ddxy_lds", LOCAL_ADDR_SPACE);
> -
>         if (op == nir_op_fddx_fine || op == nir_op_fddx)
>                 mask = AC_TID_MASK_LEFT;
>         else if (op == nir_op_fddy_fine || op == nir_op_fddy)
> @@ -1474,7 +1467,7 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
>                 idx = 2;
>
>         result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
> -                             mask, idx, ctx->ddxy_lds,
> +                             mask, idx,
>                               src0);
>         return result;
>  }
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 628e6f8..09053c3 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -3591,7 +3591,7 @@ static void si_llvm_emit_ddxy(
>
>         val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
>         val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
> -                           mask, idx, ctx->lds, val);
> +                           mask, idx, val);
>         emit_data->output[emit_data->chan] = val;
>  }
>
> @@ -4635,20 +4635,6 @@ static void create_function(struct si_shader_context *ctx)
>         assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
>         shader->info.num_input_vgprs -= num_prolog_vgprs;
>
> -       if (!ctx->screen->has_ds_bpermute &&
> -           bld_base->info &&
> -           (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
> -            bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
> -            bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
> -            bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
> -            bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
> -            bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
> -               ctx->lds =
> -                       LLVMAddGlobalInAddressSpace(gallivm->module,
> -                                                   LLVMArrayType(ctx->i32, 64),
> -                                                   "ddxy_lds",
> -                                                   LOCAL_ADDR_SPACE);
> -
>         if (shader->key.as_ls ||
>             ctx->type == PIPE_SHADER_TESS_CTRL ||
>             /* GFX9 has the ESGS ring buffer in LDS. */
> --
> 2.9.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list