[Mesa-dev] [PATCH 2/2] radeonsi: fix BFE/BFI lowering for GLSL semantics

Sat Oct 29 10:03:26 UTC 2016

For the series:

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Thu, Oct 27, 2016 at 11:39 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> Fixes spec/arb_gpu_shader5/execution/built-in-functions/*-bitfield{Extract,Insert}
>
> Cc: 13.0 <mesa-stable at lists.freedesktop.org>
> ---
>  src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 37 +++++++++++++++++++++--
>  1 file changed, 34 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> index 1ee9afb..0ffce50 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> @@ -452,44 +452,75 @@ build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
>                                    emit_data->arg_count, LLVMReadNoneAttribute);
>  }
>
>  static void emit_bfi(const struct lp_build_tgsi_action *action,
>                      struct lp_build_tgsi_context *bld_base,
>                      struct lp_build_emit_data *emit_data)
>  {
>         struct gallivm_state *gallivm = bld_base->base.gallivm;
>         LLVMBuilderRef builder = gallivm->builder;
>         LLVMValueRef bfi_args[3];
> +       LLVMValueRef bfi_sm5;
> +       LLVMValueRef cond;
>
>         // Calculate the bitmask: (((1 << src3) - 1) << src2
>         bfi_args[0] = LLVMBuildShl(builder,
>                                    LLVMBuildSub(builder,
>                                                 LLVMBuildShl(builder,
>                                                              bld_base->int_bld.one,
>                                                              emit_data->args[3], ""),
>                                                 bld_base->int_bld.one, ""),
>                                    emit_data->args[2], "");
>
>         bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
>                                    emit_data->args[2], "");
>
>         bfi_args[2] = emit_data->args[0];
>
>         /* Calculate:
>          *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
>          * Use the right-hand side, which the LLVM backend can convert to V_BFI.
>          */
> -       emit_data->output[emit_data->chan] =
> +       bfi_sm5 =
>                 LLVMBuildXor(builder, bfi_args[2],
>                         LLVMBuildAnd(builder, bfi_args[0],
>                                 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
>                                              ""), ""), "");
> +
> +       /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
> +        * uses the convenient V_BFI lowering for the above, which follows SM5
> +        * and disagrees with GLSL semantics when bits (src3) is 32.
> +        */
> +       cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
> +                            lp_build_const_int32(gallivm, 32), "");
> +       emit_data->output[emit_data->chan] =
> +               LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
> +}
> +
> +static void emit_bfe(const struct lp_build_tgsi_action *action,
> +                    struct lp_build_tgsi_context *bld_base,
> +                    struct lp_build_emit_data *emit_data)
> +{
> +       struct gallivm_state *gallivm = bld_base->base.gallivm;
> +       LLVMBuilderRef builder = gallivm->builder;
> +       LLVMValueRef bfe_sm5;
> +       LLVMValueRef cond;
> +
> +       bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
> +                                    emit_data->dst_type, emit_data->args,
> +                                    emit_data->arg_count, LLVMReadNoneAttribute);
> +
> +       /* Correct for GLSL semantics. */
> +       cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
> +                            lp_build_const_int32(gallivm, 32), "");
> +       emit_data->output[emit_data->chan] =
> +               LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
>  }
>
>  /* this is ffs in C */
>  static void emit_lsb(const struct lp_build_tgsi_action *action,
>                      struct lp_build_tgsi_context *bld_base,
>                      struct lp_build_emit_data *emit_data)
>  {
>         struct gallivm_state *gallivm = bld_base->base.gallivm;
>         LLVMValueRef args[2] = {
>                 emit_data->args[0],
> @@ -767,21 +798,21 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
>         bld_base->op_actions[TGSI_OPCODE_FMA].emit =
>                 bld_base->op_actions[TGSI_OPCODE_MAD].emit;
>         bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
>         bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
>         bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
>         bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
>         bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
>         bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
>         bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
>         bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
> -       bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
> +       bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
>         bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
>         bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
>         bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
>         bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
>         bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
>         bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
>         bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
>         bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
>         bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
>         bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
> @@ -819,21 +850,21 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
>         bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
>         bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
>         bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
>         bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
>         bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
>         bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
>         bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
>         bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
>         bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
>         bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
> -       bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
> +       bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
>         bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
>         bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
>         bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
>         bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
>         bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
>         bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
>         bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
>         bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
>         bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
>         bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev