[Mesa-dev] [PATCH 2/3] radeonsi: implement PK2H and UP2H opcodes

Tue Feb 2 20:31:06 UTC 2016

On Tue, Feb 2, 2016 at 9:23 PM, Roland Scheidegger <sroland at vmware.com> wrote:
> Am 02.02.2016 um 21:15 schrieb Marek Olšák:
>> On Sat, Jan 30, 2016 at 12:46 AM, Marek Olšák <maraeo at gmail.com> wrote:
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> Based on a gallivm patch by Ilia Mirkin.
>>>
>>> +8 piglit regressions due to precision issues
>>> ---
>>>  .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 72 ++++++++++++++++++++++
>>>  src/gallium/drivers/radeonsi/si_pipe.c             |  2 +-
>>>  2 files changed, 73 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>>> index 76be376..d8e05ce 100644
>>> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>>> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>>> @@ -1452,6 +1452,74 @@ static void emit_minmax_int(const struct lp_build_tgsi_action *action,
>>>                                 emit_data->args[1], "");
>>>  }
>>>
>>> +static void pk2h_fetch_args(struct lp_build_tgsi_context * bld_base,
>>> +                           struct lp_build_emit_data * emit_data)
>>> +{
>>> +       emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
>>> +                                                0, TGSI_CHAN_X);
>>> +       emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
>>> +                                                0, TGSI_CHAN_Y);
>>> +}
>>> +
>>> +static void emit_pk2h(const struct lp_build_tgsi_action *action,
>>> +                     struct lp_build_tgsi_context *bld_base,
>>> +                     struct lp_build_emit_data *emit_data)
>>> +{
>>> +       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>>> +       LLVMContextRef context = bld_base->base.gallivm->context;
>>> +       struct lp_build_context *uint_bld = &bld_base->uint_bld;
>>> +       LLVMTypeRef fp16, i16;
>>> +       LLVMValueRef const16, comp[2];
>>> +       unsigned i;
>>> +
>>> +       fp16 = LLVMHalfTypeInContext(context);
>>> +       i16 = LLVMInt16TypeInContext(context);
>>> +       const16 = lp_build_const_int32(uint_bld->gallivm, 16);
>>> +
>>> +       for (i = 0; i < 2; i++) {
>>> +               comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
>>> +               comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
>>> +               comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
>>> +       }
>>> +
>>> +       comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
>>> +       comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
>>> +
>>> +       emit_data->output[emit_data->chan] = comp[0];
>>> +}
>>> +
>>> +static void up2h_fetch_args(struct lp_build_tgsi_context * bld_base,
>>> +                           struct lp_build_emit_data * emit_data)
>>> +{
>>> +       emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
>>> +                                                0, TGSI_CHAN_X);
>>> +}
>>> +
>>> +static void emit_up2h(const struct lp_build_tgsi_action *action,
>>> +                     struct lp_build_tgsi_context *bld_base,
>>> +                     struct lp_build_emit_data *emit_data)
>>> +{
>>> +       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>>> +       LLVMContextRef context = bld_base->base.gallivm->context;
>>> +       struct lp_build_context *uint_bld = &bld_base->uint_bld;
>>> +       LLVMTypeRef fp16, i16;
>>> +       LLVMValueRef const16, input, val;
>>> +       unsigned i;
>>> +
>>> +       fp16 = LLVMHalfTypeInContext(context);
>>> +       i16 = LLVMInt16TypeInContext(context);
>>> +       const16 = lp_build_const_int32(uint_bld->gallivm, 16);
>>> +       input = emit_data->args[0];
>>> +
>>> +       for (i = 0; i < 2; i++) {
>>> +               val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
>>> +               val = LLVMBuildTrunc(builder, val, i16, "");
>>> +               val = LLVMBuildBitCast(builder, val, fp16, "");
>>> +               emit_data->output[i] =
>>> +                       LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
>>> +       }
>>> +}
>>> +
>>>  void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>>>  {
>>>         struct lp_type type;
>>> @@ -1581,6 +1649,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>>>         bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
>>>         bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
>>>         bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
>>> +       bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
>>> +       bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
>>>         bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
>>>         bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
>>>         bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
>>> @@ -1618,6 +1688,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>>>         bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
>>>         bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
>>>         bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
>>> +       bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
>>> +       bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
>>>  }
>>>
>>>  void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
>>> index 05f30bd..4dacdd1 100644
>>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>>> @@ -308,6 +308,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>>>         case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
>>>         case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
>>>         case PIPE_CAP_INVALIDATE_BUFFER:
>>> +       case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
>>>                 return 1;
>>
>> FYI, I'm going to change it to this:
>> return HAVE_LLVM >= 0x0306
>>
>> because Roland said LLVM < 3.6 doesn't support fptrunc 32->16.
> I haven't actually verified that... It's also possible this was
> dependent on backend support.
>
>>
>> Alternatively, we can axe the LLVM 3.5 support code, which would clean
>> up our compute code paths, but not graphics code paths.
>
>
> Also, what rounding do you actually get? I'm curious what the hw there does.

The rounding is configurable. I think there is one state for floats
and another state shared by both doubles and halfs.

There is also a dedicated PK2H opcode which rounds towards zero and we
only use it for color buffer conversions, which we have to do in
shaders. In other cases, we use the standard fp32<->fp16 conversion
opcodes.

Marek