[Mesa-dev] [PATCH 1/2] WIP gallivm: add support for PK2H/UP2H

Sun Jan 3 17:37:50 PST 2016

Am 03.01.2016 um 22:29 schrieb Ilia Mirkin:
> This hits assertion failures on LLVM 3.5
> 
> Signed-off-by: Ilia Mirkin <imirkin at alum.mitedu>
> ---
> 
> It definitely worked at one point or another, but it might have been with
> a later LLVM version and/or on a different CPU. On my i7-920 with LLVM 3.5
> I definitely get assertion errors from inside LLVM. Any interested party
> can take this patch over and fix it as they see fit. Or ignore it.

Interesting. I wasn't even aware using fptrunc could work at all with
f16 type. And on some quick look this was indeed introduced later, I
think llvm 3.6 (some backends might still not do it today). There are
also llvm.convert.to.fp16 (and f32) operations (probably the same
backends won't do them neither...). I'm not really sure what rounding
mode semantics they'll end up with. Seems like fptrunc actually might do
round-to-nearest-even (I suppose llvm.convert.to.fp16 too), but
depending on how llvm ends up doing it it might well be subject to the
same no-denorm issue as the util code.
(And unfortunately, it looks like we don't have any direct control over
rounding mode neither for them so we can't ditch
lp_build_float_to_smallfloat and lp_build_smallfloat_to_float.)

Roland

> 
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.c        |  1 -
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 87 ++++++++++++++++++++++
>  src/gallium/drivers/llvmpipe/lp_screen.c           |  2 +-
>  3 files changed, 88 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> index c88dfbf..1cbe47c 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> @@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm(
>     /* Ignore deprecated instructions */
>     switch (inst->Instruction.Opcode) {
>  
> -   case TGSI_OPCODE_UP2H:
>     case TGSI_OPCODE_UP2US:
>     case TGSI_OPCODE_UP4B:
>     case TGSI_OPCODE_UP4UB:
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 3d5e2cb..ac3298d 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -1020,6 +1020,88 @@ static void dfrac_emit(
>                                                         emit_data->args[0], tmp, "");
>  }
>  
> +static void
> +pk2h_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   /* src0.x */
> +   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
> +                                            0, TGSI_CHAN_X);
> +   /* src0.y */
> +   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
> +                                            0, TGSI_CHAN_Y);
> +}
> +
> +static void
> +emit_pk2h(const struct lp_build_tgsi_action *action,
> +          struct lp_build_tgsi_context *bld_base,
> +          struct lp_build_emit_data *emit_data)
> +{
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMContextRef context = bld_base->base.gallivm->context;
> +   struct lp_build_context *uint_bld = &bld_base->uint_bld;
> +   LLVMTypeRef fp16 = LLVMVectorType(LLVMHalfTypeInContext(context),
> +                                     bld_base->base.type.length);
> +   LLVMTypeRef i16 = LLVMVectorType(LLVMInt16TypeInContext(context),
> +                                    bld_base->base.type.length);
> +   LLVMValueRef const16 = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
> +                                             16);
> +
> +   LLVMValueRef low = LLVMBuildFPTrunc(
> +      builder, emit_data->args[0], fp16, "");
> +   LLVMValueRef high = LLVMBuildFPTrunc(
> +      builder, emit_data->args[1], fp16, "");
> +
> +   low = LLVMBuildZExt(builder, LLVMBuildBitCast(builder, low, i16, ""),
> +                       uint_bld->vec_type, "");
> +   high = LLVMBuildZExt(builder, LLVMBuildBitCast(builder, high, i16, ""),
> +                        uint_bld->vec_type, "");
> +
> +   emit_data->output[emit_data->chan] =
> +      LLVMBuildOr(builder, low, LLVMBuildShl(builder, high, const16, ""), "");
> +}
> +
> +static void
> +up2h_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   /* src0.x */
> +   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
> +                                            0, TGSI_CHAN_X);
> +}
> +
> +static void
> +emit_up2h(const struct lp_build_tgsi_action *action,
> +          struct lp_build_tgsi_context *bld_base,
> +          struct lp_build_emit_data *emit_data)
> +{
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMContextRef context = bld_base->base.gallivm->context;
> +   struct lp_build_context *uint_bld = &bld_base->uint_bld;
> +   LLVMTypeRef fp16 = LLVMVectorType(LLVMHalfTypeInContext(context),
> +                                     bld_base->base.type.length);
> +   LLVMTypeRef i16 = LLVMVectorType(LLVMInt16TypeInContext(context),
> +                                    bld_base->base.type.length);
> +   LLVMValueRef const16 = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
> +                                             16);
> +
> +   LLVMValueRef input = LLVMBuildBitCast(
> +      builder, emit_data->args[0], bld_base->base.int_vec_type, "");
> +   int i;
> +
> +   for (i = 0; i < 2; i++) {
> +      LLVMValueRef val = input;
> +      if (i == 1)
> +         val = LLVMBuildLShr(builder, val, const16, "");
> +      val = LLVMBuildTrunc(builder, val, i16, "");
> +      val = LLVMBuildBitCast(builder, val, fp16, "");
> +      emit_data->output[i] =
> +         LLVMBuildFPExt(builder, val, bld_base->base.vec_type, "");
> +   }
> +}
> +
>  void
>  lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
>  {
> @@ -1093,6 +1175,11 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
>     bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
>     bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
>  
> +   bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
> +
> +   bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
>  }
>  
>  /* CPU Only default actions */
> diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
> index 0898cff..e2fa73f 100644
> --- a/src/gallium/drivers/llvmpipe/lp_screen.c
> +++ b/src/gallium/drivers/llvmpipe/lp_screen.c
> @@ -290,6 +290,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
>     case PIPE_CAP_POLYGON_OFFSET_CLAMP:
>     case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
>     case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
> +   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
>        return 1;
>     case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
>     case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
> @@ -302,7 +303,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
>     case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
>     case PIPE_CAP_CLEAR_TEXTURE:
>     case PIPE_CAP_DRAW_PARAMETERS:
> -   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
>     case PIPE_CAP_MULTI_DRAW_INDIRECT:
>     case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
>        return 0;
>