[Mesa-dev] [PATCH 2/2] gallivm: add PK2H/UP2H support
Jose Fonseca
jfonseca at vmware.com
Tue Feb 2 12:39:04 UTC 2016
On 31/01/16 02:08, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> Add support for these opcodes, the conversion functions were already
> there albeit need some new packing stuff.
> Just like the tgsi version, piglit won't like it for all the same
> reasons, so it's disabled (UP2H passes piglit arb_shader_language_packing
> tests, albeit since PK2H won't due those rounding differences I don't
> know if that one works or not as the piglit test is rather difficult to
> deal with).
> ---
> src/gallium/auxiliary/gallivm/lp_bld_conv.c | 16 ++++-
> src/gallium/auxiliary/gallivm/lp_bld_pack.c | 26 ++++++++
> src/gallium/auxiliary/gallivm/lp_bld_pack.h | 5 ++
> src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 1 -
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 73 ++++++++++++++++++++++
> 5 files changed, 119 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> index 7854142..7cf0dee 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
> @@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
> *
> * Convert float32 to half floats, preserving Infs and NaNs,
> * with rounding towards zero (trunc).
> + * XXX: For GL, would prefer rounding towards nearest(-even).
> */
> LLVMValueRef
> lp_build_float_to_half(struct gallivm_state *gallivm,
> @@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
> struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
> LLVMValueRef result;
>
> + /*
> + * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
> + * directly, without any (x86 or generic) intrinsics.
> + * Albeit the rounding mode cannot be specified (and is undefined,
> + * though in practice on x86 seems to do nearest-even but it may
> + * be dependent on instruction set support), so is essentially
> + * useless.
> + */
> +
> if (util_cpu_caps.has_f16c &&
> (length == 4 || length == 8)) {
> struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
> @@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
> LLVMValueRef index = LLVMConstInt(i32t, i, 0);
> LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
> #if 0
> - /* XXX: not really supported by backends */
> + /*
> + * XXX: not really supported by backends.
> + * Even if they would now, rounding mode cannot be specified and
> + * is undefined.
> + */
> LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
> #else
> LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
> index 0b0f7f0..daa2043 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
> @@ -257,6 +257,32 @@ lp_build_concat_n(struct gallivm_state *gallivm,
>
>
> /**
> + * Un-interleave vector.
> + * This will return a vector consisting of every second element
> + * (depending on lo_hi, beginning at 0 or 1).
> + * The returned vector size (elems and width) will only be half
> + * that of the source vector.
> + */
> +LLVMValueRef
> +lp_build_uninterleave1(struct gallivm_state *gallivm,
> + unsigned num_elems,
> + LLVMValueRef a,
> + unsigned lo_hi)
> +{
> + LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
> + unsigned i;
> + assert(num_elems <= LP_MAX_VECTOR_LENGTH);
> +
> + for(i = 0; i < num_elems / 2; ++i)
> + elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
> +
> + shuffle = LLVMConstVector(elems, num_elems / 2);
> +
> + return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
One don't need the second `a` here. One could use
`LLVMGetUndef(LLVMTypeOf(a))` to make it explicit.
> +}
> +
> +
> +/**
> * Interleave vector elements.
> *
> * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
> index 7cede35..367fba1 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
> @@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm,
> LLVMValueRef b,
> unsigned lo_hi);
>
> +LLVMValueRef
> +lp_build_uninterleave1(struct gallivm_state *gallivm,
> + unsigned num_elems,
> + LLVMValueRef a,
> + unsigned lo_hi);
>
> void
> lp_build_unpack2(struct gallivm_state *gallivm,
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> index c88dfbf..1cbe47c 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> @@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm(
> /* Ignore deprecated instructions */
> switch (inst->Instruction.Opcode) {
>
> - case TGSI_OPCODE_UP2H:
> case TGSI_OPCODE_UP2US:
> case TGSI_OPCODE_UP4B:
> case TGSI_OPCODE_UP4UB:
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 6f75bec..f6b42ee 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -45,8 +45,10 @@
> #include "lp_bld_arit.h"
> #include "lp_bld_bitarit.h"
> #include "lp_bld_const.h"
> +#include "lp_bld_conv.h"
> #include "lp_bld_gather.h"
> #include "lp_bld_logic.h"
> +#include "lp_bld_pack.h"
>
> #include "tgsi/tgsi_exec.h"
>
> @@ -530,6 +532,75 @@ static struct lp_build_tgsi_action log_action = {
> log_emit /* emit */
> };
>
> +/* TGSI_OPCODE_PK2H */
> +
> +static void
> +pk2h_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + /* src0.x */
> + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
> + 0, TGSI_CHAN_X);
> + /* src0.y */
> + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
> + 0, TGSI_CHAN_Y);
> +}
> +
> +static void
> +pk2h_emit(const struct lp_build_tgsi_action *action,
> + struct lp_build_tgsi_context *bld_base,
> + struct lp_build_emit_data *emit_data)
> +{
> + struct gallivm_state *gallivm = bld_base->base.gallivm;
> + struct lp_type f16i_t;
> + LLVMValueRef lo, hi, res;
> +
> + f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
> + lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
> + hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
> + /* maybe some interleave doubling vector width would be useful... */
> + lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
> + hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
> + res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
> +
> + emit_data->output[emit_data->chan] = res;
> +}
> +
> +static struct lp_build_tgsi_action pk2h_action = {
> + pk2h_fetch_args, /* fetch_args */
> + pk2h_emit /* emit */
> +};
> +
> +/* TGSI_OPCODE_UP2H */
> +
> +static void
> +up2h_emit(const struct lp_build_tgsi_action *action,
> + struct lp_build_tgsi_context *bld_base,
> + struct lp_build_emit_data *emit_data)
> +{
> + struct gallivm_state *gallivm = bld_base->base.gallivm;
> + LLVMBuilderRef builder = gallivm->builder;
> + LLVMContextRef context = gallivm->context;
> + LLVMValueRef lo, hi, res[2], arg;
> + unsigned nr = bld_base->base.type.length;
> + LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
> +
> + arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
> + lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
> + hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
> + res[0] = lp_build_half_to_float(gallivm, lo);
> + res[1] = lp_build_half_to_float(gallivm, hi);
> +
> + emit_data->output[0] = emit_data->output[2] = res[0];
> + emit_data->output[1] = emit_data->output[3] = res[1];
> +}
> +
> +static struct lp_build_tgsi_action up2h_action = {
> + scalar_unary_fetch_args, /* fetch_args */
> + up2h_emit /* emit */
> +};
> +
> /* TGSI_OPCODE_LRP */
>
> static void
> @@ -1032,10 +1103,12 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
> bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
> bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
> bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
> + bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
> bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
> bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
> bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
> bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
> + bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
> bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
>
> bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
>
Otherwise series looks good.
Reviewed-by: Jose Fonseca <jfonseca at vmware.com>
More information about the mesa-dev
mailing list