[Mesa-dev] [PATCH 2/2] gallivm: add PK2H/UP2H support
Roland Scheidegger
sroland at vmware.com
Tue Feb 2 02:48:52 UTC 2016
Am 01.02.2016 um 16:36 schrieb Brian Paul:
> Just a few more nitpicks below...
>
> Reviewed-by: Brian Paul <brianp at vmware.com>
>
> On 01/30/2016 07:08 PM, sroland at vmware.com wrote:
>> From: Roland Scheidegger <sroland at vmware.com>
>>
>> Add support for these opcodes, the conversion functions were already
>> there albeit need some new packing stuff.
>> Just like the tgsi version, piglit won't like it for all the same
>> reasons, so it's disabled (UP2H passes piglit arb_shader_language_packing
>> tests, albeit since PK2H won't due those rounding differences I don't
>> know if that one works or not as the piglit test is rather difficult to
>> deal with).
>> ---
>> src/gallium/auxiliary/gallivm/lp_bld_conv.c | 16 ++++-
>> src/gallium/auxiliary/gallivm/lp_bld_pack.c | 26 ++++++++
>> src/gallium/auxiliary/gallivm/lp_bld_pack.h | 5 ++
>> src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 1 -
>> src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 73
>> ++++++++++++++++++++++
>> 5 files changed, 119 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
>> index 7854142..7cf0dee 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
>> @@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
>> *
>> * Convert float32 to half floats, preserving Infs and NaNs,
>> * with rounding towards zero (trunc).
>> + * XXX: For GL, would prefer rounding towards nearest(-even).
>> */
>> LLVMValueRef
>> lp_build_float_to_half(struct gallivm_state *gallivm,
>> @@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state
>> *gallivm,
>> struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
>> LLVMValueRef result;
>>
>> + /*
>> + * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
>> + * directly, without any (x86 or generic) intrinsics.
>> + * Albeit the rounding mode cannot be specified (and is undefined,
>> + * though in practice on x86 seems to do nearest-even but it may
>> + * be dependent on instruction set support), so is essentially
>> + * useless.
>> + */
>> +
>> if (util_cpu_caps.has_f16c &&
>> (length == 4 || length == 8)) {
>> struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
>> @@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state
>> *gallivm,
>> LLVMValueRef index = LLVMConstInt(i32t, i, 0);
>> LLVMValueRef f32 = LLVMBuildExtractElement(builder, src,
>> index, "");
>> #if 0
>> - /* XXX: not really supported by backends */
>> + /*
>> + * XXX: not really supported by backends.
>> + * Even if they would now, rounding mode cannot be specified and
>> + * is undefined.
>> + */
>> LLVMValueRef f16 = lp_build_intrinsic_unary(builder,
>> "llvm.convert.to.fp16", i16t, f32);
>> #else
>> LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
>> index 0b0f7f0..daa2043 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
>> @@ -257,6 +257,32 @@ lp_build_concat_n(struct gallivm_state *gallivm,
>>
>>
>> /**
>> + * Un-interleave vector.
>> + * This will return a vector consisting of every second element
>> + * (depending on lo_hi, beginning at 0 or 1).
>> + * The returned vector size (elems and width) will only be half
>> + * that of the source vector.
>> + */
>> +LLVMValueRef
>> +lp_build_uninterleave1(struct gallivm_state *gallivm,
>> + unsigned num_elems,
>> + LLVMValueRef a,
>> + unsigned lo_hi)
>> +{
>> + LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
>> + unsigned i;
>> + assert(num_elems <= LP_MAX_VECTOR_LENGTH);
>> +
>> + for(i = 0; i < num_elems / 2; ++i)
>
> space after for.
Fixed.
>
>> + elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
>> +
>> + shuffle = LLVMConstVector(elems, num_elems / 2);
>> +
>> + return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
>> +}
>> +
>> +
>> +/**
>> * Interleave vector elements.
>> *
>> * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
>> b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
>> index 7cede35..367fba1 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
>> @@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm,
>> LLVMValueRef b,
>> unsigned lo_hi);
>>
>> +LLVMValueRef
>> +lp_build_uninterleave1(struct gallivm_state *gallivm,
>> + unsigned num_elems,
>> + LLVMValueRef a,
>> + unsigned lo_hi);
>>
>> void
>> lp_build_unpack2(struct gallivm_state *gallivm,
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
>> index c88dfbf..1cbe47c 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
>> @@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm(
>> /* Ignore deprecated instructions */
>> switch (inst->Instruction.Opcode) {
>>
>> - case TGSI_OPCODE_UP2H:
>> case TGSI_OPCODE_UP2US:
>> case TGSI_OPCODE_UP4B:
>> case TGSI_OPCODE_UP4UB:
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
>> index 6f75bec..f6b42ee 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
>> @@ -45,8 +45,10 @@
>> #include "lp_bld_arit.h"
>> #include "lp_bld_bitarit.h"
>> #include "lp_bld_const.h"
>> +#include "lp_bld_conv.h"
>> #include "lp_bld_gather.h"
>> #include "lp_bld_logic.h"
>> +#include "lp_bld_pack.h"
>>
>> #include "tgsi/tgsi_exec.h"
>>
>> @@ -530,6 +532,75 @@ static struct lp_build_tgsi_action log_action = {
>> log_emit /* emit */
>> };
>>
>> +/* TGSI_OPCODE_PK2H */
>> +
>> +static void
>> +pk2h_fetch_args(
>> + struct lp_build_tgsi_context * bld_base,
>> + struct lp_build_emit_data * emit_data)
>
> pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
I didn't notice before, but there's over 100 functions in that file and
every single one does it like this.
The only functions not honoring that style are the new pk2h_emit and
up2h_emit... So I suppose it would be better to make them wrong too for
some consistency, not sure it's worth doing mass-whitespace fix...
Roland
>> +{
>> + /* src0.x */
>> + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
>> + 0, TGSI_CHAN_X);
>> + /* src0.y */
>> + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
>> + 0, TGSI_CHAN_Y);
>> +}
>> +
>> +static void
>> +pk2h_emit(const struct lp_build_tgsi_action *action,
>> + struct lp_build_tgsi_context *bld_base,
>> + struct lp_build_emit_data *emit_data)
>> +{
>> + struct gallivm_state *gallivm = bld_base->base.gallivm;
>> + struct lp_type f16i_t;
>> + LLVMValueRef lo, hi, res;
>> +
>> + f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
>> + lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
>> + hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
>> + /* maybe some interleave doubling vector width would be useful... */
>> + lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length *
>> 2);
>> + hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length *
>> 2);
>> + res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
>> +
>> + emit_data->output[emit_data->chan] = res;
>> +}
>> +
>> +static struct lp_build_tgsi_action pk2h_action = {
>> + pk2h_fetch_args, /* fetch_args */
>> + pk2h_emit /* emit */
>> +};
>> +
>> +/* TGSI_OPCODE_UP2H */
>> +
>> +static void
>> +up2h_emit(const struct lp_build_tgsi_action *action,
>> + struct lp_build_tgsi_context *bld_base,
>> + struct lp_build_emit_data *emit_data)
>> +{
>> + struct gallivm_state *gallivm = bld_base->base.gallivm;
>> + LLVMBuilderRef builder = gallivm->builder;
>> + LLVMContextRef context = gallivm->context;
>> + LLVMValueRef lo, hi, res[2], arg;
>> + unsigned nr = bld_base->base.type.length;
>> + LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context),
>> nr * 2);
>> +
>> + arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
>> + lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
>> + hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
>> + res[0] = lp_build_half_to_float(gallivm, lo);
>> + res[1] = lp_build_half_to_float(gallivm, hi);
>> +
>> + emit_data->output[0] = emit_data->output[2] = res[0];
>> + emit_data->output[1] = emit_data->output[3] = res[1];
>> +}
>> +
>> +static struct lp_build_tgsi_action up2h_action = {
>> + scalar_unary_fetch_args, /* fetch_args */
>> + up2h_emit /* emit */
>> +};
>> +
>> /* TGSI_OPCODE_LRP */
>>
>> static void
>> @@ -1032,10 +1103,12 @@ lp_set_default_actions(struct
>> lp_build_tgsi_context * bld_base)
>> bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
>> bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
>> bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
>> + bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
>> bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
>> bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
>> bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
>> bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
>> + bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
>> bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
>>
>> bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args =
>> scalar_unary_fetch_args;
>>
>
More information about the mesa-dev
mailing list