[Mesa-dev] [PATCH 2/2] radeonsi: use pknorm_i16/u16 and pk_i16/u16 LLVM intrinsics
Samuel Pitoiset
samuel.pitoiset at gmail.com
Mon Jan 8 10:21:01 UTC 2018
On 01/05/2018 08:32 PM, Marek Olšák wrote:
> Here: https://cgit.freedesktop.org/~mareko/mesa/log/?h=master
> LLVM: https://reviews.llvm.org/D41663
Thanks! Looks good to me, no performance changes and no regressions as well.
This patch is:
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>
> Marek
>
> On Fri, Jan 5, 2018 at 7:19 PM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>>
>>
>> On 01/05/2018 02:45 AM, Marek Olšák wrote:
>>>
>>> On Thu, Jan 4, 2018 at 10:25 AM, Samuel Pitoiset
>>> <samuel.pitoiset at gmail.com> wrote:
>>>>
>>>> How about performance?
>>>>
>>>> Few weeks ago, I fixed a bug (5f81a43535e8512cef26ea3dcd1e3a489bd5a1bb)
>>>> which affected F1 2017 and DOW3 on RADV, and it was also a nice
>>>> performance
>>>> boost, this is why I'm asking.
>>>
>>>
>>> No idea. This just decreases the number of instructions in some PS
>>> epilogs.
>>
>>
>> Okay, the series no longer applies on master, do you have a branch
>> somewhere? I would like to run, at least, F1 and DOW3.
>>
>>
>>>
>>> Marek
>>>
>>>>
>>>>
>>>> On 01/04/2018 01:55 AM, Marek Olšák wrote:
>>>>>
>>>>>
>>>>> From: Marek Olšák <marek.olsak at amd.com>
>>>>>
>>>>> ---
>>>>> src/amd/common/ac_llvm_build.c | 164
>>>>> +++++++++++++++++++++++++++++++
>>>>> src/amd/common/ac_llvm_build.h | 13 +++
>>>>> src/gallium/drivers/radeonsi/si_shader.c | 152
>>>>> ++++++++--------------------
>>>>> 3 files changed, 216 insertions(+), 113 deletions(-)
>>>>>
>>>>> diff --git a/src/amd/common/ac_llvm_build.c
>>>>> b/src/amd/common/ac_llvm_build.c
>>>>> index 7100e52..c48a186 100644
>>>>> --- a/src/amd/common/ac_llvm_build.c
>>>>> +++ b/src/amd/common/ac_llvm_build.c
>>>>> @@ -61,20 +61,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
>>>>> LLVMContextRef context,
>>>>> ctx->voidt = LLVMVoidTypeInContext(ctx->context);
>>>>> ctx->i1 = LLVMInt1TypeInContext(ctx->context);
>>>>> ctx->i8 = LLVMInt8TypeInContext(ctx->context);
>>>>> ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
>>>>> ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
>>>>> ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
>>>>> ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
>>>>> ctx->f16 = LLVMHalfTypeInContext(ctx->context);
>>>>> ctx->f32 = LLVMFloatTypeInContext(ctx->context);
>>>>> ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
>>>>> + ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
>>>>> ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
>>>>> ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
>>>>> ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
>>>>> ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
>>>>> ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
>>>>> ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
>>>>> ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
>>>>> ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
>>>>> ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
>>>>> @@ -1214,20 +1215,34 @@ LLVMValueRef ac_build_fmin(struct
>>>>> ac_llvm_context
>>>>> *ctx, LLVMValueRef a,
>>>>> }
>>>>> LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx,
>>>>> LLVMValueRef
>>>>> a,
>>>>> LLVMValueRef b)
>>>>> {
>>>>> LLVMValueRef args[2] = {a, b};
>>>>> return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32,
>>>>> args,
>>>>> 2,
>>>>> AC_FUNC_ATTR_READNONE);
>>>>> }
>>>>> +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> a,
>>>>> + LLVMValueRef b)
>>>>> +{
>>>>> + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b,
>>>>> "");
>>>>> + return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
>>>>> +}
>>>>> +
>>>>> +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
>>>>> + LLVMValueRef b)
>>>>> +{
>>>>> + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b,
>>>>> "");
>>>>> + return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
>>>>> +}
>>>>> +
>>>>> LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> a,
>>>>> LLVMValueRef b)
>>>>> {
>>>>> LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a,
>>>>> b,
>>>>> "");
>>>>> return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
>>>>> }
>>>>> LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx,
>>>>> LLVMValueRef
>>>>> value)
>>>>> {
>>>>> if (HAVE_LLVM >= 0x0500) {
>>>>> @@ -1439,20 +1454,169 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct
>>>>> ac_llvm_context *ctx,
>>>>> v2f16, args, 2,
>>>>> AC_FUNC_ATTR_READNONE);
>>>>> return LLVMBuildBitCast(ctx->builder, res, ctx->i32,
>>>>> "");
>>>>> }
>>>>> return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32,
>>>>> args,
>>>>> 2,
>>>>> AC_FUNC_ATTR_READNONE |
>>>>> AC_FUNC_ATTR_LEGACY);
>>>>> }
>>>>> +/* Upper 16 bits must be zero. */
>>>>> +static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx,
>>>>> + LLVMValueRef val[2])
>>>>> +{
>>>>> + return LLVMBuildOr(ctx->builder, val[0],
>>>>> + LLVMBuildShl(ctx->builder, val[1],
>>>>> + LLVMConstInt(ctx->i32, 16, 0),
>>>>> + ""), "");
>>>>> +}
>>>>> +
>>>>> +/* Upper 16 bits are ignored and will be dropped. */
>>>>> +static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct
>>>>> ac_llvm_context *ctx,
>>>>> + LLVMValueRef val[2])
>>>>> +{
>>>>> + LLVMValueRef v[2] = {
>>>>> + LLVMBuildAnd(ctx->builder, val[0],
>>>>> + LLVMConstInt(ctx->i32, 0xffff, 0), ""),
>>>>> + val[1],
>>>>> + };
>>>>> + return ac_llvm_pack_two_int16(ctx, v);
>>>>> +}
>>>>> +
>>>>> +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
>>>>> + LLVMValueRef args[2])
>>>>> +{
>>>>> + if (HAVE_LLVM >= 0x0600) {
>>>>> + LLVMValueRef res =
>>>>> + ac_build_intrinsic(ctx,
>>>>> "llvm.amdgcn.cvt.pknorm.i16",
>>>>> + ctx->v2i16, args, 2,
>>>>> + AC_FUNC_ATTR_READNONE);
>>>>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32,
>>>>> "");
>>>>> + }
>>>>> +
>>>>> + LLVMValueRef val[2];
>>>>> +
>>>>> + for (int chan = 0; chan < 2; chan++) {
>>>>> + /* Clamp between [-1, 1]. */
>>>>> + val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1);
>>>>> + val[chan] = ac_build_fmax(ctx, val[chan],
>>>>> LLVMConstReal(ctx->f32, -1));
>>>>> + /* Convert to a signed integer in [-32767, 32767]. */
>>>>> + val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
>>>>> + LLVMConstReal(ctx->f32,
>>>>> 32767),
>>>>> "");
>>>>> + /* If positive, add 0.5, else add -0.5. */
>>>>> + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
>>>>> + LLVMBuildSelect(ctx->builder,
>>>>> + LLVMBuildFCmp(ctx->builder,
>>>>> LLVMRealOGE,
>>>>> + val[chan],
>>>>> ctx->f32_0, ""),
>>>>> + LLVMConstReal(ctx->f32, 0.5),
>>>>> + LLVMConstReal(ctx->f32, -0.5),
>>>>> ""), "");
>>>>> + val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan],
>>>>> ctx->i32, "");
>>>>> + }
>>>>> + return ac_llvm_pack_two_int32_as_int16(ctx, val);
>>>>> +}
>>>>> +
>>>>> +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
>>>>> + LLVMValueRef args[2])
>>>>> +{
>>>>> + if (HAVE_LLVM >= 0x0600) {
>>>>> + LLVMValueRef res =
>>>>> + ac_build_intrinsic(ctx,
>>>>> "llvm.amdgcn.cvt.pknorm.u16",
>>>>> + ctx->v2i16, args, 2,
>>>>> + AC_FUNC_ATTR_READNONE);
>>>>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32,
>>>>> "");
>>>>> + }
>>>>> +
>>>>> + LLVMValueRef val[2];
>>>>> +
>>>>> + for (int chan = 0; chan < 2; chan++) {
>>>>> + val[chan] = ac_build_clamp(ctx, args[chan]);
>>>>> + val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
>>>>> + LLVMConstReal(ctx->f32,
>>>>> 65535),
>>>>> "");
>>>>> + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
>>>>> + LLVMConstReal(ctx->f32, 0.5),
>>>>> "");
>>>>> + val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
>>>>> + ctx->i32, "");
>>>>> + }
>>>>> + return ac_llvm_pack_two_int32_as_int16(ctx, val);
>>>>> +}
>>>>> +
>>>>> +/* The 8-bit and 10-bit clamping is for HW workarounds. */
>>>>> +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
>>>>> + LLVMValueRef args[2], unsigned bits,
>>>>> bool
>>>>> hi)
>>>>> +{
>>>>> + assert(bits == 8 || bits == 10 || bits == 16);
>>>>> +
>>>>> + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
>>>>> + bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
>>>>> + LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
>>>>> + bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
>>>>> + LLVMValueRef max_alpha =
>>>>> + bits != 10 ? max_rgb : ctx->i32_1;
>>>>> + LLVMValueRef min_alpha =
>>>>> + bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
>>>>> + bool has_intrinsic = HAVE_LLVM >= 0x0600;
>>>>> +
>>>>> + /* Clamp. */
>>>>> + if (!has_intrinsic || bits != 16) {
>>>>> + for (int i = 0; i < 2; i++) {
>>>>> + bool alpha = hi && i == 1;
>>>>> + args[i] = ac_build_imin(ctx, args[i],
>>>>> + alpha ? max_alpha :
>>>>> max_rgb);
>>>>> + args[i] = ac_build_imax(ctx, args[i],
>>>>> + alpha ? min_alpha :
>>>>> min_rgb);
>>>>> + }
>>>>> + }
>>>>> +
>>>>> + if (has_intrinsic) {
>>>>> + LLVMValueRef res =
>>>>> + ac_build_intrinsic(ctx,
>>>>> "llvm.amdgcn.cvt.pk.i16",
>>>>> + ctx->v2i16, args, 2,
>>>>> + AC_FUNC_ATTR_READNONE);
>>>>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32,
>>>>> "");
>>>>> + }
>>>>> +
>>>>> + return ac_llvm_pack_two_int32_as_int16(ctx, args);
>>>>> +}
>>>>> +
>>>>> +/* The 8-bit and 10-bit clamping is for HW workarounds. */
>>>>> +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
>>>>> + LLVMValueRef args[2], unsigned bits,
>>>>> bool
>>>>> hi)
>>>>> +{
>>>>> + assert(bits == 8 || bits == 10 || bits == 16);
>>>>> +
>>>>> + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
>>>>> + bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
>>>>> + LLVMValueRef max_alpha =
>>>>> + bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
>>>>> + bool has_intrinsic = HAVE_LLVM >= 0x0600;
>>>>> +
>>>>> + /* Clamp. */
>>>>> + if (!has_intrinsic || bits != 16) {
>>>>> + for (int i = 0; i < 2; i++) {
>>>>> + bool alpha = hi && i == 1;
>>>>> + args[i] = ac_build_umin(ctx, args[i],
>>>>> + alpha ? max_alpha :
>>>>> max_rgb);
>>>>> + }
>>>>> + }
>>>>> +
>>>>> + if (has_intrinsic) {
>>>>> + LLVMValueRef res =
>>>>> + ac_build_intrinsic(ctx,
>>>>> "llvm.amdgcn.cvt.pk.u16",
>>>>> + ctx->v2i16, args, 2,
>>>>> + AC_FUNC_ATTR_READNONE);
>>>>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32,
>>>>> "");
>>>>> + }
>>>>> +
>>>>> + return ac_llvm_pack_two_int16(ctx, args);
>>>>> +}
>>>>> +
>>>>> LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx,
>>>>> LLVMValueRef
>>>>> i1)
>>>>> {
>>>>> assert(HAVE_LLVM >= 0x0600);
>>>>> return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
>>>>> &i1, 1, AC_FUNC_ATTR_READNONE);
>>>>> }
>>>>> void ac_build_kill_if_false(struct ac_llvm_context *ctx,
>>>>> LLVMValueRef
>>>>> i1)
>>>>> {
>>>>> if (HAVE_LLVM >= 0x0600) {
>>>>> diff --git a/src/amd/common/ac_llvm_build.h
>>>>> b/src/amd/common/ac_llvm_build.h
>>>>> index 0deb5b5..3f0e9e2 100644
>>>>> --- a/src/amd/common/ac_llvm_build.h
>>>>> +++ b/src/amd/common/ac_llvm_build.h
>>>>> @@ -50,20 +50,21 @@ struct ac_llvm_context {
>>>>> LLVMTypeRef voidt;
>>>>> LLVMTypeRef i1;
>>>>> LLVMTypeRef i8;
>>>>> LLVMTypeRef i16;
>>>>> LLVMTypeRef i32;
>>>>> LLVMTypeRef i64;
>>>>> LLVMTypeRef intptr;
>>>>> LLVMTypeRef f16;
>>>>> LLVMTypeRef f32;
>>>>> LLVMTypeRef f64;
>>>>> + LLVMTypeRef v2i16;
>>>>> LLVMTypeRef v2i32;
>>>>> LLVMTypeRef v3i32;
>>>>> LLVMTypeRef v4i32;
>>>>> LLVMTypeRef v2f32;
>>>>> LLVMTypeRef v4f32;
>>>>> LLVMTypeRef v8i32;
>>>>> LLVMValueRef i32_0;
>>>>> LLVMValueRef i32_1;
>>>>> LLVMValueRef f32_0;
>>>>> @@ -238,20 +239,24 @@ LLVMValueRef ac_build_imsb(struct ac_llvm_context
>>>>> *ctx,
>>>>> LLVMValueRef arg,
>>>>> LLVMTypeRef dst_type);
>>>>> LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
>>>>> LLVMValueRef arg,
>>>>> LLVMTypeRef dst_type);
>>>>> LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> a,
>>>>> LLVMValueRef b);
>>>>> LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> a,
>>>>> LLVMValueRef b);
>>>>> +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
>>>>> + LLVMValueRef b);
>>>>> +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
>>>>> + LLVMValueRef b);
>>>>> LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> a,
>>>>> LLVMValueRef b);
>>>>> LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> value);
>>>>> struct ac_export_args {
>>>>> LLVMValueRef out[4];
>>>>> unsigned target;
>>>>> unsigned enabled_channels;
>>>>> bool compr;
>>>>> bool done;
>>>>> bool valid_mask;
>>>>> @@ -282,20 +287,28 @@ struct ac_image_args {
>>>>> LLVMValueRef addr;
>>>>> unsigned dmask;
>>>>> bool unorm;
>>>>> bool da;
>>>>> };
>>>>> LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
>>>>> struct ac_image_args *a);
>>>>> LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
>>>>> LLVMValueRef args[2]);
>>>>> +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
>>>>> + LLVMValueRef args[2]);
>>>>> +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
>>>>> + LLVMValueRef args[2]);
>>>>> +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
>>>>> + LLVMValueRef args[2], unsigned bits,
>>>>> bool
>>>>> hi);
>>>>> +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
>>>>> + LLVMValueRef args[2], unsigned bits,
>>>>> bool
>>>>> hi);
>>>>> LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx,
>>>>> LLVMValueRef
>>>>> i1);
>>>>> void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> i1);
>>>>> LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> input,
>>>>> LLVMValueRef offset, LLVMValueRef width,
>>>>> bool is_signed);
>>>>> void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned
>>>>> simm16);
>>>>> void ac_get_image_intr_name(const char *base_name,
>>>>> LLVMTypeRef data_type,
>>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>>>>> b/src/gallium/drivers/radeonsi/si_shader.c
>>>>> index 453822c..a695aad 100644
>>>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>>>> @@ -2093,51 +2093,27 @@ static LLVMValueRef fetch_constant(
>>>>>
>>>>> ctx->num_const_buffers);
>>>>> index = LLVMBuildAdd(ctx->ac.builder, index,
>>>>> LLVMConstInt(ctx->i32,
>>>>> SI_NUM_SHADER_BUFFERS, 0), "");
>>>>> bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
>>>>> } else
>>>>> bufp = load_const_buffer_desc(ctx, buf);
>>>>> return bitcast(bld_base, type, buffer_load_const(ctx, bufp,
>>>>> addr));
>>>>> }
>>>>> -/* Upper 16 bits must be zero. */
>>>>> -static LLVMValueRef si_llvm_pack_two_int16(struct si_shader_context
>>>>> *ctx,
>>>>> - LLVMValueRef val[2])
>>>>> -{
>>>>> - return LLVMBuildOr(ctx->ac.builder, val[0],
>>>>> - LLVMBuildShl(ctx->ac.builder, val[1],
>>>>> - LLVMConstInt(ctx->i32, 16, 0),
>>>>> - ""), "");
>>>>> -}
>>>>> -
>>>>> -/* Upper 16 bits are ignored and will be dropped. */
>>>>> -static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct
>>>>> si_shader_context *ctx,
>>>>> - LLVMValueRef val[2])
>>>>> -{
>>>>> - LLVMValueRef v[2] = {
>>>>> - LLVMBuildAnd(ctx->ac.builder, val[0],
>>>>> - LLVMConstInt(ctx->i32, 0xffff, 0), ""),
>>>>> - val[1],
>>>>> - };
>>>>> - return si_llvm_pack_two_int16(ctx, v);
>>>>> -}
>>>>> -
>>>>> /* Initialize arguments for the shader export intrinsic */
>>>>> static void si_llvm_init_export_args(struct si_shader_context *ctx,
>>>>> LLVMValueRef *values,
>>>>> unsigned target,
>>>>> struct ac_export_args *args)
>>>>> {
>>>>> LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
>>>>> - LLVMBuilderRef builder = ctx->ac.builder;
>>>>> - LLVMValueRef val[4];
>>>>> unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
>>>>> unsigned chan;
>>>>> bool is_int8, is_int10;
>>>>> /* Default is 0xf. Adjusted below depending on the format. */
>>>>> args->enabled_channels = 0xf; /* writemask */
>>>>> /* Specify whether the EXEC mask represents the valid mask */
>>>>> args->valid_mask = 0;
>>>>> @@ -2157,20 +2133,24 @@ static void si_llvm_init_export_args(struct
>>>>> si_shader_context *ctx,
>>>>> is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) &
>>>>> 0x1;
>>>>> is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf)
>>>>> &
>>>>> 0x1;
>>>>> }
>>>>> args->compr = false;
>>>>> args->out[0] = f32undef;
>>>>> args->out[1] = f32undef;
>>>>> args->out[2] = f32undef;
>>>>> args->out[3] = f32undef;
>>>>> + LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> args[2]) = NULL;
>>>>> + LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef
>>>>> args[2],
>>>>> + unsigned bits, bool hi) = NULL;
>>>>> +
>>>>> switch (spi_shader_col_format) {
>>>>> case V_028714_SPI_SHADER_ZERO:
>>>>> args->enabled_channels = 0; /* writemask */
>>>>> args->target = V_008DFC_SQ_EXP_NULL;
>>>>> break;
>>>>> case V_028714_SPI_SHADER_32_R:
>>>>> args->enabled_channels = 1; /* writemask */
>>>>> args->out[0] = values[0];
>>>>> break;
>>>>> @@ -2181,127 +2161,73 @@ static void si_llvm_init_export_args(struct
>>>>> si_shader_context *ctx,
>>>>> args->out[1] = values[1];
>>>>> break;
>>>>> case V_028714_SPI_SHADER_32_AR:
>>>>> args->enabled_channels = 0x9; /* writemask */
>>>>> args->out[0] = values[0];
>>>>> args->out[3] = values[3];
>>>>> break;
>>>>> case V_028714_SPI_SHADER_FP16_ABGR:
>>>>> - args->compr = 1; /* COMPR flag */
>>>>> -
>>>>> - for (chan = 0; chan < 2; chan++) {
>>>>> - LLVMValueRef pack_args[2] = {
>>>>> - values[2 * chan],
>>>>> - values[2 * chan + 1]
>>>>> - };
>>>>> - LLVMValueRef packed;
>>>>> -
>>>>> - packed = ac_build_cvt_pkrtz_f16(&ctx->ac,
>>>>> pack_args);
>>>>> - args->out[chan] = ac_to_float(&ctx->ac, packed);
>>>>> - }
>>>>> + packf = ac_build_cvt_pkrtz_f16;
>>>>> break;
>>>>> case V_028714_SPI_SHADER_UNORM16_ABGR:
>>>>> - for (chan = 0; chan < 4; chan++) {
>>>>> - val[chan] = ac_build_clamp(&ctx->ac,
>>>>> values[chan]);
>>>>> - val[chan] = LLVMBuildFMul(builder, val[chan],
>>>>> -
>>>>> LLVMConstReal(ctx->f32,
>>>>> 65535), "");
>>>>> - val[chan] = LLVMBuildFAdd(builder, val[chan],
>>>>> -
>>>>> LLVMConstReal(ctx->f32,
>>>>> 0.5), "");
>>>>> - val[chan] = LLVMBuildFPToUI(builder, val[chan],
>>>>> - ctx->i32, "");
>>>>> - }
>>>>> -
>>>>> - args->compr = 1; /* COMPR flag */
>>>>> - args->out[0] = ac_to_float(&ctx->ac,
>>>>> si_llvm_pack_two_int16(ctx, val));
>>>>> - args->out[1] = ac_to_float(&ctx->ac,
>>>>> si_llvm_pack_two_int16(ctx, val+2));
>>>>> + packf = ac_build_cvt_pknorm_u16;
>>>>> break;
>>>>> case V_028714_SPI_SHADER_SNORM16_ABGR:
>>>>> - for (chan = 0; chan < 4; chan++) {
>>>>> - /* Clamp between [-1, 1]. */
>>>>> - val[chan] =
>>>>> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN,
>>>>> -
>>>>> values[chan],
>>>>> -
>>>>> LLVMConstReal(ctx->f32, 1));
>>>>> - val[chan] =
>>>>> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX,
>>>>> - val[chan],
>>>>> -
>>>>> LLVMConstReal(ctx->f32, -1));
>>>>> - /* Convert to a signed integer in [-32767,
>>>>> 32767].
>>>>> */
>>>>> - val[chan] = LLVMBuildFMul(builder, val[chan],
>>>>> -
>>>>> LLVMConstReal(ctx->f32,
>>>>> 32767), "");
>>>>> - /* If positive, add 0.5, else add -0.5. */
>>>>> - val[chan] = LLVMBuildFAdd(builder, val[chan],
>>>>> - LLVMBuildSelect(builder,
>>>>> - LLVMBuildFCmp(builder,
>>>>> LLVMRealOGE,
>>>>> - val[chan],
>>>>> ctx->ac.f32_0, ""),
>>>>> - LLVMConstReal(ctx->f32,
>>>>> 0.5),
>>>>> - LLVMConstReal(ctx->f32,
>>>>> -0.5), ""), "");
>>>>> - val[chan] = LLVMBuildFPToSI(builder, val[chan],
>>>>> ctx->i32, "");
>>>>> - }
>>>>> -
>>>>> - args->compr = 1; /* COMPR flag */
>>>>> - args->out[0] = ac_to_float(&ctx->ac,
>>>>> si_llvm_pack_two_int32_as_int16(ctx, val));
>>>>> - args->out[1] = ac_to_float(&ctx->ac,
>>>>> si_llvm_pack_two_int32_as_int16(ctx, val+2));
>>>>> + packf = ac_build_cvt_pknorm_i16;
>>>>> break;
>>>>> - case V_028714_SPI_SHADER_UINT16_ABGR: {
>>>>> - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
>>>>> - is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
>>>>> - LLVMValueRef max_alpha =
>>>>> - !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3,
>>>>> 0);
>>>>> + case V_028714_SPI_SHADER_UINT16_ABGR:
>>>>> + packi = ac_build_cvt_pk_u16;
>>>>> + break;
>>>>> - /* Clamp. */
>>>>> - for (chan = 0; chan < 4; chan++) {
>>>>> - val[chan] = ac_to_integer(&ctx->ac,
>>>>> values[chan]);
>>>>> - val[chan] =
>>>>> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN,
>>>>> - val[chan],
>>>>> - chan == 3 ? max_alpha :
>>>>> max_rgb);
>>>>> - }
>>>>> + case V_028714_SPI_SHADER_SINT16_ABGR:
>>>>> + packi = ac_build_cvt_pk_i16;
>>>>> + break;
>>>>> - args->compr = 1; /* COMPR flag */
>>>>> - args->out[0] = ac_to_float(&ctx->ac,
>>>>> si_llvm_pack_two_int16(ctx, val));
>>>>> - args->out[1] = ac_to_float(&ctx->ac,
>>>>> si_llvm_pack_two_int16(ctx, val+2));
>>>>> + case V_028714_SPI_SHADER_32_ABGR:
>>>>> + memcpy(&args->out[0], values, sizeof(values[0]) * 4);
>>>>> break;
>>>>> }
>>>>> - case V_028714_SPI_SHADER_SINT16_ABGR: {
>>>>> - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
>>>>> - is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
>>>>> - LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
>>>>> - is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
>>>>> - LLVMValueRef max_alpha =
>>>>> - !is_int10 ? max_rgb : ctx->i32_1;
>>>>> - LLVMValueRef min_alpha =
>>>>> - !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2,
>>>>> 0);
>>>>> + /* Pack f16 or norm_i16/u16. */
>>>>> + if (packf) {
>>>>> + for (chan = 0; chan < 2; chan++) {
>>>>> + LLVMValueRef pack_args[2] = {
>>>>> + values[2 * chan],
>>>>> + values[2 * chan + 1]
>>>>> + };
>>>>> + LLVMValueRef packed;
>>>>> - /* Clamp. */
>>>>> - for (chan = 0; chan < 4; chan++) {
>>>>> - val[chan] = ac_to_integer(&ctx->ac,
>>>>> values[chan]);
>>>>> - val[chan] =
>>>>> lp_build_emit_llvm_binary(&ctx->bld_base,
>>>>> - TGSI_OPCODE_IMIN,
>>>>> - val[chan], chan == 3 ? max_alpha
>>>>> :
>>>>> max_rgb);
>>>>> - val[chan] =
>>>>> lp_build_emit_llvm_binary(&ctx->bld_base,
>>>>> - TGSI_OPCODE_IMAX,
>>>>> - val[chan], chan == 3 ? min_alpha
>>>>> :
>>>>> min_rgb);
>>>>> + packed = packf(&ctx->ac, pack_args);
>>>>> + args->out[chan] = ac_to_float(&ctx->ac, packed);
>>>>> }
>>>>> -
>>>>> args->compr = 1; /* COMPR flag */
>>>>> - args->out[0] = ac_to_float(&ctx->ac,
>>>>> si_llvm_pack_two_int32_as_int16(ctx, val));
>>>>> - args->out[1] = ac_to_float(&ctx->ac,
>>>>> si_llvm_pack_two_int32_as_int16(ctx, val+2));
>>>>> - break;
>>>>> }
>>>>> + /* Pack i16/u16. */
>>>>> + if (packi) {
>>>>> + for (chan = 0; chan < 2; chan++) {
>>>>> + LLVMValueRef pack_args[2] = {
>>>>> + ac_to_integer(&ctx->ac, values[2 *
>>>>> chan]),
>>>>> + ac_to_integer(&ctx->ac, values[2 * chan
>>>>> +
>>>>> 1])
>>>>> + };
>>>>> + LLVMValueRef packed;
>>>>> - case V_028714_SPI_SHADER_32_ABGR:
>>>>> - memcpy(&args->out[0], values, sizeof(values[0]) * 4);
>>>>> - break;
>>>>> + packed = packi(&ctx->ac, pack_args,
>>>>> + is_int8 ? 8 : is_int10 ? 10 : 16,
>>>>> + chan == 1);
>>>>> + args->out[chan] = ac_to_float(&ctx->ac, packed);
>>>>> + }
>>>>> + args->compr = 1; /* COMPR flag */
>>>>> }
>>>>> }
>>>>> static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
>>>>> LLVMValueRef alpha)
>>>>> {
>>>>> struct si_shader_context *ctx = si_shader_context(bld_base);
>>>>> if (ctx->shader->key.part.ps.epilog.alpha_func !=
>>>>> PIPE_FUNC_NEVER)
>>>>> {
>>>>> static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1]
>>>>> =
>>>>> {
>>>>>
>>>>
>>
More information about the mesa-dev
mailing list