[Mesa-dev] [PATCH 2/2] radeonsi: use pknorm_i16/u16 and pk_i16/u16 LLVM intrinsics
Marek Olšák
maraeo at gmail.com
Fri Jan 5 01:45:06 UTC 2018
On Thu, Jan 4, 2018 at 10:25 AM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
> How about performance?
>
> Few weeks ago, I fixed a bug (5f81a43535e8512cef26ea3dcd1e3a489bd5a1bb)
> which affected F1 2017 and DOW3 on RADV, and it was also a nice performance
> boost, this is why I'm asking.
No idea. This just decreases the number of instructions in some PS epilogs.
Marek
>
>
> On 01/04/2018 01:55 AM, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> ---
>> src/amd/common/ac_llvm_build.c | 164
>> +++++++++++++++++++++++++++++++
>> src/amd/common/ac_llvm_build.h | 13 +++
>> src/gallium/drivers/radeonsi/si_shader.c | 152
>> ++++++++--------------------
>> 3 files changed, 216 insertions(+), 113 deletions(-)
>>
>> diff --git a/src/amd/common/ac_llvm_build.c
>> b/src/amd/common/ac_llvm_build.c
>> index 7100e52..c48a186 100644
>> --- a/src/amd/common/ac_llvm_build.c
>> +++ b/src/amd/common/ac_llvm_build.c
>> @@ -61,20 +61,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
>> LLVMContextRef context,
>> ctx->voidt = LLVMVoidTypeInContext(ctx->context);
>> ctx->i1 = LLVMInt1TypeInContext(ctx->context);
>> ctx->i8 = LLVMInt8TypeInContext(ctx->context);
>> ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
>> ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
>> ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
>> ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
>> ctx->f16 = LLVMHalfTypeInContext(ctx->context);
>> ctx->f32 = LLVMFloatTypeInContext(ctx->context);
>> ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
>> + ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
>> ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
>> ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
>> ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
>> ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
>> ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
>> ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
>> ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
>> ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
>> ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
>> @@ -1214,20 +1215,34 @@ LLVMValueRef ac_build_fmin(struct ac_llvm_context
>> *ctx, LLVMValueRef a,
>> }
>> LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef
>> a,
>> LLVMValueRef b)
>> {
>> LLVMValueRef args[2] = {a, b};
>> return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, args,
>> 2,
>> AC_FUNC_ATTR_READNONE);
>> }
>> +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
>> + LLVMValueRef b)
>> +{
>> + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b,
>> "");
>> + return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
>> +}
>> +
>> +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
>> + LLVMValueRef b)
>> +{
>> + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b,
>> "");
>> + return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
>> +}
>> +
>> LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
>> LLVMValueRef b)
>> {
>> LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b,
>> "");
>> return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
>> }
>> LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef
>> value)
>> {
>> if (HAVE_LLVM >= 0x0500) {
>> @@ -1439,20 +1454,169 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct
>> ac_llvm_context *ctx,
>> v2f16, args, 2,
>> AC_FUNC_ATTR_READNONE);
>> return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
>> }
>> return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args,
>> 2,
>> AC_FUNC_ATTR_READNONE |
>> AC_FUNC_ATTR_LEGACY);
>> }
>> +/* Upper 16 bits must be zero. */
>> +static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx,
>> + LLVMValueRef val[2])
>> +{
>> + return LLVMBuildOr(ctx->builder, val[0],
>> + LLVMBuildShl(ctx->builder, val[1],
>> + LLVMConstInt(ctx->i32, 16, 0),
>> + ""), "");
>> +}
>> +
>> +/* Upper 16 bits are ignored and will be dropped. */
>> +static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct
>> ac_llvm_context *ctx,
>> + LLVMValueRef val[2])
>> +{
>> + LLVMValueRef v[2] = {
>> + LLVMBuildAnd(ctx->builder, val[0],
>> + LLVMConstInt(ctx->i32, 0xffff, 0), ""),
>> + val[1],
>> + };
>> + return ac_llvm_pack_two_int16(ctx, v);
>> +}
>> +
>> +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
>> + LLVMValueRef args[2])
>> +{
>> + if (HAVE_LLVM >= 0x0600) {
>> + LLVMValueRef res =
>> + ac_build_intrinsic(ctx,
>> "llvm.amdgcn.cvt.pknorm.i16",
>> + ctx->v2i16, args, 2,
>> + AC_FUNC_ATTR_READNONE);
>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
>> + }
>> +
>> + LLVMValueRef val[2];
>> +
>> + for (int chan = 0; chan < 2; chan++) {
>> + /* Clamp between [-1, 1]. */
>> + val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1);
>> + val[chan] = ac_build_fmax(ctx, val[chan],
>> LLVMConstReal(ctx->f32, -1));
>> + /* Convert to a signed integer in [-32767, 32767]. */
>> + val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
>> + LLVMConstReal(ctx->f32, 32767),
>> "");
>> + /* If positive, add 0.5, else add -0.5. */
>> + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
>> + LLVMBuildSelect(ctx->builder,
>> + LLVMBuildFCmp(ctx->builder,
>> LLVMRealOGE,
>> + val[chan],
>> ctx->f32_0, ""),
>> + LLVMConstReal(ctx->f32, 0.5),
>> + LLVMConstReal(ctx->f32, -0.5),
>> ""), "");
>> + val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan],
>> ctx->i32, "");
>> + }
>> + return ac_llvm_pack_two_int32_as_int16(ctx, val);
>> +}
>> +
>> +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
>> + LLVMValueRef args[2])
>> +{
>> + if (HAVE_LLVM >= 0x0600) {
>> + LLVMValueRef res =
>> + ac_build_intrinsic(ctx,
>> "llvm.amdgcn.cvt.pknorm.u16",
>> + ctx->v2i16, args, 2,
>> + AC_FUNC_ATTR_READNONE);
>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
>> + }
>> +
>> + LLVMValueRef val[2];
>> +
>> + for (int chan = 0; chan < 2; chan++) {
>> + val[chan] = ac_build_clamp(ctx, args[chan]);
>> + val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
>> + LLVMConstReal(ctx->f32, 65535),
>> "");
>> + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
>> + LLVMConstReal(ctx->f32, 0.5),
>> "");
>> + val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
>> + ctx->i32, "");
>> + }
>> + return ac_llvm_pack_two_int32_as_int16(ctx, val);
>> +}
>> +
>> +/* The 8-bit and 10-bit clamping is for HW workarounds. */
>> +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
>> + LLVMValueRef args[2], unsigned bits, bool
>> hi)
>> +{
>> + assert(bits == 8 || bits == 10 || bits == 16);
>> +
>> + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
>> + bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
>> + LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
>> + bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
>> + LLVMValueRef max_alpha =
>> + bits != 10 ? max_rgb : ctx->i32_1;
>> + LLVMValueRef min_alpha =
>> + bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
>> + bool has_intrinsic = HAVE_LLVM >= 0x0600;
>> +
>> + /* Clamp. */
>> + if (!has_intrinsic || bits != 16) {
>> + for (int i = 0; i < 2; i++) {
>> + bool alpha = hi && i == 1;
>> + args[i] = ac_build_imin(ctx, args[i],
>> + alpha ? max_alpha :
>> max_rgb);
>> + args[i] = ac_build_imax(ctx, args[i],
>> + alpha ? min_alpha :
>> min_rgb);
>> + }
>> + }
>> +
>> + if (has_intrinsic) {
>> + LLVMValueRef res =
>> + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
>> + ctx->v2i16, args, 2,
>> + AC_FUNC_ATTR_READNONE);
>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
>> + }
>> +
>> + return ac_llvm_pack_two_int32_as_int16(ctx, args);
>> +}
>> +
>> +/* The 8-bit and 10-bit clamping is for HW workarounds. */
>> +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
>> + LLVMValueRef args[2], unsigned bits, bool
>> hi)
>> +{
>> + assert(bits == 8 || bits == 10 || bits == 16);
>> +
>> + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
>> + bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
>> + LLVMValueRef max_alpha =
>> + bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
>> + bool has_intrinsic = HAVE_LLVM >= 0x0600;
>> +
>> + /* Clamp. */
>> + if (!has_intrinsic || bits != 16) {
>> + for (int i = 0; i < 2; i++) {
>> + bool alpha = hi && i == 1;
>> + args[i] = ac_build_umin(ctx, args[i],
>> + alpha ? max_alpha :
>> max_rgb);
>> + }
>> + }
>> +
>> + if (has_intrinsic) {
>> + LLVMValueRef res =
>> + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
>> + ctx->v2i16, args, 2,
>> + AC_FUNC_ATTR_READNONE);
>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
>> + }
>> +
>> + return ac_llvm_pack_two_int16(ctx, args);
>> +}
>> +
>> LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef
>> i1)
>> {
>> assert(HAVE_LLVM >= 0x0600);
>> return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
>> &i1, 1, AC_FUNC_ATTR_READNONE);
>> }
>> void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef
>> i1)
>> {
>> if (HAVE_LLVM >= 0x0600) {
>> diff --git a/src/amd/common/ac_llvm_build.h
>> b/src/amd/common/ac_llvm_build.h
>> index 0deb5b5..3f0e9e2 100644
>> --- a/src/amd/common/ac_llvm_build.h
>> +++ b/src/amd/common/ac_llvm_build.h
>> @@ -50,20 +50,21 @@ struct ac_llvm_context {
>> LLVMTypeRef voidt;
>> LLVMTypeRef i1;
>> LLVMTypeRef i8;
>> LLVMTypeRef i16;
>> LLVMTypeRef i32;
>> LLVMTypeRef i64;
>> LLVMTypeRef intptr;
>> LLVMTypeRef f16;
>> LLVMTypeRef f32;
>> LLVMTypeRef f64;
>> + LLVMTypeRef v2i16;
>> LLVMTypeRef v2i32;
>> LLVMTypeRef v3i32;
>> LLVMTypeRef v4i32;
>> LLVMTypeRef v2f32;
>> LLVMTypeRef v4f32;
>> LLVMTypeRef v8i32;
>> LLVMValueRef i32_0;
>> LLVMValueRef i32_1;
>> LLVMValueRef f32_0;
>> @@ -238,20 +239,24 @@ LLVMValueRef ac_build_imsb(struct ac_llvm_context
>> *ctx,
>> LLVMValueRef arg,
>> LLVMTypeRef dst_type);
>> LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
>> LLVMValueRef arg,
>> LLVMTypeRef dst_type);
>> LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
>> LLVMValueRef b);
>> LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
>> LLVMValueRef b);
>> +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
>> + LLVMValueRef b);
>> +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
>> + LLVMValueRef b);
>> LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
>> LLVMValueRef b);
>> LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef
>> value);
>> struct ac_export_args {
>> LLVMValueRef out[4];
>> unsigned target;
>> unsigned enabled_channels;
>> bool compr;
>> bool done;
>> bool valid_mask;
>> @@ -282,20 +287,28 @@ struct ac_image_args {
>> LLVMValueRef addr;
>> unsigned dmask;
>> bool unorm;
>> bool da;
>> };
>> LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
>> struct ac_image_args *a);
>> LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
>> LLVMValueRef args[2]);
>> +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
>> + LLVMValueRef args[2]);
>> +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
>> + LLVMValueRef args[2]);
>> +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
>> + LLVMValueRef args[2], unsigned bits, bool
>> hi);
>> +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
>> + LLVMValueRef args[2], unsigned bits, bool
>> hi);
>> LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef
>> i1);
>> void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef
>> i1);
>> LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef
>> input,
>> LLVMValueRef offset, LLVMValueRef width,
>> bool is_signed);
>> void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
>> void ac_get_image_intr_name(const char *base_name,
>> LLVMTypeRef data_type,
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>> b/src/gallium/drivers/radeonsi/si_shader.c
>> index 453822c..a695aad 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -2093,51 +2093,27 @@ static LLVMValueRef fetch_constant(
>>
>> ctx->num_const_buffers);
>> index = LLVMBuildAdd(ctx->ac.builder, index,
>> LLVMConstInt(ctx->i32,
>> SI_NUM_SHADER_BUFFERS, 0), "");
>> bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
>> } else
>> bufp = load_const_buffer_desc(ctx, buf);
>> return bitcast(bld_base, type, buffer_load_const(ctx, bufp,
>> addr));
>> }
>> -/* Upper 16 bits must be zero. */
>> -static LLVMValueRef si_llvm_pack_two_int16(struct si_shader_context *ctx,
>> - LLVMValueRef val[2])
>> -{
>> - return LLVMBuildOr(ctx->ac.builder, val[0],
>> - LLVMBuildShl(ctx->ac.builder, val[1],
>> - LLVMConstInt(ctx->i32, 16, 0),
>> - ""), "");
>> -}
>> -
>> -/* Upper 16 bits are ignored and will be dropped. */
>> -static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct
>> si_shader_context *ctx,
>> - LLVMValueRef val[2])
>> -{
>> - LLVMValueRef v[2] = {
>> - LLVMBuildAnd(ctx->ac.builder, val[0],
>> - LLVMConstInt(ctx->i32, 0xffff, 0), ""),
>> - val[1],
>> - };
>> - return si_llvm_pack_two_int16(ctx, v);
>> -}
>> -
>> /* Initialize arguments for the shader export intrinsic */
>> static void si_llvm_init_export_args(struct si_shader_context *ctx,
>> LLVMValueRef *values,
>> unsigned target,
>> struct ac_export_args *args)
>> {
>> LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
>> - LLVMBuilderRef builder = ctx->ac.builder;
>> - LLVMValueRef val[4];
>> unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
>> unsigned chan;
>> bool is_int8, is_int10;
>> /* Default is 0xf. Adjusted below depending on the format. */
>> args->enabled_channels = 0xf; /* writemask */
>> /* Specify whether the EXEC mask represents the valid mask */
>> args->valid_mask = 0;
>> @@ -2157,20 +2133,24 @@ static void si_llvm_init_export_args(struct
>> si_shader_context *ctx,
>> is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) &
>> 0x1;
>> is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) &
>> 0x1;
>> }
>> args->compr = false;
>> args->out[0] = f32undef;
>> args->out[1] = f32undef;
>> args->out[2] = f32undef;
>> args->out[3] = f32undef;
>> + LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef
>> args[2]) = NULL;
>> + LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef
>> args[2],
>> + unsigned bits, bool hi) = NULL;
>> +
>> switch (spi_shader_col_format) {
>> case V_028714_SPI_SHADER_ZERO:
>> args->enabled_channels = 0; /* writemask */
>> args->target = V_008DFC_SQ_EXP_NULL;
>> break;
>> case V_028714_SPI_SHADER_32_R:
>> args->enabled_channels = 1; /* writemask */
>> args->out[0] = values[0];
>> break;
>> @@ -2181,127 +2161,73 @@ static void si_llvm_init_export_args(struct
>> si_shader_context *ctx,
>> args->out[1] = values[1];
>> break;
>> case V_028714_SPI_SHADER_32_AR:
>> args->enabled_channels = 0x9; /* writemask */
>> args->out[0] = values[0];
>> args->out[3] = values[3];
>> break;
>> case V_028714_SPI_SHADER_FP16_ABGR:
>> - args->compr = 1; /* COMPR flag */
>> -
>> - for (chan = 0; chan < 2; chan++) {
>> - LLVMValueRef pack_args[2] = {
>> - values[2 * chan],
>> - values[2 * chan + 1]
>> - };
>> - LLVMValueRef packed;
>> -
>> - packed = ac_build_cvt_pkrtz_f16(&ctx->ac,
>> pack_args);
>> - args->out[chan] = ac_to_float(&ctx->ac, packed);
>> - }
>> + packf = ac_build_cvt_pkrtz_f16;
>> break;
>> case V_028714_SPI_SHADER_UNORM16_ABGR:
>> - for (chan = 0; chan < 4; chan++) {
>> - val[chan] = ac_build_clamp(&ctx->ac,
>> values[chan]);
>> - val[chan] = LLVMBuildFMul(builder, val[chan],
>> - LLVMConstReal(ctx->f32,
>> 65535), "");
>> - val[chan] = LLVMBuildFAdd(builder, val[chan],
>> - LLVMConstReal(ctx->f32,
>> 0.5), "");
>> - val[chan] = LLVMBuildFPToUI(builder, val[chan],
>> - ctx->i32, "");
>> - }
>> -
>> - args->compr = 1; /* COMPR flag */
>> - args->out[0] = ac_to_float(&ctx->ac,
>> si_llvm_pack_two_int16(ctx, val));
>> - args->out[1] = ac_to_float(&ctx->ac,
>> si_llvm_pack_two_int16(ctx, val+2));
>> + packf = ac_build_cvt_pknorm_u16;
>> break;
>> case V_028714_SPI_SHADER_SNORM16_ABGR:
>> - for (chan = 0; chan < 4; chan++) {
>> - /* Clamp between [-1, 1]. */
>> - val[chan] =
>> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN,
>> -
>> values[chan],
>> -
>> LLVMConstReal(ctx->f32, 1));
>> - val[chan] =
>> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX,
>> - val[chan],
>> -
>> LLVMConstReal(ctx->f32, -1));
>> - /* Convert to a signed integer in [-32767, 32767].
>> */
>> - val[chan] = LLVMBuildFMul(builder, val[chan],
>> - LLVMConstReal(ctx->f32,
>> 32767), "");
>> - /* If positive, add 0.5, else add -0.5. */
>> - val[chan] = LLVMBuildFAdd(builder, val[chan],
>> - LLVMBuildSelect(builder,
>> - LLVMBuildFCmp(builder,
>> LLVMRealOGE,
>> - val[chan],
>> ctx->ac.f32_0, ""),
>> - LLVMConstReal(ctx->f32,
>> 0.5),
>> - LLVMConstReal(ctx->f32,
>> -0.5), ""), "");
>> - val[chan] = LLVMBuildFPToSI(builder, val[chan],
>> ctx->i32, "");
>> - }
>> -
>> - args->compr = 1; /* COMPR flag */
>> - args->out[0] = ac_to_float(&ctx->ac,
>> si_llvm_pack_two_int32_as_int16(ctx, val));
>> - args->out[1] = ac_to_float(&ctx->ac,
>> si_llvm_pack_two_int32_as_int16(ctx, val+2));
>> + packf = ac_build_cvt_pknorm_i16;
>> break;
>> - case V_028714_SPI_SHADER_UINT16_ABGR: {
>> - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
>> - is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
>> - LLVMValueRef max_alpha =
>> - !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3,
>> 0);
>> + case V_028714_SPI_SHADER_UINT16_ABGR:
>> + packi = ac_build_cvt_pk_u16;
>> + break;
>> - /* Clamp. */
>> - for (chan = 0; chan < 4; chan++) {
>> - val[chan] = ac_to_integer(&ctx->ac, values[chan]);
>> - val[chan] =
>> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN,
>> - val[chan],
>> - chan == 3 ? max_alpha : max_rgb);
>> - }
>> + case V_028714_SPI_SHADER_SINT16_ABGR:
>> + packi = ac_build_cvt_pk_i16;
>> + break;
>> - args->compr = 1; /* COMPR flag */
>> - args->out[0] = ac_to_float(&ctx->ac,
>> si_llvm_pack_two_int16(ctx, val));
>> - args->out[1] = ac_to_float(&ctx->ac,
>> si_llvm_pack_two_int16(ctx, val+2));
>> + case V_028714_SPI_SHADER_32_ABGR:
>> + memcpy(&args->out[0], values, sizeof(values[0]) * 4);
>> break;
>> }
>> - case V_028714_SPI_SHADER_SINT16_ABGR: {
>> - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
>> - is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
>> - LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
>> - is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
>> - LLVMValueRef max_alpha =
>> - !is_int10 ? max_rgb : ctx->i32_1;
>> - LLVMValueRef min_alpha =
>> - !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2,
>> 0);
>> + /* Pack f16 or norm_i16/u16. */
>> + if (packf) {
>> + for (chan = 0; chan < 2; chan++) {
>> + LLVMValueRef pack_args[2] = {
>> + values[2 * chan],
>> + values[2 * chan + 1]
>> + };
>> + LLVMValueRef packed;
>> - /* Clamp. */
>> - for (chan = 0; chan < 4; chan++) {
>> - val[chan] = ac_to_integer(&ctx->ac, values[chan]);
>> - val[chan] =
>> lp_build_emit_llvm_binary(&ctx->bld_base,
>> - TGSI_OPCODE_IMIN,
>> - val[chan], chan == 3 ? max_alpha :
>> max_rgb);
>> - val[chan] =
>> lp_build_emit_llvm_binary(&ctx->bld_base,
>> - TGSI_OPCODE_IMAX,
>> - val[chan], chan == 3 ? min_alpha :
>> min_rgb);
>> + packed = packf(&ctx->ac, pack_args);
>> + args->out[chan] = ac_to_float(&ctx->ac, packed);
>> }
>> -
>> args->compr = 1; /* COMPR flag */
>> - args->out[0] = ac_to_float(&ctx->ac,
>> si_llvm_pack_two_int32_as_int16(ctx, val));
>> - args->out[1] = ac_to_float(&ctx->ac,
>> si_llvm_pack_two_int32_as_int16(ctx, val+2));
>> - break;
>> }
>> + /* Pack i16/u16. */
>> + if (packi) {
>> + for (chan = 0; chan < 2; chan++) {
>> + LLVMValueRef pack_args[2] = {
>> + ac_to_integer(&ctx->ac, values[2 * chan]),
>> + ac_to_integer(&ctx->ac, values[2 * chan +
>> 1])
>> + };
>> + LLVMValueRef packed;
>> - case V_028714_SPI_SHADER_32_ABGR:
>> - memcpy(&args->out[0], values, sizeof(values[0]) * 4);
>> - break;
>> + packed = packi(&ctx->ac, pack_args,
>> + is_int8 ? 8 : is_int10 ? 10 : 16,
>> + chan == 1);
>> + args->out[chan] = ac_to_float(&ctx->ac, packed);
>> + }
>> + args->compr = 1; /* COMPR flag */
>> }
>> }
>> static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
>> LLVMValueRef alpha)
>> {
>> struct si_shader_context *ctx = si_shader_context(bld_base);
>> if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER)
>> {
>> static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] =
>> {
>>
>
More information about the mesa-dev
mailing list