[Mesa-dev] [PATCH 2/2] radeonsi: use pknorm_i16/u16 and pk_i16/u16 LLVM intrinsics
Marek Olšák
maraeo at gmail.com
Thu Jan 4 00:55:14 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/amd/common/ac_llvm_build.c | 164 +++++++++++++++++++++++++++++++
src/amd/common/ac_llvm_build.h | 13 +++
src/gallium/drivers/radeonsi/si_shader.c | 152 ++++++++--------------------
3 files changed, 216 insertions(+), 113 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 7100e52..c48a186 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -61,20 +61,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
ctx->voidt = LLVMVoidTypeInContext(ctx->context);
ctx->i1 = LLVMInt1TypeInContext(ctx->context);
ctx->i8 = LLVMInt8TypeInContext(ctx->context);
ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
+ ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
@@ -1214,20 +1215,34 @@ LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
}
LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b)
{
LLVMValueRef args[2] = {a, b};
return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, args, 2,
AC_FUNC_ATTR_READNONE);
}
+LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b)
{
LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
}
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
{
if (HAVE_LLVM >= 0x0500) {
@@ -1439,20 +1454,169 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
v2f16, args, 2,
AC_FUNC_ATTR_READNONE);
return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
}
return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_LEGACY);
}
+/* Upper 16 bits must be zero. */
+static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx,
+ LLVMValueRef val[2])
+{
+ return LLVMBuildOr(ctx->builder, val[0],
+ LLVMBuildShl(ctx->builder, val[1],
+ LLVMConstInt(ctx->i32, 16, 0),
+ ""), "");
+}
+
+/* Upper 16 bits are ignored and will be dropped. */
+static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct ac_llvm_context *ctx,
+ LLVMValueRef val[2])
+{
+ LLVMValueRef v[2] = {
+ LLVMBuildAnd(ctx->builder, val[0],
+ LLVMConstInt(ctx->i32, 0xffff, 0), ""),
+ val[1],
+ };
+ return ac_llvm_pack_two_int16(ctx, v);
+}
+
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2])
+{
+ if (HAVE_LLVM >= 0x0600) {
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ }
+
+ LLVMValueRef val[2];
+
+ for (int chan = 0; chan < 2; chan++) {
+ /* Clamp between [-1, 1]. */
+ val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1);
+ val[chan] = ac_build_fmax(ctx, val[chan], LLVMConstReal(ctx->f32, -1));
+ /* Convert to a signed integer in [-32767, 32767]. */
+ val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
+ LLVMConstReal(ctx->f32, 32767), "");
+ /* If positive, add 0.5, else add -0.5. */
+ val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
+ LLVMBuildSelect(ctx->builder,
+ LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
+ val[chan], ctx->f32_0, ""),
+ LLVMConstReal(ctx->f32, 0.5),
+ LLVMConstReal(ctx->f32, -0.5), ""), "");
+ val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
+ }
+ return ac_llvm_pack_two_int32_as_int16(ctx, val);
+}
+
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2])
+{
+ if (HAVE_LLVM >= 0x0600) {
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ }
+
+ LLVMValueRef val[2];
+
+ for (int chan = 0; chan < 2; chan++) {
+ val[chan] = ac_build_clamp(ctx, args[chan]);
+ val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
+ LLVMConstReal(ctx->f32, 65535), "");
+ val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
+ LLVMConstReal(ctx->f32, 0.5), "");
+ val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
+ ctx->i32, "");
+ }
+ return ac_llvm_pack_two_int32_as_int16(ctx, val);
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi)
+{
+ assert(bits == 8 || bits == 10 || bits == 16);
+
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+ bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
+ LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
+ bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
+ LLVMValueRef max_alpha =
+ bits != 10 ? max_rgb : ctx->i32_1;
+ LLVMValueRef min_alpha =
+ bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
+ bool has_intrinsic = HAVE_LLVM >= 0x0600;
+
+ /* Clamp. */
+ if (!has_intrinsic || bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = ac_build_imin(ctx, args[i],
+ alpha ? max_alpha : max_rgb);
+ args[i] = ac_build_imax(ctx, args[i],
+ alpha ? min_alpha : min_rgb);
+ }
+ }
+
+ if (has_intrinsic) {
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ }
+
+ return ac_llvm_pack_two_int32_as_int16(ctx, args);
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi)
+{
+ assert(bits == 8 || bits == 10 || bits == 16);
+
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+ bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
+ LLVMValueRef max_alpha =
+ bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
+ bool has_intrinsic = HAVE_LLVM >= 0x0600;
+
+ /* Clamp. */
+ if (!has_intrinsic || bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = ac_build_umin(ctx, args[i],
+ alpha ? max_alpha : max_rgb);
+ }
+ }
+
+ if (has_intrinsic) {
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ }
+
+ return ac_llvm_pack_two_int16(ctx, args);
+}
+
LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
{
assert(HAVE_LLVM >= 0x0600);
return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
&i1, 1, AC_FUNC_ATTR_READNONE);
}
void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
{
if (HAVE_LLVM >= 0x0600) {
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 0deb5b5..3f0e9e2 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -50,20 +50,21 @@ struct ac_llvm_context {
LLVMTypeRef voidt;
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i16;
LLVMTypeRef i32;
LLVMTypeRef i64;
LLVMTypeRef intptr;
LLVMTypeRef f16;
LLVMTypeRef f32;
LLVMTypeRef f64;
+ LLVMTypeRef v2i16;
LLVMTypeRef v2i32;
LLVMTypeRef v3i32;
LLVMTypeRef v4i32;
LLVMTypeRef v2f32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
LLVMValueRef i32_0;
LLVMValueRef i32_1;
LLVMValueRef f32_0;
@@ -238,20 +239,24 @@ LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx,
LLVMValueRef arg,
LLVMTypeRef dst_type);
LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
LLVMValueRef arg,
LLVMTypeRef dst_type);
LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b);
+LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b);
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b);
LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
struct ac_export_args {
LLVMValueRef out[4];
unsigned target;
unsigned enabled_channels;
bool compr;
bool done;
bool valid_mask;
@@ -282,20 +287,28 @@ struct ac_image_args {
LLVMValueRef addr;
unsigned dmask;
bool unorm;
bool da;
};
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
struct ac_image_args *a);
LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi);
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi);
LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
LLVMValueRef offset, LLVMValueRef width,
bool is_signed);
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
void ac_get_image_intr_name(const char *base_name,
LLVMTypeRef data_type,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 453822c..a695aad 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2093,51 +2093,27 @@ static LLVMValueRef fetch_constant(
ctx->num_const_buffers);
index = LLVMBuildAdd(ctx->ac.builder, index,
LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
} else
bufp = load_const_buffer_desc(ctx, buf);
return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr));
}
-/* Upper 16 bits must be zero. */
-static LLVMValueRef si_llvm_pack_two_int16(struct si_shader_context *ctx,
- LLVMValueRef val[2])
-{
- return LLVMBuildOr(ctx->ac.builder, val[0],
- LLVMBuildShl(ctx->ac.builder, val[1],
- LLVMConstInt(ctx->i32, 16, 0),
- ""), "");
-}
-
-/* Upper 16 bits are ignored and will be dropped. */
-static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct si_shader_context *ctx,
- LLVMValueRef val[2])
-{
- LLVMValueRef v[2] = {
- LLVMBuildAnd(ctx->ac.builder, val[0],
- LLVMConstInt(ctx->i32, 0xffff, 0), ""),
- val[1],
- };
- return si_llvm_pack_two_int16(ctx, v);
-}
-
/* Initialize arguments for the shader export intrinsic */
static void si_llvm_init_export_args(struct si_shader_context *ctx,
LLVMValueRef *values,
unsigned target,
struct ac_export_args *args)
{
LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef val[4];
unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
unsigned chan;
bool is_int8, is_int10;
/* Default is 0xf. Adjusted below depending on the format. */
args->enabled_channels = 0xf; /* writemask */
/* Specify whether the EXEC mask represents the valid mask */
args->valid_mask = 0;
@@ -2157,20 +2133,24 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx,
is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & 0x1;
is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) & 0x1;
}
args->compr = false;
args->out[0] = f32undef;
args->out[1] = f32undef;
args->out[2] = f32undef;
args->out[3] = f32undef;
+ LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL;
+ LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2],
+ unsigned bits, bool hi) = NULL;
+
switch (spi_shader_col_format) {
case V_028714_SPI_SHADER_ZERO:
args->enabled_channels = 0; /* writemask */
args->target = V_008DFC_SQ_EXP_NULL;
break;
case V_028714_SPI_SHADER_32_R:
args->enabled_channels = 1; /* writemask */
args->out[0] = values[0];
break;
@@ -2181,127 +2161,73 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx,
args->out[1] = values[1];
break;
case V_028714_SPI_SHADER_32_AR:
args->enabled_channels = 0x9; /* writemask */
args->out[0] = values[0];
args->out[3] = values[3];
break;
case V_028714_SPI_SHADER_FP16_ABGR:
- args->compr = 1; /* COMPR flag */
-
- for (chan = 0; chan < 2; chan++) {
- LLVMValueRef pack_args[2] = {
- values[2 * chan],
- values[2 * chan + 1]
- };
- LLVMValueRef packed;
-
- packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
- args->out[chan] = ac_to_float(&ctx->ac, packed);
- }
+ packf = ac_build_cvt_pkrtz_f16;
break;
case V_028714_SPI_SHADER_UNORM16_ABGR:
- for (chan = 0; chan < 4; chan++) {
- val[chan] = ac_build_clamp(&ctx->ac, values[chan]);
- val[chan] = LLVMBuildFMul(builder, val[chan],
- LLVMConstReal(ctx->f32, 65535), "");
- val[chan] = LLVMBuildFAdd(builder, val[chan],
- LLVMConstReal(ctx->f32, 0.5), "");
- val[chan] = LLVMBuildFPToUI(builder, val[chan],
- ctx->i32, "");
- }
-
- args->compr = 1; /* COMPR flag */
- args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val));
- args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val+2));
+ packf = ac_build_cvt_pknorm_u16;
break;
case V_028714_SPI_SHADER_SNORM16_ABGR:
- for (chan = 0; chan < 4; chan++) {
- /* Clamp between [-1, 1]. */
- val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN,
- values[chan],
- LLVMConstReal(ctx->f32, 1));
- val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX,
- val[chan],
- LLVMConstReal(ctx->f32, -1));
- /* Convert to a signed integer in [-32767, 32767]. */
- val[chan] = LLVMBuildFMul(builder, val[chan],
- LLVMConstReal(ctx->f32, 32767), "");
- /* If positive, add 0.5, else add -0.5. */
- val[chan] = LLVMBuildFAdd(builder, val[chan],
- LLVMBuildSelect(builder,
- LLVMBuildFCmp(builder, LLVMRealOGE,
- val[chan], ctx->ac.f32_0, ""),
- LLVMConstReal(ctx->f32, 0.5),
- LLVMConstReal(ctx->f32, -0.5), ""), "");
- val[chan] = LLVMBuildFPToSI(builder, val[chan], ctx->i32, "");
- }
-
- args->compr = 1; /* COMPR flag */
- args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val));
- args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val+2));
+ packf = ac_build_cvt_pknorm_i16;
break;
- case V_028714_SPI_SHADER_UINT16_ABGR: {
- LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
- is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
- LLVMValueRef max_alpha =
- !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
+ case V_028714_SPI_SHADER_UINT16_ABGR:
+ packi = ac_build_cvt_pk_u16;
+ break;
- /* Clamp. */
- for (chan = 0; chan < 4; chan++) {
- val[chan] = ac_to_integer(&ctx->ac, values[chan]);
- val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN,
- val[chan],
- chan == 3 ? max_alpha : max_rgb);
- }
+ case V_028714_SPI_SHADER_SINT16_ABGR:
+ packi = ac_build_cvt_pk_i16;
+ break;
- args->compr = 1; /* COMPR flag */
- args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val));
- args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val+2));
+ case V_028714_SPI_SHADER_32_ABGR:
+ memcpy(&args->out[0], values, sizeof(values[0]) * 4);
break;
}
- case V_028714_SPI_SHADER_SINT16_ABGR: {
- LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
- is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
- LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
- is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
- LLVMValueRef max_alpha =
- !is_int10 ? max_rgb : ctx->i32_1;
- LLVMValueRef min_alpha =
- !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
+ /* Pack f16 or norm_i16/u16. */
+ if (packf) {
+ for (chan = 0; chan < 2; chan++) {
+ LLVMValueRef pack_args[2] = {
+ values[2 * chan],
+ values[2 * chan + 1]
+ };
+ LLVMValueRef packed;
- /* Clamp. */
- for (chan = 0; chan < 4; chan++) {
- val[chan] = ac_to_integer(&ctx->ac, values[chan]);
- val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base,
- TGSI_OPCODE_IMIN,
- val[chan], chan == 3 ? max_alpha : max_rgb);
- val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base,
- TGSI_OPCODE_IMAX,
- val[chan], chan == 3 ? min_alpha : min_rgb);
+ packed = packf(&ctx->ac, pack_args);
+ args->out[chan] = ac_to_float(&ctx->ac, packed);
}
-
args->compr = 1; /* COMPR flag */
- args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val));
- args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val+2));
- break;
}
+ /* Pack i16/u16. */
+ if (packi) {
+ for (chan = 0; chan < 2; chan++) {
+ LLVMValueRef pack_args[2] = {
+ ac_to_integer(&ctx->ac, values[2 * chan]),
+ ac_to_integer(&ctx->ac, values[2 * chan + 1])
+ };
+ LLVMValueRef packed;
- case V_028714_SPI_SHADER_32_ABGR:
- memcpy(&args->out[0], values, sizeof(values[0]) * 4);
- break;
+ packed = packi(&ctx->ac, pack_args,
+ is_int8 ? 8 : is_int10 ? 10 : 16,
+ chan == 1);
+ args->out[chan] = ac_to_float(&ctx->ac, packed);
+ }
+ args->compr = 1; /* COMPR flag */
}
}
static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
LLVMValueRef alpha)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
--
2.7.4
More information about the mesa-dev
mailing list