Mesa (master): freedreno/a3xx/compiler: add LIT

Rob Clark robclark at kemper.freedesktop.org
Sat Sep 14 17:32:50 UTC 2013


Module: Mesa
Branch: master
Commit: 64c134cedb8845f274b017ff9ca3c3ca4d34372b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=64c134cedb8845f274b017ff9ca3c3ca4d34372b

Author: Rob Clark <robclark at freedesktop.org>
Date:   Sun Sep  1 11:35:56 2013 -0400

freedreno/a3xx/compiler: add LIT

Needed by glxgears and etuxracer ;-)

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_compiler.c |  145 ++++++++++++++++++++-
 1 files changed, 142 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 994f71c..be6ecdc 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -65,10 +65,11 @@ static unsigned regmask_idx(struct ir3_register *reg)
 static void regmask_set(regmask_t regmask, struct ir3_register *reg,
 		unsigned wrmask)
 {
+	unsigned ridx = regmask_idx(reg) & ~0x3;
 	unsigned i;
 	for (i = 0; i < 4; i++) {
 		if (wrmask & (1 << i)) {
-			unsigned idx = regmask_idx(reg) + i;
+			unsigned idx = ridx + i;
 			regmask[idx / 8] |= 1 << (idx % 8);
 		}
 	}
@@ -179,8 +180,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
 			ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
 
 	so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
-	ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
-			ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
+	ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
 
 	ret = tgsi_parse_init(&ctx->parser, tokens);
 	if (ret != TGSI_PARSE_OK)
@@ -602,6 +602,144 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
  * native instructions:
  */
 
+/* LIT - Light Coefficients:
+ *   dst.x = 1.0
+ *   dst.y = max(src.x, 0.0)
+ *   dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0.0
+ *   dst.w = 1.0
+ *
+ *   max.f tmp.y, src.y, {0.0}
+ *   nop
+ *   max.f tmp.z, src.w, {-128.0}
+ *   cmps.f.ge tmp.w, (neg)src.x, {0.0}
+ *   max.f dst.y, src.x, {0.0}
+ *   nop
+ *   min.f tmp.x, tmp.z, {128.0}
+ *   add.s tmp.z, tmp.w, -1
+ *   log2 tmp.y, tmp.y
+ *   (rpt1)nop
+ *   (ss)mul.f tmp.x, tmp.x, tmp.y
+ *   mov.f16f16 dst.x, {1.0}
+ *   mov.f16f16 dst.w, {1.0}
+ *   (rpt3)nop
+ *   exp2 tmp.x, tmp.x
+ *   (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x
+ */
+static void
+trans_lit(const struct instr_translater *t,
+		struct fd3_compile_context *ctx,
+		struct tgsi_full_instruction *inst)
+{
+	struct ir3_instruction *instr;
+	struct ir3_register *r;
+	struct tgsi_dst_register tmp_dst;
+	struct tgsi_src_register *tmp_src;
+	struct tgsi_src_register constval0, constval1, constval128;
+	struct tgsi_src_register *src = &inst->Src[0].Register;
+	struct tgsi_dst_register *dst = get_dst(ctx, inst);
+
+	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
+
+	if (is_const(src))
+		src = get_unconst(ctx, src);
+
+	get_immediate(ctx, &constval0, fui(0.0));
+	get_immediate(ctx, &constval1, fui(1.0));
+	get_immediate(ctx, &constval128, fui(128.0));
+
+	/* max.f tmp.y, src.y, {0.0} */
+	instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+	add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y);
+	add_src_reg(ctx, instr, src, TGSI_SWIZZLE_Y);
+	add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
+
+	/* nop */
+	ir3_instr_create(ctx->ir, 0, OPC_NOP);
+
+	/* max.f tmp.z, src.w, {-128.0} */
+	instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+	add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z);
+	add_src_reg(ctx, instr, src, TGSI_SWIZZLE_W);
+	add_src_reg(ctx, instr, &constval128,
+			constval128.SwizzleX)->flags |= IR3_REG_NEGATE;
+
+	/* cmps.f.ge tmp.w, (neg)src.x, {0.0} */
+	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+	add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_W);
+	add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X)->flags |= IR3_REG_NEGATE;
+	add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
+	instr->cat2.condition = IR3_COND_GE;
+
+	/* max.f dst.y, src.x, {0.0} */
+	instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+	add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Y);
+	add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X);
+	add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
+
+	/* nop */
+	ir3_instr_create(ctx->ir, 0, OPC_NOP);
+
+	/* min.f tmp.x, tmp.z, {128.0} */
+	instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
+	add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
+	add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z);
+	add_src_reg(ctx, instr, &constval128, constval128.SwizzleX);
+
+	/* add.s tmp.z, tmp.w, -1 */
+	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+	add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z);
+	add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_W);
+	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+
+	/* log2 tmp.y, tmp.y */
+	instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
+	r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y);
+	add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y);
+	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_Y);
+
+	/* (rpt1)nop */
+	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
+
+	/* (ss)mul.f tmp.x, tmp.x, tmp.y */
+	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+	add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
+	add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
+	add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y);
+
+	/* mov.f16f16 dst.x, {1.0} */
+	instr = ir3_instr_create(ctx->ir, 1, 0);
+	instr->cat1.src_type = get_ftype(ctx);
+	instr->cat1.dst_type = get_ftype(ctx);
+	add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X);
+	add_src_reg(ctx, instr, &constval1, constval1.SwizzleX);
+
+	/* mov.f16f16 dst.w, {1.0} */
+	instr = ir3_instr_create(ctx->ir, 1, 0);
+	instr->cat1.src_type = get_ftype(ctx);
+	instr->cat1.dst_type = get_ftype(ctx);
+	add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_W);
+	add_src_reg(ctx, instr, &constval1, constval1.SwizzleX);
+
+	/* (rpt3)nop */
+	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 3;
+
+	/* exp2 tmp.x, tmp.x */
+	instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
+	r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
+	add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
+	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
+
+	/* (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x */
+	instr = ir3_instr_create(ctx->ir, 3,
+			ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+	add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Z);
+	add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
+	add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z);
+	add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
+
+	put_dst(ctx, inst, dst);
+}
+
 static inline void
 get_swiz(unsigned *swiz, struct tgsi_src_register *src)
 {
@@ -1266,6 +1404,7 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
 	[TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
 
 	INSTR(MOV,          instr_cat1),
+	INSTR(LIT,          trans_lit),
 	INSTR(RCP,          instr_cat4, .opc = OPC_RCP),
 	INSTR(RSQ,          instr_cat4, .opc = OPC_RSQ),
 	INSTR(SQRT,         instr_cat4, .opc = OPC_SQRT),




More information about the mesa-commit mailing list