Mesa (master): freedreno/a3xx/compiler: relative addressing

Rob Clark robclark at kemper.freedesktop.org
Fri Oct 25 00:22:27 UTC 2013


Module: Mesa
Branch: master
Commit: a453242fda0a57012a790e67053545211c0486ed
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a453242fda0a57012a790e67053545211c0486ed

Author: Rob Clark <robclark at freedesktop.org>
Date:   Thu Oct 24 17:53:31 2013 -0400

freedreno/a3xx/compiler: relative addressing

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_compiler.c |  124 ++++++++++++++++++++-
 1 files changed, 123 insertions(+), 1 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index b05b272..352afcf 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -96,6 +96,9 @@ struct fd3_compile_context {
 	/* last input dst (for setting (ei) flag): */
 	struct ir3_register *last_input;
 
+	/* last instruction with relative addressing: */
+	struct ir3_instruction *last_rel;
+
 	unsigned next_inloc;
 	unsigned num_internal_temps;
 	struct tgsi_src_register internal_temps[6];
@@ -156,6 +159,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
 	ctx->ir = so->ir;
 	ctx->so = so;
 	ctx->last_input = NULL;
+	ctx->last_rel = NULL;
 	ctx->next_inloc = 8;
 	ctx->num_internal_temps = 0;
 	ctx->branch_count = 0;
@@ -222,11 +226,23 @@ struct instr_translater {
 	unsigned arg;
 };
 
+static void
+handle_last_rel(struct fd3_compile_context *ctx)
+{
+	if (ctx->last_rel) {
+		ctx->last_rel->flags |= IR3_INSTR_UL;
+		ctx->last_rel = NULL;
+	}
+}
+
 static unsigned
 src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
 {
 	unsigned flags = 0;
 
+	if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
+		return flags;
+
 	if (regmask_get(ctx->needs_ss, reg)) {
 		flags |= IR3_INSTR_SS;
 		memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
@@ -251,6 +267,9 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 	case TGSI_FILE_TEMPORARY:
 		num = dst->Index + ctx->base_reg[dst->File];
 		break;
+	case TGSI_FILE_ADDRESS:
+		num = REG_A0;
+		break;
 	default:
 		compile_error(ctx, "unsupported dst register file: %s\n",
 			tgsi_file_name(dst->File));
@@ -270,6 +289,11 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 	unsigned flags = 0, num = 0;
 	struct ir3_register *reg;
 
+	/* TODO we need to use a mov to temp for const >= 64.. or maybe
+	 * we could use relative addressing..
+	 */
+	compile_assert(ctx, src->Index < 64);
+
 	switch (src->File) {
 	case TGSI_FILE_IMMEDIATE:
 		/* TODO if possible, use actual immediate instead of const.. but
@@ -298,11 +322,16 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 		flags |= IR3_REG_ABS;
 	if (src->Negate)
 		flags |= IR3_REG_NEGATE;
+	if (src->Indirect)
+		flags |= IR3_REG_RELATIV;
 	if (ctx->so->half_precision)
 		flags |= IR3_REG_HALF;
 
 	reg = ir3_reg_create(instr, regid(num, chan), flags);
 
+	if (src->Indirect)
+		ctx->last_rel = instr;
+
 	instr->flags |= src_flags(ctx, reg);
 
 	return reg;
@@ -350,6 +379,39 @@ get_internal_temp(struct fd3_compile_context *ctx,
 	return tmp_src;
 }
 
+/* Get internal half-precision temp src/dst to use for a sequence of
+ * instructions generated by a single TGSI op.
+ */
+static struct tgsi_src_register *
+get_internal_temp_hr(struct fd3_compile_context *ctx,
+		struct tgsi_dst_register *tmp_dst)
+{
+	struct tgsi_src_register *tmp_src;
+	int n;
+
+	if (ctx->so->half_precision)
+		return get_internal_temp(ctx, tmp_dst);
+
+	tmp_dst->File      = TGSI_FILE_TEMPORARY;
+	tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
+	tmp_dst->Indirect  = 0;
+	tmp_dst->Dimension = 0;
+
+	/* assign next temporary: */
+	n = ctx->num_internal_temps++;
+	compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
+	tmp_src = &ctx->internal_temps[n];
+
+	/* just use hr0 because no one else should be using half-
+	 * precision regs:
+	 */
+	tmp_dst->Index = 0;
+
+	src_from_dst(tmp_src, tmp_dst);
+
+	return tmp_src;
+}
+
 /* same as get_internal_temp, but w/ src.xxxx (for instructions that
  * replicate their results)
  */
@@ -520,6 +582,11 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
 		struct tgsi_src_register *src = &inst->Src[i].Register;
 		if ((src->File == dst->File) && (src->Index == dst->Index)) {
+			if ((src->SwizzleX == TGSI_SWIZZLE_X) &&
+					(src->SwizzleY == TGSI_SWIZZLE_Y) &&
+					(src->SwizzleZ == TGSI_SWIZZLE_Z) &&
+					(src->SwizzleW == TGSI_SWIZZLE_W))
+				continue;
 			ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
 			ctx->tmp_dst.WriteMask = dst->WriteMask;
 			dst = &ctx->tmp_dst;
@@ -548,6 +615,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 {
 	va_list ap;
 	int i, j, n = 0;
+	bool indirect = dst->Indirect;
 
 	add_dst_reg(ctx, instr, dst, 0);
 
@@ -560,6 +628,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 		reg->flags |= flags & ~IR3_REG_NEGATE;
 		if (flags & IR3_REG_NEGATE)
 			reg->flags ^= IR3_REG_NEGATE;
+		indirect |= src->Indirect;
 	}
 	va_end(ap);
 
@@ -589,6 +658,9 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 				cur->flags |= src_flags(ctx, cur->regs[j+1]);
 			}
 			va_end(ap);
+
+			if (indirect)
+				ctx->last_rel = cur;
 		}
 	}
 
@@ -890,6 +962,53 @@ trans_frac(const struct instr_translater *t,
 			tmp_src, IR3_REG_NEGATE);
 }
 
+/* ARL(x) = x, but mova from hrN.x to a0.. */
+static void
+trans_arl(const struct instr_translater *t,
+		struct fd3_compile_context *ctx,
+		struct tgsi_full_instruction *inst)
+{
+	struct ir3_instruction *instr;
+	struct tgsi_dst_register tmp_dst;
+	struct tgsi_src_register *tmp_src;
+	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+	struct tgsi_src_register *src = &inst->Src[0].Register;
+	unsigned chan = src->SwizzleX;
+	compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
+
+	handle_last_rel(ctx);
+
+	tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
+
+
+	/* cov.{f32,f16}s16 Rtmp, Rsrc */
+	instr = ir3_instr_create(ctx->ir, 1, 0);
+	instr->cat1.src_type = get_ftype(ctx);
+	instr->cat1.dst_type = TYPE_S16;
+	add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
+	add_src_reg(ctx, instr, src, chan);
+
+	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
+
+	/* shl.b Rtmp, Rtmp, 2 */
+	instr = ir3_instr_create(ctx->ir, 2, OPC_SHL_B);
+	add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
+	add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
+	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
+
+	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
+
+	/* mova a0, Rtmp */
+	instr = ir3_instr_create(ctx->ir, 1, 0);
+	instr->cat1.src_type = TYPE_S16;
+	instr->cat1.dst_type = TYPE_S16;
+	add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
+	add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
+
+	/* need to ensure 5 instr slots before a0 is used: */
+	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+}
+
 /* POW(a,b) = EXP2(b * LOG2(a)) */
 static void
 trans_pow(const struct instr_translater *t,
@@ -1425,10 +1544,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
 	INSTR(MIN,          instr_cat2, .opc = OPC_MIN_F),
 	INSTR(MAX,          instr_cat2, .opc = OPC_MAX_F),
 	INSTR(MAD,          instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
+	INSTR(TRUNC,        instr_cat2, .opc = OPC_TRUNC_F),
 	INSTR(LRP,          trans_lrp),
 	INSTR(FRC,          trans_frac),
 	INSTR(FLR,          instr_cat2, .opc = OPC_FLOOR_F),
-	INSTR(ARL,          instr_cat2, .opc = OPC_FLOOR_F),
+	INSTR(ARL,          trans_arl),
 	INSTR(EX2,          instr_cat4, .opc = OPC_EXP2),
 	INSTR(LG2,          instr_cat4, .opc = OPC_LOG2),
 	INSTR(POW,          trans_pow),
@@ -1626,6 +1746,8 @@ compile_instructions(struct fd3_compile_context *ctx)
 
 	if (ctx->last_input)
 		ctx->last_input->flags |= IR3_REG_EI;
+
+	handle_last_rel(ctx);
 }
 
 int




More information about the mesa-commit mailing list