Mesa (master): freedreno/a3xx/compiler: prepare for new compiler

Rob Clark robclark at kemper.freedesktop.org
Mon Feb 3 23:28:17 UTC 2014


Module: Mesa
Branch: master
Commit: a418573c4d7fc7f896e7077378d2b4daf98d5217
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a418573c4d7fc7f896e7077378d2b4daf98d5217

Author: Rob Clark <robclark at freedesktop.org>
Date:   Wed Jan 29 16:25:52 2014 -0500

freedreno/a3xx/compiler: prepare for new compiler

Shuffle things around to prepare for new compiler.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/Makefile.sources     |    2 +-
 src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c   |   34 +----
 src/gallium/drivers/freedreno/a3xx/fd3_compiler.c  |   76 ++++++----
 src/gallium/drivers/freedreno/a3xx/fd3_compiler.h  |   55 --------
 src/gallium/drivers/freedreno/a3xx/fd3_program.c   |   37 ++---
 src/gallium/drivers/freedreno/a3xx/fd3_program.h   |   12 +-
 src/gallium/drivers/freedreno/a3xx/instr-a3xx.h    |   31 ++++
 .../drivers/freedreno/a3xx/{ir-a3xx.c => ir3.c}    |   59 ++++++--
 .../drivers/freedreno/a3xx/{ir-a3xx.h => ir3.h}    |  148 +++++++++++++++++++-
 9 files changed, 308 insertions(+), 146 deletions(-)

diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index 3dcec9d..7d67bf2 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -42,4 +42,4 @@ a3xx_SOURCES := \
 	a3xx/fd3_util.c \
 	a3xx/fd3_zsa.c \
 	a3xx/disasm-a3xx.c \
-	a3xx/ir-a3xx.c
+	a3xx/ir3.c
diff --git a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
index 2d5ae62..0e45ec5 100644
--- a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
+++ b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
@@ -285,21 +285,7 @@ static void print_instr_cat2(instr_t *instr)
 static void print_instr_cat3(instr_t *instr)
 {
 	instr_cat3_t *cat3 = &instr->cat3;
-	bool full = true;
-
-	// XXX is this based on opc or some other bit?
-	switch (cat3->opc) {
-	case OPC_MAD_F16:
-	case OPC_MAD_U16:
-	case OPC_MAD_S16:
-	case OPC_SEL_B16:
-	case OPC_SEL_S16:
-	case OPC_SEL_F16:
-	case OPC_SAD_S16:
-	case OPC_SAD_S32:  // really??
-		full = false;
-		break;
-	}
+	bool full = instr_cat3_full(cat3);
 
 	printf(" ");
 	print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false);
@@ -747,26 +733,12 @@ struct opc_info {
 #undef OPC
 };
 
-#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)]))
-
-static uint32_t getopc(instr_t *instr)
-{
-	switch (instr->opc_cat) {
-	case 0:  return instr->cat0.opc;
-	case 1:  return 0;
-	case 2:  return instr->cat2.opc;
-	case 3:  return instr->cat3.opc;
-	case 4:  return instr->cat4.opc;
-	case 5:  return instr->cat5.opc;
-	case 6:  return instr->cat6.opc;
-	default: return 0;
-	}
-}
+#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)]))
 
 static void print_instr(uint32_t *dwords, int level, int n)
 {
 	instr_t *instr = (instr_t *)dwords;
-	uint32_t opc = getopc(instr);
+	uint32_t opc = instr_opc(instr);
 	const char *name;
 
 	printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 2c32c0f..5ab34e5 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -44,12 +44,13 @@
 #include "fd3_util.h"
 
 #include "instr-a3xx.h"
-#include "ir-a3xx.h"
+#include "ir3.h"
 
 
 struct fd3_compile_context {
 	const struct tgsi_token *tokens;
 	struct ir3_shader *ir;
+	struct ir3_block *block;
 	struct fd3_shader_stateobj *so;
 
 	struct tgsi_parse_context parser;
@@ -124,6 +125,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
 
 	ctx->tokens = tokens;
 	ctx->ir = so->ir;
+	ctx->block = ir3_block_create(ctx->ir, 0, 0, 0);
 	ctx->so = so;
 	ctx->last_input = NULL;
 	ctx->last_rel = NULL;
@@ -176,7 +178,7 @@ compile_error(struct fd3_compile_context *ctx, const char *format, ...)
 	_debug_vprintf(format, ap);
 	va_end(ap);
 	tgsi_dump(ctx->tokens, 0);
-	assert(0);
+	debug_assert(0);
 }
 
 #define compile_assert(ctx, cond) do { \
@@ -208,11 +210,17 @@ handle_last_rel(struct fd3_compile_context *ctx)
 	}
 }
 
+static struct ir3_instruction *
+instr_create(struct fd3_compile_context *ctx, int category, opc_t opc)
+{
+	return ir3_instr_create(ctx->block, category, opc);
+}
+
 static void
 add_nop(struct fd3_compile_context *ctx, unsigned count)
 {
 	while (count-- > 0)
-		ir3_instr_create(ctx->ir, 0, OPC_NOP);
+		instr_create(ctx, 0, OPC_NOP);
 }
 
 static unsigned
@@ -241,6 +249,7 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 		const struct tgsi_dst_register *dst, unsigned chan)
 {
 	unsigned flags = 0, num = 0;
+	struct ir3_register *reg;
 
 	switch (dst->File) {
 	case TGSI_FILE_OUTPUT:
@@ -256,10 +265,17 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 		break;
 	}
 
+	if (dst->Indirect)
+		flags |= IR3_REG_RELATIV;
 	if (ctx->so->half_precision)
 		flags |= IR3_REG_HALF;
 
-	return ir3_reg_create(instr, regid(num, chan), flags);
+	reg = ir3_reg_create(instr, regid(num, chan), flags);
+
+	if (dst->Indirect)
+		ctx->last_rel = instr;
+
+	return reg;
 }
 
 static struct ir3_register *
@@ -517,9 +533,9 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
 				/* can't have abs or neg on a mov instr, so use
 				 * absneg.f instead to handle these cases:
 				 */
-				instr = ir3_instr_create(ctx->ir, 2, OPC_ABSNEG_F);
+				instr = instr_create(ctx, 2, OPC_ABSNEG_F);
 			} else {
-				instr = ir3_instr_create(ctx->ir, 1, 0);
+				instr = instr_create(ctx, 1, 0);
 				instr->cat1.src_type = type_mov;
 				instr->cat1.dst_type = type_mov;
 			}
@@ -539,10 +555,10 @@ create_clamp(struct fd3_compile_context *ctx,
 {
 	struct ir3_instruction *instr;
 
-	instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+	instr = instr_create(ctx, 2, OPC_MAX_F);
 	vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
 
-	instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
+	instr = instr_create(ctx, 2, OPC_MIN_F);
 	vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
 }
 
@@ -707,7 +723,7 @@ trans_arl(const struct instr_translater *t,
 	tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
 
 	/* cov.{f32,f16}s16 Rtmp, Rsrc */
-	instr = ir3_instr_create(ctx->ir, 1, 0);
+	instr = instr_create(ctx, 1, 0);
 	instr->cat1.src_type = get_ftype(ctx);
 	instr->cat1.dst_type = TYPE_S16;
 	add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
@@ -716,7 +732,7 @@ trans_arl(const struct instr_translater *t,
 	add_nop(ctx, 3);
 
 	/* shl.b Rtmp, Rtmp, 2 */
-	instr = ir3_instr_create(ctx->ir, 2, OPC_SHL_B);
+	instr = instr_create(ctx, 2, OPC_SHL_B);
 	add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
 	add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
 	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
@@ -724,7 +740,7 @@ trans_arl(const struct instr_translater *t,
 	add_nop(ctx, 3);
 
 	/* mova a0, Rtmp */
-	instr = ir3_instr_create(ctx->ir, 1, 0);
+	instr = instr_create(ctx, 1, 0);
 	instr->cat1.src_type = TYPE_S16;
 	instr->cat1.dst_type = TYPE_S16;
 	add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
@@ -804,7 +820,7 @@ trans_samp(const struct instr_translater *t,
 		tmp_src = get_internal_temp(ctx, &tmp_dst);
 
 		for (j = 0; (j < 4) && (order[j] >= 0); j++) {
-			instr = ir3_instr_create(ctx->ir, 1, 0);
+			instr = instr_create(ctx, 1, 0);
 			instr->cat1.src_type = type_mov;
 			instr->cat1.dst_type = type_mov;
 			add_dst_reg(ctx, instr, &tmp_dst, j);
@@ -817,7 +833,7 @@ trans_samp(const struct instr_translater *t,
 		add_nop(ctx, 4 - j);
 	}
 
-	instr = ir3_instr_create(ctx->ir, 5, t->opc);
+	instr = instr_create(ctx, 5, t->opc);
 	instr->cat5.type = get_ftype(ctx);
 	instr->cat5.samp = samp->Index;
 	instr->cat5.tex  = samp->Index;
@@ -915,7 +931,7 @@ trans_cmp(const struct instr_translater *t,
 		a0 = get_unconst(ctx, a0);
 
 	/* cmps.f.ge tmp, a0, a1 */
-	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+	instr = instr_create(ctx, 2, OPC_CMPS_F);
 	instr->cat2.condition = condition;
 	vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
 
@@ -924,7 +940,7 @@ trans_cmp(const struct instr_translater *t,
 	case TGSI_OPCODE_SGE:
 	case TGSI_OPCODE_SLE:
 		/* cov.u16f16 dst, tmp0 */
-		instr = ir3_instr_create(ctx->ir, 1, 0);
+		instr = instr_create(ctx, 1, 0);
 		instr->cat1.src_type = get_utype(ctx);
 		instr->cat1.dst_type = get_ftype(ctx);
 		vectorize(ctx, instr, dst, 1, tmp_src, 0);
@@ -934,12 +950,12 @@ trans_cmp(const struct instr_translater *t,
 	case TGSI_OPCODE_SLT:
 	case TGSI_OPCODE_CMP:
 		/* add.s tmp, tmp, -1 */
-		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+		instr = instr_create(ctx, 2, OPC_ADD_S);
 		vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED);
 
 		if (t->tgsi_opc == TGSI_OPCODE_CMP) {
 			/* sel.{f32,f16} dst, src2, tmp, src1 */
-			instr = ir3_instr_create(ctx->ir, 3,
+			instr = instr_create(ctx, 3,
 					ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
 			vectorize(ctx, instr, dst, 3,
 					&inst->Src[2].Register, 0,
@@ -949,7 +965,7 @@ trans_cmp(const struct instr_translater *t,
 			get_immediate(ctx, &constval0, fui(0.0));
 			get_immediate(ctx, &constval1, fui(1.0));
 			/* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
-			instr = ir3_instr_create(ctx->ir, 3,
+			instr = instr_create(ctx, 3,
 					ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
 			vectorize(ctx, instr, dst, 3,
 					&constval0, 0, tmp_src, 0, &constval1, 0);
@@ -990,7 +1006,7 @@ pop_branch(struct fd3_compile_context *ctx)
 	 * and set (jp) flag on whatever the next instruction was, rather
 	 * than inserting an extra nop..
 	 */
-	instr = ir3_instr_create(ctx->ir, 0, OPC_NOP);
+	instr = instr_create(ctx, 0, OPC_NOP);
 	instr->flags |= IR3_INSTR_JP;
 
 	/* pop the branch instruction from the stack and fix up branch target: */
@@ -1018,13 +1034,13 @@ trans_if(const struct instr_translater *t,
 	if (is_const(src))
 		src = get_unconst(ctx, src);
 
-	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+	instr = instr_create(ctx, 2, OPC_CMPS_F);
 	ir3_reg_create(instr, regid(REG_P0, 0), 0);
 	add_src_reg(ctx, instr, src, src->SwizzleX);
 	add_src_reg(ctx, instr, &constval, constval.SwizzleX);
 	instr->cat2.condition = IR3_COND_EQ;
 
-	instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
+	instr = instr_create(ctx, 0, OPC_BR);
 	push_branch(ctx, instr);
 }
 
@@ -1036,7 +1052,7 @@ trans_else(const struct instr_translater *t,
 	struct ir3_instruction *instr;
 
 	/* for first half of if/else/endif, generate a jump past the else: */
-	instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP);
+	instr = instr_create(ctx, 0, OPC_JUMP);
 
 	pop_branch(ctx);
 	push_branch(ctx, instr);
@@ -1060,7 +1076,7 @@ instr_cat0(const struct instr_translater *t,
 		struct fd3_compile_context *ctx,
 		struct tgsi_full_instruction *inst)
 {
-	ir3_instr_create(ctx->ir, 0, t->opc);
+	instr_create(ctx, 0, t->opc);
 }
 
 static void
@@ -1083,7 +1099,7 @@ instr_cat1(const struct instr_translater *t,
 		 * in the future if we start supporting widening/narrowing or
 		 * conversion to/from integer..
 		 */
-		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+		instr = instr_create(ctx, 2, OPC_ADD_F);
 		get_immediate(ctx, &constval, fui(0.0));
 		vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
 	} else {
@@ -1129,14 +1145,14 @@ instr_cat2(const struct instr_translater *t,
 	case OPC_SETRM:
 	case OPC_CBITS_B:
 		/* these only have one src reg */
-		instr = ir3_instr_create(ctx->ir, 2, t->opc);
+		instr = instr_create(ctx, 2, t->opc);
 		vectorize(ctx, instr, dst, 1, src0, src0_flags);
 		break;
 	default:
 		if (is_const(src0) && is_const(src1))
 			src0 = get_unconst(ctx, src0);
 
-		instr = ir3_instr_create(ctx->ir, 2, t->opc);
+		instr = instr_create(ctx, 2, t->opc);
 		vectorize(ctx, instr, dst, 2, src0, src0_flags,
 				src1, src1_flags);
 		break;
@@ -1186,7 +1202,7 @@ instr_cat3(const struct instr_translater *t,
 		}
 	}
 
-	instr = ir3_instr_create(ctx->ir, 3,
+	instr = instr_create(ctx, 3,
 			ctx->so->half_precision ? t->hopc : t->opc);
 	vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
 			&inst->Src[2].Register, 0);
@@ -1214,8 +1230,8 @@ instr_cat4(const struct instr_translater *t,
 	for (i = 0, n = 0; i < 4; i++) {
 		if (dst->WriteMask & (1 << i)) {
 			if (n++)
-				ir3_instr_create(ctx->ir, 0, OPC_NOP);
-			instr = ir3_instr_create(ctx->ir, 4, t->opc);
+				add_nop(ctx, 1);
+			instr = instr_create(ctx, 4, t->opc);
 			add_dst_reg(ctx, instr, dst, i);
 			add_src_reg(ctx, instr, src, src->SwizzleX);
 		}
@@ -1315,7 +1331,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 				struct ir3_instruction *instr;
 				struct ir3_register *dst;
 
-				instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F);
+				instr = instr_create(ctx, 2, OPC_BARY_F);
 
 				/* dst register: */
 				dst = ir3_reg_create(instr, r + j, flags);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
index da25cdc..5cdb245 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
@@ -33,61 +33,6 @@
 #include "fd3_util.h"
 
 
-/* ************************************************************************* */
-/* split this out or find some helper to use.. like main/bitset.h.. */
-
-#define MAX_REG 256
-
-typedef uint8_t regmask_t[2 * MAX_REG / 8];
-
-static inline unsigned regmask_idx(struct ir3_register *reg)
-{
-	unsigned num = reg->num;
-	assert(num < MAX_REG);
-	if (reg->flags & IR3_REG_HALF)
-		num += MAX_REG;
-	return num;
-}
-
-static inline void regmask_init(regmask_t *regmask)
-{
-	memset(regmask, 0, sizeof(*regmask));
-}
-
-static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
-{
-	unsigned idx = regmask_idx(reg);
-	unsigned i;
-	for (i = 0; i < 4; i++, idx++)
-		if (reg->wrmask & (1 << i))
-			(*regmask)[idx / 8] |= 1 << (idx % 8);
-}
-
-static inline unsigned regmask_get(regmask_t *regmask,
-		struct ir3_register *reg)
-{
-	unsigned idx = regmask_idx(reg);
-	unsigned i;
-	for (i = 0; i < 4; i++, idx++)
-		if (reg->wrmask & (1 << i))
-			if ((*regmask)[idx / 8] & (1 << (idx % 8)))
-				return true;
-	return false;
-}
-
-/* comp:
- *   0 - x
- *   1 - y
- *   2 - z
- *   3 - w
- */
-static inline uint32_t regid(int num, int comp)
-{
-	return (num << 2) | (comp & 0x3);
-}
-
-/* ************************************************************************* */
-
 int fd3_compile_shader(struct fd3_shader_stateobj *so,
 		const struct tgsi_token *tokens);
 
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 3df29ec..ddb33ca 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -79,9 +79,10 @@ static void
 fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
 {
 	unsigned i;
-	for (i = 0; i < so->inputs_count; i++) {
+	for (i = 0; i < so->inputs_count; i++)
 		so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2);
-	}
+	for (i = 0; i < so->outputs_count; i++)
+		so->info.max_reg = MAX2(so->info.max_reg, so->outputs[i].regid >> 2);
 }
 
 static struct fd3_shader_stateobj *
@@ -230,7 +231,7 @@ find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
 }
 
 static uint32_t
-find_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
+find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
 {
 	int j;
 	for (j = 0; j < so->outputs_count; j++)
@@ -257,13 +258,13 @@ fd3_program_emit(struct fd_ringbuffer *ring,
 		fsi = &fp->info;
 	}
 
-	pos_regid = find_regid(vp,
+	pos_regid = find_output_regid(vp,
 		fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	posz_regid = find_regid(fp,
+	posz_regid = find_output_regid(fp,
 		fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	psize_regid = find_regid(vp,
+	psize_regid = find_output_regid(vp,
 		fd3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
-	color_regid = find_regid(fp,
+	color_regid = find_output_regid(fp,
 		fd3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
 
 	/* we could probably divide this up into things that need to be
@@ -501,10 +502,11 @@ create_blit_fp(struct pipe_context *pctx)
 {
 	struct fd3_shader_stateobj *so;
 	struct ir3_shader *ir = ir3_shader_create();
+	struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
 	struct ir3_instruction *instr;
 
 	/* (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x */
-	instr = ir3_instr_create(ir, 2, OPC_BARY_F);
+	instr = ir3_instr_create(block, 2, OPC_BARY_F);
 	instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
 	instr->repeat = 1;
 
@@ -514,11 +516,11 @@ create_blit_fp(struct pipe_context *pctx)
 	ir3_reg_create(instr, regid(0,0), 0);             /* r0.x */
 
 	/* (rpt5)nop */
-	instr = ir3_instr_create(ir, 0, OPC_NOP);
+	instr = ir3_instr_create(block, 0, OPC_NOP);
 	instr->repeat = 5;
 
 	/* sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 */
-	instr = ir3_instr_create(ir, 5, OPC_SAM);
+	instr = ir3_instr_create(block, 5, OPC_SAM);
 	instr->cat5.samp = 0;
 	instr->cat5.tex  = 0;
 	instr->cat5.type = TYPE_F32;
@@ -528,7 +530,7 @@ create_blit_fp(struct pipe_context *pctx)
 	ir3_reg_create(instr, regid(0,2), 0);             /* r0.z */
 
 	/* (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x */
-	instr = ir3_instr_create(ir, 1, 0);  /* mov/cov instructions have no opc */
+	instr = ir3_instr_create(block, 1, 0);  /* mov/cov instructions have no opc */
 	instr->flags = IR3_INSTR_SY;
 	instr->repeat = 3;
 	instr->cat1.src_type = TYPE_F32;
@@ -538,7 +540,7 @@ create_blit_fp(struct pipe_context *pctx)
 	ir3_reg_create(instr, regid(0,0), IR3_REG_R);     /* (r)r0.x */
 
 	/* end */
-	instr = ir3_instr_create(ir, 0, OPC_END);
+	instr = ir3_instr_create(block, 0, OPC_END);
 
 	so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
 	if (!so)
@@ -573,10 +575,11 @@ create_blit_vp(struct pipe_context *pctx)
 {
 	struct fd3_shader_stateobj *so;
 	struct ir3_shader *ir = ir3_shader_create();
+	struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
 	struct ir3_instruction *instr;
 
 	/* (sy)(ss)end */
-	instr = ir3_instr_create(ir, 0, OPC_END);
+	instr = ir3_instr_create(block, 0, OPC_END);
 	instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
 
 	so = create_internal_shader(pctx, SHADER_VERTEX, ir);
@@ -611,10 +614,11 @@ create_solid_fp(struct pipe_context *pctx)
 {
 	struct fd3_shader_stateobj *so;
 	struct ir3_shader *ir = ir3_shader_create();
+	struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
 	struct ir3_instruction *instr;
 
 	/* (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x */
-	instr = ir3_instr_create(ir, 1, 0);  /* mov/cov instructions have no opc */
+	instr = ir3_instr_create(block, 1, 0);  /* mov/cov instructions have no opc */
 	instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
 	instr->repeat = 3;
 	instr->cat1.src_type = TYPE_F16;
@@ -625,7 +629,7 @@ create_solid_fp(struct pipe_context *pctx)
 			IR3_REG_CONST | IR3_REG_R);
 
 	/* end */
-	instr = ir3_instr_create(ir, 0, OPC_END);
+	instr = ir3_instr_create(block, 0, OPC_END);
 
 	so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
 	if (!so)
@@ -650,10 +654,11 @@ create_solid_vp(struct pipe_context *pctx)
 {
 	struct fd3_shader_stateobj *so;
 	struct ir3_shader *ir = ir3_shader_create();
+	struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
 	struct ir3_instruction *instr;
 
 	/* (sy)(ss)end */
-	instr = ir3_instr_create(ir, 0, OPC_END);
+	instr = ir3_instr_create(block, 0, OPC_END);
 	instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
 
 
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index 4aeeb2e..c781dfe 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -33,7 +33,7 @@
 
 #include "freedreno_context.h"
 
-#include "ir-a3xx.h"
+#include "ir3.h"
 #include "disasm.h"
 
 typedef uint16_t fd3_semantic;  /* semantic name + index */
@@ -43,6 +43,16 @@ fd3_semantic_name(uint8_t name, uint16_t index)
 	return (name << 8) | (index & 0xff);
 }
 
+static inline uint8_t sem2name(fd3_semantic sem)
+{
+	return sem >> 8;
+}
+
+static inline uint16_t sem2idx(fd3_semantic sem)
+{
+	return sem & 0xff;
+}
+
 struct fd3_shader_stateobj {
 	enum shader_t type;
 
diff --git a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
index 1085ddf..b0f7834 100644
--- a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
+++ b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
@@ -438,6 +438,23 @@ typedef struct PACKED {
 	uint32_t opc_cat  : 3;
 } instr_cat3_t;
 
+static inline bool instr_cat3_full(instr_cat3_t *cat3)
+{
+	switch (cat3->opc) {
+	case OPC_MAD_F16:
+	case OPC_MAD_U16:
+	case OPC_MAD_S16:
+	case OPC_SEL_B16:
+	case OPC_SEL_S16:
+	case OPC_SEL_F16:
+	case OPC_SAD_S16:
+	case OPC_SAD_S32:  // really??
+		return false;
+	default:
+		return true;
+	}
+}
+
 typedef struct PACKED {
 	/* dword0: */
 	union PACKED {
@@ -612,4 +629,18 @@ typedef union PACKED {
 	};
 } instr_t;
 
+static inline uint32_t instr_opc(instr_t *instr)
+{
+	switch (instr->opc_cat) {
+	case 0:  return instr->cat0.opc;
+	case 1:  return 0;
+	case 2:  return instr->cat2.opc;
+	case 3:  return instr->cat3.opc;
+	case 4:  return instr->cat4.opc;
+	case 5:  return instr->cat5.opc;
+	case 6:  return instr->cat6.opc;
+	default: return 0;
+	}
+}
+
 #endif /* INSTR_A3XX_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c b/src/gallium/drivers/freedreno/a3xx/ir3.c
similarity index 92%
rename from src/gallium/drivers/freedreno/a3xx/ir-a3xx.c
rename to src/gallium/drivers/freedreno/a3xx/ir3.c
index a39214e..2a06d42 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c
+++ b/src/gallium/drivers/freedreno/a3xx/ir3.c
@@ -21,7 +21,7 @@
  * SOFTWARE.
  */
 
-#include "ir-a3xx.h"
+#include "ir3.h"
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -72,7 +72,8 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info,
 	if (reg->flags & IR3_REG_IMMED) {
 		val.iim_val = reg->iim_val;
 	} else {
-		int8_t max = (reg->num + repeat) >> 2;
+		int8_t components = util_last_bit(reg->wrmask);
+		int8_t max = (reg->num + repeat + components - 1) >> 2;
 
 		val.comp = reg->num & 0x3;
 		val.num  = reg->num >> 2;
@@ -514,6 +515,7 @@ void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *in
 	info->max_reg       = -1;
 	info->max_half_reg  = -1;
 	info->max_const     = -1;
+	info->instrs_count  = 0;
 
 	/* need a integer number of instruction "groups" (sets of four
 	 * instructions), so pad out w/ NOPs if needed:
@@ -528,6 +530,7 @@ void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *in
 		int ret = emit[instr->category](instr, dwords, info);
 		if (ret)
 			goto fail;
+		info->instrs_count += 1 + instr->repeat;
 		dwords += 2;
 	}
 
@@ -552,30 +555,68 @@ static struct ir3_register * reg_create(struct ir3_shader *shader,
 static void insert_instr(struct ir3_shader *shader,
 		struct ir3_instruction *instr)
 {
+#ifdef DEBUG
+	static uint32_t serialno = 0;
+	instr->serialno = ++serialno;
+#endif
 	assert(shader->instrs_count < ARRAY_SIZE(shader->instrs));
 	shader->instrs[shader->instrs_count++] = instr;
 }
 
-struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader,
+struct ir3_block * ir3_block_create(struct ir3_shader *shader,
+		unsigned ntmp, unsigned nin, unsigned nout)
+{
+	struct ir3_block *block;
+	unsigned size;
+	char *ptr;
+
+	size = sizeof(*block);
+	size += sizeof(block->temporaries[0]) * ntmp;
+	size += sizeof(block->inputs[0]) * nin;
+	size += sizeof(block->outputs[0]) * nout;
+
+	ptr = ir3_alloc(shader, size);
+
+	block = (void *)ptr;
+	ptr += sizeof(*block);
+
+	block->temporaries = (void *)ptr;
+	block->ntemporaries = ntmp;
+	ptr += sizeof(block->temporaries[0]) * ntmp;
+
+	block->inputs = (void *)ptr;
+	block->ninputs = nin;
+	ptr += sizeof(block->inputs[0]) * nin;
+
+	block->outputs = (void *)ptr;
+	block->noutputs = nout;
+	ptr += sizeof(block->outputs[0]) * nout;
+
+	block->shader = shader;
+
+	return block;
+}
+
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
 		int category, opc_t opc)
 {
 	struct ir3_instruction *instr =
-			ir3_alloc(shader, sizeof(struct ir3_instruction));
-	instr->shader = shader;
+			ir3_alloc(block->shader, sizeof(struct ir3_instruction));
+	instr->block = block;
 	instr->category = category;
 	instr->opc = opc;
-	insert_instr(shader, instr);
+	insert_instr(block->shader, instr);
 	return instr;
 }
 
 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
 {
 	struct ir3_instruction *new_instr =
-			ir3_alloc(instr->shader, sizeof(struct ir3_instruction));
+			ir3_alloc(instr->block->shader, sizeof(struct ir3_instruction));
 	unsigned i;
 
 	*new_instr = *instr;
-	insert_instr(instr->shader, new_instr);
+	insert_instr(instr->block->shader, new_instr);
 
 	/* clone registers: */
 	new_instr->regs_count = 0;
@@ -592,7 +633,7 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
 		int num, int flags)
 {
-	struct ir3_register *reg = reg_create(instr->shader, num, flags);
+	struct ir3_register *reg = reg_create(instr->block->shader, num, flags);
 	assert(instr->regs_count < ARRAY_SIZE(instr->regs));
 	instr->regs[instr->regs_count++] = reg;
 	return reg;
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir3.h
similarity index 60%
rename from src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
rename to src/gallium/drivers/freedreno/a3xx/ir3.h
index b0afe18..896bec1 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
+++ b/src/gallium/drivers/freedreno/a3xx/ir3.h
@@ -28,15 +28,19 @@
 #include <stdbool.h>
 
 #include "instr-a3xx.h"
+#include "disasm.h"  /* TODO move 'enum shader_t' somewhere else.. */
 
 /* low level intermediate representation of an adreno shader program */
 
 struct ir3_shader;
+struct ir3_instruction;
+struct ir3_block;
 
 struct ir3_shader * fd_asm_parse(const char *src);
 
 struct ir3_shader_info {
 	uint16_t sizedwords;
+	uint16_t instrs_count;   /* expanded to account for rpt's */
 	/* NOTE: max_reg, etc, does not include registers not touched
 	 * by the shader (ie. vertex fetched via VFD_DECODE but not
 	 * touched by shader)
@@ -84,7 +88,7 @@ struct ir3_register {
 };
 
 struct ir3_instruction {
-	struct ir3_shader *shader;
+	struct ir3_block *block;
 	int category;
 	opc_t opc;
 	enum {
@@ -138,7 +142,7 @@ struct ir3_instruction {
 	} flags;
 	int repeat;
 	unsigned regs_count;
-	struct ir3_register *regs[4];
+	struct ir3_register *regs[5];
 	union {
 		struct {
 			char inv;
@@ -168,6 +172,9 @@ struct ir3_instruction {
 			int iim_val;
 		} cat6;
 	};
+#ifdef DEBUG
+	uint32_t serialno;
+#endif
 };
 
 #define MAX_INSTRS 1024
@@ -179,16 +186,151 @@ struct ir3_shader {
 	unsigned heap_idx;
 };
 
+struct ir3_block {
+	struct ir3_shader *shader;
+	unsigned ntemporaries, ninputs, noutputs;
+	/* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
+	struct ir3_instruction **temporaries;
+	struct ir3_instruction **inputs;
+	struct ir3_instruction **outputs;
+	struct ir3_block *parent;
+	struct ir3_instruction *head;
+};
+
 struct ir3_shader * ir3_shader_create(void);
 void ir3_shader_destroy(struct ir3_shader *shader);
 void * ir3_shader_assemble(struct ir3_shader *shader,
 		struct ir3_shader_info *info);
 
-struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader,
+struct ir3_block * ir3_block_create(struct ir3_shader *shader,
+		unsigned ntmp, unsigned nin, unsigned nout);
+
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
 		int category, opc_t opc);
 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
 
 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
 		int num, int flags);
 
+
+/* comp:
+ *   0 - x
+ *   1 - y
+ *   2 - z
+ *   3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+	return (num << 2) | (comp & 0x3);
+}
+
+static inline uint32_t reg_num(struct ir3_register *reg)
+{
+	return reg->num >> 2;
+}
+
+static inline uint32_t reg_comp(struct ir3_register *reg)
+{
+	return reg->num & 0x3;
+}
+
+static inline bool is_alu(struct ir3_instruction *instr)
+{
+	return (1 <= instr->category) && (instr->category <= 3);
+}
+
+static inline bool is_sfu(struct ir3_instruction *instr)
+{
+	return (instr->category == 4);
+}
+
+static inline bool is_tex(struct ir3_instruction *instr)
+{
+	return (instr->category == 5);
+}
+
+static inline bool is_input(struct ir3_instruction *instr)
+{
+	return (instr->category == 2) && (instr->opc == OPC_BARY_F);
+}
+
+static inline bool is_gpr(struct ir3_register *reg)
+{
+	return !(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED));
+}
+
+/* TODO combine is_gpr()/reg_gpr().. */
+static inline bool reg_gpr(struct ir3_register *r)
+{
+	if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV))
+		return false;
+	if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
+		return false;
+	return true;
+}
+
+#ifndef ARRAY_SIZE
+#  define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+/* ************************************************************************* */
+/* split this out or find some helper to use.. like main/bitset.h.. */
+
+#include <string.h>
+
+#define MAX_REG 256
+
+typedef uint8_t regmask_t[2 * MAX_REG / 8];
+
+static inline unsigned regmask_idx(struct ir3_register *reg)
+{
+	unsigned num = reg->num;
+	assert(num < MAX_REG);
+	if (reg->flags & IR3_REG_HALF)
+		num += MAX_REG;
+	return num;
+}
+
+static inline void regmask_init(regmask_t *regmask)
+{
+	memset(regmask, 0, sizeof(*regmask));
+}
+
+static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
+{
+	unsigned idx = regmask_idx(reg);
+	unsigned i;
+	for (i = 0; i < 4; i++, idx++)
+		if (reg->wrmask & (1 << i))
+			(*regmask)[idx / 8] |= 1 << (idx % 8);
+}
+
+/* set bits in a if not set in b, conceptually:
+ *   a |= (reg & ~b)
+ */
+static inline void regmask_set_if_not(regmask_t *a,
+		struct ir3_register *reg, regmask_t *b)
+{
+	unsigned idx = regmask_idx(reg);
+	unsigned i;
+	for (i = 0; i < 4; i++, idx++)
+		if (reg->wrmask & (1 << i))
+			if (!((*b)[idx / 8] & (1 << (idx % 8))))
+				(*a)[idx / 8] |= 1 << (idx % 8);
+}
+
+static inline unsigned regmask_get(regmask_t *regmask,
+		struct ir3_register *reg)
+{
+	unsigned idx = regmask_idx(reg);
+	unsigned i;
+	for (i = 0; i < 4; i++, idx++)
+		if (reg->wrmask & (1 << i))
+			if ((*regmask)[idx / 8] & (1 << (idx % 8)))
+				return true;
+	return false;
+}
+
+/* ************************************************************************* */
+
 #endif /* IR3_H_ */




More information about the mesa-commit mailing list