Mesa (master): freedreno/ir3: updated cat6 encoding

Rob Clark robclark at kemper.freedesktop.org
Mon Jul 27 17:51:14 UTC 2015


Module: Mesa
Branch: master
Commit: bc5e2bec303acd7fd962996bf369be5ce0e15cd2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=bc5e2bec303acd7fd962996bf369be5ce0e15cd2

Author: Rob Clark <robclark at freedesktop.org>
Date:   Thu Jul 23 15:31:13 2015 -0400

freedreno/ir3: updated cat6 encoding

Sync updated cat6 encoding from freedreno.git, needed to properly encode
store instructions.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/ir3/disasm-a3xx.c    |  223 ++++++++++++++------
 src/gallium/drivers/freedreno/ir3/instr-a3xx.h     |   87 +++++---
 src/gallium/drivers/freedreno/ir3/ir3.c            |   27 ++-
 src/gallium/drivers/freedreno/ir3/ir3.h            |    4 +-
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       |    2 +-
 5 files changed, 230 insertions(+), 113 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
index 48ae7c7..83ed5ff 100644
--- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
+++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
@@ -103,7 +103,7 @@ static void print_reg(reg_t reg, bool full, bool r, bool c, bool im,
 	} else if ((reg.num == REG_P0) && !c) {
 		printf("p0.%c", component[reg.comp]);
 	} else {
-		printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+		printf("%s%c%d.%c", full ? "" : "h", type, reg.num & 0x3f, component[reg.comp]);
 	}
 }
 
@@ -122,6 +122,32 @@ static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im,
 	print_reg(reg, full, r, c, im, neg, abs, addr_rel);
 }
 
+/* TODO switch to using reginfo struct everywhere, since more readable
+ * than passing a bunch of bools to print_reg_src
+ */
+
+struct reginfo {
+	reg_t reg;
+	bool full;
+	bool r;
+	bool c;
+	bool im;
+	bool neg;
+	bool abs;
+	bool addr_rel;
+};
+
+static void print_src(struct reginfo *info)
+{
+	print_reg_src(info->reg, info->full, info->r, info->c, info->im,
+			info->neg, info->abs, info->addr_rel);
+}
+
+//static void print_dst(struct reginfo *info)
+//{
+//	print_reg_dst(info->reg, info->full, info->addr_rel);
+//}
+
 static void print_instr_cat0(instr_t *instr)
 {
 	instr_cat0_t *cat0 = &instr->cat0;
@@ -454,10 +480,70 @@ static void print_instr_cat6(instr_t *instr)
 {
 	instr_cat6_t *cat6 = &instr->cat6;
 	char sd = 0, ss = 0;  /* dst/src address space */
-	bool full = type_size(cat6->type) == 32;
 	bool nodst = false;
+	struct reginfo dst, src1, src2;
+	int src1off = 0, dstoff = 0;
 
-	printf(".%s ", type[cat6->type]);
+	memset(&dst, 0, sizeof(dst));
+	memset(&src1, 0, sizeof(src1));
+	memset(&src2, 0, sizeof(src2));
+
+	switch (cat6->opc) {
+	case OPC_RESINFO:
+	case OPC_RESFMT:
+		dst.full  = type_size(cat6->type) == 32;
+		src1.full = type_size(cat6->type) == 32;
+		src2.full = type_size(cat6->type) == 32;
+		break;
+	case OPC_L2G:
+	case OPC_G2L:
+		dst.full = true;
+		src1.full = true;
+		src2.full = true;
+		break;
+	case OPC_STG:
+	case OPC_STL:
+	case OPC_STP:
+	case OPC_STI:
+	case OPC_STLW:
+	case OPC_STGB_4D_4:
+	case OPC_STIB:
+		dst.full  = true;
+		src1.full = type_size(cat6->type) == 32;
+		src2.full = type_size(cat6->type) == 32;
+		break;
+	default:
+		dst.full  = type_size(cat6->type) == 32;
+		src1.full = true;
+		src2.full = true;
+		break;
+	}
+
+	switch (cat6->opc) {
+	case OPC_PREFETCH:
+	case OPC_RESINFO:
+		break;
+	case OPC_ATOMIC_ADD:
+	case OPC_ATOMIC_SUB:
+	case OPC_ATOMIC_XCHG:
+	case OPC_ATOMIC_INC:
+	case OPC_ATOMIC_DEC:
+	case OPC_ATOMIC_CMPXCHG:
+	case OPC_ATOMIC_MIN:
+	case OPC_ATOMIC_MAX:
+	case OPC_ATOMIC_AND:
+	case OPC_ATOMIC_OR:
+	case OPC_ATOMIC_XOR:
+		ss = cat6->g ? 'g' : 'l';
+		printf(".%c", ss);
+		printf(".%s", type[cat6->type]);
+		break;
+	default:
+		dst.im = cat6->g && !cat6->dst_off;
+		printf(".%s", type[cat6->type]);
+		break;
+	}
+	printf(" ");
 
 	switch (cat6->opc) {
 	case OPC_STG:
@@ -499,68 +585,65 @@ static void print_instr_cat6(instr_t *instr)
 		break;
 
 	case OPC_STI:
-		full = false;  // XXX or inverts??
+		dst.full = false;  // XXX or inverts??
 		break;
 	}
 
-	if (cat6->has_off) {
-		if (!nodst) {
-			if (sd)
-				printf("%c[", sd);
-			print_reg_dst((reg_t)(cat6->a.dst), full, false);
-			if (sd)
-				printf("]");
-			printf(", ");
-		}
-		if (ss)
-			printf("%c[", ss);
-		print_reg_src((reg_t)(cat6->a.src1), true,
-				false, false, cat6->a.src1_im, false, false, false);
-		if (cat6->a.off)
-			printf("%+d", cat6->a.off);
-		if (ss)
-			printf("]");
-		printf(", ");
-		print_reg_src((reg_t)(cat6->a.src2), full,
-				false, false, cat6->a.src2_im, false, false, false);
+	if (cat6->dst_off) {
+		dst.reg = (reg_t)(cat6->c.dst);
+		dstoff  = cat6->c.off;
 	} else {
-		if (!nodst) {
-			if (sd)
-				printf("%c[", sd);
-			print_reg_dst((reg_t)(cat6->b.dst), full, false);
-			if (sd)
-				printf("]");
-			printf(", ");
-		}
-		if (ss)
-			printf("%c[", ss);
-		print_reg_src((reg_t)(cat6->b.src1), true,
-				false, false, cat6->b.src1_im, false, false, false);
-		if (ss)
+		dst.reg = (reg_t)(cat6->d.dst);
+	}
+
+	if (cat6->src_off) {
+		src1.reg = (reg_t)(cat6->a.src1);
+		src1.im  = cat6->a.src1_im;
+		src2.reg = (reg_t)(cat6->a.src2);
+		src2.im  = cat6->a.src2_im;
+		src1off  = cat6->a.off;
+	} else {
+		src1.reg = (reg_t)(cat6->b.src1);
+		src1.im  = cat6->b.src1_im;
+		src2.reg = (reg_t)(cat6->b.src2);
+		src2.im  = cat6->b.src2_im;
+	}
+
+	if (!nodst) {
+		if (sd)
+			printf("%c[", sd);
+		/* note: dst might actually be a src (ie. address to store to) */
+		print_src(&dst);
+		if (dstoff)
+			printf("%+d", dstoff);
+		if (sd)
 			printf("]");
 		printf(", ");
-		print_reg_src((reg_t)(cat6->b.src2), full,
-				false, false, cat6->b.src2_im, false, false, false);
 	}
 
-	if (debug & PRINT_VERBOSE) {
-		switch (cat6->opc) {
-		case OPC_LDG:
-		case OPC_LDP:
-			/* load instructions: */
-			if (cat6->a.dummy2|cat6->a.dummy3)
-				printf("\t{6: %x,%x}", cat6->a.dummy2, cat6->a.dummy3);
-			break;
-		case OPC_STG:
-		case OPC_STP:
-		case OPC_STI:
-			/* store instructions: */
-			if (cat6->b.dummy2|cat6->b.dummy2)
-				printf("\t{6: %x,%x}", cat6->b.dummy2, cat6->b.dummy3);
-			if (cat6->b.ignore0)
-				printf("\t{?? %x}", cat6->b.ignore0);
-			break;
-		}
+	if (ss)
+		printf("%c[", ss);
+
+	/* can have a larger than normal immed, so hack: */
+	if (src1.im) {
+		printf("%u", src1.reg.dummy13);
+	} else {
+		print_src(&src1);
+	}
+
+	if (src1off)
+		printf("%+d", src1off);
+	if (ss)
+		printf("]");
+
+	switch (cat6->opc) {
+	case OPC_RESINFO:
+	case OPC_RESFMT:
+		break;
+	default:
+		printf(", ");
+		print_src(&src2);
+		break;
 	}
 }
 
@@ -711,19 +794,19 @@ struct opc_info {
 	OPC(6, OPC_LDLW,         ldlw),
 	OPC(6, OPC_STLW,         stlw),
 	OPC(6, OPC_RESFMT,       resfmt),
-	OPC(6, OPC_RESINFO,      resinf),
-	OPC(6, OPC_ATOMIC_ADD_L,     atomic.add.l),
-	OPC(6, OPC_ATOMIC_SUB_L,     atomic.sub.l),
-	OPC(6, OPC_ATOMIC_XCHG_L,    atomic.xchg.l),
-	OPC(6, OPC_ATOMIC_INC_L,     atomic.inc.l),
-	OPC(6, OPC_ATOMIC_DEC_L,     atomic.dec.l),
-	OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l),
-	OPC(6, OPC_ATOMIC_MIN_L,     atomic.min.l),
-	OPC(6, OPC_ATOMIC_MAX_L,     atomic.max.l),
-	OPC(6, OPC_ATOMIC_AND_L,     atomic.and.l),
-	OPC(6, OPC_ATOMIC_OR_L,      atomic.or.l),
-	OPC(6, OPC_ATOMIC_XOR_L,     atomic.xor.l),
-	OPC(6, OPC_LDGB_TYPED_4D,    ldgb.typed.4d),
+	OPC(6, OPC_RESINFO,      resinfo),
+	OPC(6, OPC_ATOMIC_ADD,     atomic.add),
+	OPC(6, OPC_ATOMIC_SUB,     atomic.sub),
+	OPC(6, OPC_ATOMIC_XCHG,    atomic.xchg),
+	OPC(6, OPC_ATOMIC_INC,     atomic.inc),
+	OPC(6, OPC_ATOMIC_DEC,     atomic.dec),
+	OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
+	OPC(6, OPC_ATOMIC_MIN,     atomic.min),
+	OPC(6, OPC_ATOMIC_MAX,     atomic.max),
+	OPC(6, OPC_ATOMIC_AND,     atomic.and),
+	OPC(6, OPC_ATOMIC_OR,      atomic.or),
+	OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
+	OPC(6, OPC_LDGB_TYPED_4D,    ldgb.typed.3d),
 	OPC(6, OPC_STGB_4D_4,    stgb.4d.4),
 	OPC(6, OPC_STIB,         stib),
 	OPC(6, OPC_LDC_4,        ldc.4),
diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
index efb07ea..c3fb68d 100644
--- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
+++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
@@ -173,17 +173,17 @@ typedef enum {
 	OPC_STLW = 11,
 	OPC_RESFMT = 14,
 	OPC_RESINFO = 15,
-	OPC_ATOMIC_ADD_L = 16,
-	OPC_ATOMIC_SUB_L = 17,
-	OPC_ATOMIC_XCHG_L = 18,
-	OPC_ATOMIC_INC_L = 19,
-	OPC_ATOMIC_DEC_L = 20,
-	OPC_ATOMIC_CMPXCHG_L = 21,
-	OPC_ATOMIC_MIN_L = 22,
-	OPC_ATOMIC_MAX_L = 23,
-	OPC_ATOMIC_AND_L = 24,
-	OPC_ATOMIC_OR_L = 25,
-	OPC_ATOMIC_XOR_L = 26,
+	OPC_ATOMIC_ADD = 16,
+	OPC_ATOMIC_SUB = 17,
+	OPC_ATOMIC_XCHG = 18,
+	OPC_ATOMIC_INC = 19,
+	OPC_ATOMIC_DEC = 20,
+	OPC_ATOMIC_CMPXCHG = 21,
+	OPC_ATOMIC_MIN = 22,
+	OPC_ATOMIC_MAX = 23,
+	OPC_ATOMIC_AND = 24,
+	OPC_ATOMIC_OR = 25,
+	OPC_ATOMIC_XOR = 26,
 	OPC_LDGB_TYPED_4D = 27,
 	OPC_STGB_4D_4 = 28,
 	OPC_STIB = 29,
@@ -575,7 +575,7 @@ typedef struct PACKED {
 	uint32_t opc_cat  : 3;
 } instr_cat5_t;
 
-/* [src1 + off], src2: */
+/* dword0 encoding for src_off: [src1 + off], src2: */
 typedef struct PACKED {
 	/* dword0: */
 	uint32_t mustbe1  : 1;
@@ -586,37 +586,50 @@ typedef struct PACKED {
 	uint32_t src2     : 8;
 
 	/* dword1: */
-	uint32_t dst      : 8;
-	uint32_t dummy2   : 9;
-	uint32_t type     : 3;
-	uint32_t dummy3   : 2;
-	uint32_t opc      : 5;
-	uint32_t jmp_tgt  : 1;
-	uint32_t sync     : 1;
-	uint32_t opc_cat  : 3;
+	uint32_t dword1;
 } instr_cat6a_t;
 
-/* [src1], src2: */
+/* dword0 encoding for !src_off: [src1], src2 */
 typedef struct PACKED {
 	/* dword0: */
 	uint32_t mustbe0  : 1;
-	uint32_t src1     : 8;
-	uint32_t ignore0  : 13;
+	uint32_t src1     : 13;
+	uint32_t ignore0  : 8;
 	uint32_t src1_im  : 1;
 	uint32_t src2_im  : 1;
 	uint32_t src2     : 8;
 
 	/* dword1: */
-	uint32_t dst      : 8;
-	uint32_t dummy2   : 9;
-	uint32_t type     : 3;
-	uint32_t dummy3   : 2;
-	uint32_t opc      : 5;
-	uint32_t jmp_tgt  : 1;
-	uint32_t sync     : 1;
-	uint32_t opc_cat  : 3;
+	uint32_t dword1;
 } instr_cat6b_t;
 
+/* dword1 encoding for dst_off: */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t dword0;
+
+	/* note: there is some weird stuff going on where sometimes
+	 * cat6->a.off is involved.. but that seems like a bug in
+	 * the blob, since it is used even if !cat6->src_off
+	 * It would make sense for there to be some more bits to
+	 * bring us to 11 bits worth of offset, but not sure..
+	 */
+	int32_t off       : 8;
+	uint32_t mustbe1  : 1;
+	uint32_t dst      : 8;
+	uint32_t pad1     : 15;
+} instr_cat6c_t;
+
+/* dword1 encoding for !dst_off: */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t dword0;
+
+	uint32_t dst      : 8;
+	uint32_t mustbe0  : 1;
+	uint32_t pad0     : 23;
+} instr_cat6d_t;
+
 /* I think some of the other cat6 instructions use additional
  * sub-encodings..
  */
@@ -624,16 +637,20 @@ typedef struct PACKED {
 typedef union PACKED {
 	instr_cat6a_t a;
 	instr_cat6b_t b;
+	instr_cat6c_t c;
+	instr_cat6d_t d;
 	struct PACKED {
 		/* dword0: */
-		uint32_t has_off  : 1;
+		uint32_t src_off  : 1;
 		uint32_t pad1     : 31;
 
 		/* dword1: */
-		uint32_t dst      : 8;
-		uint32_t dummy2   : 9;
+		uint32_t pad2     : 8;
+		uint32_t dst_off  : 1;
+		uint32_t pad3     : 8;
 		uint32_t type     : 3;
-		uint32_t dummy3   : 2;
+		uint32_t g        : 1;  /* or in some cases it means dst immed */
+		uint32_t pad4     : 1;
 		uint32_t opc      : 5;
 		uint32_t jmp_tgt  : 1;
 		uint32_t sync     : 1;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index a0cb744..6d19a29 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -506,25 +506,28 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
 
 	iassert(instr->regs_count >= 2);
 
-	if (instr->cat6.offset || instr->opc == OPC_LDG) {
+	/* TODO we need a more comprehensive list about which instructions
+	 * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
+	 * indicate to use the src_off encoding even if offset is zero
+	 * (but then what to do about dst_off?)
+	 */
+	if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
 		instr_cat6a_t *cat6a = ptr;
 
-		cat6->has_off = true;
+		cat6->src_off = true;
 
-		cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
 		cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
 		cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
 		if (src2) {
 			cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
 			cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
 		}
-		cat6a->off = instr->cat6.offset;
+		cat6a->off = instr->cat6.src_offset;
 	} else {
 		instr_cat6b_t *cat6b = ptr;
 
-		cat6->has_off = false;
+		cat6->src_off = false;
 
-		cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
 		cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
 		cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
 		if (src2) {
@@ -533,10 +536,22 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
 		}
 	}
 
+	if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) {
+		instr_cat6c_t *cat6c = ptr;
+		cat6->dst_off = true;
+		cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+		cat6c->off = instr->cat6.dst_offset;
+	} else {
+		instr_cat6d_t *cat6d = ptr;
+		cat6->dst_off = false;
+		cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+	}
+
 	cat6->type     = instr->cat6.type;
 	cat6->opc      = instr->opc;
 	cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
 	cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
+	cat6->g        = !!(instr->flags & IR3_INSTR_G);
 	cat6->opc_cat  = 6;
 
 	return 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index f11d8ed..c3b61a0 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -172,6 +172,7 @@ struct ir3_instruction {
 		IR3_INSTR_P     = 0x080,
 		IR3_INSTR_S     = 0x100,
 		IR3_INSTR_S2EN  = 0x200,
+		IR3_INSTR_G     = 0x400,
 		/* meta-flags, for intermediate stages of IR, ie.
 		 * before register assignment is done:
 		 */
@@ -209,7 +210,8 @@ struct ir3_instruction {
 		} cat5;
 		struct {
 			type_t type;
-			int offset;
+			int src_offset;
+			int dst_offset;
 			int iim_val;
 		} cat6;
 		/* for meta-instructions, just used to hold extra data
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 22885ff..bdba3aa 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1215,7 +1215,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
 		struct ir3_instruction *load =
 				ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
 		load->cat6.type = TYPE_U32;
-		load->cat6.offset = off + i * 4;    /* byte offset */
+		load->cat6.src_offset = off + i * 4;     /* byte offset */
 		dst[i] = load;
 	}
 }




More information about the mesa-commit mailing list