Mesa (master): r600g: rework bank swizzle code

Christian König deathsimple at kemper.freedesktop.org
Thu Jan 13 20:31:26 UTC 2011


Module: Mesa
Branch: master
Commit: a25b91c2c2118741389f418a66de7ca145b8600b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a25b91c2c2118741389f418a66de7ca145b8600b

Author: Christian König <deathsimple at vodafone.de>
Date:   Sat Dec 18 17:56:36 2010 +0100

r600g: rework bank swizzle code

---

 src/gallium/drivers/r600/r600_asm.c |  353 +++++++++++++++++------------------
 src/gallium/drivers/r600/r600_asm.h |    4 -
 2 files changed, 174 insertions(+), 183 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index b0567dd..4ceb1d7 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -32,6 +32,9 @@
 #include "r600_formats.h"
 #include "r600d.h"
 
+#define NUM_OF_CYCLES 3
+#define NUM_OF_COMPONENTS 4
+
 static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
 {
 	if(alu->is_op3)
@@ -340,228 +343,220 @@ static int assign_alu_units(struct r600_bc_alu *alu_first, struct r600_bc_alu *a
 	return 0;
 }
 
-const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210,  //000
-				      SQ_ALU_VEC_120,  //001
-				      SQ_ALU_VEC_102,  //010
-
-				      SQ_ALU_VEC_201,  //011
-				      SQ_ALU_VEC_012,  //100
-				      SQ_ALU_VEC_021,  //101
-
-				      SQ_ALU_VEC_012,  //110
-				      SQ_ALU_VEC_012}; //111
-
-const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210,  //000
-				      SQ_ALU_SCL_122,  //001
-				      SQ_ALU_SCL_122,  //010
-
-				      SQ_ALU_SCL_221,  //011
-				      SQ_ALU_SCL_212,  //100
-				      SQ_ALU_SCL_122,  //101
-
-				      SQ_ALU_SCL_122,  //110
-				      SQ_ALU_SCL_122}; //111
-
-static int init_gpr(struct r600_bc_alu *alu)
+struct alu_bank_swizzle {
+	int	hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+	int	hw_cfile_addr[4];
+	int	hw_cfile_elem[4];
+};
+
+const unsigned cycle_for_bank_swizzle_vec[][3] = {
+	[SQ_ALU_VEC_012] = { 0, 1, 2 },
+	[SQ_ALU_VEC_021] = { 0, 2, 1 },
+	[SQ_ALU_VEC_120] = { 1, 2, 0 },
+	[SQ_ALU_VEC_102] = { 1, 0, 2 },
+	[SQ_ALU_VEC_201] = { 2, 0, 1 },
+	[SQ_ALU_VEC_210] = { 2, 1, 0 }
+};
+
+const unsigned cycle_for_bank_swizzle_scl[][3] = {
+	[SQ_ALU_SCL_210] = { 2, 1, 0 },
+	[SQ_ALU_SCL_122] = { 1, 2, 2 },
+	[SQ_ALU_SCL_212] = { 2, 1, 2 },
+	[SQ_ALU_SCL_221] = { 2, 2, 1 }
+};
+
+static void init_bank_swizzle(struct alu_bank_swizzle *bs)
 {
-	int cycle, component;
+	int i, cycle, component;
 	/* set up gpr use */
 	for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
 		for (component = 0; component < NUM_OF_COMPONENTS; component++)
-			 alu->hw_gpr[cycle][component] = -1;
-	return 0;
+			 bs->hw_gpr[cycle][component] = -1;
+	for (i = 0; i < 4; i++)
+		bs->hw_cfile_addr[i] = -1;
+	for (i = 0; i < 4; i++)
+		bs->hw_cfile_elem[i] = -1;
 }
-
-#if 0
-static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle)
+ 
+static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
 {
-	if (alu->hw_gpr[cycle][chan] < 0)
-		alu->hw_gpr[cycle][chan] = sel;
-	else if (alu->hw_gpr[cycle][chan] != (int)sel) {
-		R600_ERR("Another scalar operation has already used GPR read port for channel\n");
+	if (bs->hw_gpr[cycle][chan] == -1)
+		bs->hw_gpr[cycle][chan] = sel;
+	else if (bs->hw_gpr[cycle][chan] != (int)sel) {
+		// Another scalar operation has already used GPR read port for channel
 		return -1;
 	}
 	return 0;
 }
-
-static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
+ 
+static int reserve_cfile(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
 {
-	int table[3];
-	int ret = 0;
-	switch (swiz) {
-	case SQ_ALU_SCL_210:
-		table[0] = 2; table[1] = 1; table[2] = 0;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_SCL_122:
-		table[0] = 1; table[1] = 2; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_SCL_212:
-		table[0] = 2; table[1] = 1; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_SCL_221:
-		table[0] = 2; table[1] = 2; table[2] = 1;
-		*p_cycle = table[sel];
-                break;
-		break;
-	default:
-		R600_ERR("bad scalar bank swizzle value\n");
-		ret = -1;
-		break;
+	int res, resmatch = -1, resempty = -1;
+	for (res = 3; res >= 0; --res) {
+		if (bs->hw_cfile_addr[res] == -1)
+			resempty = res;
+		else if (bs->hw_cfile_addr[res] == sel &&
+			bs->hw_cfile_elem[res] == chan)
+			resmatch = res;
 	}
-	return ret;
-}
-
-static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
-{
-	int table[3];
-	int ret;
-
-	switch (swiz) {
-	case SQ_ALU_VEC_012:
-		table[0] = 0; table[1] = 1; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_021:
-		table[0] = 0; table[1] = 2; table[2] = 1;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_120:
-		table[0] = 1; table[1] = 2; table[2] = 0;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_102:
-		table[0] = 1; table[1] = 0; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_201:
-		table[0] = 2; table[1] = 0; table[2] = 1;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_210:
-		table[0] = 2; table[1] = 1; table[2] = 0;
-                *p_cycle = table[sel];
-                break;
-	default:
-		R600_ERR("bad vector bank swizzle value\n");
-		ret = -1;
-		break;
-	}
-	return ret;
-}
-
-
-
-static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter)
-{
-	int num_src;
-	int i;
-	int channel_swizzle;
-
-	num_src = r600_bc_get_num_operands(alu);
-
-	for (i = 0; i < num_src; i++) {
-		channel_swizzle = alu->src[i].chan;
-		if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3)
-			chan_counter[channel_swizzle]++;
+	if (resmatch != -1)
+		return 0; // Read for this scalar element already reserved, nothing to do here.
+	else if (resempty != -1) {
+		bs->hw_cfile_addr[resempty] = sel;
+		bs->hw_cfile_elem[resempty] = chan;
+	} else {
+		// All cfile read ports are used, cannot reference vector element
+		return -1;
 	}
+	return 0;	
 }
-
-/* we need something like this I think - but this is bogus */
-int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+ 
+static int is_gpr(unsigned sel)
 {
-	struct r600_bc_alu *alu;
-	int chan_counter[4]  = { 0 };
-
-	update_chan_counter(alu_first, chan_counter);
-
-	LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
-		update_chan_counter(alu, chan_counter);
-	}
-
-	if (chan_counter[0] > 3 ||
-	    chan_counter[1] > 3 ||
-	    chan_counter[2] > 3 ||
-	    chan_counter[3] > 3) {
-		R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
-			 alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]);
-		return -1;
-	}
-	return 0;
+	return (sel >= 0 && sel <= 127);
 }
-#endif
 
 /* CB constants start at 512, and get translated to a kcache index when ALU
  * clauses are constructed. Note that we handle kcache constants the same way
  * as (the now gone) cfile constants, is that really required? */
+static int is_cfile(unsigned sel)
+{
+	return (sel > 255 && sel < 512) ||
+		(sel > 511 && sel < 4607) || // Kcache before translate
+		(sel > 127 && sel < 192); // Kcache after translate
+}
+ 
 static int is_const(int sel)
 {
-	if (sel > 511 && sel < 4607)
-		return 1;
-	return 0;
+	return is_cfile(sel) ||
+		(sel >= V_SQ_ALU_SRC_0 && 
+		sel <= V_SQ_ALU_SRC_LITERAL);
 }
-
-static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
+ 
+static int check_vector(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle)
 {
-	unsigned swizzle_key;
-
-	if (alu->bank_swizzle_force) {
-		alu->bank_swizzle = alu->bank_swizzle_force;
-		return 0;
+	int r, src, num_src, sel, elem, cycle;
+ 
+	num_src = r600_bc_get_num_operands(alu);
+	for (src = 0; src < num_src; src++) {
+		sel = alu->src[src].sel;
+		elem = alu->src[src].chan;
+		if (is_gpr(sel)) {
+			cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
+			if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
+				// Nothing to do; special-case optimization, 
+				// second source uses first source’s reservation
+				continue;
+			else {
+				r = reserve_gpr(bs, sel, elem, cycle);
+				if (r)
+					return r;
+			}
+		} else if (is_cfile(sel)) {
+			r = reserve_cfile(bs, sel, elem);
+			if (r)
+				return r;
+		}
+		// No restrictions on PV, PS, literal or special constants
 	}
-	swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + 
-		(is_const(alu->src[1].sel) ? 2 : 0 ) + 
-		(is_const(alu->src[2].sel) ? 1 : 0 );
-
-	alu->bank_swizzle = bank_swizzle_scl[swizzle_key];
 	return 0;
 }
-
-static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
+ 
+static int check_scalar(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle)
 {
-	unsigned swizzle_key;
-
-	if (alu->bank_swizzle_force) {
-		alu->bank_swizzle = alu->bank_swizzle_force;
-		return 0;
+	int r, src, num_src, const_count, sel, elem, cycle;
+ 
+	num_src = r600_bc_get_num_operands(alu);
+	for (const_count = 0, src = 0; src < num_src; ++src) {
+		sel = alu->src[src].sel;
+		elem = alu->src[src].chan;
+		if (is_const(sel)) { // Any constant, including literal and inline constants
+			if (const_count >= 2)
+				// More than two references to a constant in
+				// transcendental operation.
+				return -1; 
+			else
+				const_count++;
+		}
+		if (is_cfile(sel)) {
+			r = reserve_cfile(bs, sel, elem);
+			if (r)
+				return r;
+		}
+	}
+	for (src = 0; src < num_src; ++src) {
+		sel = alu->src[src].sel;
+		elem = alu->src[src].chan;
+		if (is_gpr(sel)) {
+			cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
+			if (cycle < const_count)
+				// Cycle for GPR load conflicts with
+				// constant load in transcendental operation.
+				return -1;
+			r = reserve_gpr(bs, sel, elem, cycle);
+			if (r)
+				return r;
+		}
+		// Constants already processed
+		// No restrictions on PV, PS
 	}
-	swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + 
-		(is_const(alu->src[1].sel) ? 2 : 0 ) + 
-		(is_const(alu->src[2].sel) ? 1 : 0 );
-
-	alu->bank_swizzle = bank_swizzle_vec[swizzle_key];
 	return 0;
 }
-
+ 
 static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
 {
 	struct r600_bc_alu *assignment[5];
+	struct alu_bank_swizzle bs;
+	int bank_swizzle[5];
 	int i, r;
-
-	r = init_gpr(alu_first);
-	if (r)
-		return r;
-
+ 
 	r = assign_alu_units(alu_first, assignment);
 	if (r)
 		return r;
+ 
+	if(alu_first->bank_swizzle_force) {
+		for (i = 0; i < 5; i++)
+			if (assignment[i])
+				assignment[i]->bank_swizzle = assignment[i]->bank_swizzle_force;
+		return 0;
+	}
 
+	// just check every possible combination of bank swizzle
+	// not very efficent, but works on the first try in most of the cases
 	for (i = 0; i < 4; i++)
-		if (assignment[i]) {
-			r = check_vector(bc, assignment[i]);
-			if (r)
-				return r;
+		bank_swizzle[i] = SQ_ALU_VEC_012;
+	bank_swizzle[4] = SQ_ALU_SCL_210;
+	while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
+		init_bank_swizzle(&bs);
+		for (i = 0; i < 4; i++) {
+			if (assignment[i]) {
+				r = check_vector(assignment[i], &bs, bank_swizzle[i]);
+				if (r)
+					break;
+			}
+		}
+		if (!r && assignment[4]) {
+			r = check_scalar(assignment[4], &bs, bank_swizzle[4]);
+		}
+		if (!r) {
+			for (i = 0; i < 5; i++) {
+				if (assignment[i])
+					assignment[i]->bank_swizzle = bank_swizzle[i];
+			}
+			return 0;
 		}
 
-	if (assignment[4]) {
-		r = check_scalar(bc, assignment[4]);
-		if (r)
-			return r;
+		for (i = 0; i < 5; i++) {
+			bank_swizzle[i]++;
+			if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+				break;
+			else
+				bank_swizzle[i] = SQ_ALU_VEC_012;
+		}
 	}
-	
-	return 0;
+
+	// couldn't find a working swizzle
+	return -1;
 }
 
 /* This code handles kcache lines as single blocks of 32 constants. We could
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 4763ce0..a912329 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -25,9 +25,6 @@
 
 #include "util/u_double_list.h"
 
-#define NUM_OF_CYCLES 3
-#define NUM_OF_COMPONENTS 4
-
 struct r600_vertex_element;
 struct r600_pipe_context;
 
@@ -61,7 +58,6 @@ struct r600_bc_alu {
 	unsigned			bank_swizzle;
 	unsigned			bank_swizzle_force;
 	u32				value[4];
-	int				hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
 	unsigned			omod;
 };
 




More information about the mesa-commit mailing list