Mesa (master): r600g/sb: improve optimization of conditional instructions

Vadim Girlin vadimg at kemper.freedesktop.org
Mon May 27 11:31:55 UTC 2013


Module: Mesa
Branch: master
Commit: 725671a83a67cc8cf16c0913f6e1835fb272c2fb
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=725671a83a67cc8cf16c0913f6e1835fb272c2fb

Author: Vadim Girlin <vadimgirlin at gmail.com>
Date:   Mon May 27 04:00:03 2013 +0400

r600g/sb: improve optimization of conditional instructions

Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>

---

 src/gallium/drivers/r600/sb/sb_core.cpp          |    7 +++-
 src/gallium/drivers/r600/sb/sb_expr.cpp          |   48 ++++++++++++++++++-
 src/gallium/drivers/r600/sb/sb_expr.h            |    5 +-
 src/gallium/drivers/r600/sb/sb_if_conversion.cpp |    2 +-
 src/gallium/drivers/r600/sb/sb_pass.h            |    2 +-
 src/gallium/drivers/r600/sb/sb_peephole.cpp      |   53 ++++++++++++++++-----
 6 files changed, 96 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp
index 0345057..5b917ac 100644
--- a/src/gallium/drivers/r600/sb/sb_core.cpp
+++ b/src/gallium/drivers/r600/sb/sb_core.cpp
@@ -188,9 +188,14 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 
 	sh->set_undef(sh->root->live_before);
 
-	SB_RUN_PASS(peephole,			1);
 	SB_RUN_PASS(if_conversion,		1);
 
+	// if_conversion breaks info about uses, but next pass (peephole)
+	// doesn't need it, so we can skip def/use update here
+	// until it's really required
+	//SB_RUN_PASS(def_use,			0);
+
+	SB_RUN_PASS(peephole,			1);
 	SB_RUN_PASS(def_use,			0);
 
 	SB_RUN_PASS(gvn,				1);
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 8582c8e..65a7641 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -580,7 +580,7 @@ unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
 	return ncc;
 }
 
-unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst) {
+unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
 
 	if (int_dst && cmp_type == AF_FLOAT_CMP) {
 		switch (cc) {
@@ -612,6 +612,8 @@ unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst) {
 		}
 		case AF_UINT_CMP: {
 			switch (cc) {
+			case AF_CC_E: return ALU_OP2_SETE_INT;
+			case AF_CC_NE: return ALU_OP2_SETNE_INT;
 			case AF_CC_GT: return ALU_OP2_SETGT_UINT;
 			case AF_CC_GE: return ALU_OP2_SETGE_UINT;
 			}
@@ -624,7 +626,7 @@ unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst) {
 	return ~0u;
 }
 
-unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type) {
+unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
 
 	switch(cmp_type) {
 	case AF_FLOAT_CMP: {
@@ -647,6 +649,8 @@ unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type) {
 	}
 	case AF_UINT_CMP: {
 		switch (cc) {
+		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
+		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
 		case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
 		case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
 		}
@@ -658,6 +662,44 @@ unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type) {
 	return ~0u;
 }
 
+unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
+
+	switch(cmp_type) {
+	case AF_FLOAT_CMP: {
+		switch (cc) {
+		case AF_CC_E: return ALU_OP2_KILLE;
+		case AF_CC_NE: return ALU_OP2_KILLNE;
+		case AF_CC_GT: return ALU_OP2_KILLGT;
+		case AF_CC_GE: return ALU_OP2_KILLGE;
+		}
+		break;
+	}
+	case AF_INT_CMP: {
+		switch (cc) {
+		case AF_CC_E: return ALU_OP2_KILLE_INT;
+		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
+		case AF_CC_GT: return ALU_OP2_KILLGT_INT;
+		case AF_CC_GE: return ALU_OP2_KILLGE_INT;
+		}
+		break;
+	}
+	case AF_UINT_CMP: {
+		switch (cc) {
+		case AF_CC_E: return ALU_OP2_KILLE_INT;
+		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
+		case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
+		case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
+		}
+		break;
+	}
+	}
+
+	assert(!"unexpected cc&cmp_type combination");
+	return ~0u;
+}
+
+
+
 void convert_predset_to_set(shader& sh, alu_node* a) {
 
 	unsigned flags = a->bc.op_ptr->flags;
@@ -668,7 +710,7 @@ void convert_predset_to_set(shader& sh, alu_node* a) {
 
 	cc = invert_setcc_condition(cc, swap_args);
 
-	unsigned newop = get_setcc_opcode(cc, cmp_type, true);
+	unsigned newop = get_setcc_op(cc, cmp_type, true);
 
 	a->dst.resize(1);
 	a->bc.set_op(newop);
diff --git a/src/gallium/drivers/r600/sb/sb_expr.h b/src/gallium/drivers/r600/sb/sb_expr.h
index 032867b..8917714 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.h
+++ b/src/gallium/drivers/r600/sb/sb_expr.h
@@ -37,8 +37,9 @@ value* get_select_value_for_em(shader &sh, value *em);
 
 void convert_predset_to_set(shader &sh, alu_node *a);
 unsigned invert_setcc_condition(unsigned cc, bool &swap_args);
-unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst);
-unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type);
+unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst);
+unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type);
+unsigned get_killcc_op(unsigned cc, unsigned cmp_type);
 
 class expr_handler {
 
diff --git a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp b/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
index 96dd1ee..93edace 100644
--- a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
+++ b/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
@@ -258,7 +258,7 @@ bool if_conversion::run_on(region_node* r) {
 			std::swap(newpredset->bc.src[0], newpredset->bc.src[1]);
 		}
 
-		unsigned newopcode = get_predsetcc_opcode(cc, cmptype);
+		unsigned newopcode = get_predsetcc_op(cc, cmptype);
 		newpredset->bc.set_op(newopcode);
 
 		// move the code from the 'false' branch ('else') to the 'true' branch
diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h
index 7e606da..c7272ba 100644
--- a/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/src/gallium/drivers/r600/sb/sb_pass.h
@@ -413,7 +413,7 @@ public:
 
 	void optimize_cc_op(alu_node *a);
 
-	void optimize_SETcc_op(alu_node *a);
+	void optimize_cc_op2(alu_node *a);
 	void optimize_CNDcc_op(alu_node *a);
 
 	bool get_bool_op_info(value *b, bool_op_info& bop);
diff --git a/src/gallium/drivers/r600/sb/sb_peephole.cpp b/src/gallium/drivers/r600/sb/sb_peephole.cpp
index 6373b5c..cb423c9 100644
--- a/src/gallium/drivers/r600/sb/sb_peephole.cpp
+++ b/src/gallium/drivers/r600/sb/sb_peephole.cpp
@@ -46,7 +46,7 @@ int peephole::run() {
 
 void peephole::run_on(container_node* c) {
 
-	for (node_riterator I = c->rbegin(), E = c->rend(); I != E; ++I) {
+	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 		node *n = *I;
 
 		if (n->is_container())
@@ -56,7 +56,8 @@ void peephole::run_on(container_node* c) {
 			if (n->is_alu_inst()) {
 				alu_node *a = static_cast<alu_node*>(n);
 
-				if (a->bc.op_ptr->flags & (AF_PRED | AF_SET | AF_CMOV)) {
+				if (a->bc.op_ptr->flags &
+						(AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
 					optimize_cc_op(a);
 				} else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
 
@@ -73,8 +74,8 @@ void peephole::run_on(container_node* c) {
 void peephole::optimize_cc_op(alu_node* a) {
 	unsigned aflags = a->bc.op_ptr->flags;
 
-	if (aflags & (AF_PRED | AF_SET)) {
-		optimize_SETcc_op(a);
+	if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
+		optimize_cc_op2(a);
 	} else if (aflags & AF_CMOV) {
 		optimize_CNDcc_op(a);
 	}
@@ -90,26 +91,37 @@ void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
 	f2i->remove();
 }
 
-void peephole::optimize_SETcc_op(alu_node* a) {
+void peephole::optimize_cc_op2(alu_node* a) {
 
 	unsigned flags = a->bc.op_ptr->flags;
 	unsigned cc = flags & AF_CC_MASK;
+
+	if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
+		return;
+
 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
 	unsigned dst_type = flags & AF_DST_TYPE_MASK;
-	bool is_pred = flags & AF_PRED;
 
-	// TODO handle other cases
+	int op_kind = (flags & AF_PRED) ? 1 :
+			(flags & AF_SET) ? 2 :
+			(flags & AF_KILL) ? 3 : 0;
+
+	bool swapped = false;
 
-	if (a->src[1]->is_const() && (cc == AF_CC_E || cc == AF_CC_NE) &&
-			a->src[1]->literal_value == literal(0) &&
-			a->bc.src[0].neg == 0 && a->bc.src[0].abs == 0) {
+	if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
+		std::swap(a->src[0],a->src[1]);
+		swapped = true;
+	}
+
+	if (swapped || (a->src[1]->is_const() &&
+			a->src[1]->literal_value == literal(0))) {
 
 		value *s = a->src[0];
 
 		bool_op_info bop = {};
 
 		PPH_DUMP(
-			sblog << "optSETcc ";
+			sblog << "cc_op2: ";
 			dump::dump_op(a);
 			sblog << "\n";
 		);
@@ -139,8 +151,23 @@ void peephole::optimize_SETcc_op(alu_node* a) {
 			sblog <<"\n";
 		);
 
-		unsigned newop = is_pred ? get_predsetcc_opcode(cc, cmp_type) :
-				get_setcc_opcode(cc, cmp_type, dst_type != AF_FLOAT_DST);
+		unsigned newop;
+
+		switch(op_kind) {
+		case 1:
+			newop = get_predsetcc_op(cc, cmp_type);
+			break;
+		case 2:
+			newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
+			break;
+		case 3:
+			newop = get_killcc_op(cc, cmp_type);
+			break;
+		default:
+			newop = ALU_OP0_NOP;
+			assert(!"invalid op kind");
+			break;
+		}
 
 		a->bc.set_op(newop);
 




More information about the mesa-commit mailing list