[Mesa-dev] [PATCH v2] r600/sb: bail out if prepare_alu_group() doesn't find a proper scheduling

Mon Oct 16 19:06:26 UTC 2017

It is possible that the optimizer ends up in an infinite loop in
post_scheduler::schedule_alu(), because post_scheduler::prepare_alu_group()
does not find a proper scheduling. This can be deducted from
pending.count() being larger than zero and not getting smaller.

This patch works around this problem by signalling this failure so that the
optimizers bails out and the un-optimized shader is used.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103142
Signed-off-by: Gert Wollny <gw.fossdev at gmail.com>
---
Change w.r.t. v1:
- In schedule_alu() if pending.count() == 0 then don't expect that 
  this value is reduced by a call to  prepare_alu_group(), instead 
  continue the loop until it is exited by "break".

I've added you Vadim as to original author to the CC, maybe you can shed a bit more light
on what might be going wrong here, and whether there is an easy real fix instead of just
a workaround.

best regards,
Gert

Note: Submitter has no mesa-git write access.

 src/gallium/drivers/r600/sb/sb_sched.cpp | 43 ++++++++++++++++++++------------
 src/gallium/drivers/r600/sb/sb_sched.h   |  8 +++---
 2 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
index 5113b75684..2fbec2f77e 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -711,22 +711,24 @@ void alu_group_tracker::update_flags(alu_node* n) {
 }
 
 int post_scheduler::run() {
-	run_on(sh.root);
-	return 0;
+	return run_on(sh.root) ? 0 : 1;
 }
 
-void post_scheduler::run_on(container_node* n) {
-
+bool post_scheduler::run_on(container_node* n) {
+	int r = true;
 	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
 		if (I->is_container()) {
 			if (I->subtype == NST_BB) {
 				bb_node* bb = static_cast<bb_node*>(*I);
-				schedule_bb(bb);
+				r = schedule_bb(bb);
 			} else {
-				run_on(static_cast<container_node*>(*I));
+				r = run_on(static_cast<container_node*>(*I));
 			}
+			if (!r)
+				break;
 		}
 	}
+	return r;
 }
 
 void post_scheduler::init_uc_val(container_node *c, value *v) {
@@ -758,7 +760,7 @@ unsigned post_scheduler::init_ucm(container_node *c, node *n) {
 	return F == ucm.end() ? 0 : F->second;
 }
 
-void post_scheduler::schedule_bb(bb_node* bb) {
+bool post_scheduler::schedule_bb(bb_node* bb) {
 	PSC_DUMP(
 		sblog << "scheduling BB " << bb->id << "\n";
 		if (!pending.empty())
@@ -791,8 +793,10 @@ void post_scheduler::schedule_bb(bb_node* bb) {
 
 		if (n->is_alu_clause()) {
 			n->remove();
-			process_alu(static_cast<container_node*>(n));
-			continue;
+			bool r = process_alu(static_cast<container_node*>(n));
+			if (r)
+				continue;
+			return false;
 		}
 
 		n->remove();
@@ -800,6 +804,7 @@ void post_scheduler::schedule_bb(bb_node* bb) {
 	}
 
 	this->cur_bb = NULL;
+	return true;
 }
 
 void post_scheduler::init_regmap() {
@@ -933,10 +938,10 @@ void post_scheduler::process_fetch(container_node *c) {
 	cur_bb->push_front(c);
 }
 
-void post_scheduler::process_alu(container_node *c) {
+bool post_scheduler::process_alu(container_node *c) {
 
 	if (c->empty())
-		return;
+		return true;
 
 	ucm.clear();
 	alu.reset();
@@ -973,7 +978,7 @@ void post_scheduler::process_alu(container_node *c) {
 		}
 	}
 
-	schedule_alu(c);
+	return schedule_alu(c);
 }
 
 void post_scheduler::update_local_interferences() {
@@ -1135,15 +1140,20 @@ void post_scheduler::emit_clause() {
 	emit_index_registers();
 }
 
-void post_scheduler::schedule_alu(container_node *c) {
+bool post_scheduler::schedule_alu(container_node *c) {
 
 	assert(!ready.empty() || !ready_copies.empty());
 
-	while (1) {
-
+	bool improving = true;
+	int last_pending = pending.count();
+	while (improving) {
 		prev_regmap = regmap;
-
 		if (!prepare_alu_group()) {
+
+			int new_pending = pending.count();
+			improving = (new_pending < last_pending) || (last_pending == 0);
+			last_pending = new_pending;
+
 			if (alu.current_idx[0] || alu.current_idx[1]) {
 				regmap = prev_regmap;
 				emit_clause();
@@ -1186,6 +1196,7 @@ void post_scheduler::schedule_alu(container_node *c) {
 		dump::dump_op_list(&pending);
 		assert(!"unscheduled pending instructions");
 	}
+	return improving;
 }
 
 void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
diff --git a/src/gallium/drivers/r600/sb/sb_sched.h b/src/gallium/drivers/r600/sb/sb_sched.h
index 05b428ca88..5a2663442b 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.h
+++ b/src/gallium/drivers/r600/sb/sb_sched.h
@@ -267,14 +267,14 @@ public:
 		live(), ucm(), alu(sh),	regmap(), cleared_interf() {}
 
 	virtual int run();
-	void run_on(container_node *n);
-	void schedule_bb(bb_node *bb);
+	bool run_on(container_node *n);
+	bool schedule_bb(bb_node *bb);
 
 	void load_index_register(value *v, unsigned idx);
 	void process_fetch(container_node *c);
 
-	void process_alu(container_node *c);
-	void schedule_alu(container_node *c);
+	bool process_alu(container_node *c);
+	bool schedule_alu(container_node *c);
 	bool prepare_alu_group();
 
 	void release_op(node *n);
-- 
2.14.2