[Mesa-dev] [PATCH v2] r600/sb: bail out if prepare_alu_group() doesn't find a proper scheduling

Wed Oct 18 08:15:50 UTC 2017

On 10/16/2017 10:06 PM, Gert Wollny wrote:
> It is possible that the optimizer ends up in an infinite loop in
> post_scheduler::schedule_alu(), because post_scheduler::prepare_alu_group()
> does not find a proper scheduling. This can be deducted from
> pending.count() being larger than zero and not getting smaller.
> 
> This patch works around this problem by signalling this failure so that the
> optimizers bails out and the un-optimized shader is used.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103142
> Signed-off-by: Gert Wollny <gw.fossdev at gmail.com>
> ---
> Change w.r.t. v1:
> - In schedule_alu() if pending.count() == 0 then don't expect that
>    this value is reduced by a call to  prepare_alu_group(), instead
>    continue the loop until it is exited by "break".
> 
> I've added you Vadim as to original author to the CC, maybe you can shed a bit more light
> on what might be going wrong here, and whether there is an easy real fix instead of just
> a workaround.

I'm honestly barely remember all related details, sorry, I guess you 
know that code a lot better than me now. That VLIW scheduling/packing 
stuff was pretty complicated even when I worked on it. :)
Now after 4 years I'm too scared to touch it.

So I can't reasonably review it now, but if this patch fixes the bug and 
doesn't result in any regressions, and if Glenn and Dave have no 
objections, I guess it's ok to push it, you can add my "acked-by".

I'd push it but I'm not ready to take responsibility for any possible 
fallout. I hope Dave or whoever maintains r600g will help with that.

Thanks for fixing it.

> 
> best regards,
> Gert
> 
> Note: Submitter has no mesa-git write access.
> 
>   src/gallium/drivers/r600/sb/sb_sched.cpp | 43 ++++++++++++++++++++------------
>   src/gallium/drivers/r600/sb/sb_sched.h   |  8 +++---
>   2 files changed, 31 insertions(+), 20 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
> index 5113b75684..2fbec2f77e 100644
> --- a/src/gallium/drivers/r600/sb/sb_sched.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
> @@ -711,22 +711,24 @@ void alu_group_tracker::update_flags(alu_node* n) {
>   }
>   
>   int post_scheduler::run() {
> -	run_on(sh.root);
> -	return 0;
> +	return run_on(sh.root) ? 0 : 1;
>   }
>   
> -void post_scheduler::run_on(container_node* n) {
> -
> +bool post_scheduler::run_on(container_node* n) {
> +	int r = true;
>   	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
>   		if (I->is_container()) {
>   			if (I->subtype == NST_BB) {
>   				bb_node* bb = static_cast<bb_node*>(*I);
> -				schedule_bb(bb);
> +				r = schedule_bb(bb);
>   			} else {
> -				run_on(static_cast<container_node*>(*I));
> +				r = run_on(static_cast<container_node*>(*I));
>   			}
> +			if (!r)
> +				break;
>   		}
>   	}
> +	return r;
>   }
>   
>   void post_scheduler::init_uc_val(container_node *c, value *v) {
> @@ -758,7 +760,7 @@ unsigned post_scheduler::init_ucm(container_node *c, node *n) {
>   	return F == ucm.end() ? 0 : F->second;
>   }
>   
> -void post_scheduler::schedule_bb(bb_node* bb) {
> +bool post_scheduler::schedule_bb(bb_node* bb) {
>   	PSC_DUMP(
>   		sblog << "scheduling BB " << bb->id << "\n";
>   		if (!pending.empty())
> @@ -791,8 +793,10 @@ void post_scheduler::schedule_bb(bb_node* bb) {
>   
>   		if (n->is_alu_clause()) {
>   			n->remove();
> -			process_alu(static_cast<container_node*>(n));
> -			continue;
> +			bool r = process_alu(static_cast<container_node*>(n));
> +			if (r)
> +				continue;
> +			return false;
>   		}
>   
>   		n->remove();
> @@ -800,6 +804,7 @@ void post_scheduler::schedule_bb(bb_node* bb) {
>   	}
>   
>   	this->cur_bb = NULL;
> +	return true;
>   }
>   
>   void post_scheduler::init_regmap() {
> @@ -933,10 +938,10 @@ void post_scheduler::process_fetch(container_node *c) {
>   	cur_bb->push_front(c);
>   }
>   
> -void post_scheduler::process_alu(container_node *c) {
> +bool post_scheduler::process_alu(container_node *c) {
>   
>   	if (c->empty())
> -		return;
> +		return true;
>   
>   	ucm.clear();
>   	alu.reset();
> @@ -973,7 +978,7 @@ void post_scheduler::process_alu(container_node *c) {
>   		}
>   	}
>   
> -	schedule_alu(c);
> +	return schedule_alu(c);
>   }
>   
>   void post_scheduler::update_local_interferences() {
> @@ -1135,15 +1140,20 @@ void post_scheduler::emit_clause() {
>   	emit_index_registers();
>   }
>   
> -void post_scheduler::schedule_alu(container_node *c) {
> +bool post_scheduler::schedule_alu(container_node *c) {
>   
>   	assert(!ready.empty() || !ready_copies.empty());
>   
> -	while (1) {
> -
> +	bool improving = true;
> +	int last_pending = pending.count();
> +	while (improving) {
>   		prev_regmap = regmap;
> -
>   		if (!prepare_alu_group()) {
> +
> +			int new_pending = pending.count();
> +			improving = (new_pending < last_pending) || (last_pending == 0);
> +			last_pending = new_pending;
> +
>   			if (alu.current_idx[0] || alu.current_idx[1]) {
>   				regmap = prev_regmap;
>   				emit_clause();
> @@ -1186,6 +1196,7 @@ void post_scheduler::schedule_alu(container_node *c) {
>   		dump::dump_op_list(&pending);
>   		assert(!"unscheduled pending instructions");
>   	}
> +	return improving;
>   }
>   
>   void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
> diff --git a/src/gallium/drivers/r600/sb/sb_sched.h b/src/gallium/drivers/r600/sb/sb_sched.h
> index 05b428ca88..5a2663442b 100644
> --- a/src/gallium/drivers/r600/sb/sb_sched.h
> +++ b/src/gallium/drivers/r600/sb/sb_sched.h
> @@ -267,14 +267,14 @@ public:
>   		live(), ucm(), alu(sh),	regmap(), cleared_interf() {}
>   
>   	virtual int run();
> -	void run_on(container_node *n);
> -	void schedule_bb(bb_node *bb);
> +	bool run_on(container_node *n);
> +	bool schedule_bb(bb_node *bb);
>   
>   	void load_index_register(value *v, unsigned idx);
>   	void process_fetch(container_node *c);
>   
> -	void process_alu(container_node *c);
> -	void schedule_alu(container_node *c);
> +	bool process_alu(container_node *c);
> +	bool schedule_alu(container_node *c);
>   	bool prepare_alu_group();
>   
>   	void release_op(node *n);
>