[Mesa-stable] [PATCH 1/4] r600g/sb: work around hw issues with stack on eg/cm

Grigori Goronzy greg at chown.ath.cx
Fri Nov 15 09:24:53 PST 2013


From: Vadim Girlin <vadimgirlin at gmail.com>

v2: make it actually work, improve condition

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68503
Cc: "10.0" <mesa-stable at lists.freedesktop.org>
Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>
---
 src/gallium/drivers/r600/sb/sb_bc.h            |  21 ++++
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 129 +++++++++++++++++--------
 src/gallium/drivers/r600/sb/sb_context.cpp     |   9 +-
 src/gallium/drivers/r600/sb/sb_ir.h            |   5 +-
 src/gallium/drivers/r600/sb/sb_pass.h          |   3 +
 5 files changed, 123 insertions(+), 44 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index ad1b862..73b8b08 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -614,6 +614,10 @@ public:
 	unsigned num_slots;
 	bool uses_mova_gpr;
 
+	bool stack_workaround_8xx;
+	bool stack_workaround_9xx;
+
+	unsigned wavefront_size;
 	unsigned stack_entry_size;
 
 	static unsigned dump_pass;
@@ -638,6 +642,23 @@ public:
 	bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
 	bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
 
+	bool needs_8xx_stack_workaround() {
+		if (!is_evergreen())
+			return false;
+
+		switch (hw_chip) {
+		case HW_CHIP_CYPRESS:
+		case HW_CHIP_JUNIPER:
+			return false;
+		default:
+			return true;
+		}
+	}
+
+	bool needs_9xx_stack_workaround() {
+		return is_cayman();
+	}
+
 	sb_hw_class_bits hw_class_bit() {
 		switch (hw_class) {
 		case HW_CLASS_R600:return HB_R6;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index c56c866..bc71cf8 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -40,8 +40,9 @@ namespace r600_sb {
 
 int bc_finalizer::run() {
 
-	regions_vec &rv = sh.get_regions();
+	run_on(sh.root);
 
+	regions_vec &rv = sh.get_regions();
 	for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
 			++I) {
 		region_node *r = *I;
@@ -58,8 +59,6 @@ int bc_finalizer::run() {
 		r->expand();
 	}
 
-	run_on(sh.root);
-
 	cf_peephole();
 
 	// workaround for some problems on r6xx/7xx
@@ -213,18 +212,36 @@ void bc_finalizer::run_on(container_node* c) {
 		if (n->is_alu_group()) {
 			finalize_alu_group(static_cast<alu_group_node*>(n));
 		} else {
-			if (n->is_fetch_inst()) {
+			if (n->is_alu_clause()) {
+				cf_node *c = static_cast<cf_node*>(n);
+
+				if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
+					if (ctx.stack_workaround_8xx) {
+						region_node *r = c->get_parent_region();
+						if (r) {
+							unsigned ifs, loops;
+							unsigned elems = get_stack_depth(r, loops, ifs);
+							unsigned dmod1 = elems % ctx.stack_entry_size;
+							unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
+
+							if (elems && (!dmod1 || !dmod2))
+								c->flags |= NF_ALU_STACK_WORKAROUND;
+						}
+					} else if (ctx.stack_workaround_9xx) {
+						region_node *r = c->get_parent_region();
+						if (r) {
+							unsigned ifs, loops;
+							get_stack_depth(r, loops, ifs);
+							if (loops >= 2)
+								c->flags |= NF_ALU_STACK_WORKAROUND;
+						}
+					}
+				}
+			} else if (n->is_fetch_inst()) {
 				finalize_fetch(static_cast<fetch_node*>(n));
 			} else if (n->is_cf_inst()) {
 				finalize_cf(static_cast<cf_node*>(n));
-			} else if (n->is_alu_clause()) {
-
-			} else if (n->is_fetch_clause()) {
-
-			} else {
-				assert(!"unexpected node");
 			}
-
 			if (n->is_container())
 				run_on(static_cast<container_node*>(n));
 		}
@@ -578,10 +595,6 @@ void bc_finalizer::finalize_cf(cf_node* c) {
 
 	unsigned flags = c->bc.op_ptr->flags;
 
-	if (flags & CF_CALL) {
-		update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2);
-	}
-
 	c->bc.end_of_program = 0;
 	last_cf = c;
 
@@ -715,17 +728,8 @@ void bc_finalizer::finalize_cf(cf_node* c) {
 
 			c->bc.index_gpr = reg >= 0 ? reg : 0;
 		}
-
-
-
-	} else {
-
-#if 0
-		if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) {
-			c->bc.valid_pixel_mode = 1;
-		}
-#endif
-
+	} else if (flags & CF_CALL) {
+		update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
 	}
 }
 
@@ -763,37 +767,78 @@ void bc_finalizer::update_ngpr(unsigned gpr) {
 		ngpr = gpr + 1;
 }
 
-void bc_finalizer::update_nstack(region_node* r, unsigned add) {
-	unsigned loops = 0;
-	unsigned ifs = 0;
+unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
+                                           unsigned &ifs, unsigned add) {
+	unsigned stack_elements = add;
+	bool has_non_wqm_push_with_loops_on_stack = false;
+	bool has_non_wqm_push = (add != 0);
+	region_node *r = n->is_region() ?
+			static_cast<region_node*>(n) : n->get_parent_region();
+
+	loops = 0;
+	ifs = 0;
 
 	while (r) {
-		if (r->is_loop())
+		if (r->is_loop()) {
 			++loops;
-		else
+			if (has_non_wqm_push)
+				has_non_wqm_push_with_loops_on_stack = true;
+		} else {
 			++ifs;
-
+			has_non_wqm_push = true;
+		}
 		r = r->get_parent_region();
 	}
-
-	unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add;
-
-	// FIXME calculate more precisely
-	if (ctx.is_evergreen()) {
-		++stack_elements;
-	} else {
-		stack_elements += 2;
-		if (ctx.is_cayman())
+	stack_elements += (loops * ctx.stack_entry_size) + ifs;
+
+	// reserve additional elements in some cases
+	switch (ctx.hw_class) {
+	case HW_CLASS_R600:
+	case HW_CLASS_R700:
+		if (has_non_wqm_push)
+			stack_elements += 2;
+		break;
+	case HW_CLASS_CAYMAN:
+		if (stack_elements)
+			stack_elements += 2;
+		break;
+	case HW_CLASS_EVERGREEN:
+		if (has_non_wqm_push_with_loops_on_stack)
 			++stack_elements;
+		break;
 	}
+	return stack_elements;
+}
 
-	unsigned stack_entries = (stack_elements + 3) >> 2;
+void bc_finalizer::update_nstack(region_node* r, unsigned add) {
+	unsigned loops = 0;
+	unsigned ifs = 0;
+	unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
+
+	// XXX all chips expect this value to be computed using 4 as entry size,
+	// not the real entry size
+	unsigned stack_entries = (elems + 3) >> 2;
 
 	if (nstack < stack_entries)
 		nstack = stack_entries;
 }
 
 void bc_finalizer::cf_peephole() {
+	if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
+		for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
+				I = N) {
+			N = I; ++N;
+			cf_node *c = static_cast<cf_node*>(*I);
+
+			if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
+					(c->flags & NF_ALU_STACK_WORKAROUND)) {
+				cf_node *push = sh.create_cf(CF_OP_PUSH);
+				c->insert_before(push);
+				push->jump(c);
+				c->bc.set_op(CF_OP_ALU);
+			}
+		}
+	}
 
 	for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
 			I = N) {
diff --git a/src/gallium/drivers/r600/sb/sb_context.cpp b/src/gallium/drivers/r600/sb/sb_context.cpp
index 9474f74..8e11428 100644
--- a/src/gallium/drivers/r600/sb/sb_context.cpp
+++ b/src/gallium/drivers/r600/sb/sb_context.cpp
@@ -66,20 +66,27 @@ int sb_context::init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass) {
 	case HW_CHIP_RS780:
 	case HW_CHIP_RV620:
 	case HW_CHIP_RS880:
-
+		wavefront_size = 16;
+		stack_entry_size = 8;
+		break;
 	case HW_CHIP_RV630:
 	case HW_CHIP_RV635:
 	case HW_CHIP_RV730:
 	case HW_CHIP_RV710:
 	case HW_CHIP_PALM:
 	case HW_CHIP_CEDAR:
+		wavefront_size = 32;
 		stack_entry_size = 8;
 		break;
 	default:
+		wavefront_size = 64;
 		stack_entry_size = 4;
 		break;
 	}
 
+	stack_workaround_8xx = needs_8xx_stack_workaround();
+	stack_workaround_9xx = needs_9xx_stack_workaround();
+
 	return 0;
 }
 
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h
index a74d6cb..85c3d06 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -700,7 +700,10 @@ enum node_flags {
 	NF_DONT_MOVE = (1 << 8),
 
 	// for KILLxx - we want to schedule them as early as possible
-	NF_SCHEDULE_EARLY = (1 << 9)
+	NF_SCHEDULE_EARLY = (1 << 9),
+
+	// for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
+	NF_ALU_STACK_WORKAROUND = (1 << 10)
 };
 
 inline node_flags operator |(node_flags l, node_flags r) {
diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h
index a3f8515..c955656 100644
--- a/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/src/gallium/drivers/r600/sb/sb_pass.h
@@ -708,6 +708,9 @@ public:
 	void update_ngpr(unsigned gpr);
 	void update_nstack(region_node *r, unsigned add = 0);
 
+	unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs,
+	                         unsigned add = 0);
+
 	void cf_peephole();
 
 };
-- 
1.8.1.2



More information about the mesa-stable mailing list