[Mesa-dev] [PATCH] r600g/sb: Enable SB for geometry shaders

Glenn Kennard glenn.kennard at gmail.com
Fri Mar 20 06:13:46 PDT 2015


Add SV_GEOMETRY_EMIT special variable type to track the
implicit dependencies between CUT/EMIT_VERTEX/MEM_RING
instructions so GCM/scheduler doesn't reorder them.

Mark emit instructions as unkillable so DCE doesn't eat them.

Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
---
The hangs with SB on geometry shaders were all due to the CUT/EMIT
instructions either being DCE:d or emitted out of order from the
memory ring writes, so the hardware stalled forever waiting for
completed primitives.

Tested only on a Turks so far, but should behave the same across
all R600 generations.

This patch disables the if-conversion pass when running GS shaders,
didn't seem worth the effort to fix that pass up for the marginal
returns.

 src/gallium/drivers/r600/r600_isa.h            |  8 ++++----
 src/gallium/drivers/r600/r600_shader.c         |  8 ++++----
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp |  2 +-
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   | 25 +++++++++++++++++++++++++
 src/gallium/drivers/r600/sb/sb_core.cpp        |  5 ++++-
 src/gallium/drivers/r600/sb/sb_dump.cpp        |  4 +++-
 src/gallium/drivers/r600/sb/sb_ir.h            |  6 +++++-
 src/gallium/drivers/r600/sb/sb_ra_init.cpp     |  2 +-
 src/gallium/drivers/r600/sb/sb_sched.cpp       |  2 +-
 src/gallium/drivers/r600/sb/sb_valtable.cpp    |  1 +
 10 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
index ec3f702..381f06d 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -641,7 +641,7 @@ static const struct cf_op_info cf_op_table[] = {
 
 		{"MEM_SCRATCH",                   { 0x24, 0x24, 0x50, 0x50 },  CF_MEM  },
 		{"MEM_REDUCT",                    { 0x25, 0x25,   -1,   -1 },  CF_MEM  },
-		{"MEM_RING",                      { 0x26, 0x26, 0x52, 0x52 },  CF_MEM  },
+		{"MEM_RING",                      { 0x26, 0x26, 0x52, 0x52 },  CF_MEM | CF_EMIT },
 
 		{"EXPORT",                        { 0x27, 0x27, 0x53, 0x53 },  CF_EXP  },
 		{"EXPORT_DONE",                   { 0x28, 0x28, 0x54, 0x54 },  CF_EXP  },
@@ -649,9 +649,9 @@ static const struct cf_op_info cf_op_table[] = {
 		{"MEM_EXPORT",                    {   -1, 0x3A, 0x55, 0x55 },  CF_MEM  },
 		{"MEM_RAT",                       {   -1,   -1, 0x56, 0x56 },  CF_MEM | CF_RAT },
 		{"MEM_RAT_NOCACHE",               {   -1,   -1, 0x57, 0x57 },  CF_MEM | CF_RAT },
-		{"MEM_RING1",                     {   -1,   -1, 0x58, 0x58 },  CF_MEM  },
-		{"MEM_RING2",                     {   -1,   -1, 0x59, 0x59 },  CF_MEM  },
-		{"MEM_RING3",                     {   -1,   -1, 0x5A, 0x5A },  CF_MEM  },
+		{"MEM_RING1",                     {   -1,   -1, 0x58, 0x58 },  CF_MEM | CF_EMIT },
+		{"MEM_RING2",                     {   -1,   -1, 0x59, 0x59 },  CF_MEM | CF_EMIT },
+		{"MEM_RING3",                     {   -1,   -1, 0x5A, 0x5A },  CF_MEM | CF_EMIT },
 		{"MEM_MEM_COMBINED",              {   -1,   -1, 0x5B, 0x5B },  CF_MEM  },
 		{"MEM_RAT_COMBINED_NOCACHE",      {   -1,   -1, 0x5C, 0x5C },  CF_MEM | CF_RAT },
 		{"MEM_RAT_COMBINED",              {   -1,   -1,   -1, 0x5D },  CF_MEM | CF_RAT }, /* ??? not in cayman isa doc */
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 28b290a..ff2c784 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -159,8 +159,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		goto error;
 	}
 
-	/* disable SB for geom shaders - it can't handle the CF_EMIT instructions */
-	use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
 	/* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
 	use_sb &= !shader->shader.uses_index_registers;
 
@@ -1141,6 +1139,8 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
 		for (i = 0; i < 3; i++) {
 			treg[i] = r600_get_temp(ctx);
 		}
+		r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F);
+
 		t2 = r600_get_temp(ctx);
 		for (i = 0; i < 3; i++) {
 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -1935,9 +1935,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 		ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
 	}
 
+	shader->max_arrays = 0;
+	shader->num_arrays = 0;
 	if (indirect_gprs) {
-		shader->max_arrays = 0;
-		shader->num_arrays = 0;
 
 		if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
 			r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT],
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 8d0be06..4a830be 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -282,7 +282,7 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
 		value *d = n->dst.empty() ? NULL : n->dst[0];
 
 		if (d && d->is_special_reg()) {
-			assert(n->bc.op_ptr->flags & AF_MOVA);
+			assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
 			d = NULL;
 		}
 
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 08e7f5c..4879c03 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -135,6 +135,16 @@ int bc_parser::parse_decls() {
 		}
 	}
 
+	// GS inputs can add indirect addressing
+	if (sh->target == TARGET_GS) {
+		if (pshader->num_arrays) {
+			for (unsigned i = 0; i < pshader->num_arrays; ++i) {
+				r600_shader_array &a = pshader->arrays[i];
+				sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
+			}
+		}
+	}
+
 	if (sh->target == TARGET_VS || sh->target == TARGET_ES)
 		sh->add_input(0, 1, 0x0F);
 	else if (sh->target == TARGET_GS) {
@@ -720,6 +730,16 @@ int bc_parser::prepare_ir() {
 					c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
 				}
 
+				if (flags & CF_EMIT) {
+					// Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
+					c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+					c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+					if (sh->target == TARGET_ES) {
+						// For ES shaders this is an export
+						c->flags |= NF_DONT_KILL;
+					}
+				}
+
 				if (!burst_count--)
 					break;
 
@@ -736,6 +756,11 @@ int bc_parser::prepare_ir() {
 
 			c->bc.end_of_program = eop;
 
+		} else if (flags & CF_EMIT) {
+			c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
+
+			c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+			c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
 		}
 	}
 
diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp
index 7db8008..afea818 100644
--- a/src/gallium/drivers/r600/sb/sb_core.cpp
+++ b/src/gallium/drivers/r600/sb/sb_core.cpp
@@ -189,7 +189,10 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 
 	sh->set_undef(sh->root->live_before);
 
-	SB_RUN_PASS(if_conversion,		1);
+	// if conversion breaks the dependency tracking between CF_EMIT ops when it removes
+	// the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS
+	if (sh->target != TARGET_GS)
+		SB_RUN_PASS(if_conversion,		1);
 
 	// if_conversion breaks info about uses, but next pass (peephole)
 	// doesn't need it, so we can skip def/use update here
diff --git a/src/gallium/drivers/r600/sb/sb_dump.cpp b/src/gallium/drivers/r600/sb/sb_dump.cpp
index b2130a4..d605170 100644
--- a/src/gallium/drivers/r600/sb/sb_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_dump.cpp
@@ -354,7 +354,9 @@ void dump::dump_op(node &n, const char *name) {
 					"WRITE_IND_ACK"};
 			sblog << "  " << exp_type[c->bc.type] << " " << c->bc.array_base
 					<< "   ES:" << c->bc.elem_size;
-			has_dst = false;
+			if (!(c->bc.op_ptr->flags & CF_EMIT)) {
+				has_dst = false;
+			}
 		}
 	}
 
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h
index 711c2eb..560a4a9 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -41,7 +41,8 @@ enum special_regs {
 	SV_ALU_PRED = 128,
 	SV_EXEC_MASK,
 	SV_AR_INDEX,
-	SV_VALID_MASK
+	SV_VALID_MASK,
+	SV_GEOMETRY_EMIT
 };
 
 class node;
@@ -506,6 +507,9 @@ public:
 	bool is_AR() {
 		return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0);
 	}
+	bool is_geometry_emit() {
+		return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0);
+	}
 
 	node* any_def() {
 		assert(!(def && adef));
diff --git a/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
index e53aba5..370807a 100644
--- a/src/gallium/drivers/r600/sb/sb_ra_init.cpp
+++ b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
@@ -751,7 +751,7 @@ void ra_split::split_vector_inst(node* n) {
 		// src vectors 1 (src[4-7] and 2 (src[8-11])
 
 		unsigned nvec = n->src.size() >> 2;
-		assert(nvec << 2 == n->src.size());
+		assert(nvec << 2 <= n->src.size());
 
 		for (unsigned nv = 0; nv < nvec; ++nv) {
 			vvec sv, tv, nsrc(4);
diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
index 63e7464..4248a3f 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -1463,7 +1463,7 @@ unsigned post_scheduler::try_add_instruction(node *n) {
 		value *d = a->dst.empty() ? NULL : a->dst[0];
 
 		if (d && d->is_special_reg()) {
-			assert(a->bc.op_ptr->flags & AF_MOVA);
+			assert((a->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
 			d = NULL;
 		}
 
diff --git a/src/gallium/drivers/r600/sb/sb_valtable.cpp b/src/gallium/drivers/r600/sb/sb_valtable.cpp
index 0d39e9c..eb242b1 100644
--- a/src/gallium/drivers/r600/sb/sb_valtable.cpp
+++ b/src/gallium/drivers/r600/sb/sb_valtable.cpp
@@ -55,6 +55,7 @@ sb_ostream& operator << (sb_ostream &o, value &v) {
 			case SV_ALU_PRED: o << "PR"; break;
 			case SV_EXEC_MASK: o << "EM"; break;
 			case SV_VALID_MASK: o << "VM"; break;
+			case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break;
 			default: o << "???specialreg"; break;
 		}
 		break;
-- 
1.9.1



More information about the mesa-dev mailing list