[Mesa-dev] [PATCH 2/2] r600/sb: start adding GDS support

Dave Airlie airlied at gmail.com
Thu Dec 7 04:34:31 UTC 2017


From: Dave Airlie <airlied at redhat.com>

This adds support for GDS ops to sb backend.

It seems to work for atomic counters on cayman, probably
needs a lot more testing.
---
 src/gallium/drivers/r600/r600_isa.h            |  2 +-
 src/gallium/drivers/r600/sb/sb_bc.h            |  7 +++++
 src/gallium/drivers/r600/sb/sb_bc_builder.cpp  | 39 +++++++++++++++++++++++++-
 src/gallium/drivers/r600/sb/sb_bc_decoder.cpp  |  9 +++++-
 src/gallium/drivers/r600/sb/sb_bc_dump.cpp     | 13 +++++++--
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp |  7 +++++
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   | 11 ++++++--
 src/gallium/drivers/r600/sb/sb_dump.cpp        |  1 +
 src/gallium/drivers/r600/sb/sb_gcm.cpp         | 20 ++++++++++---
 src/gallium/drivers/r600/sb/sb_ir.h            |  3 +-
 src/gallium/drivers/r600/sb/sb_peephole.cpp    | 14 ++++++++-
 src/gallium/drivers/r600/sb/sb_ra_init.cpp     |  2 ++
 src/gallium/drivers/r600/sb/sb_shader.cpp      |  3 ++
 13 files changed, 118 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
index b5a36b4e80e..f6e26976c5f 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -115,7 +115,7 @@ enum alu_op_flags
 	AF_CC_LE	= (5U << AF_CC_SHIFT),
 };
 
-/* flags for FETCH instructions (TEX/VTX) */
+/* flags for FETCH instructions (TEX/VTX/GDS) */
 enum fetch_op_flags
 {
 	FF_GDS		= (1<<0),
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index fed041cf506..fc3fa5082d0 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -401,6 +401,7 @@ enum sched_queue_id {
 	SQ_ALU,
 	SQ_TEX,
 	SQ_VTX,
+	SQ_GDS,
 
 	SQ_NUM
 };
@@ -580,6 +581,11 @@ struct bc_fetch {
 	unsigned mega_fetch:1;
 
 	unsigned src2_gpr:7; /* for GDS */
+	unsigned alloc_consume:1;
+	unsigned uav_id:4;
+	unsigned uav_index_mode:2;
+	unsigned bcast_first_req:1;
+
 	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
 };
 
@@ -966,6 +972,7 @@ private:
 	int build_fetch_clause(cf_node *n);
 	int build_fetch_tex(fetch_node *n);
 	int build_fetch_vtx(fetch_node *n);
+	int build_fetch_gds(fetch_node *n);
 };
 
 } // namespace r600_sb
diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
index b0df3d9a544..b45e81729df 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
@@ -129,7 +129,9 @@ int bc_builder::build_fetch_clause(cf_node* n) {
 			I != E; ++I) {
 		fetch_node *f = static_cast<fetch_node*>(*I);
 
-		if (f->bc.op_ptr->flags & FF_VTX)
+		if (f->bc.op_ptr->flags & FF_GDS)
+			build_fetch_gds(f);
+		else if (f->bc.op_ptr->flags & FF_VTX)
 			build_fetch_vtx(f);
 		else
 			build_fetch_tex(f);
@@ -558,6 +560,41 @@ int bc_builder::build_fetch_tex(fetch_node* n) {
 	return 0;
 }
 
+int bc_builder::build_fetch_gds(fetch_node *n) {
+	const bc_fetch &bc = n->bc;
+	const fetch_op_info *fop = bc.op_ptr;
+	unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f;
+	assert(fop->flags && FF_GDS);
+
+	fprintf(stderr, "%08x\n", ctx.fetch_opcode(bc.op));
+	bb << MEM_GDS_WORD0_EGCM()
+		.MEM_INST(2)
+		.MEM_OP(4)
+		.SRC_GPR(bc.src_gpr)
+		.SRC_SEL_X(bc.src_sel[0])
+		.SRC_SEL_Y(bc.src_sel[1])
+		.SRC_SEL_Z(bc.src_sel[2]);
+	
+	bb << MEM_GDS_WORD1_EGCM()
+		.DST_GPR(bc.dst_gpr)
+		.DST_REL_MODE(bc.dst_rel)
+		.GDS_OP(gds_op)
+		.SRC_GPR(bc.src2_gpr)
+		.UAV_INDEX_MODE(bc.uav_index_mode)
+		.UAV_ID(bc.uav_id)
+		.ALLOC_CONSUME(bc.alloc_consume)
+		.BCAST_FIRST_REQ(bc.bcast_first_req);
+
+	bb << MEM_GDS_WORD2_EGCM()
+		.DST_SEL_X(bc.dst_sel[0])
+		.DST_SEL_Y(bc.dst_sel[1])
+		.DST_SEL_Z(bc.dst_sel[2])
+		.DST_SEL_W(bc.dst_sel[3]);
+
+	bb << 0;
+	return 0;
+}
+
 int bc_builder::build_fetch_vtx(fetch_node* n) {
 	const bc_fetch &bc = n->bc;
 	const fetch_op_info *fop = bc.op_ptr;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
index 8712abe5f78..1fa580e66d6 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
@@ -415,7 +415,10 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) {
 		unsigned gds_op;
 		if (mem_op == 4) {
 			gds_op = (dw1 >> 9) & 0x1f;
-			fetch_opcode = FETCH_OP_GDS_ADD + gds_op;
+			if ((dw1 >> 9) & 0x20)
+				fetch_opcode = FETCH_OP_GDS_ADD_RET + gds_op;
+			else
+				fetch_opcode = FETCH_OP_GDS_ADD + gds_op;
 		} else if (mem_op == 5)
 			fetch_opcode = FETCH_OP_TF_WRITE;
 		bc.set_op(fetch_opcode);
@@ -512,6 +515,10 @@ int bc_decoder::decode_fetch_gds(unsigned & i, bc_fetch& bc) {
 	tmp = w1.get_DST_REL_MODE();
 	bc.dst_rel_global = (tmp == 2);
 	bc.src2_gpr = w1.get_SRC_GPR();
+	bc.alloc_consume = w1.get_ALLOC_CONSUME();
+	bc.uav_id = w1.get_UAV_ID();
+	bc.uav_index_mode = w1.get_UAV_INDEX_MODE();
+	bc.bcast_first_req = w1.get_BCAST_FIRST_REQ();
 
 	MEM_GDS_WORD2_EGCM w2(dw2);
 	bc.dst_sel[0] = w2.get_DST_SEL_X();
diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
index 788450b3c9c..72a1b24467d 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
@@ -452,11 +452,14 @@ void bc_dump::dump(fetch_node& n) {
 	sb_ostringstream s;
 	static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
 	unsigned gds = n.bc.op_ptr->flags & FF_GDS;
+	bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET &&
+		n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET;
+	bool show_dst = !gds || (gds && gds_has_ret);
 
 	s << n.bc.op_ptr->name;
 	fill_to(s, 20);
 
-	if (!gds) {
+	if (show_dst) {
 		s << "R";
 		print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
 		s << ".";
@@ -483,7 +486,13 @@ void bc_dump::dump(fetch_node& n) {
 		s << ",   RID:" << n.bc.resource_id;
 
 	if (gds) {
-
+		s << " UAV:" << n.bc.uav_id;
+		if (n.bc.uav_index_mode)
+			s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0);
+		if (n.bc.bcast_first_req)
+			s << " BFQ";
+		if (n.bc.alloc_consume)
+			s << " AC";
 	} else if (vtx) {
 		s << "  " << fetch_type[n.bc.fetch_type];
 		if (!ctx.is_cayman() && n.bc.mega_fetch_count)
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 7f1dd0a7a0e..ba6d4fc253a 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -557,6 +557,8 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
 
 	if (flags & FF_VTX) {
 		src_count = 1;
+	} else if (flags & FF_GDS) {
+		src_count = 1;
 	} else if (flags & FF_USEGRAD) {
 		emit_set_grad(f);
 	} else if (flags & FF_USE_TEXTURE_OFFSETS) {
@@ -661,6 +663,11 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
 	for (unsigned i = 0; i < 4; ++i)
 		f->bc.dst_sel[i] = dst_swz[i];
 
+	if ((flags & FF_GDS) && reg == -1) {
+		f->bc.dst_sel[0] = SEL_MASK;
+		f->bc.dst_gpr = 0;
+		return ;
+	}
 	assert(reg >= 0);
 
 	if (reg >= 0)
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 8a4abd48306..2023cfff160 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -569,7 +569,10 @@ int bc_parser::decode_fetch_clause(cf_node* cf) {
 	int r;
 	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
 
-	cf->subtype = NST_TEX_CLAUSE;
+	if (cf->bc.op_ptr->flags && FF_GDS)
+		cf->subtype = NST_GDS_CLAUSE;
+	else
+		cf->subtype = NST_TEX_CLAUSE;
 
 	while (cnt--) {
 		fetch_node *n = sh->create_fetch();
@@ -595,10 +598,14 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) {
 		unsigned flags = n->bc.op_ptr->flags;
 
 		unsigned vtx = flags & FF_VTX;
-		unsigned num_src = vtx ? ctx.vtx_src_num : 4;
+		unsigned gds = flags & FF_GDS;
+		unsigned num_src = gds ? 2 : vtx ? ctx.vtx_src_num : 4;
 
 		n->dst.resize(4);
 
+		if (gds) {
+			n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL;
+		}
 		if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
 			sh->uses_gradients = true;
 		}
diff --git a/src/gallium/drivers/r600/sb/sb_dump.cpp b/src/gallium/drivers/r600/sb/sb_dump.cpp
index d6051704c15..70892e1c8c4 100644
--- a/src/gallium/drivers/r600/sb/sb_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_dump.cpp
@@ -469,6 +469,7 @@ void dump::dump_op(node* n) {
 	case NST_ALU_CLAUSE:
 	case NST_TEX_CLAUSE:
 	case NST_VTX_CLAUSE:
+	case NST_GDS_CLAUSE:
 		dump_op(*n, static_cast<cf_node*>(n)->bc.op_ptr->name);
 		break;
 	case NST_ALU_PACKED_INST:
diff --git a/src/gallium/drivers/r600/sb/sb_gcm.cpp b/src/gallium/drivers/r600/sb/sb_gcm.cpp
index 7b43a32818e..f1e9e1caf68 100644
--- a/src/gallium/drivers/r600/sb/sb_gcm.cpp
+++ b/src/gallium/drivers/r600/sb/sb_gcm.cpp
@@ -427,10 +427,22 @@ void gcm::bu_sched_bb(bb_node* bb) {
 
 				if (sq != SQ_CF) {
 					if (!clause || sampler_indexing) {
-						clause = sh.create_clause(sq == SQ_ALU ?
-								NST_ALU_CLAUSE :
-									sq == SQ_TEX ? NST_TEX_CLAUSE :
-											NST_VTX_CLAUSE);
+						node_subtype nst;
+						switch (sq) {
+						case SQ_ALU:
+							nst = NST_ALU_CLAUSE;
+							break;
+						case SQ_TEX:
+							nst = NST_TEX_CLAUSE;
+							break;
+						case SQ_GDS:
+							nst = NST_GDS_CLAUSE;
+							break;
+						default:
+							nst = NST_VTX_CLAUSE;
+							break;
+						}	
+						clause = sh.create_clause(nst);
 						bb->push_front(clause);
 					}
 				} else {
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h
index ec973e7bfc2..374619ea77a 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -663,6 +663,7 @@ enum node_subtype {
 	NST_FETCH_INST,
 	NST_TEX_CLAUSE,
 	NST_VTX_CLAUSE,
+	NST_GDS_CLAUSE,
 
 	NST_BB,
 
@@ -786,7 +787,7 @@ public:
 	bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; }
 
 	bool is_fetch_clause() {
-		return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE;
+		return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE;
 	}
 
 	bool is_copy() { return subtype == NST_COPY; }
diff --git a/src/gallium/drivers/r600/sb/sb_peephole.cpp b/src/gallium/drivers/r600/sb/sb_peephole.cpp
index d4b97557d4e..49a6965b1f3 100644
--- a/src/gallium/drivers/r600/sb/sb_peephole.cpp
+++ b/src/gallium/drivers/r600/sb/sb_peephole.cpp
@@ -52,7 +52,19 @@ void peephole::run_on(container_node* c) {
 		if (n->is_container())
 			run_on(static_cast<container_node*>(n));
 		else {
-
+			if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) {
+				fetch_node *f = static_cast<fetch_node*>(n);
+				bool has_dst = false;
+
+				for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) {
+					value *v = *I;
+					if (v)
+						has_dst = true;
+				}
+				if (!has_dst)
+					if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET)
+						f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD);
+			}
 			if (n->is_alu_inst()) {
 				alu_node *a = static_cast<alu_node*>(n);
 
diff --git a/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
index 68ee98291f8..e5ec9db23b7 100644
--- a/src/gallium/drivers/r600/sb/sb_ra_init.cpp
+++ b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
@@ -745,6 +745,8 @@ void ra_split::split_vector_inst(node* n) {
 	no_src_swizzle |= n->is_fetch_op(FETCH_OP_VFETCH) ||
 			n->is_fetch_op(FETCH_OP_SEMFETCH);
 
+	no_src_swizzle |= n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS);
+
 	if (!n->src.empty() && !call_fs) {
 
 		// we may have more than one source vector -
diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp
index 8c7b39bb03f..321e24ea256 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.cpp
+++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
@@ -91,6 +91,7 @@ cf_node* shader::create_clause(node_subtype nst) {
 	case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
 	case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
 	case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
+	case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break;
 	default: assert(!"invalid clause type"); break;
 	}
 
@@ -597,6 +598,8 @@ sched_queue_id shader::get_queue_id(node* n) {
 			fetch_node *f = static_cast<fetch_node*>(n);
 			if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
 				return SQ_VTX;
+			if (f->bc.op_ptr->flags & FF_GDS)
+				return SQ_GDS;
 			return SQ_TEX;
 		}
 		case NST_CF_INST:
-- 
2.14.3



More information about the mesa-dev mailing list