[Mesa-dev] [PATCH 1/2] r600g/sb: separate bytecode decoding and parsing

Vadim Girlin vadimgirlin at gmail.com
Sat May 11 18:15:40 PDT 2013


Parsing and ir construction is required for optimization only,
it's unnecessary if we only need to print shader dump.
This should make new disassembler more tolerant to any new
features in the bytecode.

Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>
---
 src/gallium/drivers/r600/sb/sb_bc.h           |  27 ++--
 src/gallium/drivers/r600/sb/sb_bc_builder.cpp |   4 -
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp  | 224 +++++++++++++-------------
 src/gallium/drivers/r600/sb/sb_core.cpp       |  45 ++++--
 src/gallium/drivers/r600/sb/sb_shader.cpp     |   4 +-
 src/gallium/drivers/r600/sb/sb_shader.h       |   3 +-
 6 files changed, 163 insertions(+), 144 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index 9c6ed46..9f65098 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -674,40 +674,39 @@ class bc_parser {
 	typedef std::stack<region_node*> region_stack;
 	region_stack loop_stack;
 
-	int enable_dump;
-	int optimize;
-
 public:
 
-	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader,
-	          int dump_source, int optimize) :
+	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
 		ctx(sctx), dec(), bc(bc), pshader(pshader),
 		dw(), bc_ndw(), max_cf(),
 		sh(), error(), slots(), cgroup(),
-		cf_map(), loop_stack(), enable_dump(dump_source),
-		optimize(optimize) { }
+		cf_map(), loop_stack() { }
 
-	int parse();
+	int decode();
+	int prepare();
 
 	shader* get_shader() { assert(!error); return sh; }
 
 private:
 
-	int parse_shader();
+	int decode_shader();
 
 	int parse_decls();
 
-	int parse_cf(unsigned &i, bool &eop);
+	int decode_cf(unsigned &i, bool &eop);
 
-	int parse_alu_clause(cf_node *cf);
-	int parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
+	int decode_alu_clause(cf_node *cf);
+	int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
 
-	int parse_fetch_clause(cf_node *cf);
+	int decode_fetch_clause(cf_node *cf);
 
 	int prepare_ir();
+	int prepare_alu_clause(cf_node *cf);
+	int prepare_alu_group(cf_node* cf, alu_group_node *g);
+	int prepare_fetch_clause(cf_node *cf);
+
 	int prepare_loop(cf_node *c);
 	int prepare_if(cf_node *c);
-	int prepare_alu_clause(cf_node *c);
 
 };
 
diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
index b0c2e41..f40e469 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
@@ -94,10 +94,6 @@ int bc_builder::build() {
 		cf_pos = bb.get_pos();
 	}
 
-	if (sh.enable_dump) {
-		bc_dump(sh, cerr, &bb).run();
-	}
-
 	return 0;
 }
 
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 8329287..9f3ecc5 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -47,7 +47,7 @@ namespace r600_sb {
 
 using std::cerr;
 
-int bc_parser::parse() {
+int bc_parser::decode() {
 
 	dw = bc->bytecode;
 	bc_ndw = bc->ndw;
@@ -71,47 +71,27 @@ int bc_parser::parse() {
 			t = TARGET_FETCH;
 	}
 
-	sh = new shader(ctx, t, bc->debug_id, enable_dump);
-	int r = parse_shader();
+	sh = new shader(ctx, t, bc->debug_id);
+	int r = decode_shader();
 
 	delete dec;
 
-	if (r)
-		return r;
-
 	sh->ngpr = bc->ngpr;
 	sh->nstack = bc->nstack;
 
-	if (sh->target != TARGET_FETCH) {
-		sh->src_stats.ndw = bc->ndw;
-		sh->collect_stats(false);
-	}
-
-	if (enable_dump) {
-		bc_dump(*sh, cerr, bc->bytecode, bc_ndw).run();
-	}
-
-	if (!optimize)
-		return 0;
-
-	prepare_ir();
-
 	return r;
 }
 
-int bc_parser::parse_shader() {
+int bc_parser::decode_shader() {
 	int r = 0;
 	unsigned i = 0;
 	bool eop = false;
 
 	sh->init();
 
-	if (pshader)
-		parse_decls();
-
 	do {
 		eop = false;
-		if ((r = parse_cf(i, eop)))
+		if ((r = decode_cf(i, eop)))
 			return r;
 
 	} while (!eop || (i >> 1) <= max_cf);
@@ -119,34 +99,34 @@ int bc_parser::parse_shader() {
 	return 0;
 }
 
-int bc_parser::parse_decls() {
-
-//	sh->prepare_regs(rs.bc.ngpr);
-
-	if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
+int bc_parser::prepare() {
+	int r = 0;
+	if ((r = parse_decls()))
+		return r;
+	if ((r = prepare_ir()))
+		return r;
+	return 0;
+}
 
-#if SB_NO_ARRAY_INFO
+int bc_parser::parse_decls() {
 
+	if (!pshader) {
 		sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
+		return 0;
+	}
 
-#else
+	if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
 
 		assert(pshader->num_arrays);
 
 		if (pshader->num_arrays) {
-
 			for (unsigned i = 0; i < pshader->num_arrays; ++i) {
 				r600_shader_array &a = pshader->arrays[i];
 				sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
 			}
-
 		} else {
 			sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
 		}
-
-
-#endif
-
 	}
 
 	if (sh->target == TARGET_VS)
@@ -183,12 +163,10 @@ int bc_parser::parse_decls() {
 		}
 	}
 
-
 	return 0;
 }
 
-
-int bc_parser::parse_cf(unsigned &i, bool &eop) {
+int bc_parser::decode_cf(unsigned &i, bool &eop) {
 
 	int r;
 
@@ -210,18 +188,15 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
 	cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
 
 	if (flags & CF_ALU) {
-		if ((r = parse_alu_clause(cf)))
+		if ((r = decode_alu_clause(cf)))
 			return r;
 	} else if (flags & CF_FETCH) {
-		if ((r = parse_fetch_clause(cf)))
+		if ((r = decode_fetch_clause(cf)))
 			return r;;
 	} else if (flags & CF_EXP) {
 		assert(!cf->bc.rw_rel);
 	} else if (flags & (CF_STRM | CF_RAT)) {
 		assert(!cf->bc.rw_rel);
-	} else if (cf->bc.op == CF_OP_CALL_FS) {
-		sh->init_call_fs(cf);
-		cf->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
 	} else if (flags & CF_BRANCH) {
 		if (cf->bc.addr > max_cf)
 			max_cf = cf->bc.addr;
@@ -232,7 +207,7 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
 	return 0;
 }
 
-int bc_parser::parse_alu_clause(cf_node* cf) {
+int bc_parser::decode_alu_clause(cf_node* cf) {
 	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
 
 	cf->subtype = NST_ALU_CLAUSE;
@@ -243,7 +218,7 @@ int bc_parser::parse_alu_clause(cf_node* cf) {
 	unsigned ng = 0;
 
 	do {
-		parse_alu_group(cf, i, gcnt);
+		decode_alu_group(cf, i, gcnt);
 		assert(gcnt <= cnt);
 		cnt -= gcnt;
 		ng++;
@@ -252,16 +227,17 @@ int bc_parser::parse_alu_clause(cf_node* cf) {
 	return 0;
 }
 
-int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
+int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
 	int r;
 	alu_node *n;
 	alu_group_node *g = sh->create_alu_group();
 
 	cgroup = !cgroup;
 	memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
-
 	gcnt = 0;
 
+	unsigned literal_mask = 0;
+
 	do {
 		n = sh->create_alu();
 		g->push_back(n);
@@ -280,11 +256,62 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
 
 	assert(n->bc.last);
 
-	unsigned literal_mask = 0;
+	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
+		n = static_cast<alu_node*>(*I);
+
+		for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
+			bc_alu_src &src = n->bc.src[k];
+			if (src.sel == ALU_SRC_LITERAL) {
+				literal_mask |= (1 << src.chan);
+				src.value.u = dw[i + src.chan];
+			}
+		}
+	}
+
+	unsigned literal_ndw = 0;
+	while (literal_mask) {
+		g->literals.push_back(dw[i + literal_ndw]);
+		literal_ndw += 1;
+		literal_mask >>= 1;
+	}
+
+	literal_ndw = (literal_ndw + 1) & ~1u;
+
+	i += literal_ndw;
+	gcnt += literal_ndw >> 1;
+
+	cf->push_back(g);
+	return 0;
+}
+
+int bc_parser::prepare_alu_clause(cf_node* cf) {
+
+	// loop over alu groups
+	for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
+		assert(I->subtype == NST_ALU_GROUP);
+		alu_group_node *g = static_cast<alu_group_node*>(*I);
+		prepare_alu_group(cf, g);
+	}
+
+	return 0;
+}
+
+int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
+
+	alu_node *n;
+
+	cgroup = !cgroup;
+	memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
 
 	for (node_iterator I = g->begin(), E = g->end();
 			I != E; ++I) {
 		n = static_cast<alu_node*>(*I);
+
+		if (!sh->assign_slot(n, slots[cgroup])) {
+			assert(!"alu slot assignment failed");
+			return -1;
+		}
+
 		unsigned src_count = n->bc.op_ptr->src_count;
 
 		if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
@@ -340,10 +367,6 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
 			bc_alu_src &src = n->bc.src[s];
 
 			if (src.sel == ALU_SRC_LITERAL) {
-				unsigned chan = src.chan;
-
-				literal_mask |= (1 << chan);
-				src.value.u = dw[i+chan];
 				n->src[s] = sh->get_const_value(src.value);
 			} else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
 				unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
@@ -430,38 +453,52 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
 
 	if (p) {
 		g->push_front(p);
-	}
 
-	unsigned literal_ndw = 0;
-	while (literal_mask) {
-		g->literals.push_back(dw[i + literal_ndw]);
-		literal_ndw += 1;
-		literal_mask >>= 1;
-	}
+		if (p->count() == 3 && ctx.is_cayman()) {
+			// cayman's scalar instruction that can use 3 or 4 slots
 
-	literal_ndw = (literal_ndw + 1) & ~1u;
+			// FIXME for simplicity we'll always add 4th slot,
+			// but probably we might want to always remove 4th slot and make
+			// sure that regalloc won't choose 'w' component for dst
 
-	i += literal_ndw;
-	gcnt += literal_ndw >> 1;
+			alu_node *f = static_cast<alu_node*>(p->first);
+			alu_node *a = sh->create_alu();
+			a->src = f->src;
+			a->dst.resize(f->dst.size());
+			a->bc = f->bc;
+			a->bc.slot = SLOT_W;
+			p->push_back(a);
+		}
+	}
 
-	cf->push_back(g);
 	return 0;
 }
 
-int bc_parser::parse_fetch_clause(cf_node* cf) {
+int bc_parser::decode_fetch_clause(cf_node* cf) {
 	int r;
 	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
 
 	cf->subtype = NST_TEX_CLAUSE;
 
-	vvec grad_v, grad_h;
-
 	while (cnt--) {
 		fetch_node *n = sh->create_fetch();
 		cf->push_back(n);
 		if ((r = dec->decode_fetch(i, n->bc)))
 			return r;
 
+	}
+	return 0;
+}
+
+int bc_parser::prepare_fetch_clause(cf_node *cf) {
+
+	vvec grad_v, grad_h;
+
+	for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
+
+		fetch_node *n = static_cast<fetch_node*>(*I);
+		assert(n->is_valid());
+
 		unsigned flags = n->bc.op_ptr->flags;
 
 		unsigned vtx = flags & FF_VTX;
@@ -527,6 +564,7 @@ int bc_parser::parse_fetch_clause(cf_node* cf) {
 
 		}
 	}
+
 	return 0;
 }
 
@@ -540,7 +578,14 @@ int bc_parser::prepare_ir() {
 
 		unsigned flags = c->bc.op_ptr->flags;
 
-		if (flags & CF_LOOP_START) {
+		if (flags & CF_ALU) {
+			prepare_alu_clause(c);
+		} else if (flags & CF_FETCH) {
+			prepare_fetch_clause(c);
+		} else if (c->bc.op == CF_OP_CALL_FS) {
+			sh->init_call_fs(c);
+			c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
+		} else if (flags & CF_LOOP_START) {
 			prepare_loop(c);
 		} else if (c->bc.op == CF_OP_JUMP) {
 			prepare_if(c);
@@ -560,10 +605,6 @@ int bc_parser::prepare_ir() {
 				dep->move(c->parent->first, c);
 			c->replace_with(dep);
 			sh->simplify_dep_rep(dep);
-		} else if (flags & CF_ALU && ctx.is_cayman()) {
-			// postprocess cayman's 3-slot instructions (ex-trans-only)
-			// FIXME it shouldn't be required with proper handling
-			prepare_alu_clause(c);
 		} else if (flags & CF_EXP) {
 
 			// unroll burst exports
@@ -735,40 +776,5 @@ int bc_parser::prepare_if(cf_node* c) {
 	return 0;
 }
 
-int bc_parser::prepare_alu_clause(cf_node* c) {
-
-	// loop over alu groups
-	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
-		assert(I->subtype == NST_ALU_GROUP);
-
-		alu_group_node *g = static_cast<alu_group_node*>(*I);
-
-		// loop over alu_group items
-		for (node_iterator I2 = g->begin(), E2 = g->end(); I2 != E2; ++I2) {
-			if (I2->subtype != NST_ALU_PACKED_INST)
-				continue;
-
-			alu_packed_node *p = static_cast<alu_packed_node*>(*I2);
-
-			if (p->count() == 3) {
-				// cayman's scalar instruction that takes 3 or 4 slots
-
-				// FIXME for simplicity we'll always add 4th slot,
-				// but probably we might want to always remove 4th slot and make
-				// sure that regalloc won't choose w component for dst
-
-				alu_node *f = static_cast<alu_node*>(p->first);
-				alu_node *a = sh->create_alu();
-				a->src = f->src;
-				a->dst.resize(f->dst.size());
-				a->bc = f->bc;
-				a->bc.slot = SLOT_W;
-				p->push_back(a);
-			}
-		}
-	}
-
-	return 0;
-}
 
 } // namespace r600_sb
diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp
index b919fa4..17a8b87 100644
--- a/src/gallium/drivers/r600/sb/sb_core.cpp
+++ b/src/gallium/drivers/r600/sb/sb_core.cpp
@@ -94,7 +94,7 @@ void r600_sb_context_destroy(void * sctx) {
 int r600_sb_bytecode_process(struct r600_context *rctx,
                              struct r600_bytecode *bc,
                              struct r600_shader *pshader,
-                             int dump_source_bytecode,
+                             int dump_bytecode,
                              int optimize) {
 	int r = 0;
 	unsigned shader_id = bc->debug_id;
@@ -111,13 +111,29 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 
 	SB_DUMP_STAT( cerr << "\nsb: shader " << shader_id << "\n"; );
 
-	bc_parser parser(*ctx, bc, pshader, dump_source_bytecode, optimize);
+	bc_parser parser(*ctx, bc, pshader);
 
-	if ((r = parser.parse())) {
-		assert(0);
+	if ((r = parser.decode())) {
+		assert(!"sb: bytecode decoding error");
 		return r;
 	}
 
+	shader *sh = parser.get_shader();
+
+	if (dump_bytecode) {
+		bc_dump(*sh, cerr, bc->bytecode, bc->ndw).run();
+	}
+
+	if (!optimize) {
+		delete sh;
+		return 0;
+	}
+
+	if (sh->target != TARGET_FETCH) {
+		sh->src_stats.ndw = bc->ndw;
+		sh->collect_stats(false);
+	}
+
 	/* skip some shaders (use shaders from default backend)
 	 * dskip_start - range start, dskip_end - range_end,
 	 * e.g. start = 5, end = 6 means shaders 5 & 6
@@ -138,14 +154,13 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 		}
 	}
 
-	shader *sh = parser.get_shader();
-	SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); );
-
-	if (!optimize) {
-		delete sh;
-		return 0;
+	if ((r = parser.prepare())) {
+		assert(!"sb: bytecode parsing error");
+		return r;
 	}
 
+	SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); );
+
 #define SB_RUN_PASS(n, dump) \
 	do { \
 		r = n(*sh).run(); \
@@ -222,8 +237,13 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 		return r;
 	}
 
+	bytecode &nbc = builder.get_bytecode();
+
+	if (dump_bytecode) {
+		bc_dump(*sh, cerr, &nbc).run();
+	}
+
 	if (!sb_context::dry_run) {
-		bytecode &nbc = builder.get_bytecode();
 
 		free(bc->bytecode);
 		bc->ndw = nbc.ndw();
@@ -233,10 +253,9 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 		bc->ngpr = sh->ngpr;
 		bc->nstack = sh->nstack;
 	} else {
-		SB_DUMP_STAT( cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; );
+		SB_DUMP_STAT( cerr << "sb: dry run: optimized bytecode is not used\n"; );
 	}
 
-
 	if (sb_context::dump_stat) {
 		int64_t t = os_time_get_nano() - time_start;
 
diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp
index 5944ba6..f0665ef 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.cpp
+++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
@@ -33,11 +33,11 @@ namespace r600_sb {
 
 using std::cerr;
 
-shader::shader(sb_context &sctx, shader_target t, unsigned id, bool dump)
+shader::shader(sb_context &sctx, shader_target t, unsigned id)
 : ctx(sctx), next_temp_value_index(temp_regid_offset),
   prep_regs_count(), pred_sels(),
   regions(), inputs(), undef(), val_pool(sizeof(value)),
-  pool(), all_nodes(), src_stats(), opt_stats(), errors(), enable_dump(dump),
+  pool(), all_nodes(), src_stats(), opt_stats(), errors(),
   optimized(), id(id),
   coal(*this), bbs(),
   target(t), vt(ex), ex(*this), root(),
diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h
index b2e3837..5362e39 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.h
+++ b/src/gallium/drivers/r600/sb/sb_shader.h
@@ -271,7 +271,6 @@ public:
 
 	error_map errors;
 
-	bool enable_dump;
 	bool optimized;
 
 	unsigned id;
@@ -296,7 +295,7 @@ public:
 
 	unsigned ngpr, nstack;
 
-	shader(sb_context &sctx, shader_target t, unsigned id, bool dump);
+	shader(sb_context &sctx, shader_target t, unsigned id);
 
 	~shader();
 
-- 
1.8.2.1



More information about the mesa-dev mailing list