[Mesa-dev] [PATCH 5/5] i965: Generalize VS compute-to-MRF for compute-to-another-GRF, too.
Eric Anholt
eric at anholt.net
Tue Dec 11 13:43:02 PST 2012
No statistically significant performance difference on glbenchmark 2.7
(n=60). It reduces cycles spent in the vertex shader by 3.3% +/- 0.8%
(n=5), but that's only about .3% of all cycles spent according to the
fixed shader_time.
---
src/mesa/drivers/dri/i965/brw_vec4.cpp | 129 +++++++++++---------
src/mesa/drivers/dri/i965/brw_vec4.h | 2 +-
.../dri/i965/test_vec4_register_coalesce.cpp | 58 ++++++++-
3 files changed, 128 insertions(+), 61 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 7ab37e7..079bbab 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -680,12 +680,12 @@ vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle)
}
/*
- * Tries to reduce extra MOV instructions by taking GRFs that get just
- * written and then MOVed into an MRF and making the original write of
- * the GRF write directly to the MRF instead.
+ * Tries to reduce extra MOV instructions by taking temporary GRFs that get
+ * just written and then MOVed into another reg and making the original write
+ * of the GRF write directly to the final destination instead.
*/
bool
-vec4_visitor::opt_compute_to_mrf()
+vec4_visitor::opt_register_coalesce()
{
bool progress = false;
int next_ip = 0;
@@ -699,24 +699,25 @@ vec4_visitor::opt_compute_to_mrf()
next_ip++;
if (inst->opcode != BRW_OPCODE_MOV ||
+ (inst->dst.file != GRF && inst->dst.file != MRF) ||
inst->predicate ||
- inst->dst.file != MRF || inst->src[0].file != GRF ||
+ inst->src[0].file != GRF ||
inst->dst.type != inst->src[0].type ||
inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
continue;
- int mrf = inst->dst.reg;
+ bool to_mrf = (inst->dst.file == MRF);
- /* Can't compute-to-MRF this GRF if someone else was going to
+ /* Can't coalesce this GRF if someone else was going to
* read it later.
*/
if (this->virtual_grf_use[inst->src[0].reg] > ip)
continue;
- /* We need to check interference with the MRF between this
- * instruction and the earliest instruction involved in writing
- * the GRF we're eliminating. To do that, keep track of which
- * of our source channels we've seen initialized.
+ /* We need to check interference with the final destination between this
+ * instruction and the earliest instruction involved in writing the GRF
+ * we're eliminating. To do that, keep track of which of our source
+ * channels we've seen initialized.
*/
bool chans_needed[4] = {false, false, false, false};
int chans_remaining = 0;
@@ -735,8 +736,9 @@ vec4_visitor::opt_compute_to_mrf()
}
}
- /* Now walk up the instruction stream trying to see if we can
- * rewrite everything writing to the GRF into the MRF instead.
+ /* Now walk up the instruction stream trying to see if we can rewrite
+ * everything writing to the temporary to write into the destination
+ * instead.
*/
vec4_instruction *scan_inst;
for (scan_inst = (vec4_instruction *)inst->prev;
@@ -745,22 +747,21 @@ vec4_visitor::opt_compute_to_mrf()
if (scan_inst->dst.file == GRF &&
scan_inst->dst.reg == inst->src[0].reg &&
scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
- /* Found something writing to the reg we want to turn into
- * a compute-to-MRF.
- */
-
- /* SEND instructions can't have MRF as a destination. */
- if (scan_inst->mlen)
- break;
-
- if (intel->gen >= 6) {
- /* gen6 math instructions must have the destination be
- * GRF, so no compute-to-MRF for them.
- */
- if (scan_inst->is_math()) {
- break;
- }
- }
+ /* Found something writing to the reg we want to coalesce away. */
+ if (to_mrf) {
+ /* SEND instructions can't have MRF as a destination. */
+ if (scan_inst->mlen)
+ break;
+
+ if (intel->gen >= 6) {
+ /* gen6 math instructions must have the destination be
+ * GRF, so no compute-to-MRF for them.
+ */
+ if (scan_inst->is_math()) {
+ break;
+ }
+ }
+ }
/* If we can't handle the swizzle, bail. */
if (!scan_inst->can_reswizzle_dst(inst->dst.writemask,
@@ -784,9 +785,8 @@ vec4_visitor::opt_compute_to_mrf()
break;
}
- /* We don't handle flow control here. Most computation of
- * values that end up in MRFs are shortly before the MRF
- * write anyway.
+ /* We don't handle flow control here. Most computation of values
+ * that could be coalesced happens just before their use.
*/
if (scan_inst->opcode == BRW_OPCODE_DO ||
scan_inst->opcode == BRW_OPCODE_WHILE ||
@@ -795,9 +795,11 @@ vec4_visitor::opt_compute_to_mrf()
break;
}
- /* You can't read from an MRF, so if someone else reads our
- * MRF's source GRF that we wanted to rewrite, that stops us.
- */
+ /* You can't read from an MRF, so if someone else reads our MRF's
+ * source GRF that we wanted to rewrite, that stops us. If it's a
+ * GRF we're trying to coalesce to, we don't actually handle
+ * rewriting sources so bail in that case as well.
+ */
bool interfered = false;
for (int i = 0; i < 3; i++) {
if (scan_inst->src[i].file == GRF &&
@@ -809,30 +811,41 @@ vec4_visitor::opt_compute_to_mrf()
if (interfered)
break;
- /* If somebody else writes our MRF here, we can't
- * compute-to-MRF before that.
- */
- if (scan_inst->dst.file == MRF && mrf == scan_inst->dst.reg)
+ /* If somebody else writes our destination here, we can't coalesce
+ * before that.
+ */
+ if (scan_inst->dst.file == inst->dst.file &&
+ scan_inst->dst.reg == inst->dst.reg) {
break;
+ }
- if (scan_inst->mlen > 0) {
- /* Found a SEND instruction, which means that there are
- * live values in MRFs from base_mrf to base_mrf +
- * scan_inst->mlen - 1. Don't go pushing our MRF write up
- * above it.
- */
- if (mrf >= scan_inst->base_mrf &&
- mrf < scan_inst->base_mrf + scan_inst->mlen) {
- break;
- }
- }
+ /* Check for reads of the register we're trying to coalesce into. We
+ * can't go rewriting instructions above that to put some other value
+ * in the register instead.
+ */
+ if (to_mrf && scan_inst->mlen > 0) {
+ if (inst->dst.reg >= scan_inst->base_mrf &&
+ inst->dst.reg < scan_inst->base_mrf + scan_inst->mlen) {
+ break;
+ }
+ } else {
+ for (int i = 0; i < 3; i++) {
+ if (scan_inst->src[i].file == inst->dst.file &&
+ scan_inst->src[i].reg == inst->dst.reg &&
+ scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
+ interfered = true;
+ }
+ }
+ if (interfered)
+ break;
+ }
}
if (chans_remaining == 0) {
- /* If we've made it here, we have an inst we want to
- * compute-to-MRF, and a scan_inst pointing to the earliest
- * instruction involved in computing the value. Now go
- * rewrite the instruction stream between the two.
+ /* If we've made it here, we have an MOV we want to coalesce out, and
+ * a scan_inst pointing to the earliest instruction involved in
+ * computing the value. Now go rewrite the instruction stream
+ * between the two.
*/
while (scan_inst != inst) {
@@ -841,9 +854,9 @@ vec4_visitor::opt_compute_to_mrf()
scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
scan_inst->reswizzle_dst(inst->dst.writemask,
inst->src[0].swizzle);
- scan_inst->dst.file = MRF;
- scan_inst->dst.reg = mrf;
- scan_inst->dst.reg_offset = 0;
+ scan_inst->dst.file = inst->dst.file;
+ scan_inst->dst.reg = inst->dst.reg;
+ scan_inst->dst.reg_offset = inst->dst.reg_offset;
scan_inst->saturate |= inst->saturate;
}
scan_inst = (vec4_instruction *)scan_inst->next;
@@ -1277,7 +1290,7 @@ vec4_visitor::run()
progress = dead_code_eliminate() || progress;
progress = opt_copy_propagation() || progress;
progress = opt_algebraic() || progress;
- progress = opt_compute_to_mrf() || progress;
+ progress = opt_register_coalesce() || progress;
} while (progress);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index b4d4e7e..9b85926 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -334,7 +334,7 @@ public:
bool virtual_grf_interferes(int a, int b);
bool opt_copy_propagation();
bool opt_algebraic();
- bool opt_compute_to_mrf();
+ bool opt_register_coalesce();
vec4_instruction *emit(vec4_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index fa9c155..45be376 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -70,7 +70,7 @@ _register_coalesce(vec4_visitor *v, const char *func)
v->dump_instructions();
}
- v->opt_compute_to_mrf();
+ v->opt_register_coalesce();
if (print) {
printf("%s: instructions after:\n", func);
@@ -78,7 +78,7 @@ _register_coalesce(vec4_visitor *v, const char *func)
}
}
-TEST_F(register_coalesce_test, test_easy_success)
+TEST_F(register_coalesce_test, test_compute_to_mrf)
{
src_reg something = src_reg(v, glsl_type::float_type);
dst_reg temp = dst_reg(v, glsl_type::float_type);
@@ -143,3 +143,57 @@ TEST_F(register_coalesce_test, test_dp4_mrf)
EXPECT_EQ(dp4->dst.file, MRF);
EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
}
+
+TEST_F(register_coalesce_test, test_dp4_grf)
+{
+ src_reg some_src_1 = src_reg(v, glsl_type::vec4_type);
+ src_reg some_src_2 = src_reg(v, glsl_type::vec4_type);
+ dst_reg init;
+
+ dst_reg to = dst_reg(v, glsl_type::vec4_type);
+ dst_reg temp = dst_reg(v, glsl_type::float_type);
+
+ vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2));
+ to.writemask = WRITEMASK_Y;
+ v->emit(v->MOV(to, src_reg(temp)));
+
+ /* if we don't do something with the result, the automatic dead code
+ * elimination will remove all our instructions.
+ */
+ src_reg src = src_reg(to);
+ src.negate = true;
+ v->emit(v->MOV(dst_reg(MRF, 0), src));
+
+ register_coalesce(v);
+
+ EXPECT_EQ(dp4->dst.reg, to.reg);
+ EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
+}
+
+TEST_F(register_coalesce_test, test_channel_mul_grf)
+{
+ src_reg some_src_1 = src_reg(v, glsl_type::vec4_type);
+ src_reg some_src_2 = src_reg(v, glsl_type::vec4_type);
+ dst_reg init;
+
+ dst_reg to = dst_reg(v, glsl_type::vec4_type);
+ dst_reg temp = dst_reg(v, glsl_type::float_type);
+
+ vec4_instruction *mul = v->emit(v->MUL(temp, some_src_1, some_src_2));
+ to.writemask = WRITEMASK_Y;
+ v->emit(v->MOV(to, src_reg(temp)));
+
+ /* if we don't do something with the result, the automatic dead code
+ * elimination will remove all our instructions.
+ */
+ src_reg src = src_reg(to);
+ src.negate = true;
+ v->emit(v->MOV(dst_reg(MRF, 0), src));
+
+ register_coalesce(v);
+
+ /* This path isn't supported yet in the reswizzling code, so we're checking
+ * that we haven't done anything bad to scalar non-DP[234]s.
+ */
+ EXPECT_NE(mul->dst.reg, to.reg);
+}
--
1.7.10.4
More information about the mesa-dev
mailing list