Mesa (master): vc4: Add support for coalescing ALU ops into tex_[srtb] MOVs.

Eric Anholt anholt at kemper.freedesktop.org
Tue Nov 29 17:04:59 UTC 2016


Module: Mesa
Branch: master
Commit: 4690a93b123a64f8730a870a336ae9756d11fd18
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=4690a93b123a64f8730a870a336ae9756d11fd18

Author: Eric Anholt <eric at anholt.net>
Date:   Tue Nov 15 14:48:43 2016 -0800

vc4: Add support for coalescing ALU ops into tex_[srtb] MOVs.

This isn't as complete as I would like (can't merge interpolation because
of the implicit r5 dependency, doesn't work with control flow), but this
was cheap and easy.

Improves 3DMMES Taiji performance by 1.15353% +/- 0.299896% (n=29, 16)

total instructions in shared programs: 99810 -> 99059 (-0.75%)
instructions in affected programs:     10705 -> 9954 (-7.02%)

---

 .../drivers/vc4/vc4_opt_coalesce_ff_writes.c       | 36 ++++++++++++++--------
 src/gallium/drivers/vc4/vc4_qir.c                  | 11 +++++++
 src/gallium/drivers/vc4/vc4_qir.h                  |  1 +
 .../vc4/vc4_qir_emit_uniform_stream_resets.c       | 18 ++---------
 4 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c b/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c
index c08c026..b247c69 100644
--- a/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c
+++ b/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c
@@ -24,8 +24,8 @@
 /**
  * @file vc4_opt_coalesce_ff_writes.c
  *
- * This modifies instructions that generate the value consumed by a VPM write
- * to write directly into the VPM.
+ * This modifies instructions that generate the value consumed by a VPM or TMU
+ * coordinate write to write directly into the VPM or TMU.
  */
 
 #include "vc4_qir.h"
@@ -33,9 +33,6 @@
 bool
 qir_opt_coalesce_ff_writes(struct vc4_compile *c)
 {
-        if (c->stage == QSTAGE_FRAG)
-                return false;
-
         /* For now, only do this pass when we don't have control flow. */
         struct qblock *block = qir_entry_block(c);
         if (block != qir_exit_block(c))
@@ -60,7 +57,7 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c)
                 if (mov_inst->src[0].file != QFILE_TEMP)
                         continue;
 
-                if (mov_inst->dst.file != QFILE_VPM)
+                if (!(mov_inst->dst.file == QFILE_VPM || qir_is_tex(mov_inst)))
                         continue;
 
                 uint32_t temp = mov_inst->src[0].index;
@@ -71,24 +68,37 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c)
                 if (!inst)
                         continue;
 
+                /* Don't bother trying to fold in an ALU op using a uniform to
+                 * a texture op, as we'll just have to lower the uniform back
+                 * out.
+                 */
+                if (qir_is_tex(mov_inst) && qir_has_uniform_read(inst))
+                        continue;
+
                 if (qir_depends_on_flags(inst) || inst->sf)
                         continue;
 
                 if (qir_has_side_effects(c, inst) ||
-                    qir_has_side_effect_reads(c, inst)) {
+                    qir_has_side_effect_reads(c, inst) ||
+                    inst->op == QOP_VARY_ADD_C) {
                         continue;
                 }
 
-                /* Move the generating instruction to the end of the program
-                 * to maintain the order of the VPM writes.
+                /* Move the generating instruction into the position of the FF
+                 * write.
                  */
+                c->defs[inst->dst.index] = NULL;
+                inst->dst.file = mov_inst->dst.file;
+                inst->dst.index = mov_inst->dst.index;
+                if (qir_has_implicit_tex_uniform(mov_inst)) {
+                        inst->src[qir_get_tex_uniform_src(inst)] =
+                                mov_inst->src[qir_get_tex_uniform_src(mov_inst)];
+                }
+
                 list_del(&inst->link);
                 list_addtail(&inst->link, &mov_inst->link);
-                qir_remove_instruction(c, mov_inst);
 
-                c->defs[inst->dst.index] = NULL;
-                inst->dst.file = QFILE_VPM;
-                inst->dst.index = 0;
+                qir_remove_instruction(c, mov_inst);
 
                 progress = true;
         }
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index a082c41..d4f35d8 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -180,6 +180,17 @@ qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst)
 }
 
 bool
+qir_has_uniform_read(struct qinst *inst)
+{
+        for (int i = 0; i < qir_get_nsrc(inst); i++) {
+                if (inst->src[i].file == QFILE_UNIF)
+                        return true;
+        }
+
+        return false;
+}
+
+bool
 qir_is_mul(struct qinst *inst)
 {
         switch (inst->op) {
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 28d3344..e189bc3 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -577,6 +577,7 @@ int qir_get_tex_uniform_src(struct qinst *inst);
 bool qir_reg_equals(struct qreg a, struct qreg b);
 bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
 bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
+bool qir_has_uniform_read(struct qinst *inst);
 bool qir_is_mul(struct qinst *inst);
 bool qir_is_raw_mov(struct qinst *inst);
 bool qir_is_tex(struct qinst *inst);
diff --git a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
index 23ae8eb..443682a 100644
--- a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
+++ b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
@@ -36,24 +36,10 @@
 #include "util/u_math.h"
 
 static bool
-inst_reads_a_uniform(struct qinst *inst)
-{
-        if (qir_is_tex(inst))
-                return true;
-
-        for (int i = 0; i < qir_get_nsrc(inst); i++) {
-                if (inst->src[i].file == QFILE_UNIF)
-                        return true;
-        }
-
-        return false;
-}
-
-static bool
 block_reads_any_uniform(struct qblock *block)
 {
         qir_for_each_inst(inst, block) {
-                if (inst_reads_a_uniform(inst))
+                if (qir_has_uniform_read(inst))
                         return true;
         }
 
@@ -94,7 +80,7 @@ qir_emit_uniform_stream_resets(struct vc4_compile *c)
                 }
 
                 qir_for_each_inst(inst, block) {
-                        if (inst_reads_a_uniform(inst))
+                        if (qir_has_uniform_read(inst))
                                 uniform_count++;
                 }
         }




More information about the mesa-commit mailing list