Mesa (master): vc4: Coalesce MOVs into VPM with the instructions generating the values.

Eric Anholt anholt at kemper.freedesktop.org
Thu Dec 18 23:11:26 UTC 2014


Module: Mesa
Branch: master
Commit: e06b0778f59980429fececb1aa0de0f0a3f23427
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e06b0778f59980429fececb1aa0de0f0a3f23427

Author: Eric Anholt <eric at anholt.net>
Date:   Wed Dec 17 20:35:17 2014 -0800

vc4: Coalesce MOVs into VPM with the instructions generating the values.

total instructions in shared programs: 41168 -> 40976 (-0.47%)
instructions in affected programs:     18156 -> 17964 (-1.06%)

---

 src/gallium/drivers/vc4/Makefile.sources     |    1 +
 src/gallium/drivers/vc4/vc4_opt_vpm_writes.c |  117 ++++++++++++++++++++++++++
 src/gallium/drivers/vc4/vc4_qir.c            |   38 +++++----
 src/gallium/drivers/vc4/vc4_qir.h            |    2 +
 4 files changed, 143 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 1f8e8c4..95f1a34 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -16,6 +16,7 @@ C_SOURCES := \
 	vc4_opt_cse.c \
 	vc4_opt_dead_code.c \
 	vc4_opt_small_immediates.c \
+	vc4_opt_vpm_writes.c \
 	vc4_packet.h \
 	vc4_program.c \
 	vc4_qir.c \
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
new file mode 100644
index 0000000..477d326
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_opt_vpm_writes.c
+ *
+ * This modifies instructions that generate the value consumed by a VPM write
+ * to write directly into the VPM.
+ */
+
+#include "vc4_qir.h"
+
+bool
+qir_opt_vpm_writes(struct vc4_compile *c)
+{
+        if (c->stage == QSTAGE_FRAG)
+                return false;
+
+        bool progress = false;
+        struct simple_node *node;
+        struct qinst *defs[c->num_temps];
+        struct qinst *vpm_writes[64] = { 0 };
+        uint32_t use_count[c->num_temps];
+        uint32_t vpm_write_count = 0;
+        memset(&defs, 0, sizeof(defs));
+        memset(&use_count, 0, sizeof(use_count));
+
+        foreach(node, &c->instructions) {
+                struct qinst *inst = (struct qinst *)node;
+
+                switch (inst->dst.file) {
+                case QFILE_TEMP:
+                        defs[inst->dst.index] = inst;
+                        break;
+                case QFILE_VPM:
+                        vpm_writes[vpm_write_count++] = inst;
+                        break;
+                default:
+                        break;
+                }
+
+                for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+                        if (inst->src[i].file == QFILE_TEMP)
+                                use_count[inst->src[i].index]++;
+                }
+        }
+
+        for (int i = 0; i < vpm_write_count; i++) {
+                if (vpm_writes[i]->op != QOP_MOV ||
+                    vpm_writes[i]->src[0].file != QFILE_TEMP) {
+                        continue;
+                }
+
+                uint32_t temp = vpm_writes[i]->src[0].index;
+                if (use_count[temp] != 1)
+                        continue;
+
+                struct qinst *inst = defs[temp];
+                if (qir_is_multi_instruction(inst))
+                        continue;
+
+                if (qir_depends_on_flags(inst))
+                        continue;
+
+                if (qir_has_side_effects(c, inst))
+                        continue;
+
+                /* A QOP_TEX_RESULT destination is r4, so we can't move
+                 * accesses to it past another QOP_TEX_RESULT which would
+                 * update it.
+                 */
+                int src;
+                for (src = 0; src < qir_get_op_nsrc(inst->op); src++) {
+                        if (inst->src[src].file == QFILE_TEMP) {
+                                if (defs[inst->src[src].index]->op ==
+                                    QOP_TEX_RESULT) {
+                                        break;
+                                }
+                        }
+                }
+                if (src != qir_get_op_nsrc(inst->op))
+                        continue;
+
+                /* Move the generating instruction to the end of the program
+                 * to maintain the order of the VPM writes.
+                 */
+                move_to_tail(&vpm_writes[i]->link, &inst->link);
+                qir_remove_instruction(vpm_writes[i]);
+
+                inst->dst.file = QFILE_VPM;
+                inst->dst.index = 0;
+
+                progress = true;
+        }
+
+        return progress;
+}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 91bdefe..3fd3941 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -32,6 +32,7 @@ struct qir_op_info {
         const char *name;
         uint8_t ndst, nsrc;
         bool has_side_effects;
+        bool multi_instruction;
 };
 
 static const struct qir_op_info qir_op_info[] = {
@@ -59,21 +60,21 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_NOT] = { "not", 1, 1 },
 
         [QOP_SF] = { "sf", 0, 1 },
-        [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 },
-        [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1 },
-        [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1 },
-        [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1 },
-        [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2 },
-        [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2 },
-        [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 },
-        [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 },
-
-        [QOP_RCP] = { "rcp", 1, 1 },
-        [QOP_RSQ] = { "rsq", 1, 1 },
-        [QOP_EXP2] = { "exp2", 1, 2 },
-        [QOP_LOG2] = { "log2", 1, 2 },
-        [QOP_PACK_COLORS] = { "pack_colors", 1, 4 },
-        [QOP_PACK_SCALED] = { "pack_scaled", 1, 2 },
+        [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1, false, true },
+        [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
+        [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
+        [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true },
+        [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true },
+        [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true },
+        [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true },
+        [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true },
+
+        [QOP_RCP] = { "rcp", 1, 1, false, true },
+        [QOP_RSQ] = { "rsq", 1, 1, false, true },
+        [QOP_EXP2] = { "exp2", 1, 2, false, true },
+        [QOP_LOG2] = { "log2", 1, 2, false, true },
+        [QOP_PACK_COLORS] = { "pack_colors", 1, 4, false, true },
+        [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
         [QOP_VPM_READ] = { "vpm_read", 0, 1, true },
         [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
         [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
@@ -156,6 +157,12 @@ qir_has_side_effects(struct vc4_compile *c, struct qinst *inst)
 }
 
 bool
+qir_is_multi_instruction(struct qinst *inst)
+{
+        return qir_op_info[inst->op].multi_instruction;
+}
+
+bool
 qir_depends_on_flags(struct qinst *inst)
 {
         switch (inst->op) {
@@ -397,6 +404,7 @@ qir_optimize(struct vc4_compile *c)
                 OPTPASS(qir_opt_copy_propagation);
                 OPTPASS(qir_opt_dead_code);
                 OPTPASS(qir_opt_small_immediates);
+                OPTPASS(qir_opt_vpm_writes);
 
                 if (!progress)
                         break;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index dd9866e..f7d59a8 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -374,6 +374,7 @@ struct qreg qir_get_temp(struct vc4_compile *c);
 int qir_get_op_nsrc(enum qop qop);
 bool qir_reg_equals(struct qreg a, struct qreg b);
 bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
+bool qir_is_multi_instruction(struct qinst *inst);
 bool qir_depends_on_flags(struct qinst *inst);
 bool qir_writes_r4(struct qinst *inst);
 bool qir_reads_r4(struct qinst *inst);
@@ -389,6 +390,7 @@ bool qir_opt_copy_propagation(struct vc4_compile *c);
 bool qir_opt_cse(struct vc4_compile *c);
 bool qir_opt_dead_code(struct vc4_compile *c);
 bool qir_opt_small_immediates(struct vc4_compile *c);
+bool qir_opt_vpm_writes(struct vc4_compile *c);
 
 void qpu_schedule_instructions(struct vc4_compile *c);
 




More information about the mesa-commit mailing list