Mesa (master): vc4: Restructure color packing as a series of channel replacements.

Eric Anholt anholt at kemper.freedesktop.org
Sat Jan 10 00:57:13 UTC 2015


Module: Mesa
Branch: master
Commit: 72cb6619cb75a92901d372d687505a747a384571
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=72cb6619cb75a92901d372d687505a747a384571

Author: Eric Anholt <eric at anholt.net>
Date:   Thu Jan  8 18:32:29 2015 -0800

vc4: Restructure color packing as a series of channel replacements.

I'm using this in some WIP commits for doing blending in 8888 instead of
vec4.  But it also gives us these results immediately, thanks to allowing
more uniforms/immediates in the arguments:

total instructions in shared programs: 41027 -> 40960 (-0.16%)
instructions in affected programs:     4381 -> 4314 (-1.53%)

---

 src/gallium/drivers/vc4/vc4_program.c  |   34 ++++++++--------------
 src/gallium/drivers/vc4/vc4_qir.c      |    6 +++-
 src/gallium/drivers/vc4/vc4_qir.h      |   19 +++++++++++-
 src/gallium/drivers/vc4/vc4_qpu_emit.c |   50 ++++++++++++++++----------------
 4 files changed, 60 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index bba02ca..6bad156 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1843,32 +1843,22 @@ emit_frag_end(struct vc4_compile *c)
                 qir_TLB_Z_WRITE(c, z);
         }
 
-        bool color_written = false;
+        struct qreg packed_color = c->undef;
         for (int i = 0; i < 4; i++) {
-                if (swizzled_outputs[i].file != QFILE_NULL)
-                        color_written = true;
-        }
-
-        struct qreg packed_color;
-        if (color_written) {
-                /* Fill in any undefined colors.  The simulator will assertion
-                 * fail if we read something that wasn't written, and I don't
-                 * know what hardware does.
-                 */
-                for (int i = 0; i < 4; i++) {
-                        if (swizzled_outputs[i].file == QFILE_NULL)
-                                swizzled_outputs[i] = qir_uniform_f(c, 0.0);
+                if (swizzled_outputs[i].file == QFILE_NULL)
+                        continue;
+                if (packed_color.file == QFILE_NULL) {
+                        packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]);
+                } else {
+                        packed_color = qir_PACK_8_F(c,
+                                                    packed_color,
+                                                    swizzled_outputs[i],
+                                                    i);
                 }
-                packed_color = qir_get_temp(c);
-                qir_emit(c, qir_inst4(QOP_PACK_COLORS, packed_color,
-                                      swizzled_outputs[0],
-                                      swizzled_outputs[1],
-                                      swizzled_outputs[2],
-                                      swizzled_outputs[3]));
-        } else {
-                packed_color = qir_uniform_ui(c, 0);
         }
 
+        if (packed_color.file == QFILE_NULL)
+                packed_color = qir_uniform_ui(c, 0);
 
         if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
                 packed_color = vc4_logicop(c, packed_color, packed_dst_color);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 3fd3941..5f3b8dd 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -73,7 +73,11 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_RSQ] = { "rsq", 1, 1, false, true },
         [QOP_EXP2] = { "exp2", 1, 2, false, true },
         [QOP_LOG2] = { "log2", 1, 2, false, true },
-        [QOP_PACK_COLORS] = { "pack_colors", 1, 4, false, true },
+        [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true },
+        [QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true },
+        [QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true },
+        [QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true },
+        [QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true },
         [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
         [QOP_VPM_READ] = { "vpm_read", 0, 1, true },
         [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index f7d59a8..6dac00f 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -100,7 +100,11 @@ enum qop {
         QOP_VW_SETUP,
         QOP_VR_SETUP,
         QOP_PACK_SCALED,
-        QOP_PACK_COLORS,
+        QOP_PACK_8888_F,
+        QOP_PACK_8A_F,
+        QOP_PACK_8B_F,
+        QOP_PACK_8C_F,
+        QOP_PACK_8D_F,
         QOP_VPM_READ,
         QOP_TLB_DISCARD_SETUP,
         QOP_TLB_STENCIL_SETUP,
@@ -473,6 +477,11 @@ QIR_ALU1(RSQ)
 QIR_ALU1(EXP2)
 QIR_ALU1(LOG2)
 QIR_ALU2(PACK_SCALED)
+QIR_ALU1(PACK_8888_F)
+QIR_ALU2(PACK_8A_F)
+QIR_ALU2(PACK_8B_F)
+QIR_ALU2(PACK_8C_F)
+QIR_ALU2(PACK_8D_F)
 QIR_ALU1(VARY_ADD_C)
 QIR_NODST_2(TEX_S)
 QIR_NODST_2(TEX_T)
@@ -539,6 +548,14 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
 }
 
 static inline struct qreg
+qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
+{
+        struct qreg t = qir_get_temp(c);
+        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
+        return t;
+}
+
+static inline struct qreg
 qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
 {
         return qir_EXP2(c, qir_FMUL(c,
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 503f32a..857d56e 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -347,40 +347,40 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
 
                         break;
 
-                case QOP_PACK_COLORS: {
-                        /* We have to be careful not to start writing over one
-                         * of our source values when incrementally writing the
-                         * destination.  So, if the dst is one of the srcs, we
-                         * pack that one first (and we pack 4 channels at once
-                         * for the first pack).
-                         */
-                        struct qpu_reg first_pack = src[0];
-                        for (int i = 0; i < 4; i++) {
-                                if (src[i].mux == dst.mux &&
-                                    src[i].addr == dst.addr) {
-                                        first_pack = dst;
-                                        break;
-                                }
-                        }
-                        queue(c, qpu_m_MOV(dst, first_pack));
+                case QOP_PACK_8888_F:
+                        queue(c, qpu_m_MOV(dst, src[0]));
                         *last_inst(c) |= QPU_PM;
                         *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888,
                                                        QPU_PACK);
+                        break;
 
-                        for (int i = 0; i < 4; i++) {
-                                if (src[i].mux == first_pack.mux &&
-                                    src[i].addr == first_pack.addr) {
-                                        continue;
+                case QOP_PACK_8A_F:
+                case QOP_PACK_8B_F:
+                case QOP_PACK_8C_F:
+                case QOP_PACK_8D_F:
+                        /* If dst doesn't happen to already contain src[0],
+                         * then we have to move it in.
+                         */
+                        if (qinst->src[0].file != QFILE_NULL &&
+                            (src[0].mux != dst.mux || src[0].addr != dst.addr)) {
+                                /* Don't overwrite src1 while setting up
+                                 * the dst!
+                                 */
+                                if (dst.mux == src[1].mux &&
+                                    dst.addr == src[1].addr) {
+                                        queue(c, qpu_m_MOV(qpu_rb(31), src[1]));
+                                        src[1] = qpu_rb(31);
                                 }
 
-                                queue(c, qpu_m_MOV(dst, src[i]));
-                                *last_inst(c) |= QPU_PM;
-                                *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i,
-                                                               QPU_PACK);
+                                queue(c, qpu_m_MOV(dst, src[0]));
                         }
 
+                        queue(c, qpu_m_MOV(dst, src[1]));
+                        *last_inst(c) |= QPU_PM;
+                        *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A +
+                                                       qinst->op - QOP_PACK_8A_F,
+                                                       QPU_PACK);
                         break;
-                }
 
                 case QOP_FRAG_X:
                         queue(c, qpu_a_ITOF(dst,




More information about the mesa-commit mailing list