Mesa (master): vc4: Expose compares at a lower level in QIR.

Eric Anholt anholt at kemper.freedesktop.org
Thu Sep 4 18:42:26 UTC 2014


Module: Mesa
Branch: master
Commit: 874dfa8b2ecccf3c9a73453d7ccc6638363a59bd
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=874dfa8b2ecccf3c9a73453d7ccc6638363a59bd

Author: Eric Anholt <eric at anholt.net>
Date:   Sun Aug 24 16:51:32 2014 -0700

vc4: Expose compares at a lower level in QIR.

Before, we had some special opcodes like CMP and SNE that emitted multiple
instructions.  Now, we reduce those operations significantly, giving
optimization more to look at for reducing redundant operations.

The downside is that QOP_SF is pretty special -- we're going to have to
track it separately when we're doing instruction scheduling, and we want
to peephole it into the instruction generating the destination write in
most cases (and not allocate the destination reg, probably.  Unless it's
used for some other purpose, as well).

---

 src/gallium/drivers/vc4/vc4_opt_algebraic.c |    9 ++-
 src/gallium/drivers/vc4/vc4_program.c       |  104 ++++++++++++++++++++-------
 src/gallium/drivers/vc4/vc4_qir.c           |   14 ++--
 src/gallium/drivers/vc4/vc4_qir.h           |   37 ++++++----
 src/gallium/drivers/vc4/vc4_qpu_emit.c      |   63 ++++++++--------
 5 files changed, 148 insertions(+), 79 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index 2bf474c..f8ed621 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -45,9 +45,12 @@ qir_opt_algebraic(struct qcompile *c)
                 struct qinst *inst = (struct qinst *)node;
 
                 switch (inst->op) {
-                case QOP_CMP:
-                        /* Turn "dst = (a < 0) ? b : b)" into "dst = b" */
-                        if (qir_reg_equals(inst->src[1], inst->src[2])) {
+                case QOP_SEL_X_Y_ZS:
+                case QOP_SEL_X_Y_ZC:
+                case QOP_SEL_X_Y_NS:
+                case QOP_SEL_X_Y_NC:
+                        /* Turn "dst = (sf == x) ? a : a)" into "dst = a" */
+                        if (qir_reg_equals(inst->src[0], inst->src[1])) {
                                 if (debug) {
                                         fprintf(stderr, "optimizing: ");
                                         qir_dump_inst(inst);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index d404047..aaa7eb3 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -249,6 +249,58 @@ tgsi_to_qir_alu(struct tgsi_to_qir *trans,
 }
 
 static struct qreg
+tgsi_to_qir_seq(struct tgsi_to_qir *trans,
+                struct tgsi_full_instruction *tgsi_inst,
+                enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_ZS(c, qir_uniform_f(trans, 1.0));
+}
+
+static struct qreg
+tgsi_to_qir_sne(struct tgsi_to_qir *trans,
+                struct tgsi_full_instruction *tgsi_inst,
+                enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_ZC(c, qir_uniform_f(trans, 1.0));
+}
+
+static struct qreg
+tgsi_to_qir_slt(struct tgsi_to_qir *trans,
+                struct tgsi_full_instruction *tgsi_inst,
+                enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_NS(c, qir_uniform_f(trans, 1.0));
+}
+
+static struct qreg
+tgsi_to_qir_sge(struct tgsi_to_qir *trans,
+                struct tgsi_full_instruction *tgsi_inst,
+                enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_NC(c, qir_uniform_f(trans, 1.0));
+}
+
+static struct qreg
+tgsi_to_qir_cmp(struct tgsi_to_qir *trans,
+                struct tgsi_full_instruction *tgsi_inst,
+                enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, src[0 * 4 + i]);
+        return qir_SEL_X_Y_NS(c,
+                              src[1 * 4 + i],
+                              src[2 * 4 + i]);
+}
+
+static struct qreg
 tgsi_to_qir_mad(struct tgsi_to_qir *trans,
                 struct tgsi_full_instruction *tgsi_inst,
                 enum qop op, struct qreg *src, int i)
@@ -280,16 +332,15 @@ tgsi_to_qir_lit(struct tgsi_to_qir *trans,
         case 2: {
                 struct qreg zero = qir_uniform_f(trans, 0.0);
 
+                qir_SF(c, x);
                 /* XXX: Clamp w to -128..128 */
-                return qir_CMP(c,
-                               x,
-                               zero,
-                               qir_EXP2(c, qir_FMUL(c,
-                                                    w,
-                                                    qir_LOG2(c,
-                                                             qir_FMAX(c,
-                                                                      y,
-                                                                      zero)))));
+                return qir_SEL_X_0_NC(c,
+                                      qir_EXP2(c, qir_FMUL(c,
+                                                           w,
+                                                           qir_LOG2(c,
+                                                                    qir_FMAX(c,
+                                                                             y,
+                                                                             zero)))));
         }
         default:
                 assert(!"not reached");
@@ -415,10 +466,10 @@ tgsi_to_qir_frc(struct tgsi_to_qir *trans,
         struct qcompile *c = trans->c;
         struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
         struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
-        return qir_CMP(c,
-                       diff,
-                       qir_FADD(c, diff, qir_uniform_f(trans, 1.0)),
-                       diff);
+        qir_SF(c, diff);
+        return qir_SEL_X_Y_NS(c,
+                              qir_FADD(c, diff, qir_uniform_f(trans, 1.0)),
+                              diff);
 }
 
 /**
@@ -436,12 +487,11 @@ tgsi_to_qir_flr(struct tgsi_to_qir *trans,
         /* This will be < 0 if we truncated and the truncation was of a value
          * that was < 0 in the first place.
          */
-        struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
+        qir_SF(c, qir_FSUB(c, src[0 * 4 + i], trunc));
 
-        return qir_CMP(c,
-                       diff,
-                       qir_FSUB(c, trunc, qir_uniform_f(trans, 1.0)),
-                       trunc);
+        return qir_SEL_X_Y_NS(c,
+                              qir_FSUB(c, trunc, qir_uniform_f(trans, 1.0)),
+                              trunc);
 }
 
 static struct qreg
@@ -613,10 +663,10 @@ tgsi_to_qir_kill_if(struct tgsi_to_qir *trans, struct qreg *src, int i)
 
         if (trans->discard.file == QFILE_NULL)
                 trans->discard = qir_uniform_f(trans, 0.0);
-        trans->discard = qir_CMP(c,
-                                 src[0 * 4 + i],
-                                 qir_uniform_f(trans, 1.0),
-                                 trans->discard);
+        qir_SF(c, src[0 * 4 + i]);
+        trans->discard = qir_SEL_X_Y_NS(c,
+                                        qir_uniform_f(trans, 1.0),
+                                        trans->discard);
 }
 
 static void
@@ -705,11 +755,11 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
                 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
                 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
                 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
-                [TGSI_OPCODE_SEQ] = { QOP_SEQ, tgsi_to_qir_alu },
-                [TGSI_OPCODE_SNE] = { QOP_SNE, tgsi_to_qir_alu },
-                [TGSI_OPCODE_SGE] = { QOP_SGE, tgsi_to_qir_alu },
-                [TGSI_OPCODE_SLT] = { QOP_SLT, tgsi_to_qir_alu },
-                [TGSI_OPCODE_CMP] = { QOP_CMP, tgsi_to_qir_alu },
+                [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
+                [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
+                [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
+                [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt },
+                [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
                 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
                 [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
                 [TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 0b0d2c1..7214990 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -43,11 +43,15 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_FMINABS] = { "fminabs", 1, 2 },
         [QOP_FMAXABS] = { "fmaxabs", 1, 2 },
 
-        [QOP_SEQ] = { "seq", 1, 2 },
-        [QOP_SNE] = { "sne", 1, 2 },
-        [QOP_SGE] = { "sge", 1, 2 },
-        [QOP_SLT] = { "slt", 1, 2 },
-        [QOP_CMP] = { "cmp", 1, 3 },
+        [QOP_SF] = { "sf", 0, 1 },
+        [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 },
+        [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1 },
+        [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1 },
+        [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1 },
+        [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2 },
+        [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2 },
+        [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 },
+        [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 },
 
         [QOP_FTOI] = { "ftoi", 1, 1 },
         [QOP_ITOF] = { "itof", 1, 1 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 77b5f1a..99df99c 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -54,11 +54,21 @@ enum qop {
         QOP_FMINABS,
         QOP_FMAXABS,
 
-        QOP_SEQ,
-        QOP_SNE,
-        QOP_SGE,
-        QOP_SLT,
-        QOP_CMP,
+        /* Sets the flag register according to src. */
+        QOP_SF,
+
+        /* Note: Orderings of these compares must be the same as in
+         * qpu_defines.h.  Selects the src[0] if the ns flag bit is set,
+         * otherwise 0. */
+        QOP_SEL_X_0_ZS,
+        QOP_SEL_X_0_ZC,
+        QOP_SEL_X_0_NS,
+        QOP_SEL_X_0_NC,
+        /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
+        QOP_SEL_X_Y_ZS,
+        QOP_SEL_X_Y_ZC,
+        QOP_SEL_X_Y_NS,
+        QOP_SEL_X_Y_NC,
 
         QOP_FTOI,
         QOP_ITOF,
@@ -260,6 +270,15 @@ QIR_ALU1(MOV)
 QIR_ALU2(FADD)
 QIR_ALU2(FSUB)
 QIR_ALU2(FMUL)
+QIR_NODST_1(SF)
+QIR_ALU1(SEL_X_0_ZS)
+QIR_ALU1(SEL_X_0_ZC)
+QIR_ALU1(SEL_X_0_NS)
+QIR_ALU1(SEL_X_0_NC)
+QIR_ALU2(SEL_X_Y_ZS)
+QIR_ALU2(SEL_X_Y_ZC)
+QIR_ALU2(SEL_X_Y_NS)
+QIR_ALU2(SEL_X_Y_NC)
 QIR_ALU2(FMIN)
 QIR_ALU2(FMAX)
 QIR_ALU2(FMINABS)
@@ -284,14 +303,6 @@ QIR_ALU0(FRAG_RCP_W)
 QIR_NODST_1(TLB_DISCARD_SETUP)
 
 static inline struct qreg
-qir_CMP(struct qcompile *c, struct qreg cmp, struct qreg a, struct qreg b)
-{
-        struct qreg t = qir_get_temp(c);
-        qir_emit(c, qir_inst4(QOP_CMP, t, cmp, a, b, c->undef));
-        return t;
-}
-
-static inline struct qreg
 qir_R4_UNPACK(struct qcompile *c, int i)
 {
         struct qreg t = qir_get_temp(c);
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 4e28ff7..6d2c34f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -60,6 +60,12 @@ last_inst(struct qcompile *c)
         return &q->inst;
 }
 
+static void
+set_last_cond_add(struct qcompile *c, uint32_t cond)
+{
+        *last_inst(c) = qpu_set_cond_add(*last_inst(c), cond);
+}
+
 /**
  * This is used to resolve the fact that we might register-allocate two
  * different operands of an instruction to the same physical register file
@@ -278,13 +284,6 @@ vc4_generate_code(struct qcompile *c)
                         M(FMUL),
                 };
 
-                static const uint32_t compareflags[] = {
-                        [QOP_SEQ - QOP_SEQ] = QPU_COND_ZS,
-                        [QOP_SNE - QOP_SEQ] = QPU_COND_ZC,
-                        [QOP_SLT - QOP_SEQ] = QPU_COND_NS,
-                        [QOP_SGE - QOP_SEQ] = QPU_COND_NC,
-                };
-
                 struct qpu_reg src[4];
                 for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
                         int index = qinst->src[i].index;
@@ -365,32 +364,36 @@ vc4_generate_code(struct qcompile *c)
                         }
                         break;
 
-                case QOP_CMP:
+                case QOP_SF:
+                        fixup_raddr_conflict(c, src[0], &src[1]);
                         queue(c, qpu_a_MOV(qpu_ra(QPU_W_NOP), src[0]));
                         *last_inst(c) |= QPU_SF;
-
-                        queue(c, qpu_a_MOV(dst, src[1]));
-                        *last_inst(c) = qpu_set_cond_add(*last_inst(c),
-                                                         QPU_COND_NS);
-
-                        queue(c, qpu_a_MOV(dst, src[2]));
-                        *last_inst(c) = qpu_set_cond_add(*last_inst(c),
-                                                         QPU_COND_NC);
                         break;
 
-                case QOP_SEQ:
-                case QOP_SNE:
-                case QOP_SGE:
-                case QOP_SLT:
-                        fixup_raddr_conflict(c, src[0], &src[1]);
-                        queue(c, qpu_a_FSUB(qpu_ra(QPU_W_NOP), src[0], src[1]));
-                        *last_inst(c) |= QPU_SF;
+                case QOP_SEL_X_0_ZS:
+                case QOP_SEL_X_0_ZC:
+                case QOP_SEL_X_0_NS:
+                case QOP_SEL_X_0_NC:
+                        queue(c, qpu_a_MOV(dst, src[0]));
+                        set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
+                                          QPU_COND_ZS);
+
+                        queue(c, qpu_a_XOR(dst, qpu_r0(), qpu_r0()));
+                        set_last_cond_add(c, ((qinst->op - QOP_SEL_X_0_ZS) ^
+                                              1) + QPU_COND_ZS);
+                        break;
 
-                        queue(c, qpu_load_imm_f(dst, 0.0));
-                        queue(c, qpu_load_imm_f(dst, 1.0));
-                        *last_inst(c) = qpu_set_cond_add(*last_inst(c),
-                                                         compareflags[qinst->op - QOP_SEQ]);
+                case QOP_SEL_X_Y_ZS:
+                case QOP_SEL_X_Y_ZC:
+                case QOP_SEL_X_Y_NS:
+                case QOP_SEL_X_Y_NC:
+                        queue(c, qpu_a_MOV(dst, src[0]));
+                        set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
+                                          QPU_COND_ZS);
 
+                        queue(c, qpu_a_MOV(dst, src[1]));
+                        set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^
+                                              1) + QPU_COND_ZS);
 
                         break;
 
@@ -475,8 +478,7 @@ vc4_generate_code(struct qcompile *c)
                         queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
                                            qpu_rb(QPU_R_FRAG_PAYLOAD_ZW)));
                         if (discard) {
-                                *last_inst(c) = qpu_set_cond_add(*last_inst(c),
-                                                                 QPU_COND_ZS);
+                                set_last_cond_add(c, QPU_COND_ZS);
                         }
                         break;
 
@@ -490,8 +492,7 @@ vc4_generate_code(struct qcompile *c)
                 case QOP_TLB_COLOR_WRITE:
                         queue(c, qpu_a_MOV(qpu_tlbc(), src[0]));
                         if (discard) {
-                                *last_inst(c) = qpu_set_cond_add(*last_inst(c),
-                                                                 QPU_COND_ZS);
+                                set_last_cond_add(c, QPU_COND_ZS);
                         }
                         break;
 




More information about the mesa-commit mailing list