Mesa (master): vc4: Add support for the 2-bit LOAD_IMM variants.

Fri Aug 26 00:24:59 UTC 2016

Module: Mesa
Branch: master
Commit: 074f1f3c0c2cd15213a62eb7f589423ece6391c8
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=074f1f3c0c2cd15213a62eb7f589423ece6391c8

Author: Eric Anholt <eric at anholt.net>
Date:   Thu Aug 25 12:15:29 2016 -0700

vc4: Add support for the 2-bit LOAD_IMM variants.

Extracted and fixed up from a patch by jonasarrow on github.  This ended
up not getting used for ddx/ddy, but seems like it might still be useful.

---

 src/gallium/drivers/vc4/vc4_qir.c         |  2 ++
 src/gallium/drivers/vc4/vc4_qir.h         | 26 ++++++++++++++++++++++++++
 src/gallium/drivers/vc4/vc4_qpu.c         | 14 ++++++++++++++
 src/gallium/drivers/vc4/vc4_qpu.h         |  2 ++
 src/gallium/drivers/vc4/vc4_qpu_defines.h |  6 ++++++
 src/gallium/drivers/vc4/vc4_qpu_emit.c    |  8 ++++++++
 6 files changed, 58 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 0919d32..9b4a28e 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -83,6 +83,8 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
 
         [QOP_LOAD_IMM] = { "load_imm", 0, 1 },
+        [QOP_LOAD_IMM_U2] = { "load_imm_u2", 0, 1 },
+        [QOP_LOAD_IMM_I2] = { "load_imm_i2", 0, 1 },
 
         [QOP_BRANCH] = { "branch", 0, 0, true },
         [QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 9e61200..90cc138 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -156,8 +156,18 @@ enum qop {
          */
         QOP_TEX_RESULT,
 
+        /* 32-bit immediate loaded to each SIMD channel */
         QOP_LOAD_IMM,
 
+        /* 32-bit immediate divided into 16 2-bit unsigned int values and
+         * loaded to each corresponding SIMD channel.
+         */
+        QOP_LOAD_IMM_U2,
+        /* 32-bit immediate divided into 16 2-bit signed int values and
+         * loaded to each corresponding SIMD channel.
+         */
+        QOP_LOAD_IMM_I2,
+
         /* Jumps to block->successor[0] if the qinst->cond (as a
          * QPU_COND_BRANCH_*) passes, or block->successor[1] if not.  Note
          * that block->successor[1] may be unset if the condition is ALWAYS.
@@ -796,6 +806,22 @@ qir_LOAD_IMM(struct vc4_compile *c, uint32_t val)
                                         qir_reg(QFILE_LOAD_IMM, val), c->undef));
 }
 
+static inline struct qreg
+qir_LOAD_IMM_U2(struct vc4_compile *c, uint32_t val)
+{
+        return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_U2, c->undef,
+                                        qir_reg(QFILE_LOAD_IMM, val),
+                                        c->undef));
+}
+
+static inline struct qreg
+qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val)
+{
+        return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_I2, c->undef,
+                                        qir_reg(QFILE_LOAD_IMM, val),
+                                        c->undef));
+}
+
 static inline void
 qir_MOV_cond(struct vc4_compile *c, uint8_t cond,
              struct qreg dest, struct qreg src)
diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c
index cf74c42..d022d10 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -165,6 +165,20 @@ qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
 }
 
 uint64_t
+qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)
+{
+        return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,
+                                                         QPU_LOAD_IMM_MODE);
+}
+
+uint64_t
+qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)
+{
+        return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,
+                                                         QPU_LOAD_IMM_MODE);
+}
+
+uint64_t
 qpu_branch(uint32_t cond, uint32_t target)
 {
         uint64_t inst = 0;
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index a0aac15..437e4f5 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -143,6 +143,8 @@ uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
                     struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
 uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST;
 uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_u2(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_i2(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
 uint64_t qpu_branch(uint32_t cond, uint32_t target) ATTRIBUTE_CONST;
 uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
 uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h
index 3ca5aba..e6ca345 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_defines.h
+++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h
@@ -246,6 +246,12 @@ enum qpu_unpack {
 #define QPU_UNPACK_SHIFT                57
 #define QPU_UNPACK_MASK                 QPU_MASK(59, 57)
 
+#define QPU_LOAD_IMM_MODE_SHIFT         57
+#define QPU_LOAD_IMM_MODE_MASK          QPU_MASK(59, 57)
+# define QPU_LOAD_IMM_MODE_U32          0
+# define QPU_LOAD_IMM_MODE_I2           1
+# define QPU_LOAD_IMM_MODE_U2           3
+
 /**
  * If set, the pack field means PACK_MUL or R4 packing, instead of normal
  * regfile a packing.
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 77aa4f6..f5a5b8a 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -428,6 +428,14 @@ vc4_generate_code_block(struct vc4_compile *c,
                         queue(block, qpu_load_imm_ui(dst, qinst->src[0].index));
                         break;
 
+                case QOP_LOAD_IMM_U2:
+                        queue(block, qpu_load_imm_u2(dst, qinst->src[0].index));
+                        break;
+
+                case QOP_LOAD_IMM_I2:
+                        queue(block, qpu_load_imm_i2(dst, qinst->src[0].index));
+                        break;
+
                 case QOP_MS_MASK:
                         src[1] = qpu_ra(QPU_R_MS_REV_FLAGS);
                         fixup_raddr_conflict(block, dst, &src[0], &src[1],