Mesa (master): vc4: Fold the 16-bit integer pack into the instructions generating it.
Eric Anholt
anholt at kemper.freedesktop.org
Fri Aug 21 20:35:04 UTC 2015
Module: Mesa
Branch: master
Commit: 89b1b33f44bc6ce71109ac8668529c30b6d6d910
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=89b1b33f44bc6ce71109ac8668529c30b6d6d910
Author: Eric Anholt <eric at anholt.net>
Date: Fri Aug 21 00:08:13 2015 -0700
vc4: Fold the 16-bit integer pack into the instructions generating it.
total instructions in shared programs: 97580 -> 96798 (-0.80%)
instructions in affected programs: 52826 -> 52044 (-1.48%)
---
src/gallium/drivers/vc4/vc4_program.c | 19 ++++++++++---------
src/gallium/drivers/vc4/vc4_qir.c | 2 --
src/gallium/drivers/vc4/vc4_qir.h | 4 ----
src/gallium/drivers/vc4/vc4_qpu_emit.c | 14 +++++---------
src/gallium/drivers/vc4/vc4_register_allocate.c | 13 +++++++------
5 files changed, 22 insertions(+), 30 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 6bf4c9e..e002983 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1186,22 +1186,23 @@ emit_frag_end(struct vc4_compile *c)
static void
emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
{
- struct qreg xyi[2];
+ struct qreg packed = qir_get_temp(c);
for (int i = 0; i < 2; i++) {
struct qreg scale =
qir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
- xyi[i] = qir_FTOI(c, qir_FMUL(c,
- qir_FMUL(c,
- c->outputs[c->output_position_index + i],
- scale),
- rcp_w));
+ struct qreg packed_chan = packed;
+ packed_chan.pack = QPU_PACK_A_16A + i;
+
+ qir_FTOI_dest(c, packed_chan,
+ qir_FMUL(c,
+ qir_FMUL(c,
+ c->outputs[c->output_position_index + i],
+ scale),
+ rcp_w));
}
- struct qreg packed = qir_get_temp(c);
- qir_PACK_16A_I(c, packed, xyi[0]);
- qir_PACK_16B_I(c, packed, xyi[1]);
qir_VPM_WRITE(c, packed);
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index e5efb7cb..9d93071 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -76,8 +76,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 },
[QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 },
[QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 },
- [QOP_PACK_16A_I] = { "pack_16a_i", 1, 1 },
- [QOP_PACK_16B_I] = { "pack_16b_i", 1, 1 },
[QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
[QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index cbeff43..a2b21fa 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -110,8 +110,6 @@ enum qop {
QOP_PACK_8B_F,
QOP_PACK_8C_F,
QOP_PACK_8D_F,
- QOP_PACK_16A_I,
- QOP_PACK_16B_I,
QOP_TLB_DISCARD_SETUP,
QOP_TLB_STENCIL_SETUP,
QOP_TLB_Z_WRITE,
@@ -604,8 +602,6 @@ QIR_PACK(PACK_8A_F)
QIR_PACK(PACK_8B_F)
QIR_PACK(PACK_8C_F)
QIR_PACK(PACK_8D_F)
-QIR_PACK(PACK_16A_I)
-QIR_PACK(PACK_16B_I)
QIR_ALU1(VARY_ADD_C)
QIR_NODST_2(TEX_S)
QIR_NODST_2(TEX_T)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index bf614a2..adf3a8b 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -402,15 +402,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
break;
- case QOP_PACK_16A_I:
- case QOP_PACK_16B_I:
- queue(c,
- qpu_a_MOV(dst, src[0]) |
- QPU_SET_FIELD(qinst->op == QOP_PACK_16A_I ?
- QPU_PACK_A_16A : QPU_PACK_A_16B,
- QPU_PACK));
- break;
-
case QOP_TEX_S:
case QOP_TEX_T:
case QOP_TEX_R:
@@ -516,6 +507,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_a_alu2(translate[qinst->op].op,
dst,
src[0], src[1]));
+ if (qinst->dst.pack) {
+ assert(dst.mux == QPU_MUX_A);
+ *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
+ QPU_PACK);
+ }
}
break;
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 9301314..2ea8850 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -268,16 +268,17 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
break;
- case QOP_PACK_16A_I:
- case QOP_PACK_16B_I:
- /* The pack flags require an A-file dst register. */
- class_bits[inst->dst.index] &= CLASS_BIT_A;
- break;
-
default:
break;
}
+ if (inst->dst.pack && !qir_is_mul(inst)) {
+ /* The non-MUL pack flags require an A-file dst
+ * register.
+ */
+ class_bits[inst->dst.index] &= CLASS_BIT_A;
+ }
+
if (qir_src_needs_a_file(inst)) {
switch (inst->op) {
case QOP_UNPACK_8A_F:
More information about the mesa-commit
mailing list