[Mesa-dev] [PATCH 4/4] nv50/ir: manually optimize multiplication expansion logic

Ilia Mirkin imirkin at alum.mit.edu
Thu Dec 10 22:12:00 PST 2015


The conversion of 32-bit integer multiplies into 16-bit ones happens
after the regular optimization loop. However it's fairly common to
multiply by a small integer, rendering some of the expansion pointless.

Firstly, propagate immediates when possible into mul ops, secondly just
remove the ops when they are unnecessary.

Including the change to generate imad immediates, the effect is:

total instructions in shared programs : 6365463 -> 6351898 (-0.21%)
total gprs used in shared programs    : 728684 -> 728684 (0.00%)
total local used in shared programs   : 9904 -> 9904 (0.00%)
total bytes used in shared programs   : 44001576 -> 44036120 (0.08%)

                local        gpr       inst      bytes
    helped           0           0        3288           4
      hurt           0           0           0         842

It's easy for this to hurt bytes since we end up always generating the
8-byte form, while we can't always get rid of the immediate in question.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 .../nouveau/codegen/nv50_ir_lowering_nv50.cpp      | 31 +++++++++++++++++-----
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 19965ff..8752b0c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -44,6 +44,8 @@ static bool
 expandIntegerMUL(BuildUtil *bld, Instruction *mul)
 {
    const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;
+   ImmediateValue src1;
+   bool src1imm = mul->src(1).getImmediate(src1);
 
    DataType fTy; // full type
    switch (mul->sType) {
@@ -72,24 +74,41 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
    for (int j = 0; j < 4; ++j)
       t[j] = bld->getSSA(fullSize);
 
-   s[0] = mul->getSrc(0);
-   s[1] = mul->getSrc(1);
-
    if (isSignedType(mul->sType) && highResult) {
       s[0] = bld->getSSA(fullSize);
       s[1] = bld->getSSA(fullSize);
       bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
       bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1));
+      src1.reg.data.s32 = abs(src1.reg.data.s32);
+   } else {
+      s[0] = mul->getSrc(0);
+      s[1] = mul->getSrc(1);
    }
 
    // split sources into halves
    i[0] = bld->mkSplit(a, halfSize, s[0]);
    i[1] = bld->mkSplit(b, halfSize, s[1]);
 
-   i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
-   i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
+   if (src1imm && (src1.reg.data.u32 & 0xffff0000) == 0) {
+      i[2] = i[3] = bld->mkOp2(OP_MUL, fTy, t[1], a[1],
+                               bld->mkImm(src1.reg.data.u32 & 0xffff));
+   } else {
+      i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0],
+                        src1imm ? bld->mkImm(src1.reg.data.u32 >> 16) : b[1]);
+      if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) {
+         i[3] = i[2];
+         t[1] = t[0];
+      } else {
+         i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
+      }
+   }
    i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8));
-   i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
+   if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) {
+      i[4] = i[3];
+      t[3] = t[2];
+   } else {
+      i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
+   }
 
    if (highResult) {
       Value *c[2];
-- 
2.4.10



More information about the mesa-dev mailing list