Mesa (master): i965/fs: Lower integer multiplication after optimizations.

Matt Turner mattst88 at kemper.freedesktop.org
Mon May 18 18:34:33 UTC 2015


Module: Mesa
Branch: master
Commit: 1e4e17fbd9296cc5064aabdb351a894d10190cb6
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1e4e17fbd9296cc5064aabdb351a894d10190cb6

Author: Matt Turner <mattst88 at gmail.com>
Date:   Mon May 11 09:29:56 2015 -0700

i965/fs: Lower integer multiplication after optimizations.

32-bit x 32-bit integer multiplication requires multiple instructions
until Broadwell. This patch just lets us treat the MUL instruction in
the FS backend like it operates on Broadwell, and after optimizations
we lower it into a sequence of instructions on older platforms.

Doing this will allow us to some extra optimization on integer
multiplies.

Reviewed-by: Jason Ekstrand <jason.ekstrand at intel.com>

---

 src/mesa/drivers/dri/i965/brw_fs.cpp         |   66 ++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h           |    1 +
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp     |   36 +-------------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |   31 +-----------
 4 files changed, 70 insertions(+), 64 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b63ca23..cb13fcb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3523,6 +3523,71 @@ fs_visitor::lower_load_payload()
    return progress;
 }
 
+bool
+fs_visitor::lower_integer_multiplication()
+{
+   bool progress = false;
+
+   /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation
+    * directly, but Cherryview cannot.
+    */
+   if (devinfo->gen >= 8 && !devinfo->is_cherryview)
+      return false;
+
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+      if (inst->opcode != BRW_OPCODE_MUL ||
+          inst->dst.is_accumulator() ||
+          (inst->dst.type != BRW_REGISTER_TYPE_D &&
+           inst->dst.type != BRW_REGISTER_TYPE_UD))
+         continue;
+
+#define insert(instr) inst->insert_before(block, instr)
+
+      /* The MUL instruction isn't commutative. On Gen <= 6, only the low
+       * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
+       * src1 are used.
+       *
+       * If multiplying by an immediate value that fits in 16-bits, do a
+       * single MUL instruction with that value in the proper location.
+       */
+      if (inst->src[1].file == IMM &&
+          inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
+         if (devinfo->gen < 7) {
+            fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
+                       inst->dst.type, dispatch_width);
+            insert(MOV(imm, inst->src[1]));
+            insert(MUL(inst->dst, imm, inst->src[0]));
+         } else {
+            insert(MUL(inst->dst, inst->src[0], inst->src[1]));
+         }
+      } else {
+         if (devinfo->gen >= 7)
+            no16("SIMD16 integer multiply unsupported\n");
+
+         const unsigned channels = dispatch_width;
+         const enum brw_reg_type type = inst->dst.type;
+         const fs_reg acc(retype(brw_acc_reg(channels), type));
+         const fs_reg null(retype(brw_null_vec(channels), type));
+
+         const fs_reg &src0 = inst->src[0];
+         const fs_reg &src1 = inst->src[1];
+
+         insert(MUL(acc, src0, src1));
+         insert(MACH(null, src0, src1));
+         insert(MOV(inst->dst, acc));
+      }
+#undef insert
+
+      inst->remove(block);
+      progress = true;
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
 void
 fs_visitor::dump_instructions()
 {
@@ -4001,6 +4066,7 @@ fs_visitor::optimize()
    }
 
    OPT(opt_combine_constants);
+   OPT(lower_integer_multiplication);
 
    lower_uniform_pull_constant_loads();
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 991cff9..f2aa0ae 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -241,6 +241,7 @@ public:
    void no16(const char *msg, ...);
    void lower_uniform_pull_constant_loads();
    bool lower_load_payload();
+   bool lower_integer_multiplication();
    bool opt_combine_constants();
 
    void emit_dummy_fs();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 9cfd0e7..5dd8363 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -780,41 +780,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
       inst->saturate = instr->dest.saturate;
       break;
 
-   case nir_op_imul: {
-      if (devinfo->gen >= 8) {
-         emit(MUL(result, op[0], op[1]));
-         break;
-      } else {
-         nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
-         nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
-
-         if (value0 && value0->u[0] < (1 << 16)) {
-            if (devinfo->gen < 7) {
-               emit(MUL(result, op[0], op[1]));
-            } else {
-               emit(MUL(result, op[1], op[0]));
-            }
-            break;
-         } else if (value1 && value1->u[0] < (1 << 16)) {
-            if (devinfo->gen < 7) {
-               emit(MUL(result, op[1], op[0]));
-            } else {
-               emit(MUL(result, op[0], op[1]));
-            }
-            break;
-         }
-      }
-
-      if (devinfo->gen >= 7)
-         no16("SIMD16 explicit accumulator operands unsupported\n");
-
-      struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
-
-      emit(MUL(acc, op[0], op[1]));
-      emit(MACH(reg_null_d, op[0], op[1]));
-      emit(MOV(result, fs_reg(acc)));
+   case nir_op_imul:
+      emit(MUL(result, op[0], op[1]));
       break;
-   }
 
    case nir_op_imul_high:
    case nir_op_umul_high: {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index abaea5f..ead7768 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -873,36 +873,7 @@ fs_visitor::visit(ir_expression *ir)
       unreachable("not reached: should be handled by ir_sub_to_add_neg");
 
    case ir_binop_mul:
-      if (devinfo->gen < 8 && ir->type->is_integer()) {
-	 /* For integer multiplication, the MUL uses the low 16 bits
-	  * of one of the operands (src0 on gen6, src1 on gen7).  The
-	  * MACH accumulates in the contribution of the upper 16 bits
-	  * of that operand.
-          */
-         if (ir->operands[0]->is_uint16_constant()) {
-            if (devinfo->gen < 7)
-               emit(MUL(this->result, op[0], op[1]));
-            else
-               emit(MUL(this->result, op[1], op[0]));
-         } else if (ir->operands[1]->is_uint16_constant()) {
-            if (devinfo->gen < 7)
-               emit(MUL(this->result, op[1], op[0]));
-            else
-               emit(MUL(this->result, op[0], op[1]));
-         } else {
-            if (devinfo->gen >= 7)
-               no16("SIMD16 explicit accumulator operands unsupported\n");
-
-            struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-                                        this->result.type);
-
-            emit(MUL(acc, op[0], op[1]));
-            emit(MACH(reg_null_d, op[0], op[1]));
-            emit(MOV(this->result, fs_reg(acc)));
-         }
-      } else {
-	 emit(MUL(this->result, op[0], op[1]));
-      }
+      emit(MUL(this->result, op[0], op[1]));
       break;
    case ir_binop_imul_high: {
       if (devinfo->gen >= 7)




More information about the mesa-commit mailing list