[Mesa-dev] [PATCH 18/21] i965/vec4: Extract dword multiplies

Mon Dec 22 19:29:28 PST 2014

This is the analogous patch to i965/fs: Extract dword multiplies. Like that
patch, we'll do the internal users separately to aid bisection.

Because that patch was requested by Jason, this was is also indirectly requested
by him.

Cc: Jason Ekstrand <jason.ekstrand at intel.com>
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp         | 71 ++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h           |  4 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 62 ++--------------------
 3 files changed, 80 insertions(+), 57 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 21ab23c..8a2fd12 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1615,6 +1615,77 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
    emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst));
 }
 
+void
+vec4_visitor::emit_mul_dw(dst_reg dst, src_reg src0, src_reg src1,
+                          bool src0_u16, bool src1_u16)
+{
+   assert(type_is_dword(src0.type) && type_is_dword(src1.type));
+
+   if (brw->gen >= 8) {
+      emit(MUL(dst, src0, src1));
+      return;
+   }
+
+   /* For integer multiplication, the MUL uses the low 16 bits of one of the
+    * operands (src0 through SNB, src1 on IVB and later).  The MACH accumulates
+    * in the contribution of the upper 16 bits of that operand.  If we can
+    * determine that one of the args is in the low 16 bits, though, we can just
+    * emit a single MUL.
+    */
+   if (src0_u16) {
+      if (brw->gen < 7)
+         emit(MUL(dst, src0, src1));
+      else
+         emit(MUL(dst, src1, src0));
+   } else if (src1_u16) {
+      if (brw->gen < 7)
+         emit(MUL(dst, src1, src0));
+      else
+         emit(MUL(dst, src0, src1));
+   } else {
+      struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+
+      emit(MUL(acc, src0, src1));
+      emit(MACH(dst_null_d(), src0, src1));
+      emit(MOV(dst, src_reg(acc)));
+   }
+}
+
+void
+vec4_visitor::emit_mul_dw_high(dst_reg dst, src_reg src0, src_reg src1)
+{
+   struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+   if (brw->gen >= 8) {
+      /* GEN8+ A source modifier must not be used on src1 for the macro
+       * operation.  This applies to both mul and mach of the macro. If source
+       * modifier is required, an additional mov instruction may be used before
+       * the macro.
+       *
+       * Unlike in the FS, we cannot just use a QWORD mul: "Restriction: Q/UQ
+       * data types are not supported in Align16 mode."
+       */
+      src_reg temp;
+      if (src1.negate || src1.abs) {
+         if (!src0.negate && !src0.abs) {
+            /* If src0 has no modifiers, just swap */
+            temp = src0;
+            src0 = src1;
+            src1 = temp;
+         } else {
+            /* Otherwise, apply via MOV first */
+            src_reg temp = src_reg(this, glsl_type::uvec4_type);
+            emit(BRW_OPCODE_MOV, dst_reg(temp), src1);
+            src1 = temp;
+         }
+      }
+      emit(SHADER_OPCODE_MUL64, acc, src0, src1);
+   } else {
+      emit(MUL(acc, src0, src1));
+   }
+
+   emit(MACH(dst, src0, src1));
+}
+
 bool
 vec4_visitor::run()
 {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index be52fbc..9594362 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -486,6 +486,10 @@ public:
    void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
                   const src_reg &src1 = src_reg());
    src_reg fix_math_operand(src_reg src);
+   void emit_mul_dw(dst_reg dst, src_reg src0, src_reg src1,
+                    bool src0_u16 = false,
+                    bool src1_u16 = false);
+   void emit_mul_dw_high(dst_reg dst, src_reg src0, src_reg src1);
 
    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c8d1be6..f6fb98d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1467,68 +1467,16 @@ vec4_visitor::visit(ir_expression *ir)
       unreachable("not reached: should be handled by ir_sub_to_add_neg");
 
    case ir_binop_mul:
-      if (brw->gen < 8 && ir->type->is_integer()) {
-	 /* For integer multiplication, the MUL uses the low 16 bits of one of
-	  * the operands (src0 through SNB, src1 on IVB and later).  The MACH
-	  * accumulates in the contribution of the upper 16 bits of that
-	  * operand.  If we can determine that one of the args is in the low
-	  * 16 bits, though, we can just emit a single MUL.
-          */
-         if (ir->operands[0]->is_uint16_constant()) {
-            if (brw->gen < 7)
-               emit(MUL(result_dst, op[0], op[1]));
-            else
-               emit(MUL(result_dst, op[1], op[0]));
-         } else if (ir->operands[1]->is_uint16_constant()) {
-            if (brw->gen < 7)
-               emit(MUL(result_dst, op[1], op[0]));
-            else
-               emit(MUL(result_dst, op[0], op[1]));
-         } else {
-            struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
-
-            emit(MUL(acc, op[0], op[1]));
-            emit(MACH(dst_null_d(), op[0], op[1]));
-            emit(MOV(result_dst, src_reg(acc)));
-         }
+      if (ir->type->is_integer()) {
+         emit_mul_dw(result_dst, op[0], op[1],
+                     ir->operands[0]->is_uint16_constant(),
+                     ir->operands[1]->is_uint16_constant());
       } else {
-         /* Gen8+ can natively multiply a DW * DW chopping off the upper bits of
-          * the operation. No MACH is needed
-          */
 	 emit(MUL(result_dst, op[0], op[1]));
       }
       break;
    case ir_binop_imul_high: {
-      struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
-      if (brw->gen >= 8) {
-         /* GEN8+ A source modifier must not be used on src1 for the macro
-          * operation.  This applies to both mul and mach of the macro. If
-          * source modifier is required, an additional mov instruction may be
-          * used before the macro.
-          *
-          * Unlike in the FS, we cannot just use a QWORD mul: "Restriction: Q/UQ
-          * data types are not supported in Align16 mode."
-          */
-         src_reg temp;
-         if (op[1].negate || op[1].abs) {
-            if (!op[0].negate && !op[0].abs) {
-               /* If op[0] has no modifiers, just swap */
-               temp = op[0];
-               op[0] = op[1];
-               op[1] = temp;
-            } else {
-               /* Otherwise, apply via MOV first */
-               src_reg temp = src_reg(this, glsl_type::uvec4_type);
-               emit(BRW_OPCODE_MOV, dst_reg(temp), op[1]);
-               op[1] = temp;
-            }
-         }
-         emit(SHADER_OPCODE_MUL64, acc, op[0], op[1]);
-      } else {
-         emit(MUL(acc, op[0], op[1]));
-      }
-
-      emit(MACH(result_dst, op[0], op[1]));
+      emit_mul_dw_high(result_dst, op[0], op[1]);
       break;
    }
    case ir_binop_div:
-- 
2.2.1