[Mesa-dev] [PATCH 19/21] i965: Add some dw mul operands fixes

Ben Widawsky benjamin.widawsky at intel.com
Mon Dec 22 19:29:29 PST 2014


Because we now potentially change src0/src1 in the visitor, and internal callers
can use this function, we need to make sure we won't emit a bad operation.
Specifically, the requirement that src1 have the only immediate is easy to hit.

While the existing assertion in brw_eu_emit.c will catch this case, we do have
some ability to fix it up, and so we try to do that. Some cases we cannot fix
things and so we must assert. Finding it at visitor time does make things a bit
easier to debug and fix.

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 44 ++++++++++++++++++++++------------
 src/mesa/drivers/dri/i965/brw_fs.h     |  1 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 29 ++++++++++++++++------
 src/mesa/drivers/dri/i965/brw_vec4.h   |  2 ++
 4 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7c43280..672efcd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1649,6 +1649,20 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    return inst;
 }
 
+fs_inst *
+fs_visitor::do_mul_dw(fs_reg dst, fs_reg src0, fs_reg src1, bool can_swap)
+{
+   /* 2 src operations must have the immediate in src1. We let constant folding
+    * code handle the case where both are immediate */
+   if ((src0.file == IMM && src1.file != IMM) && can_swap) {
+         fs_reg tmp = src1;
+         src1 = src0;
+         src0 = tmp;
+   }
+   assert(!(src0.file == IMM && src1.file != IMM));
+   return emit(MUL(dst, src0, src1));
+}
+
 void
 fs_visitor::emit_mul_dw(fs_reg dst, fs_reg src0, fs_reg src1,
                         bool src0_u16, bool src1_u16)
@@ -1660,12 +1674,12 @@ fs_visitor::emit_mul_dw(fs_reg dst, fs_reg src0, fs_reg src1,
     * are smaller than a DW, we can forego the MACH.
     */
    if (brw->gen >= 8) {
-      emit(MUL(dst, src0, src1));
+      do_mul_dw(dst, src0, src1, true);
       return;
    } else if ((src0_u16 && src1_u16)) {
       fs_reg s0 = fs_reg(retype(src0, BRW_REGISTER_TYPE_UW));
       fs_reg s1 = fs_reg(retype(src0, BRW_REGISTER_TYPE_UW));
-      emit(MUL(dst, s0, s1));
+      do_mul_dw(dst, s0, s1, true);
       return;
    }
 
@@ -1675,31 +1689,31 @@ fs_visitor::emit_mul_dw(fs_reg dst, fs_reg src0, fs_reg src1,
     */
    if (src0_u16) {
       if (brw->gen < 7)
-         emit(MUL(dst, src0, src1));
+         do_mul_dw(dst, src0, src1, false);
       else
-         emit(MUL(dst, src1, src0));
+         do_mul_dw(dst, src1, src0, false);
    } else if (src1_u16) {
       if (brw->gen < 7)
-         emit(MUL(dst, src1, src0));
+         do_mul_dw(dst, src1, src0, false);
       else
-         emit(MUL(dst, src0, src1));
+         do_mul_dw(dst, src0, src1, false);
    } else {
       unsigned width = brw->gen == 7 ? 8 : dispatch_width;
       fs_reg acc = fs_reg(retype(brw_acc_reg(width), dst.type));
       fs_reg null = fs_reg(retype(brw_null_vec(width), dst.type));
 
       if (brw->gen == 7 && dispatch_width == 16) {
-         emit(MUL(acc, half(src0, 0), half(src1, 0)));
+         do_mul_dw(acc, half(src0, 0), half(src1, 0), true);
          emit(MACH(null, half(src0, 0), half(src1, 0)));
          fs_inst *mov = emit(MOV(half(dst, 0), acc));
          mov->force_sechalf = true;
 
-         emit(MUL(acc, half(src0, 1), half(src1, 1)));
+         do_mul_dw(acc, half(src0, 1), half(src1, 1), true);
          emit(MACH(null, half(src0, 1), half(src1, 1)));
          mov = emit(MOV(half(dst, 1), acc));
          mov->force_sechalf = true;
       } else {
-         emit(MUL(acc, src0, src1));
+         do_mul_dw(acc, src0, src1, true);
          emit(MACH(null, src0, src1));
          emit(MOV(dst, acc));
       }
@@ -1733,15 +1747,15 @@ fs_visitor::emit_mul_dw_high(fs_reg dst, fs_reg src0, fs_reg src1)
       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(dispatch_width / 8),
                            qw_type_for_mul(src0, src1), dispatch_width);
       if (dispatch_width == 16) {
-         fs_inst *mul = emit(MUL(temp, half(src0, 0), half(src1, 0)));
+         fs_inst *mul = do_mul_dw(temp, half(src0, 0), half(src1, 0), true);
          mul->exec_size = 8;
 
-         mul = emit(MUL(temp, half(src0, 1), half(src1, 1)));
+         mul = do_mul_dw(temp, half(src0, 1), half(src1, 1), true);
          mul->exec_size = 8;
 
          emit(SHADER_OPCODE_MOV64, this->result, temp);
       } else {
-         emit(MUL(temp, src0, src1));
+         do_mul_dw(temp, src0, src1, true);
          emit(SHADER_OPCODE_MOV64, this->result, temp);
       }
    } else {
@@ -1749,12 +1763,12 @@ fs_visitor::emit_mul_dw_high(fs_reg dst, fs_reg src0, fs_reg src1)
       fs_reg acc = fs_reg(retype(brw_acc_reg(width), this->result.type));
 
       if (brw->gen == 7 && dispatch_width == 16) {
-         emit(MUL(acc, half(src0, 0), half(src1, 0)));
+         do_mul_dw(acc, half(src0, 0), half(src1, 0), true);
          emit(MACH(half(this->result, 0), half(src0, 0), half(src1, 0)));
-         emit(MUL(acc, half(src0, 1), half(src1, 1)));
+         do_mul_dw(acc, half(src0, 1), half(src1, 1), true);
          emit(MACH(half(this->result, 1), half(src0, 1), half(src1, 1)));
       } else {
-         emit(MUL(acc, src0, src1));
+         do_mul_dw(acc, src0, src1, true);
          emit(MACH(this->result, src0, src1));
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 6a168ff..564029f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -521,6 +521,7 @@ public:
    fs_reg fix_math_operand(fs_reg src);
    fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
    fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst * do_mul_dw(fs_reg dst, fs_reg src0, fs_reg src1, bool can_swap);
    void emit_mul_dw(fs_reg dst, fs_reg src0, fs_reg src1,
                     bool src0_u16 = false,
                     bool src1_u16 = false);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 8a2fd12..c99a8db 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1615,6 +1615,20 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
    emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst));
 }
 
+vec4_instruction *
+vec4_visitor::do_mul_dw(dst_reg dst, src_reg src0, src_reg src1, bool can_swap)
+{
+   /* 2 src operations must have the immediate in src1. We let constant folding
+    * code handle the case where both are immediate */
+   if (src0.file == IMM && src1.file != IMM && can_swap) {
+      src_reg tmp = src1;
+      src1 = src0;
+      src0 = tmp;
+   }
+   assert(!(src0.file == IMM && src1.file != IMM));
+   return emit(MUL(dst, src0, src1));
+}
+
 void
 vec4_visitor::emit_mul_dw(dst_reg dst, src_reg src0, src_reg src1,
                           bool src0_u16, bool src1_u16)
@@ -1622,7 +1636,7 @@ vec4_visitor::emit_mul_dw(dst_reg dst, src_reg src0, src_reg src1,
    assert(type_is_dword(src0.type) && type_is_dword(src1.type));
 
    if (brw->gen >= 8) {
-      emit(MUL(dst, src0, src1));
+      do_mul_dw(dst, src0, src1, true);
       return;
    }
 
@@ -1634,18 +1648,18 @@ vec4_visitor::emit_mul_dw(dst_reg dst, src_reg src0, src_reg src1,
     */
    if (src0_u16) {
       if (brw->gen < 7)
-         emit(MUL(dst, src0, src1));
+         do_mul_dw(dst, src0, src1, false);
       else
-         emit(MUL(dst, src1, src0));
+         do_mul_dw(dst, src1, src0, false);
    } else if (src1_u16) {
       if (brw->gen < 7)
-         emit(MUL(dst, src1, src0));
+         do_mul_dw(dst, src1, src0, false);
       else
-         emit(MUL(dst, src0, src1));
+         do_mul_dw(dst, src0, src1, false);
    } else {
       struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
 
-      emit(MUL(acc, src0, src1));
+      do_mul_dw(acc, src0, src1, true);
       emit(MACH(dst_null_d(), src0, src1));
       emit(MOV(dst, src_reg(acc)));
    }
@@ -1677,10 +1691,11 @@ vec4_visitor::emit_mul_dw_high(dst_reg dst, src_reg src0, src_reg src1)
             emit(BRW_OPCODE_MOV, dst_reg(temp), src1);
             src1 = temp;
          }
+         assert(!(src0.file == IMM && src1.file != IMM));
       }
       emit(SHADER_OPCODE_MUL64, acc, src0, src1);
    } else {
-      emit(MUL(acc, src0, src1));
+      do_mul_dw(acc, src0, src1, true);
    }
 
    emit(MACH(dst, src0, src1));
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 9594362..2790523 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -486,6 +486,8 @@ public:
    void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
                   const src_reg &src1 = src_reg());
    src_reg fix_math_operand(src_reg src);
+   vec4_instruction *do_mul_dw(dst_reg dst, src_reg src0, src_reg src1,
+                               bool can_swap);
    void emit_mul_dw(dst_reg dst, src_reg src0, src_reg src1,
                     bool src0_u16 = false,
                     bool src1_u16 = false);
-- 
2.2.1



More information about the mesa-dev mailing list