Mesa (master): i965: Enable faster workaround-free math on Ivybridge.

Kenneth Graunke kwg at kemper.freedesktop.org
Mon Nov 7 19:02:49 UTC 2011


Module: Mesa
Branch: master
Commit: a73c65c5342bf41fa0dfefe7daa9197ce6a11db4
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a73c65c5342bf41fa0dfefe7daa9197ce6a11db4

Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Tue Oct 18 12:24:47 2011 -0700

i965: Enable faster workaround-free math on Ivybridge.

According to the documentation, Ivybridge's math instruction works in
SIMD16 mode for the fragment shader, and no longer forbids align16 mode
for the vertex shader.

The documentation claims that SIMD16 mode isn't supported for INT DIV,
but empirical evidence shows that it works fine.  Presumably the note
is trying to warn us that the variant that returns both quotient and
remainder in (dst, dst + 1) doesn't work in SIMD16 mode since dst + 1
would be sechalf(dst), trashing half your results.  Since we don't use
that variant, we don't care and can just enable SIMD16 everywhere.

The documentation also still claims that source modifiers and
conditional modifiers aren't supported, but empirical evidence and
study of the simulator both show that they work just fine.

Goodbye workarounds.  Math just works now.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
Reviewed-by: Paul Berry <stereotype441 at gmail.com>

---

 src/mesa/drivers/dri/i965/brw_eu_emit.c     |   29 +++++++++++++++---------
 src/mesa/drivers/dri/i965/brw_fs.cpp        |    8 ++++--
 src/mesa/drivers/dri/i965/brw_fs.h          |    7 ++++++
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp   |   31 ++++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_vec4.h        |    4 +++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |   19 ++++++++++++++-
 6 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 7284bdc..dbb42f4 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1443,11 +1443,14 @@ void brw_math( struct brw_compile *p,
       assert(src.file == BRW_GENERAL_REGISTER_FILE);
 
       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
-      assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+      if (intel->gen == 6)
+	 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
 
-      /* Source modifiers are ignored for extended math instructions. */
-      assert(!src.negate);
-      assert(!src.abs);
+      /* Source modifiers are ignored for extended math instructions on Gen6. */
+      if (intel->gen == 6) {
+	 assert(!src.negate);
+	 assert(!src.abs);
+      }
 
       if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
 	  function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
@@ -1507,8 +1510,10 @@ void brw_math2(struct brw_compile *p,
    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
 
    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
-   assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
-   assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
+   if (intel->gen == 6) {
+      assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
+      assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
+   }
 
    if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
        function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
@@ -1520,11 +1525,13 @@ void brw_math2(struct brw_compile *p,
       assert(src1.type == BRW_REGISTER_TYPE_F);
    }
 
-   /* Source modifiers are ignored for extended math instructions. */
-   assert(!src0.negate);
-   assert(!src0.abs);
-   assert(!src1.negate);
-   assert(!src1.abs);
+   /* Source modifiers are ignored for extended math instructions on Gen6. */
+   if (intel->gen == 6) {
+      assert(!src0.negate);
+      assert(!src0.abs);
+      assert(!src1.negate);
+      assert(!src1.abs);
+   }
 
    /* Math is the same ISA format as other opcodes, except that CondModifier
     * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b66febb..2a5f510 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -559,10 +559,10 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
     * expanding that result out, but we would need to be careful with
     * masking.
     *
-    * The hardware ignores source modifiers (negate and abs) on math
+    * Gen 6 hardware ignores source modifiers (negate and abs) on math
     * instructions, so we also move to a temp to set those up.
     */
-   if (intel->gen >= 6 && (src.file == UNIFORM ||
+   if (intel->gen == 6 && (src.file == UNIFORM ||
 			   src.abs ||
 			   src.negate)) {
       fs_reg expanded = fs_reg(this, glsl_type::float_type);
@@ -596,7 +596,9 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
       return NULL;
    }
 
-   if (intel->gen >= 6) {
+   if (intel->gen >= 7) {
+      inst = emit(opcode, dst, src0, src1);
+   } else if (intel->gen == 6) {
       /* Can't do hstride == 0 args to gen6 math, so expand it out.
        *
        * The hardware ignores source modifiers (negate and abs) on math
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index e2ad649..3e45030 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -490,6 +490,13 @@ public:
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
 			 struct brw_reg *src);
    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
+   void generate_math1_gen7(fs_inst *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src);
+   void generate_math2_gen7(fs_inst *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src0,
+			    struct brw_reg src1);
    void generate_math1_gen6(fs_inst *inst,
 			    struct brw_reg dst,
 			    struct brw_reg src);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 0c14baf..b613fb0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -143,6 +143,31 @@ fs_visitor::generate_linterp(fs_inst *inst,
 }
 
 void
+fs_visitor::generate_math1_gen7(fs_inst *inst,
+			        struct brw_reg dst,
+			        struct brw_reg src0)
+{
+   assert(inst->mlen == 0);
+   brw_math(p, dst,
+	    brw_math_function(inst->opcode),
+	    inst->saturate ? BRW_MATH_SATURATE_SATURATE
+			   : BRW_MATH_SATURATE_NONE,
+	    0, src0,
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+}
+
+void
+fs_visitor::generate_math2_gen7(fs_inst *inst,
+			        struct brw_reg dst,
+			        struct brw_reg src0,
+			        struct brw_reg src1)
+{
+   assert(inst->mlen == 0);
+   brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
+}
+
+void
 fs_visitor::generate_math1_gen6(fs_inst *inst,
 			        struct brw_reg dst,
 			        struct brw_reg src0)
@@ -797,7 +822,9 @@ fs_visitor::generate_code()
       case SHADER_OPCODE_LOG2:
       case SHADER_OPCODE_SIN:
       case SHADER_OPCODE_COS:
-	 if (intel->gen >= 6) {
+	 if (intel->gen >= 7) {
+	    generate_math1_gen7(inst, dst, src[0]);
+	 } else if (intel->gen == 6) {
 	    generate_math1_gen6(inst, dst, src[0]);
 	 } else {
 	    generate_math_gen4(inst, dst, src[0]);
@@ -807,6 +834,8 @@ fs_visitor::generate_code()
       case SHADER_OPCODE_INT_REMAINDER:
       case SHADER_OPCODE_POW:
 	 if (intel->gen >= 6) {
+	    generate_math2_gen7(inst, dst, src[0], src[1]);
+	 } else if (intel->gen == 6) {
 	    generate_math2_gen6(inst, dst, src[0], src[1]);
 	 } else {
 	    generate_math_gen4(inst, dst, src[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index a83a6b2..93ccda9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -533,6 +533,10 @@ public:
 			    struct brw_reg dst,
 			    struct brw_reg src0,
 			    struct brw_reg src1);
+   void generate_math2_gen7(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src0,
+			    struct brw_reg src1);
 
    void generate_urb_write(vec4_instruction *inst);
    void generate_oword_dual_block_offsets(struct brw_reg m1,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 66c3b7e..dc39f17 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -283,6 +283,18 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 }
 
 void
+vec4_visitor::generate_math2_gen7(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src0,
+				  struct brw_reg src1)
+{
+   brw_math2(p,
+	     dst,
+	     brw_math_function(inst->opcode),
+	     src0, src1);
+}
+
+void
 vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
 				  struct brw_reg dst,
 				  struct brw_reg src0,
@@ -552,9 +564,10 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
    case SHADER_OPCODE_LOG2:
    case SHADER_OPCODE_SIN:
    case SHADER_OPCODE_COS:
-      if (intel->gen >= 6) {
+      if (intel->gen == 6) {
 	 generate_math1_gen6(inst, dst, src[0]);
       } else {
+	 /* Also works for Gen7. */
 	 generate_math1_gen4(inst, dst, src[0]);
       }
       break;
@@ -562,7 +575,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
    case SHADER_OPCODE_POW:
    case SHADER_OPCODE_INT_QUOTIENT:
    case SHADER_OPCODE_INT_REMAINDER:
-      if (intel->gen >= 6) {
+      if (intel->gen >= 7) {
+	 generate_math2_gen7(inst, dst, src[0], src[1]);
+      } else if (intel->gen == 6) {
 	 generate_math2_gen6(inst, dst, src[0], src[1]);
       } else {
 	 generate_math2_gen4(inst, dst, src[0], src[1]);




More information about the mesa-commit mailing list