On 18 October 2011 12:24, Kenneth Graunke <span dir="ltr">&lt;<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>&gt;</span> wrote:<br><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

According to the documentation, Ivybridge&#39;s math instruction works in<br>
SIMD16 mode for the fragment shader, and no longer forbids align16 mode<br>
for the vertex shader.<br>
<br>
The documentation claims that SIMD16 mode isn&#39;t supported for INT DIV,<br>
but empirical evidence shows that it works fine.  Presumably the note<br>
is trying to warn us that the variant that returns both quotient and<br>
remainder in (dst, dst + 1) doesn&#39;t work in SIMD16 mode since dst + 1<br>
would be sechalf(dst), trashing half your results.  Since we don&#39;t use<br>
that variant, we don&#39;t care and can just enable SIMD16 everywhere.<br>
<br>
The documentation also still claims that source modifiers and<br>
conditional modifiers aren&#39;t supported, but empirical evidence and<br>
study of the simulator both show that they work just fine.<br>
<br>
Goodbye workarounds.  Math just works now.<br>
<br>
Signed-off-by: Kenneth Graunke &lt;<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>&gt;<br>
---<br>
 src/mesa/drivers/dri/i965/brw_eu_emit.c     |   29 +++++++++++++++---------<br>
 src/mesa/drivers/dri/i965/brw_fs.cpp        |    6 +++-<br>
 src/mesa/drivers/dri/i965/brw_fs.h          |    7 ++++++<br>
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp   |   31 ++++++++++++++++++++++++++-<br>
 src/mesa/drivers/dri/i965/brw_vec4.h        |    4 +++<br>
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |   19 ++++++++++++++-<br>
 6 files changed, 80 insertions(+), 16 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
index 5caebfc..0841478 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
@@ -1496,11 +1496,14 @@ void brw_math( struct brw_compile *p,<br>
       assert(src.file == BRW_GENERAL_REGISTER_FILE);<br>
<br>
       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
-      assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
+      if (intel-&gt;gen == 6)<br>
+        assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
<br>
-      /* Source modifiers are ignored for extended math instructions. */<br>
-      assert(!src.negate);<br>
-      assert(!src.abs);<br>
+      /* Source modifiers are ignored for extended math instructions on Gen6. */<br>
+      if (intel-&gt;gen == 6) {<br>
+        assert(!src.negate);<br>
+        assert(!src.abs);<br>
+      }<br>
<br>
       if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||<br>
          function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||<br>
@@ -1560,8 +1563,10 @@ void brw_math2(struct brw_compile *p,<br>
    assert(src1.file == BRW_GENERAL_REGISTER_FILE);<br>
<br>
    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
-   assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
-   assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
+   if (intel-&gt;gen == 6) {<br>
+      assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
+      assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
+   }<br>
<br>
    if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||<br>
        function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||<br>
@@ -1573,11 +1578,13 @@ void brw_math2(struct brw_compile *p,<br>
       assert(src1.type == BRW_REGISTER_TYPE_F);<br>
    }<br>
<br>
-   /* Source modifiers are ignored for extended math instructions. */<br>
-   assert(!src0.negate);<br>
-   assert(!src0.abs);<br>
-   assert(!src1.negate);<br>
-   assert(!src1.abs);<br>
+   /* Source modifiers are ignored for extended math instructions on Gen6. */<br>
+   if (intel-&gt;gen == 6) {<br>
+      assert(!src0.negate);<br>
+      assert(!src0.abs);<br>
+      assert(!src1.negate);<br>
+      assert(!src1.abs);<br>
+   }<br>
<br>
    /* Math is the same ISA format as other opcodes, except that CondModifier<br>
     * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index f731662..6c417d4 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -554,7 +554,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)<br>
     * The hardware ignores source modifiers (negate and abs) on math<br>
     * instructions, so we also move to a temp to set those up.<br></blockquote><div><br>Probably should change this comment to clarify that this is only the case for Gen6.<br> </div><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204, 204, 204);padding-left:1ex">


     */<br>
-   if (intel-&gt;gen &gt;= 6 &amp;&amp; (src.file == UNIFORM ||<br>
+   if (intel-&gt;gen == 6 &amp;&amp; (src.file == UNIFORM ||<br>
                           src.abs ||<br>
                           src.negate)) {<br>
       fs_reg expanded = fs_reg(this, glsl_type::float_type);<br>
@@ -588,7 +588,9 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)<br>
       return NULL;<br>
    }<br>
<br>
-   if (intel-&gt;gen &gt;= 6) {<br>
+   if (intel-&gt;gen &gt;= 7) {<br>
+      inst = emit(opcode, dst, src0, src1);<br>
+   } else if (intel-&gt;gen == 6) {<br>
       /* Can&#39;t do hstride == 0 args to gen6 math, so expand it out.<br>
        *<br>
        * The hardware ignores source modifiers (negate and abs) on math<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h<br>
index 4035186..85d3cad 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.h<br>
@@ -490,6 +490,13 @@ public:<br>
    void generate_linterp(fs_inst *inst, struct brw_reg dst,<br>
                         struct brw_reg *src);<br>
    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);<br>
+   void generate_math1_gen7(fs_inst *inst,<br>
+                           struct brw_reg dst,<br>
+                           struct brw_reg src);<br>
+   void generate_math2_gen7(fs_inst *inst,<br>
+                           struct brw_reg dst,<br>
+                           struct brw_reg src0,<br>
+                           struct brw_reg src1);<br>
    void generate_math1_gen6(fs_inst *inst,<br>
                            struct brw_reg dst,<br>
                            struct brw_reg src);<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
index 4c158fe..9697762 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
@@ -143,6 +143,31 @@ fs_visitor::generate_linterp(fs_inst *inst,<br>
 }<br>
<br>
 void<br>
+fs_visitor::generate_math1_gen7(fs_inst *inst,<br>
+                               struct brw_reg dst,<br>
+                               struct brw_reg src0)<br>
+{<br>
+   assert(inst-&gt;mlen == 0);<br>
+   brw_math(p, dst,<br>
+           brw_math_function(inst-&gt;opcode),<br>
+           inst-&gt;saturate ? BRW_MATH_SATURATE_SATURATE<br>
+                          : BRW_MATH_SATURATE_NONE,<br>
+           0, src0,<br>
+           BRW_MATH_DATA_VECTOR,<br>
+           BRW_MATH_PRECISION_FULL);<br>
+}<br>
+<br>
+void<br>
+fs_visitor::generate_math2_gen7(fs_inst *inst,<br>
+                               struct brw_reg dst,<br>
+                               struct brw_reg src0,<br>
+                               struct brw_reg src1)<br>
+{<br>
+   assert(inst-&gt;mlen == 0);<br>
+   brw_math2(p, dst, brw_math_function(inst-&gt;opcode), src0, src1);<br>
+}<br>
+<br>
+void<br>
 fs_visitor::generate_math1_gen6(fs_inst *inst,<br>
                                struct brw_reg dst,<br>
                                struct brw_reg src0)<br>
@@ -788,7 +813,9 @@ fs_visitor::generate_code()<br>
       case SHADER_OPCODE_LOG2:<br>
       case SHADER_OPCODE_SIN:<br>
       case SHADER_OPCODE_COS:<br>
-        if (intel-&gt;gen &gt;= 6) {<br>
+        if (intel-&gt;gen &gt;= 7) {<br>
+           generate_math1_gen7(inst, dst, src[0]);<br>
+        } else if (intel-&gt;gen == 6) {<br>
            generate_math1_gen6(inst, dst, src[0]);<br>
         } else {<br>
            generate_math_gen4(inst, dst, src[0]);<br>
@@ -798,6 +825,8 @@ fs_visitor::generate_code()<br>
       case SHADER_OPCODE_INT_REMAINDER:<br>
       case SHADER_OPCODE_POW:<br>
         if (intel-&gt;gen &gt;= 6) {<br>
+           generate_math2_gen7(inst, dst, src[0], src[1]);<br>
+        } else if (intel-&gt;gen == 6) {<br>
            generate_math2_gen6(inst, dst, src[0], src[1]);<br>
         } else {<br>
            generate_math_gen4(inst, dst, src[0]);<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h<br>
index eae841c..4d8bb2d 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h<br>
@@ -532,6 +532,10 @@ public:<br>
                            struct brw_reg dst,<br>
                            struct brw_reg src0,<br>
                            struct brw_reg src1);<br>
+   void generate_math2_gen7(vec4_instruction *inst,<br>
+                           struct brw_reg dst,<br>
+                           struct brw_reg src0,<br>
+                           struct brw_reg src1);<br>
<br>
    void generate_urb_write(vec4_instruction *inst);<br>
    void generate_oword_dual_block_offsets(struct brw_reg m1,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp<br>
index ccbad25..f66091d 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp<br>
@@ -282,6 +282,18 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,<br>
 }<br>
<br>
 void<br>
+vec4_visitor::generate_math2_gen7(vec4_instruction *inst,<br>
+                                 struct brw_reg dst,<br>
+                                 struct brw_reg src0,<br>
+                                 struct brw_reg src1)<br>
+{<br>
+   brw_math2(p,<br>
+            dst,<br>
+            brw_math_function(inst-&gt;opcode),<br>
+            src0, src1);<br>
+}<br>
+<br>
+void<br>
 vec4_visitor::generate_math2_gen6(vec4_instruction *inst,<br>
                                  struct brw_reg dst,<br>
                                  struct brw_reg src0,<br>
@@ -549,9 +561,10 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,<br>
    case SHADER_OPCODE_LOG2:<br>
    case SHADER_OPCODE_SIN:<br>
    case SHADER_OPCODE_COS:<br>
-      if (intel-&gt;gen &gt;= 6) {<br>
+      if (intel-&gt;gen == 6) {<br>
         generate_math1_gen6(inst, dst, src[0]);<br>
       } else {<br>
+        /* Also works for Gen7. */<br>
         generate_math1_gen4(inst, dst, src[0]);<br>
       }<br>
       break;<br>
@@ -559,7 +572,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,<br>
    case SHADER_OPCODE_POW:<br>
    case SHADER_OPCODE_INT_QUOTIENT:<br>
    case SHADER_OPCODE_INT_REMAINDER:<br>
-      if (intel-&gt;gen &gt;= 6) {<br>
+      if (intel-&gt;gen &gt;= 7) {<br>
+        generate_math2_gen7(inst, dst, src[0], src[1]);<br>
+      } else if (intel-&gt;gen == 6) {<br>
         generate_math2_gen6(inst, dst, src[0], src[1]);<br>
       } else {<br>
         generate_math2_gen4(inst, dst, src[0], src[1]);<br>
<font color="#888888">--<br>
1.7.7<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></blockquote></div><br>Other than the above comment, this patch is:<br><br>Reviewed-by: Paul Berry &lt;<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>&gt;<br>