On 18 October 2011 12:24, Kenneth Graunke <span dir="ltr"><<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>></span> wrote:<br><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
According to the documentation, Ivybridge's math instruction works in<br>
SIMD16 mode for the fragment shader, and no longer forbids align16 mode<br>
for the vertex shader.<br>
<br>
The documentation claims that SIMD16 mode isn't supported for INT DIV,<br>
but empirical evidence shows that it works fine. Presumably the note<br>
is trying to warn us that the variant that returns both quotient and<br>
remainder in (dst, dst + 1) doesn't work in SIMD16 mode since dst + 1<br>
would be sechalf(dst), trashing half your results. Since we don't use<br>
that variant, we don't care and can just enable SIMD16 everywhere.<br>
<br>
The documentation also still claims that source modifiers and<br>
conditional modifiers aren't supported, but empirical evidence and<br>
study of the simulator both show that they work just fine.<br>
<br>
Goodbye workarounds. Math just works now.<br>
<br>
Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>><br>
---<br>
src/mesa/drivers/dri/i965/brw_eu_emit.c | 29 +++++++++++++++---------<br>
src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +++-<br>
src/mesa/drivers/dri/i965/brw_fs.h | 7 ++++++<br>
src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 31 ++++++++++++++++++++++++++-<br>
src/mesa/drivers/dri/i965/brw_vec4.h | 4 +++<br>
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 19 ++++++++++++++-<br>
6 files changed, 80 insertions(+), 16 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
index 5caebfc..0841478 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
@@ -1496,11 +1496,14 @@ void brw_math( struct brw_compile *p,<br>
assert(src.file == BRW_GENERAL_REGISTER_FILE);<br>
<br>
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
- assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
+ if (intel->gen == 6)<br>
+ assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
<br>
- /* Source modifiers are ignored for extended math instructions. */<br>
- assert(!src.negate);<br>
- assert(!src.abs);<br>
+ /* Source modifiers are ignored for extended math instructions on Gen6. */<br>
+ if (intel->gen == 6) {<br>
+ assert(!src.negate);<br>
+ assert(!src.abs);<br>
+ }<br>
<br>
if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||<br>
function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||<br>
@@ -1560,8 +1563,10 @@ void brw_math2(struct brw_compile *p,<br>
assert(src1.file == BRW_GENERAL_REGISTER_FILE);<br>
<br>
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
- assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
- assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
+ if (intel->gen == 6) {<br>
+ assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
+ assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);<br>
+ }<br>
<br>
if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||<br>
function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||<br>
@@ -1573,11 +1578,13 @@ void brw_math2(struct brw_compile *p,<br>
assert(src1.type == BRW_REGISTER_TYPE_F);<br>
}<br>
<br>
- /* Source modifiers are ignored for extended math instructions. */<br>
- assert(!src0.negate);<br>
- assert(!src0.abs);<br>
- assert(!src1.negate);<br>
- assert(!src1.abs);<br>
+ /* Source modifiers are ignored for extended math instructions on Gen6. */<br>
+ if (intel->gen == 6) {<br>
+ assert(!src0.negate);<br>
+ assert(!src0.abs);<br>
+ assert(!src1.negate);<br>
+ assert(!src1.abs);<br>
+ }<br>
<br>
/* Math is the same ISA format as other opcodes, except that CondModifier<br>
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index f731662..6c417d4 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -554,7 +554,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)<br>
* The hardware ignores source modifiers (negate and abs) on math<br>
* instructions, so we also move to a temp to set those up.<br></blockquote><div><br>Probably should change this comment to clarify that this is only the case for Gen6.<br> </div><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204, 204, 204);padding-left:1ex">
*/<br>
- if (intel->gen >= 6 && (src.file == UNIFORM ||<br>
+ if (intel->gen == 6 && (src.file == UNIFORM ||<br>
src.abs ||<br>
src.negate)) {<br>
fs_reg expanded = fs_reg(this, glsl_type::float_type);<br>
@@ -588,7 +588,9 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)<br>
return NULL;<br>
}<br>
<br>
- if (intel->gen >= 6) {<br>
+ if (intel->gen >= 7) {<br>
+ inst = emit(opcode, dst, src0, src1);<br>
+ } else if (intel->gen == 6) {<br>
/* Can't do hstride == 0 args to gen6 math, so expand it out.<br>
*<br>
* The hardware ignores source modifiers (negate and abs) on math<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h<br>
index 4035186..85d3cad 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.h<br>
@@ -490,6 +490,13 @@ public:<br>
void generate_linterp(fs_inst *inst, struct brw_reg dst,<br>
struct brw_reg *src);<br>
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);<br>
+ void generate_math1_gen7(fs_inst *inst,<br>
+ struct brw_reg dst,<br>
+ struct brw_reg src);<br>
+ void generate_math2_gen7(fs_inst *inst,<br>
+ struct brw_reg dst,<br>
+ struct brw_reg src0,<br>
+ struct brw_reg src1);<br>
void generate_math1_gen6(fs_inst *inst,<br>
struct brw_reg dst,<br>
struct brw_reg src);<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
index 4c158fe..9697762 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
@@ -143,6 +143,31 @@ fs_visitor::generate_linterp(fs_inst *inst,<br>
}<br>
<br>
void<br>
+fs_visitor::generate_math1_gen7(fs_inst *inst,<br>
+ struct brw_reg dst,<br>
+ struct brw_reg src0)<br>
+{<br>
+ assert(inst->mlen == 0);<br>
+ brw_math(p, dst,<br>
+ brw_math_function(inst->opcode),<br>
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE<br>
+ : BRW_MATH_SATURATE_NONE,<br>
+ 0, src0,<br>
+ BRW_MATH_DATA_VECTOR,<br>
+ BRW_MATH_PRECISION_FULL);<br>
+}<br>
+<br>
+void<br>
+fs_visitor::generate_math2_gen7(fs_inst *inst,<br>
+ struct brw_reg dst,<br>
+ struct brw_reg src0,<br>
+ struct brw_reg src1)<br>
+{<br>
+ assert(inst->mlen == 0);<br>
+ brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);<br>
+}<br>
+<br>
+void<br>
fs_visitor::generate_math1_gen6(fs_inst *inst,<br>
struct brw_reg dst,<br>
struct brw_reg src0)<br>
@@ -788,7 +813,9 @@ fs_visitor::generate_code()<br>
case SHADER_OPCODE_LOG2:<br>
case SHADER_OPCODE_SIN:<br>
case SHADER_OPCODE_COS:<br>
- if (intel->gen >= 6) {<br>
+ if (intel->gen >= 7) {<br>
+ generate_math1_gen7(inst, dst, src[0]);<br>
+ } else if (intel->gen == 6) {<br>
generate_math1_gen6(inst, dst, src[0]);<br>
} else {<br>
generate_math_gen4(inst, dst, src[0]);<br>
@@ -798,6 +825,8 @@ fs_visitor::generate_code()<br>
case SHADER_OPCODE_INT_REMAINDER:<br>
case SHADER_OPCODE_POW:<br>
if (intel->gen >= 6) {<br>
+ generate_math2_gen7(inst, dst, src[0], src[1]);<br>
+ } else if (intel->gen == 6) {<br>
generate_math2_gen6(inst, dst, src[0], src[1]);<br>
} else {<br>
generate_math_gen4(inst, dst, src[0]);<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h<br>
index eae841c..4d8bb2d 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h<br>
@@ -532,6 +532,10 @@ public:<br>
struct brw_reg dst,<br>
struct brw_reg src0,<br>
struct brw_reg src1);<br>
+ void generate_math2_gen7(vec4_instruction *inst,<br>
+ struct brw_reg dst,<br>
+ struct brw_reg src0,<br>
+ struct brw_reg src1);<br>
<br>
void generate_urb_write(vec4_instruction *inst);<br>
void generate_oword_dual_block_offsets(struct brw_reg m1,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp<br>
index ccbad25..f66091d 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp<br>
@@ -282,6 +282,18 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,<br>
}<br>
<br>
void<br>
+vec4_visitor::generate_math2_gen7(vec4_instruction *inst,<br>
+ struct brw_reg dst,<br>
+ struct brw_reg src0,<br>
+ struct brw_reg src1)<br>
+{<br>
+ brw_math2(p,<br>
+ dst,<br>
+ brw_math_function(inst->opcode),<br>
+ src0, src1);<br>
+}<br>
+<br>
+void<br>
vec4_visitor::generate_math2_gen6(vec4_instruction *inst,<br>
struct brw_reg dst,<br>
struct brw_reg src0,<br>
@@ -549,9 +561,10 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,<br>
case SHADER_OPCODE_LOG2:<br>
case SHADER_OPCODE_SIN:<br>
case SHADER_OPCODE_COS:<br>
- if (intel->gen >= 6) {<br>
+ if (intel->gen == 6) {<br>
generate_math1_gen6(inst, dst, src[0]);<br>
} else {<br>
+ /* Also works for Gen7. */<br>
generate_math1_gen4(inst, dst, src[0]);<br>
}<br>
break;<br>
@@ -559,7 +572,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,<br>
case SHADER_OPCODE_POW:<br>
case SHADER_OPCODE_INT_QUOTIENT:<br>
case SHADER_OPCODE_INT_REMAINDER:<br>
- if (intel->gen >= 6) {<br>
+ if (intel->gen >= 7) {<br>
+ generate_math2_gen7(inst, dst, src[0], src[1]);<br>
+ } else if (intel->gen == 6) {<br>
generate_math2_gen6(inst, dst, src[0], src[1]);<br>
} else {<br>
generate_math2_gen4(inst, dst, src[0], src[1]);<br>
<font color="#888888">--<br>
1.7.7<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></blockquote></div><br>Other than the above comment, this patch is:<br><br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br>