<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Fri, Apr 8, 2016 at 9:36 AM, Eduardo Lima Mitev <span dir="ltr"><<a href="mailto:elima@igalia.com" target="_blank">elima@igalia.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><span class="">On 04/08/2016 01:35 AM, Kenneth Graunke wrote:<br>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
This makes the extra multiply visible to NIR's algebraic optimizations<br>
(for constant reassociation) as well as constant folding. This means<br>
that when the result of sin/cos are multiplied by an constant, we can<br>
eliminate the extra multiply altogether, reducing the cost of the<br>
workaround.<br>
<br>
It also means we only have to implement it one place, rather than in<br>
both backends.<br>
<br>
This makes INTEL_PRECISE_TRIG=1 cost nothing on GPUTest/Volplosion,<br>
which has a ton of sin() calls, but always multiplies them by an<br>
immediate constant. The extra multiply gets folded away.<br>
<br>
</blockquote>
<br></span>
Cleaner, indeed. Patch (and series) is:<br>
<br>
Reviewed-by: Eduardo Lima Mitev <<a href="mailto:elima@igalia.com" target="_blank">elima@igalia.com</a>><br>
<br>
Thanks!<span class=""><font color="#888888"><br>
<br>
Eduardo</font></span><div class=""><div class="h5"><br>
<br>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>><br>
---<br>
src/mesa/drivers/dri/i965/Makefile.am | 5 +++<br>
src/mesa/drivers/dri/i965/Makefile.sources | 1 +<br>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +-------<br>
src/mesa/drivers/dri/i965/brw_nir.c | 3 ++<br>
src/mesa/drivers/dri/i965/brw_nir.h | 2 +<br>
.../drivers/dri/i965/brw_nir_trig_workarounds.py | 43 ++++++++++++++++++++++<br>
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 16 +-------<br>
7 files changed, 58 insertions(+), 28 deletions(-)<br>
create mode 100755 src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am<br>
index 0db5a51..a41c830 100644<br>
--- a/src/mesa/drivers/dri/i965/Makefile.am<br>
+++ b/src/mesa/drivers/dri/i965/Makefile.am<br>
@@ -33,6 +33,7 @@ AM_CFLAGS = \<br>
-I$(top_srcdir)/src/mesa/drivers/dri/common \<br>
-I$(top_srcdir)/src/mesa/drivers/dri/intel/server \<br>
-I$(top_srcdir)/src/gtest/include \<br>
+ -I$(top_srcdir)/src/compiler/nir \<br>
-I$(top_builddir)/src/compiler/nir \<br>
-I$(top_builddir)/src/mesa/drivers/dri/common \<br>
$(DEFINES) \<br>
@@ -41,6 +42,10 @@ AM_CFLAGS = \<br>
<br>
AM_CXXFLAGS = $(AM_CFLAGS)<br>
<br>
+brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py<br>
+ $(MKDIR_GEN)<br>
+ $(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_nir_trig_workarounds.py > $@ || ($(RM) $@; false)<br>
+<br>
noinst_LTLIBRARIES = <a href="http://libi965_dri.la" rel="noreferrer" target="_blank">libi965_dri.la</a> <a href="http://libi965_compiler.la" rel="noreferrer" target="_blank">libi965_compiler.la</a><br>
libi965_dri_la_SOURCES = $(i965_FILES)<br>
libi965_dri_la_LIBADD = <a href="http://libi965_compiler.la" rel="noreferrer" target="_blank">libi965_compiler.la</a> $(INTEL_LIBS)<br>
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources<br>
index 4689588..2619e43 100644<br>
--- a/src/mesa/drivers/dri/i965/Makefile.sources<br>
+++ b/src/mesa/drivers/dri/i965/Makefile.sources<br>
@@ -44,6 +44,7 @@ i965_compiler_FILES = \<br>
brw_nir.c \<br>
brw_nir_analyze_boolean_resolves.c \<br>
brw_nir_attribute_workarounds.c \<br>
+ brw_nir_trig_workarounds.c \<br>
brw_nir_opt_peephole_ffma.c \<br>
brw_nir_uniforms.cpp \<br>
brw_packed_float.c \<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp<br>
index 90b8789..bd6314a 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp<br>
@@ -775,24 +775,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)<br>
break;<br>
<br>
case nir_op_fsin:<br>
- if (!compiler->precise_trig) {<br>
- inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);<br>
- } else {<br>
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);<br>
- inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);<br>
- inst = bld.MUL(result, tmp, brw_imm_f(0.99997));<br>
- }<br>
+ inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);<br>
inst->saturate = instr->dest.saturate;<br>
break;<br>
<br>
case nir_op_fcos:<br>
- if (!compiler->precise_trig) {<br>
- inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);<br>
- } else {<br>
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);<br>
- inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);<br>
- inst = bld.MUL(result, tmp, brw_imm_f(0.99997));<br>
- }<br>
+ inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);<br>
inst->saturate = instr->dest.saturate;<br>
break;<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c<br>
index 1821c0d..932979a 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_nir.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_nir.c<br>
@@ -447,6 +447,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)<br>
if (nir->stage == MESA_SHADER_GEOMETRY)<br>
OPT(nir_lower_gs_intrinsics);<br>
<br>
+ if (compiler->precise_trig)<br>
+ OPT(brw_nir_apply_trig_workarounds);<br>
+<br>
static const nir_lower_tex_options tex_options = {<br>
.lower_txp = ~0,<br>
};<br>
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h<br>
index b10c083..2711606 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_nir.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_nir.h<br>
@@ -106,6 +106,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir,<br>
bool use_legacy_snorm_formula,<br>
const uint8_t *attrib_wa_flags);<br>
<br>
+bool brw_nir_apply_trig_workarounds(nir_shader *nir);<br>
+<br>
nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,<br>
const struct brw_device_info *devinfo,<br>
const struct brw_sampler_prog_key_data *key,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py<br>
new file mode 100755<br>
index 0000000..67dab9a<br>
--- /dev/null<br>
+++ b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py<br>
@@ -0,0 +1,43 @@<br>
+#! /usr/bin/env python<br>
+#<br>
+# Copyright (C) 2016 Intel Corporation<br>
+#<br>
+# Permission is hereby granted, free of charge, to any person obtaining a<br>
+# copy of this software and associated documentation files (the "Software"),<br>
+# to deal in the Software without restriction, including without limitation<br>
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+# and/or sell copies of the Software, and to permit persons to whom the<br>
+# Software is furnished to do so, subject to the following conditions:<br>
+#<br>
+# The above copyright notice and this permission notice (including the next<br>
+# paragraph) shall be included in all copies or substantial portions of the<br>
+# Software.<br>
+#<br>
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL<br>
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br>
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING<br>
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS<br>
+# IN THE SOFTWARE.<br>
+<br>
+import nir_algebraic<br>
+<br>
+# The SIN and COS instructions on Intel hardware can produce values<br>
+# slightly outside of the [-1.0, 1.0] range for a small set of values.<br>
+# Obviously, this can break everyone's expectations about trig functions.<br>
+#<br>
+# According to an internal presentation, the COS instruction can produce<br>
+# a value up to 1.000027 for inputs in the range (0.08296, 0.09888). One<br>
+# suggested workaround is to multiply by 0.99997, scaling down the<br>
+# amplitude slightly. Apparently this also minimizes the error function,<br>
+# reducing the maximum error from 0.00006 to about 0.00003.<br>
+<br>
+trig_workarounds = [<br>
+ (('fsin', 'x'), ('fmul', ('fsin', 'x'), 0.99997)),<br>
+ (('fcos', 'x'), ('fmul', ('fcos', 'x'), 0.99997)),<br>
+]<br>
+<br>
+print '#include "brw_nir.h"'<br>
+print nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds",<br>
+ trig_workarounds).render()<br></blockquote></div></div></blockquote><div><br>Side note: Part of the intention of nir_algebraic when I wrote it over a year ago was to make adding custom algebraic transformation passes very light-weight and easy. Glad to see someone finally taking advantage of that. :-)<br><br></div><div>Series Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div class=""><div class="h5"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp<br>
index d9f96c5..e4e8c38 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp<br>
@@ -1101,24 +1101,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)<br>
break;<br>
<br>
case nir_op_fsin:<br>
- if (!compiler->precise_trig) {<br>
- inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);<br>
- } else {<br>
- src_reg tmp = src_reg(this, glsl_type::vec4_type);<br>
- inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);<br>
- inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));<br>
- }<br>
+ inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);<br>
inst->saturate = instr->dest.saturate;<br>
break;<br>
<br>
case nir_op_fcos:<br>
- if (!compiler->precise_trig) {<br>
- inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);<br>
- } else {<br>
- src_reg tmp = src_reg(this, glsl_type::vec4_type);<br>
- inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);<br>
- inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));<br>
- }<br>
+ inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);<br>
inst->saturate = instr->dest.saturate;<br>
break;<br>
<br>
<br>
</blockquote>
<br></div></div><div class=""><div class="h5">
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</div></div></blockquote></div><br></div></div>