Mesa (main): spirv: Produce correct result for GLSLstd450Tanh with NaN

Thu Feb 10 18:46:40 UTC 2022

Module: Mesa
Branch: main
Commit: 93ed87af28e7f5b7db7bae095e5a37b63b7bd2c7
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=93ed87af28e7f5b7db7bae095e5a37b63b7bd2c7

Author: Ian Romanick <ian.d.romanick at intel.com>
Date:   Fri Oct 29 10:51:25 2021 -0700

spirv: Produce correct result for GLSLstd450Tanh with NaN

No shader-db or fossil-db changes on any Intel platform.

Reviewed-by: Caio Oliveira <caio.oliveira at intel.com>
Fixes: 9f9432d56c0 ("Revert "spirv: Use a simpler and more correct implementaiton of tanh()"")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13999>

---

 src/compiler/spirv/vtn_glsl450.c | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c
index 21f5fa80973..045ffb41832 100644
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -520,11 +520,35 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
       nir_ssa_def *x = nir_fclamp(nb, src[0],
                                   nir_imm_floatN_t(nb, -clamped_x, bit_size),
                                   nir_imm_floatN_t(nb, clamped_x, bit_size));
-      dest->def =
-         nir_fdiv(nb, nir_fsub(nb, nir_fexp(nb, x),
-                               nir_fexp(nb, nir_fneg(nb, x))),
-                  nir_fadd(nb, nir_fexp(nb, x),
-                           nir_fexp(nb, nir_fneg(nb, x))));
+
+      /* The clamping will filter out NaN values causing an incorrect result.
+       * The comparison is carefully structured to get NaN result for NaN and
+       * get -0 for -0.
+       *
+       *    result = abs(s) > 0.0 ? ... : s;
+       */
+      const bool exact = nb->exact;
+
+      nb->exact = true;
+      nir_ssa_def *is_regular = nir_flt(nb,
+                                        nir_imm_floatN_t(nb, 0, bit_size),
+                                        nir_fabs(nb, src[0]));
+
+      /* The extra 1.0*s ensures that subnormal inputs are flushed to zero
+       * when that is selected by the shader.
+       */
+      nir_ssa_def *flushed = nir_fmul(nb,
+                                      src[0],
+                                      nir_imm_floatN_t(nb, 1.0, bit_size));
+      nb->exact = exact;
+
+      dest->def = nir_bcsel(nb,
+                            is_regular,
+                            nir_fdiv(nb, nir_fsub(nb, nir_fexp(nb, x),
+                                                  nir_fexp(nb, nir_fneg(nb, x))),
+                                     nir_fadd(nb, nir_fexp(nb, x),
+                                              nir_fexp(nb, nir_fneg(nb, x)))),
+                            flushed);
       break;
    }