<div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Wed, Dec 19, 2018 at 5:51 AM Iago Toral Quiroga <<a href="mailto:itoral@igalia.com">itoral@igalia.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">v2:<br>
 - use nir_fadd_imm and nir_fmul_imm helpers (Jason)<br>
---<br>
 src/compiler/spirv/vtn_glsl450.c | 44 +++++++++++++++++++-------------<br>
 1 file changed, 26 insertions(+), 18 deletions(-)<br>
<br>
diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c<br>
index ec91d9308c5..c8400d6c80f 100644<br>
--- a/src/compiler/spirv/vtn_glsl450.c<br>
+++ b/src/compiler/spirv/vtn_glsl450.c<br>
@@ -654,17 +654,17 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,<br>
    case GLSLstd450Sinh:<br>
       /* 0.5 * (e^x - e^(-x)) */<br>
       val->ssa->def =<br>
-         nir_fmul(nb, nir_imm_float(nb, 0.5f),<br>
-                      nir_fsub(nb, build_exp(nb, src[0]),<br>
-                                   build_exp(nb, nir_fneg(nb, src[0]))));<br>
+         nir_fmul_imm(nb, nir_fsub(nb, build_exp(nb, src[0]),<br>
+                                       build_exp(nb, nir_fneg(nb, src[0]))),<br>
+                          0.5f);<br>
       return;<br>
<br>
    case GLSLstd450Cosh:<br>
       /* 0.5 * (e^x + e^(-x)) */<br>
       val->ssa->def =<br>
-         nir_fmul(nb, nir_imm_float(nb, 0.5f),<br>
-                      nir_fadd(nb, build_exp(nb, src[0]),<br>
-                                   build_exp(nb, nir_fneg(nb, src[0]))));<br>
+         nir_fmul_imm(nb, nir_fadd(nb, build_exp(nb, src[0]),<br>
+                                       build_exp(nb, nir_fneg(nb, src[0]))),<br>
+                          0.5f);<br>
       return;<br>
<br>
    case GLSLstd450Tanh: {<br>
@@ -675,30 +675,38 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,<br>
        * We clamp x to (-inf, +10] to avoid precision problems.  When x > 10,<br>
        * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the<br>
        * computation e^2x +/- 1 so it can be ignored.<br>
+       *<br>
+       * For 16-bit precision we clamp x to (-inf, +4.2] since the maximum<br>
+       * representable number is only 65,504 and e^(2*6) exceeds that. Also,<br>
+       * if x > 4.2, tanh(x) will return 1.0 in fp16.<br>
        */<br>
-      nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));<br>
-      nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2)));<br>
-      val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)),<br>
-                                   nir_fadd(nb, exp2x, nir_imm_float(nb, 1)));<br>
+      const uint32_t bit_size = src[0]->bit_size;<br>
+      const double clamped_x = bit_size > 16 ? 10.0 : 4.2;<br>
+      nir_ssa_def *x = nir_fmin(nb, src[0],<br>
+                                    nir_imm_floatN_t(nb, clamped_x, bit_size));<br>
+      nir_ssa_def *exp2x = build_exp(nb, nir_fmul_imm(nb, x, 2.0));<br>
+      val->ssa->def = nir_fdiv(nb, nir_fadd_imm(nb, exp2x, -1.0),<br>
+                                   nir_fadd_imm(nb, exp2x, 1.0));<br>
       return;<br>
    }<br>
<br>
    case GLSLstd450Asinh:<br>
       val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),<br>
          build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),<br>
-                       nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),<br>
-                                                  nir_imm_float(nb, 1.0f))))));<br>
+                       nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),<br>
+                                                      1.0f)))));<br>
       return;<br>
    case GLSLstd450Acosh:<br>
       val->ssa->def = build_log(nb, nir_fadd(nb, src[0],<br>
-         nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),<br>
-                                    nir_imm_float(nb, 1.0f)))));<br>
+         nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),<br>
+                                        -1.0f))));<br>
       return;<br>
    case GLSLstd450Atanh: {<br>
-      nir_ssa_def *one = nir_imm_float(nb, 1.0);<br>
-      val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),<br>
-         build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),<br>
-                                    nir_fsub(nb, one, src[0]))));<br>
+      nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, src[0]->bit_size);<br>
+      val->ssa->def =<br>
+         nir_fmul_imm(nb, build_log(nb, nir_fdiv(nb, nir_fadd_imm(nb, src[0], 1.0f),<br></blockquote><div><br></div><div>Since you had to declare one for the line below, you could just use it here.  Doesn't really matter though.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+                                        nir_fsub(nb, one, src[0]))),<br>
+                          0.5f);<br>
       return;<br>
    }<br>
<br>
-- <br>
2.17.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div>