<div dir="ltr">My comment earlier, I think, applies to all of the first 7. Let's just add nir_fadd_imm and nir_fmul_imm and rewrite them to use those. That'll make them handle doubles as well if we ever need it. </div> <div class="gmail_quote"><div dir="ltr">On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga <<a href="mailto:itoral@igalia.com">itoral@igalia.com</a>> wrote: </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">--- src/compiler/spirv/vtn_glsl450.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 8bdef9db822..85851755aab 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -672,7 +672,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Sinh: /* 0.5 * (e^x - e^(-x)) */ val->ssa->def = - nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fmul(nb, nir_imm_floatN_t(nb, 0.5f, src[0]->bit_size), nir_fsub(nb, build_exp(nb, src[0]), build_exp(nb, nir_fneg(nb, src[0])))); return; @@ -680,7 +680,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Cosh: /* 0.5 * (e^x + e^(-x)) */ val->ssa->def = - nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fmul(nb, nir_imm_floatN_t(nb, 0.5f, src[0]->bit_size), nir_fadd(nb, build_exp(nb, src[0]), build_exp(nb, nir_fneg(nb, src[0])))); return; @@ -693,11 +693,20 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, * We clamp x to (-inf, +10] to avoid precision problems. When x > 10, * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the * computation e^2x +/- 1 so it can be ignored. + * + * For 16-bit precision we clamp x to (-inf, +4.2] since the maximum + * representable number is only 65,504 and e^(2*6) exceeds that. Also, + * if x > 4.2, tanh(x) will return 1.0 in fp16. */ - nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10)); - nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2))); - val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)), - nir_fadd(nb, exp2x, nir_imm_float(nb, 1))); + const uint32_t bit_size = src[0]->bit_size; + const double clamped_x = bit_size > 16 ? 10.0 : 4.2; + nir_ssa_def *x = nir_fmin(nb, src[0], + nir_imm_floatN_t(nb, clamped_x, bit_size)); + nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, bit_size); + nir_ssa_def *two = nir_imm_floatN_t(nb, 2.0, bit_size); + nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, two)); + val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, one), + nir_fadd(nb, exp2x, one)); return; } @@ -705,16 +714,16 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]), build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]), nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]), - nir_imm_float(nb, 1.0f)))))); + nir_imm_floatN_t(nb, 1.0f, src[0]->bit_size)))))); return; case GLSLstd450Acosh: val->ssa->def = build_log(nb, nir_fadd(nb, src[0], nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]), - nir_imm_float(nb, 1.0f))))); + nir_imm_floatN_t(nb, 1.0f, src[0]->bit_size))))); return; case GLSLstd450Atanh: { - nir_ssa_def *one = nir_imm_float(nb, 1.0); - val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, src[0]->bit_size); + val->ssa->def = nir_fmul(nb, nir_imm_floatN_t(nb, 0.5f, src[0]->bit_size), build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]), nir_fsub(nb, one, src[0])))); return; -- 2.17.1 _______________________________________________ mesa-dev mailing list <a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a> <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a> </blockquote></div>