[Mesa-dev] [PATCH 07/59] compiler/spirv: implement 16-bit hyperbolic trigonometric functions

Jason Ekstrand jason at jlekstrand.net
Fri Dec 7 15:21:58 UTC 2018


My comment earlier, I think, applies to all of the first 7.  Let's just add
nir_fadd_imm and nir_fmul_imm and rewrite them to use those.  That'll make
them handle doubles as well if we ever need it.

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga <itoral at igalia.com> wrote:

> ---
>  src/compiler/spirv/vtn_glsl450.c | 29 +++++++++++++++++++----------
>  1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/src/compiler/spirv/vtn_glsl450.c
> b/src/compiler/spirv/vtn_glsl450.c
> index 8bdef9db822..85851755aab 100644
> --- a/src/compiler/spirv/vtn_glsl450.c
> +++ b/src/compiler/spirv/vtn_glsl450.c
> @@ -672,7 +672,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
>     case GLSLstd450Sinh:
>        /* 0.5 * (e^x - e^(-x)) */
>        val->ssa->def =
> -         nir_fmul(nb, nir_imm_float(nb, 0.5f),
> +         nir_fmul(nb, nir_imm_floatN_t(nb, 0.5f, src[0]->bit_size),
>                        nir_fsub(nb, build_exp(nb, src[0]),
>                                     build_exp(nb, nir_fneg(nb, src[0]))));
>        return;
> @@ -680,7 +680,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
>     case GLSLstd450Cosh:
>        /* 0.5 * (e^x + e^(-x)) */
>        val->ssa->def =
> -         nir_fmul(nb, nir_imm_float(nb, 0.5f),
> +         nir_fmul(nb, nir_imm_floatN_t(nb, 0.5f, src[0]->bit_size),
>                        nir_fadd(nb, build_exp(nb, src[0]),
>                                     build_exp(nb, nir_fneg(nb, src[0]))));
>        return;
> @@ -693,11 +693,20 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
>         * We clamp x to (-inf, +10] to avoid precision problems.  When x >
> 10,
>         * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in
> the
>         * computation e^2x +/- 1 so it can be ignored.
> +       *
> +       * For 16-bit precision we clamp x to (-inf, +4.2] since the maximum
> +       * representable number is only 65,504 and e^(2*6) exceeds that.
> Also,
> +       * if x > 4.2, tanh(x) will return 1.0 in fp16.
>         */
> -      nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
> -      nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x,
> nir_imm_float(nb, 2)));
> -      val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb,
> 1)),
> -                                   nir_fadd(nb, exp2x, nir_imm_float(nb,
> 1)));
> +      const uint32_t bit_size = src[0]->bit_size;
> +      const double clamped_x = bit_size > 16 ? 10.0 : 4.2;
> +      nir_ssa_def *x = nir_fmin(nb, src[0],
> +                                    nir_imm_floatN_t(nb, clamped_x,
> bit_size));
> +      nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, bit_size);
> +      nir_ssa_def *two = nir_imm_floatN_t(nb, 2.0, bit_size);
> +      nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, two));
> +      val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, one),
> +                                   nir_fadd(nb, exp2x, one));
>        return;
>     }
>
> @@ -705,16 +714,16 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
>        val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
>           build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
>                         nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0],
> src[0]),
> -                                                  nir_imm_float(nb,
> 1.0f))))));
> +                                                  nir_imm_floatN_t(nb,
> 1.0f, src[0]->bit_size))))));
>        return;
>     case GLSLstd450Acosh:
>        val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
>           nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
> -                                    nir_imm_float(nb, 1.0f)))));
> +                                    nir_imm_floatN_t(nb, 1.0f,
> src[0]->bit_size)))));
>        return;
>     case GLSLstd450Atanh: {
> -      nir_ssa_def *one = nir_imm_float(nb, 1.0);
> -      val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
> +      nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, src[0]->bit_size);
> +      val->ssa->def = nir_fmul(nb, nir_imm_floatN_t(nb, 0.5f,
> src[0]->bit_size),
>           build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
>                                      nir_fsub(nb, one, src[0]))));
>        return;
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20181207/0c0d2d3a/attachment-0001.html>


More information about the mesa-dev mailing list