[Mesa-dev] [PATCH 04/59] compiler/spirv: implement 16-bit atan

Thu Dec 6 10:00:44 UTC 2018

On Wed, 2018-12-05 at 11:33 -0600, Jason Ekstrand wrote:
> On Tue, Dec 4, 2018 at 1:17 AM Iago Toral Quiroga <itoral at igalia.com>
> wrote:
> > ---
> > 
> >  src/compiler/spirv/vtn_glsl450.c | 36 +++++++++++++++++++++-------
> > ----
> > 
> >  1 file changed, 24 insertions(+), 12 deletions(-)
> > 
> > 
> > 
> > diff --git a/src/compiler/spirv/vtn_glsl450.c
> > b/src/compiler/spirv/vtn_glsl450.c
> > 
> > index 4345c9c61a3..9cda80c5137 100644
> > 
> > --- a/src/compiler/spirv/vtn_glsl450.c
> > 
> > +++ b/src/compiler/spirv/vtn_glsl450.c
> > 
> > @@ -238,8 +238,10 @@ build_fsum(nir_builder *b, nir_ssa_def **xs,
> > int terms)
> > 
> >  static nir_ssa_def *
> > 
> >  build_atan(nir_builder *b, nir_ssa_def *y_over_x)
> > 
> >  {
> > 
> > +   const uint32_t bit_size = y_over_x->bit_size;
> > 
> > +
> > 
> >     nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
> > 
> > -   nir_ssa_def *one = nir_imm_float(b, 1.0f);
> > 
> > +   nir_ssa_def *one = nir_imm_floatN_t(b, 1.0f, bit_size);
> > 
> > 
> > 
> >     /*
> > 
> >      * range-reduction, first step:
> > 
> > @@ -265,25 +267,35 @@ build_atan(nir_builder *b, nir_ssa_def
> > *y_over_x)
> > 
> >     nir_ssa_def *x_9  = nir_fmul(b, x_7, x_2);
> > 
> >     nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
> > 
> > 
> > 
> > +   const float coef[] = {
> > 
> > +       0.9999793128310355f,
> > 
> > +      -0.3326756418091246f,
> > 
> > +       0.1938924977115610f,
> > 
> > +      -0.1173503194786851f,
> > 
> > +       0.0536813784310406f,
> > 
> > +      -0.0121323213173444f,
> > 
> > +   };
> > 
> > +
> > 
> >     nir_ssa_def *polynomial_terms[] = {
> > 
> > -      nir_fmul(b, x,    nir_imm_float(b,  0.9999793128310355f)),
> > 
> > -      nir_fmul(b, x_3,  nir_imm_float(b, -0.3326756418091246f)),
> > 
> > -      nir_fmul(b, x_5,  nir_imm_float(b,  0.1938924977115610f)),
> > 
> > -      nir_fmul(b, x_7,  nir_imm_float(b, -0.1173503194786851f)),
> > 
> > -      nir_fmul(b, x_9,  nir_imm_float(b,  0.0536813784310406f)),
> > 
> > -      nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
> > 
> > +      nir_fmul(b, x,    nir_imm_floatN_t(b, coef[0], bit_size)),
> > 
> > +      nir_fmul(b, x_3,  nir_imm_floatN_t(b, coef[1], bit_size)),
> > 
> > +      nir_fmul(b, x_5,  nir_imm_floatN_t(b, coef[2], bit_size)),
> > 
> > +      nir_fmul(b, x_7,  nir_imm_floatN_t(b, coef[3], bit_size)),
> > 
> > +      nir_fmul(b, x_9,  nir_imm_floatN_t(b, coef[4], bit_size)),
> > 
> > +      nir_fmul(b, x_11, nir_imm_floatN_t(b, coef[5], bit_size)),
> 
> Any reason why you split the coefficients out into their own array? 
> Just to avoid line wrapping?

Yes, I think that was the only reason.
> In a recent commit, I added nir_iadd_imm and nir_imul_imm helpers for
> multiplying by or adding an immediate.  It might be worth doing the
> same thing for multiplying and add by floats for these kinds of
> computations.

Right, yes, that is probably a good idea, I'll do that.
> >     };
> > 
> > 
> > 
> >     nir_ssa_def *tmp =
> > 
> >        build_fsum(b, polynomial_terms,
> > ARRAY_SIZE(polynomial_terms));
> > 
> > 
> > 
> >     /* range-reduction fixup */
> > 
> > +   nir_ssa_def *minus_2 = nir_imm_floatN_t(b, -2.0f, bit_size);
> > 
> > +   nir_ssa_def *m_pi_2 = nir_imm_floatN_t(b, M_PI_2f, bit_size);
> > 
> > +   nir_ssa_def *b2f = nir_b2f(b, nir_flt(b, one, abs_y_over_x));
> > 
> > +   b2f->bit_size = bit_size; /* do we prefer b2f<bitsize> opcodes?
> > */
> 
> I have patches which will fix this at least somewhat.  For right now,
> what you did there is fine.

Ah yes, I imagine it is likely that your patches will land ahead of
this series so I  should remember to edit this when that happens.
> >     tmp = nir_fadd(b, tmp,
> > 
> > -                  nir_fmul(b,
> > 
> > -                           nir_b2f(b, nir_flt(b, one,
> > abs_y_over_x)),
> > 
> > -                           nir_fadd(b, nir_fmul(b, tmp,
> > 
> > -                                                nir_imm_float(b,
> > -2.0f)),
> > 
> > -                                       nir_imm_float(b,
> > M_PI_2f))));
> > 
> > +                  nir_fmul(b, b2f,
> > 
> > +                              nir_fadd(b, nir_fmul(b, tmp,
> > minus_2), m_pi_2)));
> > 
> > 
> > 
> >     /* sign fixup */
> > 
> >     return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
> > 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20181206/a790100f/attachment-0001.html>