[Mesa-dev] [PATCH] llvmpipe: Remove the special path for TGSI_OPCODE_EXP.

Wed Sep 11 07:27:48 PDT 2013

Replying privately.

See also http://bugzilla.eng.vmware.com/show_bug.cgi?id=999655#c5

Jose

----- Original Message -----
> Hmm sure it is rarely used (for arb_vp and d3d9 vs 1.1 (2.0 too maybe
> though the semantics are different there even if the precision required
> is the same)?
> The problem I have with this is that the emulation which will get used
> instead is _extremely_ terrible. EXP should be a cheaper alternative to
> EX2, yet the emulation will make it more than twice as expensive
> (because there are _two_ ex2 calls in exp_emit()).
> Also, since the exp/log functions actually have configurable precision
> (though it is compile-time dependent for now) maybe could exploit that
> and use a polynomial with a lesser degree?
> Otherwise though having less specialized code makes sense.
> 
> Roland
> 
> 
> 
> Am 11.09.2013 13:04, schrieb jfonseca at vmware.com:
> > From: José Fonseca <jfonseca at vmware.com>
> > 
> > It was wrong for EXP.y, as we clamped the source before computing the
> > fractional part, and this opcode should be rarely used, so it's not
> > worth the hassle.
> > ---
> >  src/gallium/auxiliary/gallivm/lp_bld_arit.c        | 80
> >  ++++++++--------------
> >  src/gallium/auxiliary/gallivm/lp_bld_arit.h        |  7 --
> >  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 15 ----
> >  3 files changed, 30 insertions(+), 72 deletions(-)
> > 
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> > b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> > index 09107ff..00052ed 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> > @@ -3001,12 +3001,9 @@ const double lp_build_exp2_polynomial[] = {
> >  };
> >  
> >  
> > -void
> > -lp_build_exp2_approx(struct lp_build_context *bld,
> > -                     LLVMValueRef x,
> > -                     LLVMValueRef *p_exp2_int_part,
> > -                     LLVMValueRef *p_frac_part,
> > -                     LLVMValueRef *p_exp2)
> > +LLVMValueRef
> > +lp_build_exp2(struct lp_build_context *bld,
> > +              LLVMValueRef x)
> >  {
> >     LLVMBuilderRef builder = bld->gallivm->builder;
> >     const struct lp_type type = bld->type;
> > @@ -3019,65 +3016,48 @@ lp_build_exp2_approx(struct lp_build_context *bld,
> >  
> >     assert(lp_check_value(bld->type, x));
> >  
> > -   if(p_exp2_int_part || p_frac_part || p_exp2) {
> > -      /* TODO: optimize the constant case */
> > -      if (gallivm_debug & GALLIVM_DEBUG_PERF &&
> > -          LLVMIsConstant(x)) {
> > -         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
> > -                      __FUNCTION__);
> > -      }
> >  
> > -      assert(type.floating && type.width == 32);
> > +   /* TODO: optimize the constant case */
> > +   if (gallivm_debug & GALLIVM_DEBUG_PERF &&
> > +       LLVMIsConstant(x)) {
> > +      debug_printf("%s: inefficient/imprecise constant arithmetic\n",
> > +                   __FUNCTION__);
> > +   }
> >  
> > -      /* We want to preserve NaN and make sure than for exp2 if x > 128,
> > -       * the result is INF  and if it's smaller than -126.9 the result is
> > 0 */
> > -      x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,
> > 128.0), x,
> > -                           GALLIVM_NAN_RETURN_SECOND);
> > -      x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type,
> > -126.99999), x,
> > -                           GALLIVM_NAN_RETURN_SECOND);
> > +   assert(type.floating && type.width == 32);
> >  
> > -      /* ipart = floor(x) */
> > -      /* fpart = x - ipart */
> > -      lp_build_ifloor_fract(bld, x, &ipart, &fpart);
> > -   }
> > +   /* We want to preserve NaN and make sure than for exp2 if x > 128,
> > +    * the result is INF  and if it's smaller than -126.9 the result is 0
> > */
> > +   x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,
> > 128.0), x,
> > +                        GALLIVM_NAN_RETURN_SECOND);
> > +   x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type,
> > -126.99999), x,
> > +                        GALLIVM_NAN_RETURN_SECOND);
> >  
> > -   if(p_exp2_int_part || p_exp2) {
> > -      /* expipart = (float) (1 << ipart) */
> > -      expipart = LLVMBuildAdd(builder, ipart,
> > -                              lp_build_const_int_vec(bld->gallivm, type,
> > 127), "");
> > -      expipart = LLVMBuildShl(builder, expipart,
> > -                              lp_build_const_int_vec(bld->gallivm, type,
> > 23), "");
> > -      expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
> > -   }
> > +   /* ipart = floor(x) */
> > +   /* fpart = x - ipart */
> > +   lp_build_ifloor_fract(bld, x, &ipart, &fpart);
> >  
> > -   if(p_exp2) {
> > -      expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
> > -                                     Elements(lp_build_exp2_polynomial));
> >  
> > -      res = LLVMBuildFMul(builder, expipart, expfpart, "");
> > -   }
> >  
> > -   if(p_exp2_int_part)
> > -      *p_exp2_int_part = expipart;
> > +   /* expipart = (float) (1 << ipart) */
> > +   expipart = LLVMBuildAdd(builder, ipart,
> > +                           lp_build_const_int_vec(bld->gallivm, type,
> > 127), "");
> > +   expipart = LLVMBuildShl(builder, expipart,
> > +                           lp_build_const_int_vec(bld->gallivm, type, 23),
> > "");
> > +   expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
> >  
> > -   if(p_frac_part)
> > -      *p_frac_part = fpart;
> >  
> > -   if(p_exp2)
> > -      *p_exp2 = res;
> > -}
> > +   expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
> > +                                  Elements(lp_build_exp2_polynomial));
> > +
> > +   res = LLVMBuildFMul(builder, expipart, expfpart, "");
> >  
> >  
> > -LLVMValueRef
> > -lp_build_exp2(struct lp_build_context *bld,
> > -              LLVMValueRef x)
> > -{
> > -   LLVMValueRef res;
> > -   lp_build_exp2_approx(bld, x, NULL, NULL, &res);
> >     return res;
> >  }
> >  
> >  
> > +
> >  /**
> >   * Extract the exponent of a IEEE-754 floating point value.
> >   *
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> > b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> > index d98025e..49d4e2c 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> > @@ -326,13 +326,6 @@ lp_build_ilog2(struct lp_build_context *bld,
> >                 LLVMValueRef x);
> >  
> >  void
> > -lp_build_exp2_approx(struct lp_build_context *bld,
> > -                     LLVMValueRef x,
> > -                     LLVMValueRef *p_exp2_int_part,
> > -                     LLVMValueRef *p_frac_part,
> > -                     LLVMValueRef *p_exp2);
> > -
> > -void
> >  lp_build_log2_approx(struct lp_build_context *bld,
> >                       LLVMValueRef x,
> >                       LLVMValueRef *p_exp,
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > index 86c3249..1cfaf78 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > @@ -1057,20 +1057,6 @@ ex2_emit_cpu(
> >                                                          emit_data->args[0]);
> >  }
> >  
> > -/* TGSI_OPCODE_EXP (CPU Only) */
> > -static void
> > -exp_emit_cpu(
> > -   const struct lp_build_tgsi_action * action,
> > -   struct lp_build_tgsi_context * bld_base,
> > -   struct lp_build_emit_data * emit_data)
> > -{
> > -   lp_build_exp2_approx(&bld_base->base, emit_data->args[0],
> > -                        &emit_data->output[TGSI_CHAN_X],
> > -                        &emit_data->output[TGSI_CHAN_Y],
> > -                        &emit_data->output[TGSI_CHAN_Z]);
> > -   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> > -}
> > -
> >  /* TGSI_OPCODE_F2I (CPU Only) */
> >  static void
> >  f2i_emit_cpu(
> > @@ -1785,7 +1771,6 @@ lp_set_default_actions_cpu(
> >     bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
> >     bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
> >     bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
> > -   bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu;
> >     bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
> >     bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu;
> >     bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
> > 
>