[Mesa-dev] [PATCH] llvmpipe: Remove the special path for TGSI_OPCODE_EXP.

Wed Sep 11 06:58:06 PDT 2013

Hmm sure it is rarely used (for arb_vp and d3d9 vs 1.1 (2.0 too maybe
though the semantics are different there even if the precision required
is the same)?
The problem I have with this is that the emulation which will get used
instead is _extremely_ terrible. EXP should be a cheaper alternative to
EX2, yet the emulation will make it more than twice as expensive
(because there are _two_ ex2 calls in exp_emit()).
Also, since the exp/log functions actually have configurable precision
(though it is compile-time dependent for now) maybe could exploit that
and use a polynomial with a lesser degree?
Otherwise though having less specialized code makes sense.

Roland

Am 11.09.2013 13:04, schrieb jfonseca at vmware.com:
> From: José Fonseca <jfonseca at vmware.com>
> 
> It was wrong for EXP.y, as we clamped the source before computing the
> fractional part, and this opcode should be rarely used, so it's not
> worth the hassle.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_arit.c        | 80 ++++++++--------------
>  src/gallium/auxiliary/gallivm/lp_bld_arit.h        |  7 --
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 15 ----
>  3 files changed, 30 insertions(+), 72 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> index 09107ff..00052ed 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> @@ -3001,12 +3001,9 @@ const double lp_build_exp2_polynomial[] = {
>  };
>  
>  
> -void
> -lp_build_exp2_approx(struct lp_build_context *bld,
> -                     LLVMValueRef x,
> -                     LLVMValueRef *p_exp2_int_part,
> -                     LLVMValueRef *p_frac_part,
> -                     LLVMValueRef *p_exp2)
> +LLVMValueRef
> +lp_build_exp2(struct lp_build_context *bld,
> +              LLVMValueRef x)
>  {
>     LLVMBuilderRef builder = bld->gallivm->builder;
>     const struct lp_type type = bld->type;
> @@ -3019,65 +3016,48 @@ lp_build_exp2_approx(struct lp_build_context *bld,
>  
>     assert(lp_check_value(bld->type, x));
>  
> -   if(p_exp2_int_part || p_frac_part || p_exp2) {
> -      /* TODO: optimize the constant case */
> -      if (gallivm_debug & GALLIVM_DEBUG_PERF &&
> -          LLVMIsConstant(x)) {
> -         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
> -                      __FUNCTION__);
> -      }
>  
> -      assert(type.floating && type.width == 32);
> +   /* TODO: optimize the constant case */
> +   if (gallivm_debug & GALLIVM_DEBUG_PERF &&
> +       LLVMIsConstant(x)) {
> +      debug_printf("%s: inefficient/imprecise constant arithmetic\n",
> +                   __FUNCTION__);
> +   }
>  
> -      /* We want to preserve NaN and make sure than for exp2 if x > 128,
> -       * the result is INF  and if it's smaller than -126.9 the result is 0 */
> -      x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,  128.0), x,
> -                           GALLIVM_NAN_RETURN_SECOND);
> -      x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
> -                           GALLIVM_NAN_RETURN_SECOND);
> +   assert(type.floating && type.width == 32);
>  
> -      /* ipart = floor(x) */
> -      /* fpart = x - ipart */
> -      lp_build_ifloor_fract(bld, x, &ipart, &fpart);
> -   }
> +   /* We want to preserve NaN and make sure than for exp2 if x > 128,
> +    * the result is INF  and if it's smaller than -126.9 the result is 0 */
> +   x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,  128.0), x,
> +                        GALLIVM_NAN_RETURN_SECOND);
> +   x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
> +                        GALLIVM_NAN_RETURN_SECOND);
>  
> -   if(p_exp2_int_part || p_exp2) {
> -      /* expipart = (float) (1 << ipart) */
> -      expipart = LLVMBuildAdd(builder, ipart,
> -                              lp_build_const_int_vec(bld->gallivm, type, 127), "");
> -      expipart = LLVMBuildShl(builder, expipart,
> -                              lp_build_const_int_vec(bld->gallivm, type, 23), "");
> -      expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
> -   }
> +   /* ipart = floor(x) */
> +   /* fpart = x - ipart */
> +   lp_build_ifloor_fract(bld, x, &ipart, &fpart);
>  
> -   if(p_exp2) {
> -      expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
> -                                     Elements(lp_build_exp2_polynomial));
>  
> -      res = LLVMBuildFMul(builder, expipart, expfpart, "");
> -   }
>  
> -   if(p_exp2_int_part)
> -      *p_exp2_int_part = expipart;
> +   /* expipart = (float) (1 << ipart) */
> +   expipart = LLVMBuildAdd(builder, ipart,
> +                           lp_build_const_int_vec(bld->gallivm, type, 127), "");
> +   expipart = LLVMBuildShl(builder, expipart,
> +                           lp_build_const_int_vec(bld->gallivm, type, 23), "");
> +   expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
>  
> -   if(p_frac_part)
> -      *p_frac_part = fpart;
>  
> -   if(p_exp2)
> -      *p_exp2 = res;
> -}
> +   expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
> +                                  Elements(lp_build_exp2_polynomial));
> +
> +   res = LLVMBuildFMul(builder, expipart, expfpart, "");
>  
>  
> -LLVMValueRef
> -lp_build_exp2(struct lp_build_context *bld,
> -              LLVMValueRef x)
> -{
> -   LLVMValueRef res;
> -   lp_build_exp2_approx(bld, x, NULL, NULL, &res);
>     return res;
>  }
>  
>  
> +
>  /**
>   * Extract the exponent of a IEEE-754 floating point value.
>   *
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> index d98025e..49d4e2c 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> @@ -326,13 +326,6 @@ lp_build_ilog2(struct lp_build_context *bld,
>                 LLVMValueRef x);
>  
>  void
> -lp_build_exp2_approx(struct lp_build_context *bld,
> -                     LLVMValueRef x,
> -                     LLVMValueRef *p_exp2_int_part,
> -                     LLVMValueRef *p_frac_part,
> -                     LLVMValueRef *p_exp2);
> -
> -void
>  lp_build_log2_approx(struct lp_build_context *bld,
>                       LLVMValueRef x,
>                       LLVMValueRef *p_exp,
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 86c3249..1cfaf78 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -1057,20 +1057,6 @@ ex2_emit_cpu(
>                                                          emit_data->args[0]);
>  }
>  
> -/* TGSI_OPCODE_EXP (CPU Only) */
> -static void
> -exp_emit_cpu(
> -   const struct lp_build_tgsi_action * action,
> -   struct lp_build_tgsi_context * bld_base,
> -   struct lp_build_emit_data * emit_data)
> -{
> -   lp_build_exp2_approx(&bld_base->base, emit_data->args[0],
> -                        &emit_data->output[TGSI_CHAN_X],
> -                        &emit_data->output[TGSI_CHAN_Y],
> -                        &emit_data->output[TGSI_CHAN_Z]);
> -   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> -}
> -
>  /* TGSI_OPCODE_F2I (CPU Only) */
>  static void
>  f2i_emit_cpu(
> @@ -1785,7 +1771,6 @@ lp_set_default_actions_cpu(
>     bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
> -   bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
>