[Mesa-dev] [PATCH] llvmpipe: Remove the special path for TGSI_OPCODE_EXP.
Jose Fonseca
jfonseca at vmware.com
Wed Sep 11 07:28:23 PDT 2013
GLSL does not use it.
vs_2_0 does not use it either http://msdn.microsoft.com/en-us/library/windows/desktop/bb173373(v=vs.85).aspx
D3D10 doesn't have similar thing neither.
It just didn't seem worth to keep this special path. And it seemed hard to fix it without breaking NaN/Inf correctness.
Jose
----- Original Message -----
> Hmm sure it is rarely used (for arb_vp and d3d9 vs 1.1 (2.0 too maybe
> though the semantics are different there even if the precision required
> is the same)?
> The problem I have with this is that the emulation which will get used
> instead is _extremely_ terrible. EXP should be a cheaper alternative to
> EX2, yet the emulation will make it more than twice as expensive
> (because there are _two_ ex2 calls in exp_emit()).
> Also, since the exp/log functions actually have configurable precision
> (though it is compile-time dependent for now) maybe could exploit that
> and use a polynomial with a lesser degree?
> Otherwise though having less specialized code makes sense.
>
> Roland
>
>
>
> Am 11.09.2013 13:04, schrieb jfonseca at vmware.com:
> > From: José Fonseca <jfonseca at vmware.com>
> >
> > It was wrong for EXP.y, as we clamped the source before computing the
> > fractional part, and this opcode should be rarely used, so it's not
> > worth the hassle.
> > ---
> > src/gallium/auxiliary/gallivm/lp_bld_arit.c | 80
> > ++++++++--------------
> > src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 --
> > src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 15 ----
> > 3 files changed, 30 insertions(+), 72 deletions(-)
> >
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> > b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> > index 09107ff..00052ed 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> > @@ -3001,12 +3001,9 @@ const double lp_build_exp2_polynomial[] = {
> > };
> >
> >
> > -void
> > -lp_build_exp2_approx(struct lp_build_context *bld,
> > - LLVMValueRef x,
> > - LLVMValueRef *p_exp2_int_part,
> > - LLVMValueRef *p_frac_part,
> > - LLVMValueRef *p_exp2)
> > +LLVMValueRef
> > +lp_build_exp2(struct lp_build_context *bld,
> > + LLVMValueRef x)
> > {
> > LLVMBuilderRef builder = bld->gallivm->builder;
> > const struct lp_type type = bld->type;
> > @@ -3019,65 +3016,48 @@ lp_build_exp2_approx(struct lp_build_context *bld,
> >
> > assert(lp_check_value(bld->type, x));
> >
> > - if(p_exp2_int_part || p_frac_part || p_exp2) {
> > - /* TODO: optimize the constant case */
> > - if (gallivm_debug & GALLIVM_DEBUG_PERF &&
> > - LLVMIsConstant(x)) {
> > - debug_printf("%s: inefficient/imprecise constant arithmetic\n",
> > - __FUNCTION__);
> > - }
> >
> > - assert(type.floating && type.width == 32);
> > + /* TODO: optimize the constant case */
> > + if (gallivm_debug & GALLIVM_DEBUG_PERF &&
> > + LLVMIsConstant(x)) {
> > + debug_printf("%s: inefficient/imprecise constant arithmetic\n",
> > + __FUNCTION__);
> > + }
> >
> > - /* We want to preserve NaN and make sure than for exp2 if x > 128,
> > - * the result is INF and if it's smaller than -126.9 the result is
> > 0 */
> > - x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,
> > 128.0), x,
> > - GALLIVM_NAN_RETURN_SECOND);
> > - x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type,
> > -126.99999), x,
> > - GALLIVM_NAN_RETURN_SECOND);
> > + assert(type.floating && type.width == 32);
> >
> > - /* ipart = floor(x) */
> > - /* fpart = x - ipart */
> > - lp_build_ifloor_fract(bld, x, &ipart, &fpart);
> > - }
> > + /* We want to preserve NaN and make sure than for exp2 if x > 128,
> > + * the result is INF and if it's smaller than -126.9 the result is 0
> > */
> > + x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,
> > 128.0), x,
> > + GALLIVM_NAN_RETURN_SECOND);
> > + x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type,
> > -126.99999), x,
> > + GALLIVM_NAN_RETURN_SECOND);
> >
> > - if(p_exp2_int_part || p_exp2) {
> > - /* expipart = (float) (1 << ipart) */
> > - expipart = LLVMBuildAdd(builder, ipart,
> > - lp_build_const_int_vec(bld->gallivm, type,
> > 127), "");
> > - expipart = LLVMBuildShl(builder, expipart,
> > - lp_build_const_int_vec(bld->gallivm, type,
> > 23), "");
> > - expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
> > - }
> > + /* ipart = floor(x) */
> > + /* fpart = x - ipart */
> > + lp_build_ifloor_fract(bld, x, &ipart, &fpart);
> >
> > - if(p_exp2) {
> > - expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
> > - Elements(lp_build_exp2_polynomial));
> >
> > - res = LLVMBuildFMul(builder, expipart, expfpart, "");
> > - }
> >
> > - if(p_exp2_int_part)
> > - *p_exp2_int_part = expipart;
> > + /* expipart = (float) (1 << ipart) */
> > + expipart = LLVMBuildAdd(builder, ipart,
> > + lp_build_const_int_vec(bld->gallivm, type,
> > 127), "");
> > + expipart = LLVMBuildShl(builder, expipart,
> > + lp_build_const_int_vec(bld->gallivm, type, 23),
> > "");
> > + expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
> >
> > - if(p_frac_part)
> > - *p_frac_part = fpart;
> >
> > - if(p_exp2)
> > - *p_exp2 = res;
> > -}
> > + expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
> > + Elements(lp_build_exp2_polynomial));
> > +
> > + res = LLVMBuildFMul(builder, expipart, expfpart, "");
> >
> >
> > -LLVMValueRef
> > -lp_build_exp2(struct lp_build_context *bld,
> > - LLVMValueRef x)
> > -{
> > - LLVMValueRef res;
> > - lp_build_exp2_approx(bld, x, NULL, NULL, &res);
> > return res;
> > }
> >
> >
> > +
> > /**
> > * Extract the exponent of a IEEE-754 floating point value.
> > *
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> > b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> > index d98025e..49d4e2c 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> > @@ -326,13 +326,6 @@ lp_build_ilog2(struct lp_build_context *bld,
> > LLVMValueRef x);
> >
> > void
> > -lp_build_exp2_approx(struct lp_build_context *bld,
> > - LLVMValueRef x,
> > - LLVMValueRef *p_exp2_int_part,
> > - LLVMValueRef *p_frac_part,
> > - LLVMValueRef *p_exp2);
> > -
> > -void
> > lp_build_log2_approx(struct lp_build_context *bld,
> > LLVMValueRef x,
> > LLVMValueRef *p_exp,
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > index 86c3249..1cfaf78 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > @@ -1057,20 +1057,6 @@ ex2_emit_cpu(
> > emit_data->args[0]);
> > }
> >
> > -/* TGSI_OPCODE_EXP (CPU Only) */
> > -static void
> > -exp_emit_cpu(
> > - const struct lp_build_tgsi_action * action,
> > - struct lp_build_tgsi_context * bld_base,
> > - struct lp_build_emit_data * emit_data)
> > -{
> > - lp_build_exp2_approx(&bld_base->base, emit_data->args[0],
> > - &emit_data->output[TGSI_CHAN_X],
> > - &emit_data->output[TGSI_CHAN_Y],
> > - &emit_data->output[TGSI_CHAN_Z]);
> > - emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> > -}
> > -
> > /* TGSI_OPCODE_F2I (CPU Only) */
> > static void
> > f2i_emit_cpu(
> > @@ -1785,7 +1771,6 @@ lp_set_default_actions_cpu(
> > bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
> > bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
> > bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
> > - bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu;
> > bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
> > bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu;
> > bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
> >
>
More information about the mesa-dev
mailing list