[Mesa-dev] [PATCH] llvmpipe: Remove the special path for TGSI_OPCODE_EXP.
Roland Scheidegger
sroland at vmware.com
Wed Sep 11 06:58:06 PDT 2013
Hmm sure it is rarely used (for arb_vp and d3d9 vs 1.1 (2.0 too maybe
though the semantics are different there even if the precision required
is the same)?
The problem I have with this is that the emulation which will get used
instead is _extremely_ terrible. EXP should be a cheaper alternative to
EX2, yet the emulation will make it more than twice as expensive
(because there are _two_ ex2 calls in exp_emit()).
Also, since the exp/log functions actually have configurable precision
(though it is compile-time dependent for now) maybe could exploit that
and use a polynomial with a lesser degree?
Otherwise though having less specialized code makes sense.
Roland
Am 11.09.2013 13:04, schrieb jfonseca at vmware.com:
> From: José Fonseca <jfonseca at vmware.com>
>
> It was wrong for EXP.y, as we clamped the source before computing the
> fractional part, and this opcode should be rarely used, so it's not
> worth the hassle.
> ---
> src/gallium/auxiliary/gallivm/lp_bld_arit.c | 80 ++++++++--------------
> src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 --
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 15 ----
> 3 files changed, 30 insertions(+), 72 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> index 09107ff..00052ed 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> @@ -3001,12 +3001,9 @@ const double lp_build_exp2_polynomial[] = {
> };
>
>
> -void
> -lp_build_exp2_approx(struct lp_build_context *bld,
> - LLVMValueRef x,
> - LLVMValueRef *p_exp2_int_part,
> - LLVMValueRef *p_frac_part,
> - LLVMValueRef *p_exp2)
> +LLVMValueRef
> +lp_build_exp2(struct lp_build_context *bld,
> + LLVMValueRef x)
> {
> LLVMBuilderRef builder = bld->gallivm->builder;
> const struct lp_type type = bld->type;
> @@ -3019,65 +3016,48 @@ lp_build_exp2_approx(struct lp_build_context *bld,
>
> assert(lp_check_value(bld->type, x));
>
> - if(p_exp2_int_part || p_frac_part || p_exp2) {
> - /* TODO: optimize the constant case */
> - if (gallivm_debug & GALLIVM_DEBUG_PERF &&
> - LLVMIsConstant(x)) {
> - debug_printf("%s: inefficient/imprecise constant arithmetic\n",
> - __FUNCTION__);
> - }
>
> - assert(type.floating && type.width == 32);
> + /* TODO: optimize the constant case */
> + if (gallivm_debug & GALLIVM_DEBUG_PERF &&
> + LLVMIsConstant(x)) {
> + debug_printf("%s: inefficient/imprecise constant arithmetic\n",
> + __FUNCTION__);
> + }
>
> - /* We want to preserve NaN and make sure than for exp2 if x > 128,
> - * the result is INF and if it's smaller than -126.9 the result is 0 */
> - x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), x,
> - GALLIVM_NAN_RETURN_SECOND);
> - x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
> - GALLIVM_NAN_RETURN_SECOND);
> + assert(type.floating && type.width == 32);
>
> - /* ipart = floor(x) */
> - /* fpart = x - ipart */
> - lp_build_ifloor_fract(bld, x, &ipart, &fpart);
> - }
> + /* We want to preserve NaN and make sure than for exp2 if x > 128,
> + * the result is INF and if it's smaller than -126.9 the result is 0 */
> + x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), x,
> + GALLIVM_NAN_RETURN_SECOND);
> + x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
> + GALLIVM_NAN_RETURN_SECOND);
>
> - if(p_exp2_int_part || p_exp2) {
> - /* expipart = (float) (1 << ipart) */
> - expipart = LLVMBuildAdd(builder, ipart,
> - lp_build_const_int_vec(bld->gallivm, type, 127), "");
> - expipart = LLVMBuildShl(builder, expipart,
> - lp_build_const_int_vec(bld->gallivm, type, 23), "");
> - expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
> - }
> + /* ipart = floor(x) */
> + /* fpart = x - ipart */
> + lp_build_ifloor_fract(bld, x, &ipart, &fpart);
>
> - if(p_exp2) {
> - expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
> - Elements(lp_build_exp2_polynomial));
>
> - res = LLVMBuildFMul(builder, expipart, expfpart, "");
> - }
>
> - if(p_exp2_int_part)
> - *p_exp2_int_part = expipart;
> + /* expipart = (float) (1 << ipart) */
> + expipart = LLVMBuildAdd(builder, ipart,
> + lp_build_const_int_vec(bld->gallivm, type, 127), "");
> + expipart = LLVMBuildShl(builder, expipart,
> + lp_build_const_int_vec(bld->gallivm, type, 23), "");
> + expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
>
> - if(p_frac_part)
> - *p_frac_part = fpart;
>
> - if(p_exp2)
> - *p_exp2 = res;
> -}
> + expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
> + Elements(lp_build_exp2_polynomial));
> +
> + res = LLVMBuildFMul(builder, expipart, expfpart, "");
>
>
> -LLVMValueRef
> -lp_build_exp2(struct lp_build_context *bld,
> - LLVMValueRef x)
> -{
> - LLVMValueRef res;
> - lp_build_exp2_approx(bld, x, NULL, NULL, &res);
> return res;
> }
>
>
> +
> /**
> * Extract the exponent of a IEEE-754 floating point value.
> *
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> index d98025e..49d4e2c 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> @@ -326,13 +326,6 @@ lp_build_ilog2(struct lp_build_context *bld,
> LLVMValueRef x);
>
> void
> -lp_build_exp2_approx(struct lp_build_context *bld,
> - LLVMValueRef x,
> - LLVMValueRef *p_exp2_int_part,
> - LLVMValueRef *p_frac_part,
> - LLVMValueRef *p_exp2);
> -
> -void
> lp_build_log2_approx(struct lp_build_context *bld,
> LLVMValueRef x,
> LLVMValueRef *p_exp,
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 86c3249..1cfaf78 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -1057,20 +1057,6 @@ ex2_emit_cpu(
> emit_data->args[0]);
> }
>
> -/* TGSI_OPCODE_EXP (CPU Only) */
> -static void
> -exp_emit_cpu(
> - const struct lp_build_tgsi_action * action,
> - struct lp_build_tgsi_context * bld_base,
> - struct lp_build_emit_data * emit_data)
> -{
> - lp_build_exp2_approx(&bld_base->base, emit_data->args[0],
> - &emit_data->output[TGSI_CHAN_X],
> - &emit_data->output[TGSI_CHAN_Y],
> - &emit_data->output[TGSI_CHAN_Z]);
> - emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> -}
> -
> /* TGSI_OPCODE_F2I (CPU Only) */
> static void
> f2i_emit_cpu(
> @@ -1785,7 +1771,6 @@ lp_set_default_actions_cpu(
> bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
> - bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
>
More information about the mesa-dev
mailing list