[Mesa-dev] [PATCH 13/15] glsl: Add ldexp_to_arith lowering pass.
Paul Berry
stereotype441 at gmail.com
Fri Aug 23 09:42:35 PDT 2013
On 22 August 2013 16:08, Matt Turner <mattst88 at gmail.com> wrote:
> ---
> src/glsl/ir_optimization.h | 1 +
> src/glsl/lower_instructions.cpp | 128
> ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 129 insertions(+)
>
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index b79c2b7..074686c 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -38,6 +38,7 @@
> #define INT_DIV_TO_MUL_RCP 0x40
> #define LRP_TO_ARITH 0x80
> #define BITFIELD_INSERT_TO_BFM_BFI 0x100
> +#define LDEXP_TO_ARITH 0x200
>
> /**
> * \see class lower_packing_builtins_visitor
> diff --git a/src/glsl/lower_instructions.cpp
> b/src/glsl/lower_instructions.cpp
> index d32ec80..8b0a8e1 100644
> --- a/src/glsl/lower_instructions.cpp
> +++ b/src/glsl/lower_instructions.cpp
> @@ -37,6 +37,7 @@
> * - POW_TO_EXP2
> * - LOG_TO_LOG2
> * - MOD_TO_FRACT
> + * - LDEXP_TO_ARITH
> * - LRP_TO_ARITH
> * - BITFIELD_INSERT_TO_BFM_BFI
> *
> @@ -82,6 +83,10 @@
> * if we have to break it down like this anyway, it gives an
> * opportunity to do things like constant fold the (1.0 / op1) easily.
> *
> + * LDEXP_TO_ARITH:
> + * -------------
> + * Converts ir_binop_ldexp to arithmetic and bit operations.
> + *
> * LRP_TO_ARITH:
> * -------------
> * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
> @@ -125,6 +130,7 @@ private:
> void log_to_log2(ir_expression *);
> void lrp_to_arith(ir_expression *);
> void bitfield_insert_to_bfm_bfi(ir_expression *);
> + void ldexp_to_arith(ir_expression *);
> };
>
> /**
> @@ -332,6 +338,123 @@
> lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)
> this->progress = true;
> }
>
> +void
> +lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
> +{
> + /* Translates
> + * ir_binop_ldexp x exp
> + * into
> + *
> + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
> + * resulting_biased_exp = extracted_biased_exp + exp;
>
This comment is a little difficult to follow since it refers to exp_shift,
which you don't define until down in the code. You might want to add a
comment saying something like "(where exp_shift = 23, the bit location of
the exponent part of an IEEE float)"
> + *
> + * if (resulting_biased_exp < 1) {
> + * return copysign(0.0, x);
> + * }
> + *
> + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
> + * lshift(i2u(resulting_biased_exp), exp_shift));
> + *
> + * which we can't actually implement as such, since the GLSL IR doesn't
> + * have vectorized if-statements. We actually implement it without
> branches
> + * using conditional-select:
> + *
> + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
> + * resulting_biased_exp = extracted_biased_exp + exp;
> + *
> + * is_not_zero_or_underflow = gequal(resulting_biased_exp, 1);
> + * x = cond_sel(is_not_zero_or_underflow, x, copysign(0.0f, x));
> + * resulting_biased_exp = cond_sel(is_not_zero_or_underflow,
> + * resulting_biased_exp, 0);
> + *
> + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
> + * lshift(i2u(resulting_biased_exp), exp_shift));
> + */
> +
> + const unsigned vec_elem = ir->type->vector_elements;
> +
> + /* Types */
> + const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT,
> vec_elem, 1);
> + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL,
> vec_elem, 1);
> +
> + /* Constants */
> + ir_constant *zeroi = ir_constant::zero(ir, ivec);
> + ir_constant *zerof = ir_constant::zero(ir, ir->type);
> +
> + ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu,
> vec_elem);
> + ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
> +
> + ir_constant *exp_shift = new(ir) ir_constant(23u, vec_elem);
> +
> + /* Temporary variables */
> + ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
> + ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
> +
> + ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
> + ir_var_temporary);
> +
> + ir_variable *extracted_biased_exp =
> + new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
> + ir_variable *resulting_biased_exp =
> + new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);
> +
> + ir_variable *is_not_zero_or_underflow =
> + new(ir) ir_variable(bvec, "is_not_zero_or_underflow",
> ir_var_temporary);
> +
> + ir_instruction &i = *base_ir;
> +
> + /* Copy <x> and <exp> arguments. */
> + i.insert_before(x);
> + i.insert_before(assign(x, ir->operands[0]));
> + i.insert_before(exp);
> + i.insert_before(assign(exp, ir->operands[1]));
> +
> + /* Extract the biased exponent from <x>. */
> + i.insert_before(extracted_biased_exp);
> + i.insert_before(assign(extracted_biased_exp,
> + rshift(bitcast_f2i(abs(x)), exp_shift)));
> +
> + i.insert_before(resulting_biased_exp);
> + i.insert_before(assign(resulting_biased_exp,
> + add(extracted_biased_exp, exp)));
> +
> + /* Test if result is ±0.0, subnormal, or underflow by checking if the
> + * resulting biased exponent would be less than 0x1. If so, the result
> is
> + * 0.0 with the sign of x. (Actually, invert the conditions so that
> + * immediate values are the second arguments, which is better for i965)
> + */
> + i.insert_before(zero_sign_x);
> + i.insert_before(assign(zero_sign_x,
> + bitcast_u2f(bit_or(bit_and(bitcast_f2u(x),
> sign_mask),
> + bitcast_f2u(zerof)))));
>
Is it guaranteed that future optimization passes will constant fold
bitcast_f2u(zerof) down to the appropriate value?
Other than that the patch is:
Reviewed-by: Paul Berry <stereotype441 at gmail.com>
> +
> + i.insert_before(is_not_zero_or_underflow);
> + i.insert_before(assign(is_not_zero_or_underflow,
> + gequal(resulting_biased_exp,
> + new(ir) ir_constant(0x1, vec_elem))));
> + i.insert_before(assign(x, cond_sel(is_not_zero_or_underflow,
> + x, zero_sign_x)));
> + i.insert_before(assign(resulting_biased_exp,
> + cond_sel(is_not_zero_or_underflow,
> + resulting_biased_exp, zeroi)));
> +
> + /* We could test for overflows by checking if the resulting biased
> exponent
> + * would be greater than 0xFE. Turns out we don't need to because the
> GLSL
> + * spec says:
> + *
> + * "If this product is too large to be represented in the
> + * floating-point type, the result is undefined."
> + */
> +
> + ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL);
> + ir->operation = ir_unop_bitcast_u2f;
> + ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask),
> + lshift(i2u(resulting_biased_exp),
> exp_shift_clone));
> + ir->operands[1] = NULL;
> +
> + this->progress = true;
> +}
> +
> ir_visitor_status
> lower_instructions_visitor::visit_leave(ir_expression *ir)
> {
> @@ -378,6 +501,11 @@ lower_instructions_visitor::visit_leave(ir_expression
> *ir)
> bitfield_insert_to_bfm_bfi(ir);
> break;
>
> + case ir_binop_ldexp:
> + if (lowering(LDEXP_TO_ARITH))
> + ldexp_to_arith(ir);
> + break;
> +
> default:
> return visit_continue;
> }
> --
> 1.8.3.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130823/a97620ca/attachment-0001.html>
More information about the mesa-dev
mailing list