[Mesa-dev] [PATCH 13/15] glsl: Add ldexp_to_arith lowering pass.

Fri Aug 23 09:42:35 PDT 2013

On 22 August 2013 16:08, Matt Turner <mattst88 at gmail.com> wrote:

> ---
>  src/glsl/ir_optimization.h      |   1 +
>  src/glsl/lower_instructions.cpp | 128
> ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 129 insertions(+)
>
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index b79c2b7..074686c 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -38,6 +38,7 @@
>  #define INT_DIV_TO_MUL_RCP 0x40
>  #define LRP_TO_ARITH       0x80
>  #define BITFIELD_INSERT_TO_BFM_BFI 0x100
> +#define LDEXP_TO_ARITH     0x200
>
>  /**
>   * \see class lower_packing_builtins_visitor
> diff --git a/src/glsl/lower_instructions.cpp
> b/src/glsl/lower_instructions.cpp
> index d32ec80..8b0a8e1 100644
> --- a/src/glsl/lower_instructions.cpp
> +++ b/src/glsl/lower_instructions.cpp
> @@ -37,6 +37,7 @@
>   * - POW_TO_EXP2
>   * - LOG_TO_LOG2
>   * - MOD_TO_FRACT
> + * - LDEXP_TO_ARITH
>   * - LRP_TO_ARITH
>   * - BITFIELD_INSERT_TO_BFM_BFI
>   *
> @@ -82,6 +83,10 @@
>   * if we have to break it down like this anyway, it gives an
>   * opportunity to do things like constant fold the (1.0 / op1) easily.
>   *
> + * LDEXP_TO_ARITH:
> + * -------------
> + * Converts ir_binop_ldexp to arithmetic and bit operations.
> + *
>   * LRP_TO_ARITH:
>   * -------------
>   * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
> @@ -125,6 +130,7 @@ private:
>     void log_to_log2(ir_expression *);
>     void lrp_to_arith(ir_expression *);
>     void bitfield_insert_to_bfm_bfi(ir_expression *);
> +   void ldexp_to_arith(ir_expression *);
>  };
>
>  /**
> @@ -332,6 +338,123 @@
> lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)
>     this->progress = true;
>  }
>
> +void
> +lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
> +{
> +   /* Translates
> +    *    ir_binop_ldexp x exp
> +    * into
> +    *
> +    *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
> +    *    resulting_biased_exp = extracted_biased_exp + exp;
>

This comment is a little difficult to follow since it refers to exp_shift,
which you don't define until down in the code.  You might want to add a
comment saying something like "(where exp_shift = 23, the bit location of
the exponent part of an IEEE float)"

> +    *
> +    *    if (resulting_biased_exp < 1) {
> +    *       return copysign(0.0, x);
> +    *    }
> +    *
> +    *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
> +    *                       lshift(i2u(resulting_biased_exp), exp_shift));
> +    *
> +    * which we can't actually implement as such, since the GLSL IR doesn't
> +    * have vectorized if-statements. We actually implement it without
> branches
> +    * using conditional-select:
> +    *
> +    *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
> +    *    resulting_biased_exp = extracted_biased_exp + exp;
> +    *
> +    *    is_not_zero_or_underflow = gequal(resulting_biased_exp, 1);
> +    *    x = cond_sel(is_not_zero_or_underflow, x, copysign(0.0f, x));
> +    *    resulting_biased_exp = cond_sel(is_not_zero_or_underflow,
> +    *                                    resulting_biased_exp, 0);
> +    *
> +    *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
> +    *                       lshift(i2u(resulting_biased_exp), exp_shift));
> +    */
> +
> +   const unsigned vec_elem = ir->type->vector_elements;
> +
> +   /* Types */
> +   const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT,
> vec_elem, 1);
> +   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL,
> vec_elem, 1);
> +
> +   /* Constants */
> +   ir_constant *zeroi = ir_constant::zero(ir, ivec);
> +   ir_constant *zerof = ir_constant::zero(ir, ir->type);
> +
> +   ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu,
> vec_elem);
> +   ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
> +
> +   ir_constant *exp_shift = new(ir) ir_constant(23u, vec_elem);
> +
> +   /* Temporary variables */
> +   ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
> +   ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
> +
> +   ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
> +                                                  ir_var_temporary);
> +
> +   ir_variable *extracted_biased_exp =
> +      new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
> +   ir_variable *resulting_biased_exp =
> +      new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);
> +
> +   ir_variable *is_not_zero_or_underflow =
> +      new(ir) ir_variable(bvec, "is_not_zero_or_underflow",
> ir_var_temporary);
> +
> +   ir_instruction &i = *base_ir;
> +
> +   /* Copy <x> and <exp> arguments. */
> +   i.insert_before(x);
> +   i.insert_before(assign(x, ir->operands[0]));
> +   i.insert_before(exp);
> +   i.insert_before(assign(exp, ir->operands[1]));
> +
> +   /* Extract the biased exponent from <x>. */
> +   i.insert_before(extracted_biased_exp);
> +   i.insert_before(assign(extracted_biased_exp,
> +                          rshift(bitcast_f2i(abs(x)), exp_shift)));
> +
> +   i.insert_before(resulting_biased_exp);
> +   i.insert_before(assign(resulting_biased_exp,
> +                          add(extracted_biased_exp, exp)));
> +
> +   /* Test if result is ±0.0, subnormal, or underflow by checking if the
> +    * resulting biased exponent would be less than 0x1. If so, the result
> is
> +    * 0.0 with the sign of x. (Actually, invert the conditions so that
> +    * immediate values are the second arguments, which is better for i965)
> +    */
> +   i.insert_before(zero_sign_x);
> +   i.insert_before(assign(zero_sign_x,
> +                          bitcast_u2f(bit_or(bit_and(bitcast_f2u(x),
> sign_mask),
> +                                             bitcast_f2u(zerof)))));
>

Is it guaranteed that future optimization passes will constant fold
bitcast_f2u(zerof) down to the appropriate value?

Other than that the patch is:

Reviewed-by: Paul Berry <stereotype441 at gmail.com>

> +
> +   i.insert_before(is_not_zero_or_underflow);
> +   i.insert_before(assign(is_not_zero_or_underflow,
> +                          gequal(resulting_biased_exp,
> +                                  new(ir) ir_constant(0x1, vec_elem))));
> +   i.insert_before(assign(x, cond_sel(is_not_zero_or_underflow,
> +                                      x, zero_sign_x)));
> +   i.insert_before(assign(resulting_biased_exp,
> +                          cond_sel(is_not_zero_or_underflow,
> +                                   resulting_biased_exp, zeroi)));
> +
> +   /* We could test for overflows by checking if the resulting biased
> exponent
> +    * would be greater than 0xFE. Turns out we don't need to because the
> GLSL
> +    * spec says:
> +    *
> +    *    "If this product is too large to be represented in the
> +    *     floating-point type, the result is undefined."
> +    */
> +
> +   ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL);
> +   ir->operation = ir_unop_bitcast_u2f;
> +   ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask),
> +                            lshift(i2u(resulting_biased_exp),
> exp_shift_clone));
> +   ir->operands[1] = NULL;
> +
> +   this->progress = true;
> +}
> +
>  ir_visitor_status
>  lower_instructions_visitor::visit_leave(ir_expression *ir)
>  {
> @@ -378,6 +501,11 @@ lower_instructions_visitor::visit_leave(ir_expression
> *ir)
>           bitfield_insert_to_bfm_bfi(ir);
>        break;
>
> +   case ir_binop_ldexp:
> +      if (lowering(LDEXP_TO_ARITH))
> +         ldexp_to_arith(ir);
> +      break;
> +
>     default:
>        return visit_continue;
>     }
> --
> 1.8.3.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130823/a97620ca/attachment-0001.html>