<div dir="ltr">On 22 August 2013 16:08, Matt Turner <span dir="ltr"><<a href="mailto:mattst88@gmail.com" target="_blank">mattst88@gmail.com</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
---<br>
 src/glsl/ir_optimization.h      |   1 +<br>
 src/glsl/lower_instructions.cpp | 128 ++++++++++++++++++++++++++++++++++++++++<br>
 2 files changed, 129 insertions(+)<br>
<br>
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h<br>
index b79c2b7..074686c 100644<br>
--- a/src/glsl/ir_optimization.h<br>
+++ b/src/glsl/ir_optimization.h<br>
@@ -38,6 +38,7 @@<br>
 #define INT_DIV_TO_MUL_RCP 0x40<br>
 #define LRP_TO_ARITH       0x80<br>
 #define BITFIELD_INSERT_TO_BFM_BFI 0x100<br>
+#define LDEXP_TO_ARITH     0x200<br>
<br>
 /**<br>
  * \see class lower_packing_builtins_visitor<br>
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp<br>
index d32ec80..8b0a8e1 100644<br>
--- a/src/glsl/lower_instructions.cpp<br>
+++ b/src/glsl/lower_instructions.cpp<br>
@@ -37,6 +37,7 @@<br>
  * - POW_TO_EXP2<br>
  * - LOG_TO_LOG2<br>
  * - MOD_TO_FRACT<br>
+ * - LDEXP_TO_ARITH<br>
  * - LRP_TO_ARITH<br>
  * - BITFIELD_INSERT_TO_BFM_BFI<br>
  *<br>
@@ -82,6 +83,10 @@<br>
  * if we have to break it down like this anyway, it gives an<br>
  * opportunity to do things like constant fold the (1.0 / op1) easily.<br>
  *<br>
+ * LDEXP_TO_ARITH:<br>
+ * -------------<br>
+ * Converts ir_binop_ldexp to arithmetic and bit operations.<br>
+ *<br>
  * LRP_TO_ARITH:<br>
  * -------------<br>
  * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).<br>
@@ -125,6 +130,7 @@ private:<br>
    void log_to_log2(ir_expression *);<br>
    void lrp_to_arith(ir_expression *);<br>
    void bitfield_insert_to_bfm_bfi(ir_expression *);<br>
+   void ldexp_to_arith(ir_expression *);<br>
 };<br>
<br>
 /**<br>
@@ -332,6 +338,123 @@ lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)<br>
    this->progress = true;<br>
 }<br>
<br>
+void<br>
+lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)<br>
+{<br>
+   /* Translates<br>
+    *    ir_binop_ldexp x exp<br>
+    * into<br>
+    *<br>
+    *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);<br>
+    *    resulting_biased_exp = extracted_biased_exp + exp;<br></blockquote><div><br></div><div>This comment is a little difficult to follow since it refers to exp_shift, which you don't define until down in the code.  You might want to add a comment saying something like "(where exp_shift = 23, the bit location of the exponent part of an IEEE float)"<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+    *<br>
+    *    if (resulting_biased_exp < 1) {<br>
+    *       return copysign(0.0, x);<br>
+    *    }<br>
+    *<br>
+    *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |<br>
+    *                       lshift(i2u(resulting_biased_exp), exp_shift));<br>
+    *<br>
+    * which we can't actually implement as such, since the GLSL IR doesn't<br>
+    * have vectorized if-statements. We actually implement it without branches<br>
+    * using conditional-select:<br>
+    *<br>
+    *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);<br>
+    *    resulting_biased_exp = extracted_biased_exp + exp;<br>
+    *<br>
+    *    is_not_zero_or_underflow = gequal(resulting_biased_exp, 1);<br>
+    *    x = cond_sel(is_not_zero_or_underflow, x, copysign(0.0f, x));<br>
+    *    resulting_biased_exp = cond_sel(is_not_zero_or_underflow,<br>
+    *                                    resulting_biased_exp, 0);<br>
+    *<br>
+    *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |<br>
+    *                       lshift(i2u(resulting_biased_exp), exp_shift));<br>
+    */<br>
+<br>
+   const unsigned vec_elem = ir->type->vector_elements;<br>
+<br>
+   /* Types */<br>
+   const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1);<br>
+   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);<br>
+<br>
+   /* Constants */<br>
+   ir_constant *zeroi = ir_constant::zero(ir, ivec);<br>
+   ir_constant *zerof = ir_constant::zero(ir, ir->type);<br>
+<br>
+   ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu, vec_elem);<br>
+   ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);<br>
+<br>
+   ir_constant *exp_shift = new(ir) ir_constant(23u, vec_elem);<br>
+<br>
+   /* Temporary variables */<br>
+   ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);<br>
+   ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);<br>
+<br>
+   ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",<br>
+                                                  ir_var_temporary);<br>
+<br>
+   ir_variable *extracted_biased_exp =<br>
+      new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);<br>
+   ir_variable *resulting_biased_exp =<br>
+      new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);<br>
+<br>
+   ir_variable *is_not_zero_or_underflow =<br>
+      new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary);<br>
+<br>
+   ir_instruction &i = *base_ir;<br>
+<br>
+   /* Copy <x> and <exp> arguments. */<br>
+   i.insert_before(x);<br>
+   i.insert_before(assign(x, ir->operands[0]));<br>
+   i.insert_before(exp);<br>
+   i.insert_before(assign(exp, ir->operands[1]));<br>
+<br>
+   /* Extract the biased exponent from <x>. */<br>
+   i.insert_before(extracted_biased_exp);<br>
+   i.insert_before(assign(extracted_biased_exp,<br>
+                          rshift(bitcast_f2i(abs(x)), exp_shift)));<br>
+<br>
+   i.insert_before(resulting_biased_exp);<br>
+   i.insert_before(assign(resulting_biased_exp,<br>
+                          add(extracted_biased_exp, exp)));<br>
+<br>
+   /* Test if result is ±0.0, subnormal, or underflow by checking if the<br>
+    * resulting biased exponent would be less than 0x1. If so, the result is<br>
+    * 0.0 with the sign of x. (Actually, invert the conditions so that<br>
+    * immediate values are the second arguments, which is better for i965)<br>
+    */<br>
+   i.insert_before(zero_sign_x);<br>
+   i.insert_before(assign(zero_sign_x,<br>
+                          bitcast_u2f(bit_or(bit_and(bitcast_f2u(x), sign_mask),<br>
+                                             bitcast_f2u(zerof)))));<br></blockquote><div><br></div><div>Is it guaranteed that future optimization passes will constant fold bitcast_f2u(zerof) down to the appropriate value?<br>
<br></div><div>Other than that the patch is:<br><br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+<br>
+   i.insert_before(is_not_zero_or_underflow);<br>
+   i.insert_before(assign(is_not_zero_or_underflow,<br>
+                          gequal(resulting_biased_exp,<br>
+                                  new(ir) ir_constant(0x1, vec_elem))));<br>
+   i.insert_before(assign(x, cond_sel(is_not_zero_or_underflow,<br>
+                                      x, zero_sign_x)));<br>
+   i.insert_before(assign(resulting_biased_exp,<br>
+                          cond_sel(is_not_zero_or_underflow,<br>
+                                   resulting_biased_exp, zeroi)));<br>
+<br>
+   /* We could test for overflows by checking if the resulting biased exponent<br>
+    * would be greater than 0xFE. Turns out we don't need to because the GLSL<br>
+    * spec says:<br>
+    *<br>
+    *    "If this product is too large to be represented in the<br>
+    *     floating-point type, the result is undefined."<br>
+    */<br>
+<br>
+   ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL);<br>
+   ir->operation = ir_unop_bitcast_u2f;<br>
+   ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask),<br>
+                            lshift(i2u(resulting_biased_exp), exp_shift_clone));<br>
+   ir->operands[1] = NULL;<br>
+<br>
+   this->progress = true;<br>
+}<br>
+<br>
 ir_visitor_status<br>
 lower_instructions_visitor::visit_leave(ir_expression *ir)<br>
 {<br>
@@ -378,6 +501,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)<br>
          bitfield_insert_to_bfm_bfi(ir);<br>
       break;<br>
<br>
+   case ir_binop_ldexp:<br>
+      if (lowering(LDEXP_TO_ARITH))<br>
+         ldexp_to_arith(ir);<br>
+      break;<br>
+<br>
    default:<br>
       return visit_continue;<br>
    }<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.8.3.2<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>