<div dir="ltr">On 22 August 2013 16:08, Matt Turner <<a href="mailto:mattst88@gmail.com" target="_blank">mattst88@gmail.com</a>> wrote: <div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> --- src/glsl/ir_optimization.h | 1 + src/glsl/lower_instructions.cpp | 128 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index b79c2b7..074686c 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -38,6 +38,7 @@ #define INT_DIV_TO_MUL_RCP 0x40 #define LRP_TO_ARITH 0x80 #define BITFIELD_INSERT_TO_BFM_BFI 0x100 +#define LDEXP_TO_ARITH 0x200 /** * \see class lower_packing_builtins_visitor diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index d32ec80..8b0a8e1 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -37,6 +37,7 @@ * - POW_TO_EXP2 * - LOG_TO_LOG2 * - MOD_TO_FRACT + * - LDEXP_TO_ARITH * - LRP_TO_ARITH * - BITFIELD_INSERT_TO_BFM_BFI * @@ -82,6 +83,10 @@ * if we have to break it down like this anyway, it gives an * opportunity to do things like constant fold the (1.0 / op1) easily. * + * LDEXP_TO_ARITH: + * ------------- + * Converts ir_binop_ldexp to arithmetic and bit operations. + * * LRP_TO_ARITH: * ------------- * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2). @@ -125,6 +130,7 @@ private: void log_to_log2(ir_expression *); void lrp_to_arith(ir_expression *); void bitfield_insert_to_bfm_bfi(ir_expression *); + void ldexp_to_arith(ir_expression *); }; /** @@ -332,6 +338,123 @@ lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::ldexp_to_arith(ir_expression *ir) +{ + /* Translates + * ir_binop_ldexp x exp + * into + * + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + * resulting_biased_exp = extracted_biased_exp + exp; </blockquote><div> </div><div>This comment is a little difficult to follow since it refers to exp_shift, which you don't define until down in the code. You might want to add a comment saying something like "(where exp_shift = 23, the bit location of the exponent part of an IEEE float)" </div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + * + * if (resulting_biased_exp < 1) { + * return copysign(0.0, x); + * } + * + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + * lshift(i2u(resulting_biased_exp), exp_shift)); + * + * which we can't actually implement as such, since the GLSL IR doesn't + * have vectorized if-statements. We actually implement it without branches + * using conditional-select: + * + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + * resulting_biased_exp = extracted_biased_exp + exp; + * + * is_not_zero_or_underflow = gequal(resulting_biased_exp, 1); + * x = cond_sel(is_not_zero_or_underflow, x, copysign(0.0f, x)); + * resulting_biased_exp = cond_sel(is_not_zero_or_underflow, + * resulting_biased_exp, 0); + * + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + * lshift(i2u(resulting_biased_exp), exp_shift)); + */ + + const unsigned vec_elem = ir->type->vector_elements; + + /* Types */ + const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + + /* Constants */ + ir_constant *zeroi = ir_constant::zero(ir, ivec); + ir_constant *zerof = ir_constant::zero(ir, ir->type); + + ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu, vec_elem); + ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem); + + ir_constant *exp_shift = new(ir) ir_constant(23u, vec_elem); + + /* Temporary variables */ + ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); + ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); + + ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", + ir_var_temporary); + + ir_variable *extracted_biased_exp = + new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); + ir_variable *resulting_biased_exp = + new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); + + ir_variable *is_not_zero_or_underflow = + new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); + + ir_instruction &i = *base_ir; + + /* Copy <x> and <exp> arguments. */ + i.insert_before(x); + i.insert_before(assign(x, ir->operands[0])); + i.insert_before(exp); + i.insert_before(assign(exp, ir->operands[1])); + + /* Extract the biased exponent from <x>. */ + i.insert_before(extracted_biased_exp); + i.insert_before(assign(extracted_biased_exp, + rshift(bitcast_f2i(abs(x)), exp_shift))); + + i.insert_before(resulting_biased_exp); + i.insert_before(assign(resulting_biased_exp, + add(extracted_biased_exp, exp))); + + /* Test if result is ą0.0, subnormal, or underflow by checking if the + * resulting biased exponent would be less than 0x1. If so, the result is + * 0.0 with the sign of x. (Actually, invert the conditions so that + * immediate values are the second arguments, which is better for i965) + */ + i.insert_before(zero_sign_x); + i.insert_before(assign(zero_sign_x, + bitcast_u2f(bit_or(bit_and(bitcast_f2u(x), sign_mask), + bitcast_f2u(zerof))))); </blockquote><div> </div><div>Is it guaranteed that future optimization passes will constant fold bitcast_f2u(zerof) down to the appropriate value? </div><div>Other than that the patch is: Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>> </div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + + i.insert_before(is_not_zero_or_underflow); + i.insert_before(assign(is_not_zero_or_underflow, + gequal(resulting_biased_exp, + new(ir) ir_constant(0x1, vec_elem)))); + i.insert_before(assign(x, cond_sel(is_not_zero_or_underflow, + x, zero_sign_x))); + i.insert_before(assign(resulting_biased_exp, + cond_sel(is_not_zero_or_underflow, + resulting_biased_exp, zeroi))); + + /* We could test for overflows by checking if the resulting biased exponent + * would be greater than 0xFE. Turns out we don't need to because the GLSL + * spec says: + * + * "If this product is too large to be represented in the + * floating-point type, the result is undefined." + */ + + ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL); + ir->operation = ir_unop_bitcast_u2f; + ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask), + lshift(i2u(resulting_biased_exp), exp_shift_clone)); + ir->operands[1] = NULL; + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -378,6 +501,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) bitfield_insert_to_bfm_bfi(ir); break; + case ir_binop_ldexp: + if (lowering(LDEXP_TO_ARITH)) + ldexp_to_arith(ir); + break; + default: return visit_continue; } -- 1.8.3.2 _______________________________________________ mesa-dev mailing list <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a> <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a> </blockquote></div> </div></div>