[Mesa-dev] [PATCH v2 27/28] glsl: lower double optional passes (v2)

Sun Feb 8 00:09:34 PST 2015

On Fri, Feb 6, 2015 at 4:46 AM, Ian Romanick <idr at freedesktop.org> wrote:
> On 02/06/2015 06:56 AM, Ilia Mirkin wrote:
>> From: Dave Airlie <airlied at gmail.com>
>>
>> These lowering passes are optional for the backend to request, currently
>> the TGSI softpipe backend most likely the r600g backend would want to use
>> these passes as is. They aim to hit the gallium opcodes from the standard
>> rounding/truncation functions.
>>
>> v2: also lower floor in mod_to_floor
>>
>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>> ---
>>  src/glsl/ir_optimization.h      |   1 +
>>  src/glsl/lower_instructions.cpp | 212 ++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 213 insertions(+)
>>
>> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
>> index 912d910..9f91e2f 100644
>> --- a/src/glsl/ir_optimization.h
>> +++ b/src/glsl/ir_optimization.h
>> @@ -41,6 +41,7 @@
>>  #define CARRY_TO_ARITH     0x200
>>  #define BORROW_TO_ARITH    0x400
>>  #define SAT_TO_CLAMP       0x800
>> +#define DOPS_TO_DFRAC      0x1000
>>
>>  /**
>>   * \see class lower_packing_builtins_visitor
>> diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
>> index 140b6d4..bf45c95 100644
>> --- a/src/glsl/lower_instructions.cpp
>> +++ b/src/glsl/lower_instructions.cpp
>> @@ -42,6 +42,7 @@
>>   * - CARRY_TO_ARITH
>>   * - BORROW_TO_ARITH
>>   * - SAT_TO_CLAMP
>> + * - DOPS_TO_DFRAC
>>   *
>>   * SUB_TO_ADD_NEG:
>>   * ---------------
>> @@ -112,6 +113,9 @@
>>   * -------------
>>   * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
>>   *
>> + * DOPS_TO_DFRAC:
>> + * --------------
>> + * Converts double trunc, ceil, floor, round to fract
>>   */
>>
>>  #include "main/core.h" /* for M_LOG2E */
>> @@ -151,6 +155,11 @@ private:
>>     void sat_to_clamp(ir_expression *);
>>     void double_dot_to_fma(ir_expression *);
>>     void double_lrp(ir_expression *);
>> +   void dceil_to_dfrac(ir_expression *);
>> +   void dfloor_to_dfrac(ir_expression *);
>> +   void dround_even_to_dfrac(ir_expression *);
>> +   void dtrunc_to_dfrac(ir_expression *);
>> +   void dsign_to_csel(ir_expression *);
>>  };
>>
>>  } /* anonymous namespace */
>> @@ -315,6 +324,9 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
>>     ir_expression *const floor_expr =
>>        new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
>>
>> +   if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
>> +      dfloor_to_dfrac(floor_expr);
>> +
>>     ir_expression *const mul_expr =
>>        new(ir) ir_expression(ir_binop_mul,
>>                              new(ir) ir_dereference_variable(y),
>> @@ -596,6 +608,182 @@ lower_instructions_visitor::double_lrp(ir_expression *ir)
>>     this->progress = true;
>>  }
>>
>> +void
>> +lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
>> +{
>> +   /*
>> +    * frtemp = frac(x);
>> +    * temp = sub(x, frtemp);
>> +    * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0);
>> +    */
>> +   ir_instruction &i = *base_ir;
>> +   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
>> +   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
>> +   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
>> +                                             ir_var_temporary);
>> +   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
>> +                                           ir_var_temporary);
>> +   ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
>> +                                         ir_var_temporary);
>
> For all these functions, I think you really want to use an ir_factory.
> Then you can just use ir_factory::make_temp and ir_factory::constant.
> That would reduce a lot of the clone calls... I think it would make this
> code a lot easier to read.

ir_factory::constant won't do vectors. There are only 2 instances of
ir_factory usage in the tree, and I don't understand how to use it.
It'll require a better understanding of the GLSL IR than I have right
now to operate it. TBH I don't really see what it adds beyond the
complexity of having a separate exec list that must then be
reintegrated. How will it help get rid of clones?

I'd really prefer to leave it as is (but obviously remove all the
unnecessary vars).

>
>> +
>> +   i.insert_before(frtemp);
>> +   i.insert_before(assign(frtemp, fract(ir->operands[0])));
>> +
>> +   i.insert_before(temp);
>> +   i.insert_before(assign(temp, sub(ir->operands[0]->clone(ir, NULL), frtemp)));
>> +
>> +   i.insert_before(t2);
>> +   i.insert_before(assign(t2, csel(nequal(frtemp, zero), one, zero->clone(ir, NULL))));
>> +   ir->operation = ir_binop_add;
>> +   ir->operands[0] = new(ir) ir_dereference_variable(temp);
>> +   ir->operands[1] = new(ir) ir_dereference_variable(t2);
>> +}
>> +
>> +void
>> +lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
>> +{
>> +   /*
>> +    * frtemp = frac(x);
>> +    * result = sub(x, frtemp);
>> +    */
>> +   ir_instruction &i = *base_ir;
>> +   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
>> +                                             ir_var_temporary);
>> +
>> +   i.insert_before(frtemp);
>> +   i.insert_before(assign(frtemp, fract(ir->operands[0]->clone(ir, NULL))));
>> +
>> +   ir->operation = ir_binop_sub;
>> +   ir->operands[1] = new(ir) ir_dereference_variable(frtemp);
>> +}
>> +void
>> +lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir)
>> +{
>> +   /*
>> +    * insane but works
>> +    * temp = x + 0.5;
>> +    * frtemp = frac(temp);
>> +    * t2 = sub(temp, frtemp);
>> +    * if (frac(x) == 0.5)
>> +    *     result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1;
>> +    *  else
>> +    *     result = t2;
>> +
>> +    */
>> +   const unsigned vec_elem = ir->type->vector_elements;
>> +   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
>> +   ir_instruction &i = *base_ir;
>> +   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
>> +                                             ir_var_temporary);
>> +   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
>> +                                           ir_var_temporary);
>> +   ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
>> +                                           ir_var_temporary);
>> +   ir_variable *t3 = new(ir) ir_variable(bvec, "t3",
>> +                                           ir_var_temporary);
>> +   ir_variable *t4 = new(ir) ir_variable(bvec, "t4",
>> +                                           ir_var_temporary);
>> +   ir_variable *t5 = new(ir) ir_variable(ir->operands[0]->type, "t5",
>> +                                           ir_var_temporary);
>> +   ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements);
>> +   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
>> +   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
>> +
>> +   i.insert_before(temp);
>> +   i.insert_before(assign(temp, add(ir->operands[0], p5)));
>> +
>> +   i.insert_before(frtemp);
>> +   i.insert_before(assign(frtemp, fract(temp)));
>> +
>> +   i.insert_before(t2);
>> +   i.insert_before(assign(t2, sub(temp, frtemp)));
>> +
>> +   i.insert_before(t3);
>> +   i.insert_before(assign(t3, equal(fract(ir->operands[0]->clone(ir, NULL)), p5->clone(ir, NULL))));
>> +
>> +   i.insert_before(t4);
>> +   i.insert_before(assign(t4, equal(fract(mul(t2, p5->clone(ir, NULL))), zero)));
>> +
>> +   i.insert_before(t5);
>> +   i.insert_before(assign(t5, csel(t4, t2, sub(t2, one))));
>> +
>> +   ir->operation = ir_triop_csel;
>> +   ir->operands[0] = new(ir) ir_dereference_variable(t3);
>> +   ir->operands[1] = new(ir) ir_dereference_variable(t5);
>> +   ir->operands[2] = new(ir) ir_dereference_variable(t2);
>> +}
>> +
>> +void
>> +lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir)
>> +{
>> +   /*
>> +    * frtemp = frac(x);
>> +    * temp = sub(x, frtemp);
>> +    * if (x >= 0)
>> +    *     result = temp;
>> +    * else
>> +    *     result = temp + (frtemp == 0.0) ? 0 : 1;
>
> Isn't this
>
>     result = temp + (x < 0 && frtemp == 0.0) ? 0 : 1;
>
> or
>
>     result = temp + csel(x < 0 && frtemp == 0.0, 0, 1);

I suppose so... done.

>
>> +    */
>> +   const unsigned vec_elem = ir->type->vector_elements;
>> +   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
>> +   ir_instruction &i = *base_ir;
>> +
>> +   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
>> +   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
>> +   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
>> +                                             ir_var_temporary);
>> +   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
>> +                                           ir_var_temporary);
>> +   ir_variable *t2 = new(ir) ir_variable(bvec, "t2",
>> +                                         ir_var_temporary);
>> +   ir_variable *t3 = new(ir) ir_variable(ir->operands[0]->type, "t3",
>> +                                         ir_var_temporary);
>> +
>> +   i.insert_before(frtemp);
>> +   i.insert_before(assign(frtemp, fract(ir->operands[0])));
>> +   i.insert_before(temp);
>> +   i.insert_before(assign(temp, sub(ir->operands[0]->clone(ir, NULL), frtemp)));
>> +
>> +   i.insert_before(t2);
>> +   i.insert_before(assign(t2, less(ir->operands[0]->clone(ir, NULL), zero)));
>> +
>> +   i.insert_before(t3);
>> +   i.insert_before(assign(t3, add(temp, csel(equal(frtemp, zero->clone(ir, NULL)), zero->clone(ir, NULL), one))));
>> +
>> +   ir->operation = ir_triop_csel;
>> +   ir->operands[0] = new(ir) ir_dereference_variable(t2);
>> +   ir->operands[1] = new(ir) ir_dereference_variable(t3);
>> +   ir->operands[2] = new(ir) ir_dereference_variable(temp);
>> +
>> +   this->progress = true;
>> +}
>> +
>> +void
>> +lower_instructions_visitor::dsign_to_csel(ir_expression *ir)
>> +{
>> +   /*
>> +    * temp = x > 0.0 ? 1.0 : 0.0;
>> +    * result = x < 0.0 ? -1.0 : temp;
>> +    */
>> +   ir_instruction &i = *base_ir;
>> +   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
>> +                                           ir_var_temporary);
>> +   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
>> +   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
>> +   ir_constant *negone = new(ir) ir_constant(-1.0, ir->operands[0]->type->vector_elements);
>
>                    neg_one... I kept reading it as "ne gone". :)

OK.

>
>> +   ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
>> +                                           ir_var_temporary);
>> +   i.insert_before(temp);
>> +   i.insert_before(assign(temp, csel(greater(ir->operands[0], zero), one, zero->clone(ir, NULL))));
>> +
>> +   i.insert_before(t2);
>> +   i.insert_before(assign(t2, less(ir->operands[0]->clone(ir, NULL), zero->clone(ir, NULL))));
>> +   ir->operation = ir_triop_csel;
>> +   ir->operands[0] = new(ir) ir_dereference_variable(t2);
>> +   ir->operands[1] = negone;
>> +   ir->operands[2] = new(ir) ir_dereference_variable(temp);
>
> You can skip creating one or both of these temporaries... at least t2.
>
>    ir->operands[0] = less(ir->operands[0]->clone(ir, NULL),
> zero->clone(ir, NULL));

Indeed. Cleaned up.

>
> The other lowering functions should get the same treatment.
>
>> +}
>> +
>>  ir_visitor_status
>>  lower_instructions_visitor::visit_leave(ir_expression *ir)
>>  {
>> @@ -665,6 +853,30 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
>>           sat_to_clamp(ir);
>>        break;
>>
>> +   case ir_unop_trunc:
>> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
>> +         dtrunc_to_dfrac(ir);
>> +      break;
>> +
>> +   case ir_unop_ceil:
>> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
>> +         dceil_to_dfrac(ir);
>> +      break;
>> +
>> +   case ir_unop_floor:
>> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
>> +         dfloor_to_dfrac(ir);
>> +      break;
>> +
>> +   case ir_unop_round_even:
>> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
>> +         dround_even_to_dfrac(ir);
>> +      break;
>> +
>> +   case ir_unop_sign:
>> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
>> +         dsign_to_csel(ir);
>> +      break;
>>     default:
>>        return visit_continue;
>>     }
>>
>