[Mesa-dev] [PATCH v2 27/28] glsl: lower double optional passes (v2)

Fri Feb 6 01:46:00 PST 2015

On 02/06/2015 06:56 AM, Ilia Mirkin wrote:
> From: Dave Airlie <airlied at gmail.com>
> 
> These lowering passes are optional for the backend to request, currently
> the TGSI softpipe backend most likely the r600g backend would want to use
> these passes as is. They aim to hit the gallium opcodes from the standard
> rounding/truncation functions.
> 
> v2: also lower floor in mod_to_floor
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/glsl/ir_optimization.h      |   1 +
>  src/glsl/lower_instructions.cpp | 212 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 213 insertions(+)
> 
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index 912d910..9f91e2f 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -41,6 +41,7 @@
>  #define CARRY_TO_ARITH     0x200
>  #define BORROW_TO_ARITH    0x400
>  #define SAT_TO_CLAMP       0x800
> +#define DOPS_TO_DFRAC      0x1000
>  
>  /**
>   * \see class lower_packing_builtins_visitor
> diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
> index 140b6d4..bf45c95 100644
> --- a/src/glsl/lower_instructions.cpp
> +++ b/src/glsl/lower_instructions.cpp
> @@ -42,6 +42,7 @@
>   * - CARRY_TO_ARITH
>   * - BORROW_TO_ARITH
>   * - SAT_TO_CLAMP
> + * - DOPS_TO_DFRAC
>   *
>   * SUB_TO_ADD_NEG:
>   * ---------------
> @@ -112,6 +113,9 @@
>   * -------------
>   * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
>   *
> + * DOPS_TO_DFRAC:
> + * --------------
> + * Converts double trunc, ceil, floor, round to fract
>   */
>  
>  #include "main/core.h" /* for M_LOG2E */
> @@ -151,6 +155,11 @@ private:
>     void sat_to_clamp(ir_expression *);
>     void double_dot_to_fma(ir_expression *);
>     void double_lrp(ir_expression *);
> +   void dceil_to_dfrac(ir_expression *);
> +   void dfloor_to_dfrac(ir_expression *);
> +   void dround_even_to_dfrac(ir_expression *);
> +   void dtrunc_to_dfrac(ir_expression *);
> +   void dsign_to_csel(ir_expression *);
>  };
>  
>  } /* anonymous namespace */
> @@ -315,6 +324,9 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
>     ir_expression *const floor_expr =
>        new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
>  
> +   if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> +      dfloor_to_dfrac(floor_expr);
> +
>     ir_expression *const mul_expr =
>        new(ir) ir_expression(ir_binop_mul,
>                              new(ir) ir_dereference_variable(y),
> @@ -596,6 +608,182 @@ lower_instructions_visitor::double_lrp(ir_expression *ir)
>     this->progress = true;
>  }
>  
> +void
> +lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
> +{
> +   /*
> +    * frtemp = frac(x);
> +    * temp = sub(x, frtemp);
> +    * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0);
> +    */
> +   ir_instruction &i = *base_ir;
> +   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
> +   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
> +   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
> +                                             ir_var_temporary);
> +   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
> +                                           ir_var_temporary);
> +   ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
> +                                         ir_var_temporary);

For all these functions, I think you really want to use an ir_factory.
Then you can just use ir_factory::make_temp and ir_factory::constant.
That would reduce a lot of the clone calls... I think it would make this
code a lot easier to read.

> +
> +   i.insert_before(frtemp);
> +   i.insert_before(assign(frtemp, fract(ir->operands[0])));
> +
> +   i.insert_before(temp);
> +   i.insert_before(assign(temp, sub(ir->operands[0]->clone(ir, NULL), frtemp)));
> +
> +   i.insert_before(t2);
> +   i.insert_before(assign(t2, csel(nequal(frtemp, zero), one, zero->clone(ir, NULL))));
> +   ir->operation = ir_binop_add;
> +   ir->operands[0] = new(ir) ir_dereference_variable(temp);
> +   ir->operands[1] = new(ir) ir_dereference_variable(t2);
> +}
> +
> +void
> +lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
> +{
> +   /*
> +    * frtemp = frac(x);
> +    * result = sub(x, frtemp);
> +    */
> +   ir_instruction &i = *base_ir;
> +   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
> +                                             ir_var_temporary);
> +
> +   i.insert_before(frtemp);
> +   i.insert_before(assign(frtemp, fract(ir->operands[0]->clone(ir, NULL))));
> +
> +   ir->operation = ir_binop_sub;
> +   ir->operands[1] = new(ir) ir_dereference_variable(frtemp);
> +}
> +void
> +lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir)
> +{
> +   /*
> +    * insane but works
> +    * temp = x + 0.5;
> +    * frtemp = frac(temp);
> +    * t2 = sub(temp, frtemp);
> +    * if (frac(x) == 0.5)
> +    *     result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1;
> +    *  else
> +    *     result = t2;
> +
> +    */
> +   const unsigned vec_elem = ir->type->vector_elements;
> +   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
> +   ir_instruction &i = *base_ir;
> +   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
> +                                             ir_var_temporary);
> +   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
> +                                           ir_var_temporary);
> +   ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
> +                                           ir_var_temporary);
> +   ir_variable *t3 = new(ir) ir_variable(bvec, "t3",
> +                                           ir_var_temporary);
> +   ir_variable *t4 = new(ir) ir_variable(bvec, "t4",
> +                                           ir_var_temporary);
> +   ir_variable *t5 = new(ir) ir_variable(ir->operands[0]->type, "t5",
> +                                           ir_var_temporary);
> +   ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements);
> +   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
> +   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
> +
> +   i.insert_before(temp);
> +   i.insert_before(assign(temp, add(ir->operands[0], p5)));
> +
> +   i.insert_before(frtemp);
> +   i.insert_before(assign(frtemp, fract(temp)));
> +
> +   i.insert_before(t2);
> +   i.insert_before(assign(t2, sub(temp, frtemp)));
> +
> +   i.insert_before(t3);
> +   i.insert_before(assign(t3, equal(fract(ir->operands[0]->clone(ir, NULL)), p5->clone(ir, NULL))));
> +
> +   i.insert_before(t4);
> +   i.insert_before(assign(t4, equal(fract(mul(t2, p5->clone(ir, NULL))), zero)));
> +
> +   i.insert_before(t5);
> +   i.insert_before(assign(t5, csel(t4, t2, sub(t2, one))));
> +
> +   ir->operation = ir_triop_csel;
> +   ir->operands[0] = new(ir) ir_dereference_variable(t3);
> +   ir->operands[1] = new(ir) ir_dereference_variable(t5);
> +   ir->operands[2] = new(ir) ir_dereference_variable(t2);
> +}
> +
> +void
> +lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir)
> +{
> +   /*
> +    * frtemp = frac(x);
> +    * temp = sub(x, frtemp);
> +    * if (x >= 0)
> +    *     result = temp;
> +    * else
> +    *     result = temp + (frtemp == 0.0) ? 0 : 1;

Isn't this

    result = temp + (x < 0 && frtemp == 0.0) ? 0 : 1;

or

    result = temp + csel(x < 0 && frtemp == 0.0, 0, 1);

> +    */
> +   const unsigned vec_elem = ir->type->vector_elements;
> +   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
> +   ir_instruction &i = *base_ir;
> +
> +   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
> +   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
> +   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
> +                                             ir_var_temporary);
> +   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
> +                                           ir_var_temporary);
> +   ir_variable *t2 = new(ir) ir_variable(bvec, "t2",
> +                                         ir_var_temporary);
> +   ir_variable *t3 = new(ir) ir_variable(ir->operands[0]->type, "t3",
> +                                         ir_var_temporary);
> +
> +   i.insert_before(frtemp);
> +   i.insert_before(assign(frtemp, fract(ir->operands[0])));
> +   i.insert_before(temp);
> +   i.insert_before(assign(temp, sub(ir->operands[0]->clone(ir, NULL), frtemp)));
> +
> +   i.insert_before(t2);
> +   i.insert_before(assign(t2, less(ir->operands[0]->clone(ir, NULL), zero)));
> +
> +   i.insert_before(t3);
> +   i.insert_before(assign(t3, add(temp, csel(equal(frtemp, zero->clone(ir, NULL)), zero->clone(ir, NULL), one))));
> +
> +   ir->operation = ir_triop_csel;
> +   ir->operands[0] = new(ir) ir_dereference_variable(t2);
> +   ir->operands[1] = new(ir) ir_dereference_variable(t3);
> +   ir->operands[2] = new(ir) ir_dereference_variable(temp);
> +
> +   this->progress = true;
> +}
> +
> +void
> +lower_instructions_visitor::dsign_to_csel(ir_expression *ir)
> +{
> +   /*
> +    * temp = x > 0.0 ? 1.0 : 0.0;
> +    * result = x < 0.0 ? -1.0 : temp;
> +    */
> +   ir_instruction &i = *base_ir;
> +   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
> +                                           ir_var_temporary);
> +   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
> +   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
> +   ir_constant *negone = new(ir) ir_constant(-1.0, ir->operands[0]->type->vector_elements);

                   neg_one... I kept reading it as "ne gone". :)

> +   ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
> +                                           ir_var_temporary);
> +   i.insert_before(temp);
> +   i.insert_before(assign(temp, csel(greater(ir->operands[0], zero), one, zero->clone(ir, NULL))));
> +
> +   i.insert_before(t2);
> +   i.insert_before(assign(t2, less(ir->operands[0]->clone(ir, NULL), zero->clone(ir, NULL))));
> +   ir->operation = ir_triop_csel;
> +   ir->operands[0] = new(ir) ir_dereference_variable(t2);
> +   ir->operands[1] = negone;
> +   ir->operands[2] = new(ir) ir_dereference_variable(temp);

You can skip creating one or both of these temporaries... at least t2.

   ir->operands[0] = less(ir->operands[0]->clone(ir, NULL),
zero->clone(ir, NULL));

The other lowering functions should get the same treatment.

> +}
> +
>  ir_visitor_status
>  lower_instructions_visitor::visit_leave(ir_expression *ir)
>  {
> @@ -665,6 +853,30 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
>           sat_to_clamp(ir);
>        break;
>  
> +   case ir_unop_trunc:
> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> +         dtrunc_to_dfrac(ir);
> +      break;
> +
> +   case ir_unop_ceil:
> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> +         dceil_to_dfrac(ir);
> +      break;
> +
> +   case ir_unop_floor:
> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> +         dfloor_to_dfrac(ir);
> +      break;
> +
> +   case ir_unop_round_even:
> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> +         dround_even_to_dfrac(ir);
> +      break;
> +
> +   case ir_unop_sign:
> +      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> +         dsign_to_csel(ir);
> +      break;
>     default:
>        return visit_continue;
>     }
>