[Mesa-dev] [PATCH v2 27/28] glsl: lower double optional passes (v2)
Ian Romanick
idr at freedesktop.org
Fri Feb 6 01:46:00 PST 2015
On 02/06/2015 06:56 AM, Ilia Mirkin wrote:
> From: Dave Airlie <airlied at gmail.com>
>
> These lowering passes are optional for the backend to request, currently
> the TGSI softpipe backend most likely the r600g backend would want to use
> these passes as is. They aim to hit the gallium opcodes from the standard
> rounding/truncation functions.
>
> v2: also lower floor in mod_to_floor
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/glsl/ir_optimization.h | 1 +
> src/glsl/lower_instructions.cpp | 212 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 213 insertions(+)
>
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index 912d910..9f91e2f 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -41,6 +41,7 @@
> #define CARRY_TO_ARITH 0x200
> #define BORROW_TO_ARITH 0x400
> #define SAT_TO_CLAMP 0x800
> +#define DOPS_TO_DFRAC 0x1000
>
> /**
> * \see class lower_packing_builtins_visitor
> diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
> index 140b6d4..bf45c95 100644
> --- a/src/glsl/lower_instructions.cpp
> +++ b/src/glsl/lower_instructions.cpp
> @@ -42,6 +42,7 @@
> * - CARRY_TO_ARITH
> * - BORROW_TO_ARITH
> * - SAT_TO_CLAMP
> + * - DOPS_TO_DFRAC
> *
> * SUB_TO_ADD_NEG:
> * ---------------
> @@ -112,6 +113,9 @@
> * -------------
> * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
> *
> + * DOPS_TO_DFRAC:
> + * --------------
> + * Converts double trunc, ceil, floor, round to fract
> */
>
> #include "main/core.h" /* for M_LOG2E */
> @@ -151,6 +155,11 @@ private:
> void sat_to_clamp(ir_expression *);
> void double_dot_to_fma(ir_expression *);
> void double_lrp(ir_expression *);
> + void dceil_to_dfrac(ir_expression *);
> + void dfloor_to_dfrac(ir_expression *);
> + void dround_even_to_dfrac(ir_expression *);
> + void dtrunc_to_dfrac(ir_expression *);
> + void dsign_to_csel(ir_expression *);
> };
>
> } /* anonymous namespace */
> @@ -315,6 +324,9 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
> ir_expression *const floor_expr =
> new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
>
> + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> + dfloor_to_dfrac(floor_expr);
> +
> ir_expression *const mul_expr =
> new(ir) ir_expression(ir_binop_mul,
> new(ir) ir_dereference_variable(y),
> @@ -596,6 +608,182 @@ lower_instructions_visitor::double_lrp(ir_expression *ir)
> this->progress = true;
> }
>
> +void
> +lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
> +{
> + /*
> + * frtemp = frac(x);
> + * temp = sub(x, frtemp);
> + * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0);
> + */
> + ir_instruction &i = *base_ir;
> + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
> + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
> + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
> + ir_var_temporary);
> + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
> + ir_var_temporary);
> + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
> + ir_var_temporary);
For all these functions, I think you really want to use an ir_factory.
Then you can just use ir_factory::make_temp and ir_factory::constant.
That would reduce a lot of the clone calls... I think it would make this
code a lot easier to read.
> +
> + i.insert_before(frtemp);
> + i.insert_before(assign(frtemp, fract(ir->operands[0])));
> +
> + i.insert_before(temp);
> + i.insert_before(assign(temp, sub(ir->operands[0]->clone(ir, NULL), frtemp)));
> +
> + i.insert_before(t2);
> + i.insert_before(assign(t2, csel(nequal(frtemp, zero), one, zero->clone(ir, NULL))));
> + ir->operation = ir_binop_add;
> + ir->operands[0] = new(ir) ir_dereference_variable(temp);
> + ir->operands[1] = new(ir) ir_dereference_variable(t2);
> +}
> +
> +void
> +lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
> +{
> + /*
> + * frtemp = frac(x);
> + * result = sub(x, frtemp);
> + */
> + ir_instruction &i = *base_ir;
> + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
> + ir_var_temporary);
> +
> + i.insert_before(frtemp);
> + i.insert_before(assign(frtemp, fract(ir->operands[0]->clone(ir, NULL))));
> +
> + ir->operation = ir_binop_sub;
> + ir->operands[1] = new(ir) ir_dereference_variable(frtemp);
> +}
> +void
> +lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir)
> +{
> + /*
> + * insane but works
> + * temp = x + 0.5;
> + * frtemp = frac(temp);
> + * t2 = sub(temp, frtemp);
> + * if (frac(x) == 0.5)
> + * result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1;
> + * else
> + * result = t2;
> +
> + */
> + const unsigned vec_elem = ir->type->vector_elements;
> + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
> + ir_instruction &i = *base_ir;
> + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
> + ir_var_temporary);
> + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
> + ir_var_temporary);
> + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
> + ir_var_temporary);
> + ir_variable *t3 = new(ir) ir_variable(bvec, "t3",
> + ir_var_temporary);
> + ir_variable *t4 = new(ir) ir_variable(bvec, "t4",
> + ir_var_temporary);
> + ir_variable *t5 = new(ir) ir_variable(ir->operands[0]->type, "t5",
> + ir_var_temporary);
> + ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements);
> + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
> + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
> +
> + i.insert_before(temp);
> + i.insert_before(assign(temp, add(ir->operands[0], p5)));
> +
> + i.insert_before(frtemp);
> + i.insert_before(assign(frtemp, fract(temp)));
> +
> + i.insert_before(t2);
> + i.insert_before(assign(t2, sub(temp, frtemp)));
> +
> + i.insert_before(t3);
> + i.insert_before(assign(t3, equal(fract(ir->operands[0]->clone(ir, NULL)), p5->clone(ir, NULL))));
> +
> + i.insert_before(t4);
> + i.insert_before(assign(t4, equal(fract(mul(t2, p5->clone(ir, NULL))), zero)));
> +
> + i.insert_before(t5);
> + i.insert_before(assign(t5, csel(t4, t2, sub(t2, one))));
> +
> + ir->operation = ir_triop_csel;
> + ir->operands[0] = new(ir) ir_dereference_variable(t3);
> + ir->operands[1] = new(ir) ir_dereference_variable(t5);
> + ir->operands[2] = new(ir) ir_dereference_variable(t2);
> +}
> +
> +void
> +lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir)
> +{
> + /*
> + * frtemp = frac(x);
> + * temp = sub(x, frtemp);
> + * if (x >= 0)
> + * result = temp;
> + * else
> + * result = temp + (frtemp == 0.0) ? 0 : 1;
Isn't this
result = temp + (x < 0 && frtemp == 0.0) ? 0 : 1;
or
result = temp + csel(x < 0 && frtemp == 0.0, 0, 1);
> + */
> + const unsigned vec_elem = ir->type->vector_elements;
> + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
> + ir_instruction &i = *base_ir;
> +
> + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
> + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
> + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
> + ir_var_temporary);
> + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
> + ir_var_temporary);
> + ir_variable *t2 = new(ir) ir_variable(bvec, "t2",
> + ir_var_temporary);
> + ir_variable *t3 = new(ir) ir_variable(ir->operands[0]->type, "t3",
> + ir_var_temporary);
> +
> + i.insert_before(frtemp);
> + i.insert_before(assign(frtemp, fract(ir->operands[0])));
> + i.insert_before(temp);
> + i.insert_before(assign(temp, sub(ir->operands[0]->clone(ir, NULL), frtemp)));
> +
> + i.insert_before(t2);
> + i.insert_before(assign(t2, less(ir->operands[0]->clone(ir, NULL), zero)));
> +
> + i.insert_before(t3);
> + i.insert_before(assign(t3, add(temp, csel(equal(frtemp, zero->clone(ir, NULL)), zero->clone(ir, NULL), one))));
> +
> + ir->operation = ir_triop_csel;
> + ir->operands[0] = new(ir) ir_dereference_variable(t2);
> + ir->operands[1] = new(ir) ir_dereference_variable(t3);
> + ir->operands[2] = new(ir) ir_dereference_variable(temp);
> +
> + this->progress = true;
> +}
> +
> +void
> +lower_instructions_visitor::dsign_to_csel(ir_expression *ir)
> +{
> + /*
> + * temp = x > 0.0 ? 1.0 : 0.0;
> + * result = x < 0.0 ? -1.0 : temp;
> + */
> + ir_instruction &i = *base_ir;
> + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
> + ir_var_temporary);
> + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
> + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
> + ir_constant *negone = new(ir) ir_constant(-1.0, ir->operands[0]->type->vector_elements);
neg_one... I kept reading it as "ne gone". :)
> + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
> + ir_var_temporary);
> + i.insert_before(temp);
> + i.insert_before(assign(temp, csel(greater(ir->operands[0], zero), one, zero->clone(ir, NULL))));
> +
> + i.insert_before(t2);
> + i.insert_before(assign(t2, less(ir->operands[0]->clone(ir, NULL), zero->clone(ir, NULL))));
> + ir->operation = ir_triop_csel;
> + ir->operands[0] = new(ir) ir_dereference_variable(t2);
> + ir->operands[1] = negone;
> + ir->operands[2] = new(ir) ir_dereference_variable(temp);
You can skip creating one or both of these temporaries... at least t2.
ir->operands[0] = less(ir->operands[0]->clone(ir, NULL),
zero->clone(ir, NULL));
The other lowering functions should get the same treatment.
> +}
> +
> ir_visitor_status
> lower_instructions_visitor::visit_leave(ir_expression *ir)
> {
> @@ -665,6 +853,30 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
> sat_to_clamp(ir);
> break;
>
> + case ir_unop_trunc:
> + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> + dtrunc_to_dfrac(ir);
> + break;
> +
> + case ir_unop_ceil:
> + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> + dceil_to_dfrac(ir);
> + break;
> +
> + case ir_unop_floor:
> + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> + dfloor_to_dfrac(ir);
> + break;
> +
> + case ir_unop_round_even:
> + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> + dround_even_to_dfrac(ir);
> + break;
> +
> + case ir_unop_sign:
> + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
> + dsign_to_csel(ir);
> + break;
> default:
> return visit_continue;
> }
>
More information about the mesa-dev
mailing list