[Mesa-dev] [PATCH 3/9] glsl: Convert mix() to use a new ir_triop_lrp opcode.

Tue Feb 19 18:38:33 PST 2013

Not much to say about the code (the theory sounds sane) but I was
wondering about the comment.
Why did glsl implement this really as x * (1 - a) + y * a?
The usual way for lerp would be (y - x) * a + x, i.e. two ops for most
gpus (sub+mad, or sub+mul+add). But I'm wondering if that sacrifices
precision or gets Infs wrong or something (this is the way the gallivm
code implements TGSI_OPCODE_LRP). I guess strict IEEE conformance would
really forbid that optimization though...

Roland

Am 20.02.2013 02:03, schrieb Matt Turner:
> From: Kenneth Graunke <kenneth at whitecape.org>
> 
> Many GPUs have an instruction to do linear interpolation which is more
> efficient than simply performing the algebra necessary (two multiplies,
> an add, and a subtract).
> 
> Pattern matching or peepholing this is more desirable, but can be
> tricky.  By using an opcode, we can at least make shaders which use the
> mix() built-in get the more efficient behavior.
> 
> Currently, all consumers lower ir_triop_lrp.  Subsequent patches will
> actually generate different code.
> 
> v2 [mattst88]:
>    - Add LRP_TO_ARITH flag to ir_to_mesa.cpp. Will be removed in a
>      subsequent patch and ir_triop_lrp translated directly.
> 
> Reviewed-by: Matt Turner <mattst88 at gmail.com>
> ---
>  src/glsl/builtins/ir/mix.ir                |   14 +++++-----
>  src/glsl/ir.cpp                            |    4 +++
>  src/glsl/ir.h                              |    7 +++++
>  src/glsl/ir_constant_expression.cpp        |   13 ++++++++++
>  src/glsl/ir_optimization.h                 |    1 +
>  src/glsl/ir_validate.cpp                   |    6 ++++
>  src/glsl/lower_instructions.cpp            |   35 ++++++++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_shader.cpp   |    3 +-
>  src/mesa/program/ir_to_mesa.cpp            |    6 ++++-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |    1 +
>  10 files changed, 81 insertions(+), 9 deletions(-)
> 
> diff --git a/src/glsl/builtins/ir/mix.ir b/src/glsl/builtins/ir/mix.ir
> index 70ae13c..e666532 100644
> --- a/src/glsl/builtins/ir/mix.ir
> +++ b/src/glsl/builtins/ir/mix.ir
> @@ -4,49 +4,49 @@
>         (declare (in) float arg0)
>         (declare (in) float arg1)
>         (declare (in) float arg2))
> -     ((return (expression float + (expression float * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression float * (var_ref arg1) (var_ref arg2))))))
> +     ((return (expression float lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
>  
>     (signature vec2
>       (parameters
>         (declare (in) vec2 arg0)
>         (declare (in) vec2 arg1)
>         (declare (in) vec2 arg2))
> -     ((return (expression vec2 + (expression vec2 * (var_ref arg0) (expression vec2 - (constant float (1.000000)) (var_ref arg2))) (expression vec2 * (var_ref arg1) (var_ref arg2))))))
> +     ((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
>  
>     (signature vec3
>       (parameters
>         (declare (in) vec3 arg0)
>         (declare (in) vec3 arg1)
>         (declare (in) vec3 arg2))
> -     ((return (expression vec3 + (expression vec3 * (var_ref arg0) (expression vec3 - (constant float (1.000000)) (var_ref arg2))) (expression vec3 * (var_ref arg1) (var_ref arg2))))))
> +     ((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
>  
>     (signature vec4
>       (parameters
>         (declare (in) vec4 arg0)
>         (declare (in) vec4 arg1)
>         (declare (in) vec4 arg2))
> -     ((return (expression vec4 + (expression vec4 * (var_ref arg0) (expression vec4 - (constant float (1.000000)) (var_ref arg2))) (expression vec4 * (var_ref arg1) (var_ref arg2))))))
> +     ((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
>  
>     (signature vec2
>       (parameters
>         (declare (in) vec2 arg0)
>         (declare (in) vec2 arg1)
>         (declare (in) float arg2))
> -     ((return (expression vec2 + (expression vec2 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec2 * (var_ref arg1) (var_ref arg2))))))
> +     ((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
>  
>     (signature vec3
>       (parameters
>         (declare (in) vec3 arg0)
>         (declare (in) vec3 arg1)
>         (declare (in) float arg2))
> -     ((return (expression vec3 + (expression vec3 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec3 * (var_ref arg1) (var_ref arg2))))))
> +     ((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
>  
>     (signature vec4
>       (parameters
>         (declare (in) vec4 arg0)
>         (declare (in) vec4 arg1)
>         (declare (in) float arg2))
> -     ((return (expression vec4 + (expression vec4 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec4 * (var_ref arg1) (var_ref arg2))))))
> +     ((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
>  
>     (signature float
>       (parameters
> diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
> index 4ccdc42..717d6f6 100644
> --- a/src/glsl/ir.cpp
> +++ b/src/glsl/ir.cpp
> @@ -416,6 +416,9 @@ ir_expression::get_num_operands(ir_expression_operation op)
>     if (op <= ir_last_binop)
>        return 2;
>  
> +   if (op <= ir_last_triop)
> +      return 3;
> +
>     if (op == ir_quadop_vector)
>        return 4;
>  
> @@ -502,6 +505,7 @@ static const char *const operator_strs[] = {
>     "pow",
>     "packHalf2x16_split",
>     "ubo_load",
> +   "lrp",
>     "vector",
>  };
>  
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index d878bd8..d63dac1 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -1118,6 +1118,13 @@ enum ir_expression_operation {
>      */
>     ir_last_binop = ir_binop_ubo_load,
>  
> +   ir_triop_lrp,
> +
> +   /**
> +    * A sentinel marking the last of the ternary operations.
> +    */
> +   ir_last_triop = ir_triop_lrp,
> +
>     ir_quadop_vector,
>  
>     /**
> diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
> index 86b863f..c2d0dc4 100644
> --- a/src/glsl/ir_constant_expression.cpp
> +++ b/src/glsl/ir_constant_expression.cpp
> @@ -1248,6 +1248,19 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
>        }
>        break;
>  
> +   case ir_triop_lrp: {
> +      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
> +      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
> +      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT);
> +
> +      unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1;
> +      for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) {
> +         data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) +
> +                     (op[1]->value.f[c] * op[2]->value.f[c2]);
> +      }
> +      break;
> +   }
> +
>     case ir_quadop_vector:
>        for (unsigned c = 0; c < this->type->vector_elements; c++) {
>  	 switch (this->type->base_type) {
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index 8f33018..2454bbe 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -36,6 +36,7 @@
>  #define LOG_TO_LOG2        0x10
>  #define MOD_TO_FRACT       0x20
>  #define INT_DIV_TO_MUL_RCP 0x40
> +#define LRP_TO_ARITH       0x80
>  
>  /**
>   * \see class lower_packing_builtins_visitor
> diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
> index d8cafd5..24ea506 100644
> --- a/src/glsl/ir_validate.cpp
> +++ b/src/glsl/ir_validate.cpp
> @@ -468,6 +468,12 @@ ir_validate::visit_leave(ir_expression *ir)
>        assert(ir->operands[1]->type == glsl_type::uint_type);
>        break;
>  
> +   case ir_triop_lrp:
> +      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
> +      assert(ir->operands[0]->type == ir->operands[1]->type);
> +      assert(ir->operands[2]->type == ir->operands[0]->type || ir->operands[2]->type == glsl_type::float_type);
> +      break;
> +
>     case ir_quadop_vector:
>        /* The vector operator collects some number of scalars and generates a
>         * vector from them.
> diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
> index a8ef765..1ce7b7c 100644
> --- a/src/glsl/lower_instructions.cpp
> +++ b/src/glsl/lower_instructions.cpp
> @@ -37,6 +37,7 @@
>   * - POW_TO_EXP2
>   * - LOG_TO_LOG2
>   * - MOD_TO_FRACT
> + * - LRP_TO_ARITH
>   *
>   * SUB_TO_ADD_NEG:
>   * ---------------
> @@ -79,13 +80,20 @@
>   * Many GPUs don't have a MOD instruction (945 and 965 included), and
>   * if we have to break it down like this anyway, it gives an
>   * opportunity to do things like constant fold the (1.0 / op1) easily.
> + *
> + * LRP_TO_ARITH:
> + * -------------
> + * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
>   */
>  
>  #include "main/core.h" /* for M_LOG2E */
>  #include "glsl_types.h"
>  #include "ir.h"
> +#include "ir_builder.h"
>  #include "ir_optimization.h"
>  
> +using namespace ir_builder;
> +
>  class lower_instructions_visitor : public ir_hierarchical_visitor {
>  public:
>     lower_instructions_visitor(unsigned lower)
> @@ -105,6 +113,7 @@ private:
>     void exp_to_exp2(ir_expression *);
>     void pow_to_exp2(ir_expression *);
>     void log_to_log2(ir_expression *);
> +   void lrp_to_arith(ir_expression *);
>  };
>  
>  /**
> @@ -268,6 +277,27 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
>     this->progress = true;
>  }
>  
> +void
> +lower_instructions_visitor::lrp_to_arith(ir_expression *ir)
> +{
> +   /* (lrp x y a) -> x*(1-a) + y*a */
> +
> +   /* Save op2 */
> +   ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, "lrp_factor",
> +					   ir_var_temporary);
> +   this->base_ir->insert_before(temp);
> +   this->base_ir->insert_before(assign(temp, ir->operands[2]));
> +
> +   ir_constant *one = new(ir) ir_constant(1.0f);
> +
> +   ir->operation = ir_binop_add;
> +   ir->operands[0] = mul(ir->operands[0], sub(one, temp));
> +   ir->operands[1] = mul(ir->operands[1], temp);
> +   ir->operands[2] = NULL;
> +
> +   this->progress = true;
> +}
> +
>  ir_visitor_status
>  lower_instructions_visitor::visit_leave(ir_expression *ir)
>  {
> @@ -304,6 +334,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
>  	 pow_to_exp2(ir);
>        break;
>  
> +   case ir_triop_lrp:
> +      if (lowering(LRP_TO_ARITH))
> +	 lrp_to_arith(ir);
> +      break;
> +
>     default:
>        return visit_continue;
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index c71715e..9ab18cc 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -155,7 +155,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
>  			 DIV_TO_MUL_RCP |
>  			 SUB_TO_ADD_NEG |
>  			 EXP_TO_EXP2 |
> -			 LOG_TO_LOG2);
> +			 LOG_TO_LOG2 |
> +			 LRP_TO_ARITH);
>  
>        /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
>         * if-statements need to be flattened.
> diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
> index ce409ec..30305d2 100644
> --- a/src/mesa/program/ir_to_mesa.cpp
> +++ b/src/mesa/program/ir_to_mesa.cpp
> @@ -1478,6 +1478,10 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
>        assert(!"not supported");
>        break;
>  
> +   case ir_triop_lrp:
> +      assert(!"ir_triop_lrp should have been lowered.");
> +      break;
> +
>     case ir_quadop_vector:
>        /* This operation should have already been handled.
>         */
> @@ -2993,7 +2997,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
>  	 /* Lowering */
>  	 do_mat_op_to_vec(ir);
>  	 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
> -				 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
> +				 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | LRP_TO_ARITH
>  				 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
>  
>  	 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 63b7428..757bd71 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -5189,6 +5189,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
>                           DIV_TO_MUL_RCP |
>                           EXP_TO_EXP2 |
>                           LOG_TO_LOG2 |
> +                         LRP_TO_ARITH |
>                           (options->EmitNoPow ? POW_TO_EXP2 : 0) |
>                           (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
>  
>