[Mesa-dev] [PATCH 14/23] glsl: Distribute unary operations over ir_triop_csel

Fri Mar 20 13:58:14 PDT 2015

From: Ian Romanick <ian.d.romanick at intel.com>

If the both result operands of the ir_triop_csel are constants,
distributing the unary operation allows constant folding to eliminate
it.

This change fixes may of the shaders most hurt by "glsl: Optimize
certain if-statements to ir_triop_csel".

Shader-db results:

GM45 (0x2A42):
total instructions in shared programs: 3545712 -> 3545351 (-0.01%)
instructions in affected programs:     95123 -> 94762 (-0.38%)
helped:                                301

Iron Lake (0x0046):
total instructions in shared programs: 4976067 -> 4975577 (-0.01%)
instructions in affected programs:     127925 -> 127435 (-0.38%)
helped:                                430

Sandy Bridge (0x0116):
total instructions in shared programs: 6803288 -> 6802556 (-0.01%)
instructions in affected programs:     160949 -> 160217 (-0.45%)
helped:                                606

Sandy Bridge (0x0116) NIR:
total instructions in shared programs: 6817171 -> 6815965 (-0.02%)
instructions in affected programs:     176431 -> 175225 (-0.68%)
helped:                                658
HURT:                                  5

Ivy Bridge (0x0166):
total instructions in shared programs: 6278318 -> 6277594 (-0.01%)
instructions in affected programs:     148462 -> 147738 (-0.49%)
helped:                                596

Ivy Bridge (0x0166) NIR:
total instructions in shared programs: 6324604 -> 6323362 (-0.02%)
instructions in affected programs:     164896 -> 163654 (-0.75%)
helped:                                652
HURT:                                  5

Haswell (0x0426):
total instructions in shared programs: 5763779 -> 5763055 (-0.01%)
instructions in affected programs:     131140 -> 130416 (-0.55%)
helped:                                596

Haswell (0x0426) NIR:
total instructions in shared programs: 5800334 -> 5799128 (-0.02%)
instructions in affected programs:     146551 -> 145345 (-0.82%)
helped:                                652
HURT:                                  8

Broadwell (0x162E):
total instructions in shared programs: 6811805 -> 6811079 (-0.01%)
instructions in affected programs:     132390 -> 131664 (-0.55%)
helped:                                600

Broadwell (0x162E) NIR:
total instructions in shared programs: 7015106 -> 7013948 (-0.02%)
instructions in affected programs:     146168 -> 145010 (-0.79%)
helped:                                652
HURT:                                  8

Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
 src/glsl/opt_algebraic.cpp | 68 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 60 insertions(+), 8 deletions(-)

diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index deedc9c..b1f0fa9 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -283,25 +283,77 @@ ir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
       return operand;
 }
 
-ir_rvalue *
-ir_algebraic_visitor::handle_expression(ir_expression *ir)
+static bool
+preprocess_operands(ir_expression *ir, ir_constant *op_const[4],
+                    ir_expression *op_expr[4])
 {
-   ir_constant *op_const[4] = {NULL, NULL, NULL, NULL};
-   ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
-   unsigned int i;
-
    assert(ir->get_num_operands() <= 4);
-   for (i = 0; i < ir->get_num_operands(); i++) {
+   for (unsigned i = 0; i < ir->get_num_operands(); i++) {
       if (ir->operands[i]->type->is_matrix())
-	 return ir;
+         return false;
 
       op_const[i] = ir->operands[i]->constant_expression_value();
       op_expr[i] = ir->operands[i]->as_expression();
    }
 
+   return true;
+}
+
+ir_rvalue *
+ir_algebraic_visitor::handle_expression(ir_expression *ir)
+{
+   ir_constant *op_const[4] = {NULL, NULL, NULL, NULL};
+   ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
+   unsigned int i;
+
+   if (!preprocess_operands(ir, op_const, op_expr))
+       return ir;
+
    if (this->mem_ctx == NULL)
       this->mem_ctx = ralloc_parent(ir);
 
+
+   /* If the expression is a unary operation over an ir_triop_csel with
+    * constant results, distribute the unary operation over the ir_triop_csel
+    * results.  Constant folding will do the rest.
+    *
+    * Example: -mix(0, 57, condition) becomes mix(0, -57, condition).
+    */
+   if (ir->get_num_operands() == 1 &&
+       op_expr[0] != NULL &&
+       op_expr[0]->operation == ir_triop_csel) {
+      if (op_expr[0]->operands[1]->as_constant() &&
+          op_expr[0]->operands[2]->as_constant()) {
+         ir_expression *const tmp_expr[2] = {
+            new(mem_ctx) ir_expression(ir->operation, op_expr[0]->operands[1]),
+            new(mem_ctx) ir_expression(ir->operation, op_expr[0]->operands[2])
+         };
+
+         /* Constant-fold the expressions now to (possibly) save an
+          * interation through the optimization loop.
+          */
+         op_expr[0]->operands[1] = tmp_expr[0]->constant_expression_value();
+         op_expr[0]->operands[2] = tmp_expr[1]->constant_expression_value();
+         assert(op_expr[0]->operands[1] != NULL);
+         assert(op_expr[0]->operands[2] != NULL);
+
+         /* The type of the ir_triop_csel is now whatever the type of the
+          * distributed expression was.
+          */
+         assert(ir->type == op_expr[0]->operands[1]->type);
+         assert(ir->type == op_expr[0]->operands[2]->type);
+         op_expr[0]->type = ir->type;
+
+         /* Now let the rest of the algebraic optimization operate on the
+          * ir_triop_csel expression that replace the original unary
+          * expression.
+          */
+         ir = op_expr[0];
+         if (!preprocess_operands(ir, op_const, op_expr))
+             return ir;
+      }
+   }
+
    switch (ir->operation) {
    case ir_unop_bit_not:
       if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not)
-- 
2.1.0