[Mesa-dev] [PATCH 4/7] glsl/lower_if: conditionally lower if-branches based on their size

Marek Olšák maraeo at gmail.com
Fri Oct 28 23:13:39 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

---
 src/compiler/glsl/ir_optimization.h           |  2 +-
 src/compiler/glsl/lower_if_to_cond_assign.cpp | 55 ++++++++++++++++++++++++---
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index c033f6b..a662a6d 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -102,21 +102,21 @@ bool do_dead_code_local(exec_list *instructions);
 bool do_dead_code_unlinked(exec_list *instructions);
 bool do_dead_functions(exec_list *instructions);
 bool opt_flip_matrices(exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
 bool do_lower_texture_projection(exec_list *instructions);
 bool do_if_simplification(exec_list *instructions);
 bool opt_flatten_nested_if_blocks(exec_list *instructions);
 bool do_discard_simplification(exec_list *instructions);
 bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
-                             unsigned max_depth = 0);
+                             unsigned max_depth = 0, unsigned min_branch_cost = 0);
 bool do_mat_op_to_vec(exec_list *instructions);
 bool do_minmax_prune(exec_list *instructions);
 bool do_noop_swizzle(exec_list *instructions);
 bool do_structure_splitting(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
 bool do_vectorize(exec_list *instructions);
 bool do_tree_grafting(exec_list *instructions);
 bool do_vec_index_to_cond_assign(exec_list *instructions);
 bool do_vec_index_to_swizzle(exec_list *instructions);
 bool lower_discard(exec_list *instructions);
diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp
index a413306..7b59c00 100644
--- a/src/compiler/glsl/lower_if_to_cond_assign.cpp
+++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp
@@ -17,22 +17,28 @@
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
 
 /**
  * \file lower_if_to_cond_assign.cpp
  *
- * This attempts to flatten if-statements to conditional assignments for
- * GPUs with limited or no flow control support.
+ * This flattens if-statements to conditional assignments if:
+ *
+ * - the GPU has limited or no flow control support
+ *   (controlled by max_depth)
+ *
+ * - small conditional branches are more expensive than conditional assignments
+ *   (controlled by min_branch_cost, that's the cost for a branch to be
+ *    preserved)
  *
  * It can't handle other control flow being inside of its block, such
  * as calls or loops.  Hopefully loop unrolling and inlining will take
  * care of those.
  *
  * Drivers for GPUs with no control flow support should simply call
  *
  *    lower_if_to_cond_assign(instructions)
  *
  * to attempt to flatten all if-statements.
@@ -42,65 +48,73 @@
  *
  *    lower_if_to_cond_assign(instructions, N)
  *
  * to attempt to flatten any if-statements appearing at depth > N.
  */
 
 #include "compiler/glsl_types.h"
 #include "ir.h"
 #include "util/set.h"
 #include "util/hash_table.h" /* Needed for the hashing functions */
+#include "main/macros.h" /* for MAX2 */
 
 namespace {
 
 class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
 public:
    ir_if_to_cond_assign_visitor(gl_shader_stage stage,
-                                unsigned max_depth)
+                                unsigned max_depth,
+                                unsigned min_branch_cost)
    {
       this->progress = false;
       this->stage = stage;
       this->max_depth = max_depth;
+      this->min_branch_cost = min_branch_cost;
       this->depth = 0;
 
       this->condition_variables =
             _mesa_set_create(NULL, _mesa_hash_pointer,
                                     _mesa_key_pointer_equal);
    }
 
    ~ir_if_to_cond_assign_visitor()
    {
       _mesa_set_destroy(this->condition_variables, NULL);
    }
 
    ir_visitor_status visit_enter(ir_if *);
    ir_visitor_status visit_leave(ir_if *);
 
    bool found_unsupported_op;
+   bool found_expensive_op;
+   bool is_then;
    bool progress;
    gl_shader_stage stage;
+   unsigned then_cost;
+   unsigned else_cost;
+   unsigned min_branch_cost;
    unsigned max_depth;
    unsigned depth;
 
    struct set *condition_variables;
 };
 
 } /* anonymous namespace */
 
 bool
 lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
-                        unsigned max_depth)
+                        unsigned max_depth, unsigned min_branch_cost)
 {
    if (max_depth == UINT_MAX)
       return false;
 
-   ir_if_to_cond_assign_visitor v(stage, max_depth);
+   ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
 
    visit_list_elements(&v, instructions);
 
    return v.progress;
 }
 
 void
 check_ir_node(ir_instruction *ir, void *data)
 {
    ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data;
@@ -122,20 +136,34 @@ check_ir_node(ir_instruction *ir, void *data)
 
       /* Tess control shader outputs are like shared memory with complex
        * side effects, so treat it that way.
        */
       if (v->stage == MESA_SHADER_TESS_CTRL &&
           var->data.mode == ir_var_shader_out)
          v->found_unsupported_op = true;
       break;
    }
 
+   /* SSBO, images, atomic counters are handled by ir_type_call */
+   case ir_type_texture:
+      v->found_expensive_op = true;
+      break;
+
+   case ir_type_expression:
+   case ir_type_dereference_array:
+   case ir_type_dereference_record:
+      if (v->is_then)
+         v->then_cost++;
+      else
+         v->else_cost++;
+      break;
+
    default:
       break;
    }
 }
 
 void
 move_block_to_cond_assign(void *mem_ctx,
 			  ir_if *if_ir, ir_rvalue *cond_expr,
 			  exec_list *instructions,
 			  struct set *set)
@@ -186,38 +214,53 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
 {
    (void) ir;
    this->depth++;
 
    return visit_continue;
 }
 
 ir_visitor_status
 ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
 {
+   bool must_lower = this->depth-- > this->max_depth;
+
    /* Only flatten when beyond the GPU's maximum supported nesting depth. */
-   if (this->depth-- <= this->max_depth)
+   if (!must_lower && this->min_branch_cost == 0)
       return visit_continue;
 
    this->found_unsupported_op = false;
+   this->found_expensive_op = false;
+   this->then_cost = 0;
+   this->else_cost = 0;
 
    ir_assignment *assign;
 
    /* Check that both blocks don't contain anything we can't support. */
+   this->is_then = true;
    foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
       visit_tree(then_ir, check_ir_node, this);
    }
+
+   this->is_then = false;
    foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
       visit_tree(else_ir, check_ir_node, this);
    }
+
    if (this->found_unsupported_op)
       return visit_continue; /* can't handle inner unsupported opcodes */
 
+   /* Skip if the branch cost is high enough or if there's an expensive op. */
+   if (!must_lower &&
+       (this->found_expensive_op ||
+        MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
+      return visit_continue;
+
    void *mem_ctx = ralloc_parent(ir);
 
    /* Store the condition to a variable.  Move all of the instructions from
     * the then-clause of the if-statement.  Use the condition variable as a
     * condition for all assignments.
     */
    ir_variable *const then_var =
       new(mem_ctx) ir_variable(glsl_type::bool_type,
 			       "if_to_cond_assign_then",
 			       ir_var_temporary);
-- 
2.7.4



More information about the mesa-dev mailing list