[Mesa-dev] [PATCH 4/7] glsl/lower_if: conditionally lower if-branches based on their size

Ian Romanick idr at freedesktop.org
Thu Nov 3 19:39:44 UTC 2016


On 10/28/2016 04:13 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> ---
>  src/compiler/glsl/ir_optimization.h           |  2 +-
>  src/compiler/glsl/lower_if_to_cond_assign.cpp | 55 ++++++++++++++++++++++++---
>  2 files changed, 50 insertions(+), 7 deletions(-)
> 
> diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
> index c033f6b..a662a6d 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -102,21 +102,21 @@ bool do_dead_code_local(exec_list *instructions);
>  bool do_dead_code_unlinked(exec_list *instructions);
>  bool do_dead_functions(exec_list *instructions);
>  bool opt_flip_matrices(exec_list *instructions);
>  bool do_function_inlining(exec_list *instructions);
>  bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
>  bool do_lower_texture_projection(exec_list *instructions);
>  bool do_if_simplification(exec_list *instructions);
>  bool opt_flatten_nested_if_blocks(exec_list *instructions);
>  bool do_discard_simplification(exec_list *instructions);
>  bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
> -                             unsigned max_depth = 0);
> +                             unsigned max_depth = 0, unsigned min_branch_cost = 0);
>  bool do_mat_op_to_vec(exec_list *instructions);
>  bool do_minmax_prune(exec_list *instructions);
>  bool do_noop_swizzle(exec_list *instructions);
>  bool do_structure_splitting(exec_list *instructions);
>  bool do_swizzle_swizzle(exec_list *instructions);
>  bool do_vectorize(exec_list *instructions);
>  bool do_tree_grafting(exec_list *instructions);
>  bool do_vec_index_to_cond_assign(exec_list *instructions);
>  bool do_vec_index_to_swizzle(exec_list *instructions);
>  bool lower_discard(exec_list *instructions);
> diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp
> index a413306..7b59c00 100644
> --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp
> +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp
> @@ -17,22 +17,28 @@
>   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>   * DEALINGS IN THE SOFTWARE.
>   */
>  
>  /**
>   * \file lower_if_to_cond_assign.cpp
>   *
> - * This attempts to flatten if-statements to conditional assignments for
> - * GPUs with limited or no flow control support.
> + * This flattens if-statements to conditional assignments if:
> + *
> + * - the GPU has limited or no flow control support
> + *   (controlled by max_depth)
> + *
> + * - small conditional branches are more expensive than conditional assignments
> + *   (controlled by min_branch_cost, that's the cost for a branch to be
> + *    preserved)
>   *
>   * It can't handle other control flow being inside of its block, such
>   * as calls or loops.  Hopefully loop unrolling and inlining will take
>   * care of those.
>   *
>   * Drivers for GPUs with no control flow support should simply call
>   *
>   *    lower_if_to_cond_assign(instructions)
>   *
>   * to attempt to flatten all if-statements.
> @@ -42,65 +48,73 @@
>   *
>   *    lower_if_to_cond_assign(instructions, N)
>   *
>   * to attempt to flatten any if-statements appearing at depth > N.
>   */
>  
>  #include "compiler/glsl_types.h"
>  #include "ir.h"
>  #include "util/set.h"
>  #include "util/hash_table.h" /* Needed for the hashing functions */
> +#include "main/macros.h" /* for MAX2 */
>  
>  namespace {
>  
>  class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
>  public:
>     ir_if_to_cond_assign_visitor(gl_shader_stage stage,
> -                                unsigned max_depth)
> +                                unsigned max_depth,
> +                                unsigned min_branch_cost)
>     {
>        this->progress = false;
>        this->stage = stage;
>        this->max_depth = max_depth;
> +      this->min_branch_cost = min_branch_cost;
>        this->depth = 0;
>  
>        this->condition_variables =
>              _mesa_set_create(NULL, _mesa_hash_pointer,
>                                      _mesa_key_pointer_equal);
>     }
>  
>     ~ir_if_to_cond_assign_visitor()
>     {
>        _mesa_set_destroy(this->condition_variables, NULL);
>     }
>  
>     ir_visitor_status visit_enter(ir_if *);
>     ir_visitor_status visit_leave(ir_if *);
>  
>     bool found_unsupported_op;
> +   bool found_expensive_op;
> +   bool is_then;

I wonder if it would be more clear to have an 'unsigned *cost' instead
that points at either then_cost or else_cost.  I could see arguments
either way.

>     bool progress;
>     gl_shader_stage stage;
> +   unsigned then_cost;
> +   unsigned else_cost;
> +   unsigned min_branch_cost;
>     unsigned max_depth;
>     unsigned depth;
>  
>     struct set *condition_variables;
>  };
>  
>  } /* anonymous namespace */
>  
>  bool
>  lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
> -                        unsigned max_depth)
> +                        unsigned max_depth, unsigned min_branch_cost)
>  {
>     if (max_depth == UINT_MAX)
>        return false;
>  
> -   ir_if_to_cond_assign_visitor v(stage, max_depth);
> +   ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
>  
>     visit_list_elements(&v, instructions);
>  
>     return v.progress;
>  }
>  
>  void
>  check_ir_node(ir_instruction *ir, void *data)
>  {
>     ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data;
> @@ -122,20 +136,34 @@ check_ir_node(ir_instruction *ir, void *data)
>  
>        /* Tess control shader outputs are like shared memory with complex
>         * side effects, so treat it that way.
>         */
>        if (v->stage == MESA_SHADER_TESS_CTRL &&
>            var->data.mode == ir_var_shader_out)
>           v->found_unsupported_op = true;
>        break;
>     }
>  
> +   /* SSBO, images, atomic counters are handled by ir_type_call */
> +   case ir_type_texture:
> +      v->found_expensive_op = true;
> +      break;
> +
> +   case ir_type_expression:
> +   case ir_type_dereference_array:
> +   case ir_type_dereference_record:
> +      if (v->is_then)
> +         v->then_cost++;
> +      else
> +         v->else_cost++;
> +      break;
> +
>     default:
>        break;
>     }
>  }
>  
>  void
>  move_block_to_cond_assign(void *mem_ctx,
>  			  ir_if *if_ir, ir_rvalue *cond_expr,
>  			  exec_list *instructions,
>  			  struct set *set)
> @@ -186,38 +214,53 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
>  {
>     (void) ir;
>     this->depth++;
>  
>     return visit_continue;
>  }
>  
>  ir_visitor_status
>  ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
>  {
> +   bool must_lower = this->depth-- > this->max_depth;
> +
>     /* Only flatten when beyond the GPU's maximum supported nesting depth. */
> -   if (this->depth-- <= this->max_depth)
> +   if (!must_lower && this->min_branch_cost == 0)
>        return visit_continue;
>  
>     this->found_unsupported_op = false;
> +   this->found_expensive_op = false;
> +   this->then_cost = 0;
> +   this->else_cost = 0;
>  
>     ir_assignment *assign;
>  
>     /* Check that both blocks don't contain anything we can't support. */
> +   this->is_then = true;
>     foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
>        visit_tree(then_ir, check_ir_node, this);
>     }
> +
> +   this->is_then = false;
>     foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
>        visit_tree(else_ir, check_ir_node, this);
>     }
> +
>     if (this->found_unsupported_op)
>        return visit_continue; /* can't handle inner unsupported opcodes */
>  
> +   /* Skip if the branch cost is high enough or if there's an expensive op. */
> +   if (!must_lower &&
> +       (this->found_expensive_op ||
> +        MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
> +      return visit_continue;
> +
>     void *mem_ctx = ralloc_parent(ir);
>  
>     /* Store the condition to a variable.  Move all of the instructions from
>      * the then-clause of the if-statement.  Use the condition variable as a
>      * condition for all assignments.
>      */
>     ir_variable *const then_var =
>        new(mem_ctx) ir_variable(glsl_type::bool_type,
>  			       "if_to_cond_assign_then",
>  			       ir_var_temporary);
> 



More information about the mesa-dev mailing list