[Mesa-dev] [PATCH 4/7] glsl/lower_if: conditionally lower if-branches based on their size
Ian Romanick
idr at freedesktop.org
Thu Nov 3 19:39:44 UTC 2016
On 10/28/2016 04:13 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/compiler/glsl/ir_optimization.h | 2 +-
> src/compiler/glsl/lower_if_to_cond_assign.cpp | 55 ++++++++++++++++++++++++---
> 2 files changed, 50 insertions(+), 7 deletions(-)
>
> diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
> index c033f6b..a662a6d 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -102,21 +102,21 @@ bool do_dead_code_local(exec_list *instructions);
> bool do_dead_code_unlinked(exec_list *instructions);
> bool do_dead_functions(exec_list *instructions);
> bool opt_flip_matrices(exec_list *instructions);
> bool do_function_inlining(exec_list *instructions);
> bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
> bool do_lower_texture_projection(exec_list *instructions);
> bool do_if_simplification(exec_list *instructions);
> bool opt_flatten_nested_if_blocks(exec_list *instructions);
> bool do_discard_simplification(exec_list *instructions);
> bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
> - unsigned max_depth = 0);
> + unsigned max_depth = 0, unsigned min_branch_cost = 0);
> bool do_mat_op_to_vec(exec_list *instructions);
> bool do_minmax_prune(exec_list *instructions);
> bool do_noop_swizzle(exec_list *instructions);
> bool do_structure_splitting(exec_list *instructions);
> bool do_swizzle_swizzle(exec_list *instructions);
> bool do_vectorize(exec_list *instructions);
> bool do_tree_grafting(exec_list *instructions);
> bool do_vec_index_to_cond_assign(exec_list *instructions);
> bool do_vec_index_to_swizzle(exec_list *instructions);
> bool lower_discard(exec_list *instructions);
> diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp
> index a413306..7b59c00 100644
> --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp
> +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp
> @@ -17,22 +17,28 @@
> * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> * DEALINGS IN THE SOFTWARE.
> */
>
> /**
> * \file lower_if_to_cond_assign.cpp
> *
> - * This attempts to flatten if-statements to conditional assignments for
> - * GPUs with limited or no flow control support.
> + * This flattens if-statements to conditional assignments if:
> + *
> + * - the GPU has limited or no flow control support
> + * (controlled by max_depth)
> + *
> + * - small conditional branches are more expensive than conditional assignments
> + * (controlled by min_branch_cost, that's the cost for a branch to be
> + * preserved)
> *
> * It can't handle other control flow being inside of its block, such
> * as calls or loops. Hopefully loop unrolling and inlining will take
> * care of those.
> *
> * Drivers for GPUs with no control flow support should simply call
> *
> * lower_if_to_cond_assign(instructions)
> *
> * to attempt to flatten all if-statements.
> @@ -42,65 +48,73 @@
> *
> * lower_if_to_cond_assign(instructions, N)
> *
> * to attempt to flatten any if-statements appearing at depth > N.
> */
>
> #include "compiler/glsl_types.h"
> #include "ir.h"
> #include "util/set.h"
> #include "util/hash_table.h" /* Needed for the hashing functions */
> +#include "main/macros.h" /* for MAX2 */
>
> namespace {
>
> class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
> public:
> ir_if_to_cond_assign_visitor(gl_shader_stage stage,
> - unsigned max_depth)
> + unsigned max_depth,
> + unsigned min_branch_cost)
> {
> this->progress = false;
> this->stage = stage;
> this->max_depth = max_depth;
> + this->min_branch_cost = min_branch_cost;
> this->depth = 0;
>
> this->condition_variables =
> _mesa_set_create(NULL, _mesa_hash_pointer,
> _mesa_key_pointer_equal);
> }
>
> ~ir_if_to_cond_assign_visitor()
> {
> _mesa_set_destroy(this->condition_variables, NULL);
> }
>
> ir_visitor_status visit_enter(ir_if *);
> ir_visitor_status visit_leave(ir_if *);
>
> bool found_unsupported_op;
> + bool found_expensive_op;
> + bool is_then;
I wonder if it would be more clear to have an 'unsigned *cost' instead
that points at either then_cost or else_cost. I could see arguments
either way.
> bool progress;
> gl_shader_stage stage;
> + unsigned then_cost;
> + unsigned else_cost;
> + unsigned min_branch_cost;
> unsigned max_depth;
> unsigned depth;
>
> struct set *condition_variables;
> };
>
> } /* anonymous namespace */
>
> bool
> lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
> - unsigned max_depth)
> + unsigned max_depth, unsigned min_branch_cost)
> {
> if (max_depth == UINT_MAX)
> return false;
>
> - ir_if_to_cond_assign_visitor v(stage, max_depth);
> + ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
>
> visit_list_elements(&v, instructions);
>
> return v.progress;
> }
>
> void
> check_ir_node(ir_instruction *ir, void *data)
> {
> ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data;
> @@ -122,20 +136,34 @@ check_ir_node(ir_instruction *ir, void *data)
>
> /* Tess control shader outputs are like shared memory with complex
> * side effects, so treat it that way.
> */
> if (v->stage == MESA_SHADER_TESS_CTRL &&
> var->data.mode == ir_var_shader_out)
> v->found_unsupported_op = true;
> break;
> }
>
> + /* SSBO, images, atomic counters are handled by ir_type_call */
> + case ir_type_texture:
> + v->found_expensive_op = true;
> + break;
> +
> + case ir_type_expression:
> + case ir_type_dereference_array:
> + case ir_type_dereference_record:
> + if (v->is_then)
> + v->then_cost++;
> + else
> + v->else_cost++;
> + break;
> +
> default:
> break;
> }
> }
>
> void
> move_block_to_cond_assign(void *mem_ctx,
> ir_if *if_ir, ir_rvalue *cond_expr,
> exec_list *instructions,
> struct set *set)
> @@ -186,38 +214,53 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
> {
> (void) ir;
> this->depth++;
>
> return visit_continue;
> }
>
> ir_visitor_status
> ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
> {
> + bool must_lower = this->depth-- > this->max_depth;
> +
> /* Only flatten when beyond the GPU's maximum supported nesting depth. */
> - if (this->depth-- <= this->max_depth)
> + if (!must_lower && this->min_branch_cost == 0)
> return visit_continue;
>
> this->found_unsupported_op = false;
> + this->found_expensive_op = false;
> + this->then_cost = 0;
> + this->else_cost = 0;
>
> ir_assignment *assign;
>
> /* Check that both blocks don't contain anything we can't support. */
> + this->is_then = true;
> foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
> visit_tree(then_ir, check_ir_node, this);
> }
> +
> + this->is_then = false;
> foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
> visit_tree(else_ir, check_ir_node, this);
> }
> +
> if (this->found_unsupported_op)
> return visit_continue; /* can't handle inner unsupported opcodes */
>
> + /* Skip if the branch cost is high enough or if there's an expensive op. */
> + if (!must_lower &&
> + (this->found_expensive_op ||
> + MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
> + return visit_continue;
> +
> void *mem_ctx = ralloc_parent(ir);
>
> /* Store the condition to a variable. Move all of the instructions from
> * the then-clause of the if-statement. Use the condition variable as a
> * condition for all assignments.
> */
> ir_variable *const then_var =
> new(mem_ctx) ir_variable(glsl_type::bool_type,
> "if_to_cond_assign_then",
> ir_var_temporary);
>
More information about the mesa-dev
mailing list