[Mesa-dev] [PATCH 4/7] glsl/lower_if: conditionally lower if-branches based on their size
Marek Olšák
maraeo at gmail.com
Fri Oct 28 23:13:39 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
---
src/compiler/glsl/ir_optimization.h | 2 +-
src/compiler/glsl/lower_if_to_cond_assign.cpp | 55 ++++++++++++++++++++++++---
2 files changed, 50 insertions(+), 7 deletions(-)
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index c033f6b..a662a6d 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -102,21 +102,21 @@ bool do_dead_code_local(exec_list *instructions);
bool do_dead_code_unlinked(exec_list *instructions);
bool do_dead_functions(exec_list *instructions);
bool opt_flip_matrices(exec_list *instructions);
bool do_function_inlining(exec_list *instructions);
bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
bool do_lower_texture_projection(exec_list *instructions);
bool do_if_simplification(exec_list *instructions);
bool opt_flatten_nested_if_blocks(exec_list *instructions);
bool do_discard_simplification(exec_list *instructions);
bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
- unsigned max_depth = 0);
+ unsigned max_depth = 0, unsigned min_branch_cost = 0);
bool do_mat_op_to_vec(exec_list *instructions);
bool do_minmax_prune(exec_list *instructions);
bool do_noop_swizzle(exec_list *instructions);
bool do_structure_splitting(exec_list *instructions);
bool do_swizzle_swizzle(exec_list *instructions);
bool do_vectorize(exec_list *instructions);
bool do_tree_grafting(exec_list *instructions);
bool do_vec_index_to_cond_assign(exec_list *instructions);
bool do_vec_index_to_swizzle(exec_list *instructions);
bool lower_discard(exec_list *instructions);
diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp
index a413306..7b59c00 100644
--- a/src/compiler/glsl/lower_if_to_cond_assign.cpp
+++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp
@@ -17,22 +17,28 @@
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file lower_if_to_cond_assign.cpp
*
- * This attempts to flatten if-statements to conditional assignments for
- * GPUs with limited or no flow control support.
+ * This flattens if-statements to conditional assignments if:
+ *
+ * - the GPU has limited or no flow control support
+ * (controlled by max_depth)
+ *
+ * - small conditional branches are more expensive than conditional assignments
+ * (controlled by min_branch_cost, that's the cost for a branch to be
+ * preserved)
*
* It can't handle other control flow being inside of its block, such
* as calls or loops. Hopefully loop unrolling and inlining will take
* care of those.
*
* Drivers for GPUs with no control flow support should simply call
*
* lower_if_to_cond_assign(instructions)
*
* to attempt to flatten all if-statements.
@@ -42,65 +48,73 @@
*
* lower_if_to_cond_assign(instructions, N)
*
* to attempt to flatten any if-statements appearing at depth > N.
*/
#include "compiler/glsl_types.h"
#include "ir.h"
#include "util/set.h"
#include "util/hash_table.h" /* Needed for the hashing functions */
+#include "main/macros.h" /* for MAX2 */
namespace {
class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
public:
ir_if_to_cond_assign_visitor(gl_shader_stage stage,
- unsigned max_depth)
+ unsigned max_depth,
+ unsigned min_branch_cost)
{
this->progress = false;
this->stage = stage;
this->max_depth = max_depth;
+ this->min_branch_cost = min_branch_cost;
this->depth = 0;
this->condition_variables =
_mesa_set_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
}
~ir_if_to_cond_assign_visitor()
{
_mesa_set_destroy(this->condition_variables, NULL);
}
ir_visitor_status visit_enter(ir_if *);
ir_visitor_status visit_leave(ir_if *);
bool found_unsupported_op;
+ bool found_expensive_op;
+ bool is_then;
bool progress;
gl_shader_stage stage;
+ unsigned then_cost;
+ unsigned else_cost;
+ unsigned min_branch_cost;
unsigned max_depth;
unsigned depth;
struct set *condition_variables;
};
} /* anonymous namespace */
bool
lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
- unsigned max_depth)
+ unsigned max_depth, unsigned min_branch_cost)
{
if (max_depth == UINT_MAX)
return false;
- ir_if_to_cond_assign_visitor v(stage, max_depth);
+ ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
visit_list_elements(&v, instructions);
return v.progress;
}
void
check_ir_node(ir_instruction *ir, void *data)
{
ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data;
@@ -122,20 +136,34 @@ check_ir_node(ir_instruction *ir, void *data)
/* Tess control shader outputs are like shared memory with complex
* side effects, so treat it that way.
*/
if (v->stage == MESA_SHADER_TESS_CTRL &&
var->data.mode == ir_var_shader_out)
v->found_unsupported_op = true;
break;
}
+ /* SSBO, images, atomic counters are handled by ir_type_call */
+ case ir_type_texture:
+ v->found_expensive_op = true;
+ break;
+
+ case ir_type_expression:
+ case ir_type_dereference_array:
+ case ir_type_dereference_record:
+ if (v->is_then)
+ v->then_cost++;
+ else
+ v->else_cost++;
+ break;
+
default:
break;
}
}
void
move_block_to_cond_assign(void *mem_ctx,
ir_if *if_ir, ir_rvalue *cond_expr,
exec_list *instructions,
struct set *set)
@@ -186,38 +214,53 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
{
(void) ir;
this->depth++;
return visit_continue;
}
ir_visitor_status
ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
{
+ bool must_lower = this->depth-- > this->max_depth;
+
/* Only flatten when beyond the GPU's maximum supported nesting depth. */
- if (this->depth-- <= this->max_depth)
+ if (!must_lower && this->min_branch_cost == 0)
return visit_continue;
this->found_unsupported_op = false;
+ this->found_expensive_op = false;
+ this->then_cost = 0;
+ this->else_cost = 0;
ir_assignment *assign;
/* Check that both blocks don't contain anything we can't support. */
+ this->is_then = true;
foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
visit_tree(then_ir, check_ir_node, this);
}
+
+ this->is_then = false;
foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
visit_tree(else_ir, check_ir_node, this);
}
+
if (this->found_unsupported_op)
return visit_continue; /* can't handle inner unsupported opcodes */
+ /* Skip if the branch cost is high enough or if there's an expensive op. */
+ if (!must_lower &&
+ (this->found_expensive_op ||
+ MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
+ return visit_continue;
+
void *mem_ctx = ralloc_parent(ir);
/* Store the condition to a variable. Move all of the instructions from
* the then-clause of the if-statement. Use the condition variable as a
* condition for all assignments.
*/
ir_variable *const then_var =
new(mem_ctx) ir_variable(glsl_type::bool_type,
"if_to_cond_assign_then",
ir_var_temporary);
--
2.7.4
More information about the mesa-dev
mailing list