[Mesa-dev] [RFC PATCH 01/10] glsl/loops: consolidate bounded loop handling into a lowering pass.
Paul Berry
stereotype441 at gmail.com
Sat Nov 30 08:38:10 PST 2013
Previously, all of the back-ends (ir_to_mesa, st_glsl_to_tgsi, and the
i965 fs and vec4 visitors) had nearly identical logic for handling
bounded loops. This replaces the duplicate logic with an equivalent
lowering pass that is used by all the back-ends.
Note: on i965, there is a slight increase in instruction count. For
example, a loop like this:
for (int i = 0; i < 100; i++) {
total += i;
}
would previously compile down to this (vec4) native code:
mov(8) g4<1>.xD 0D
mov(8) g8<1>.xD 0D
loop:
cmp.ge.f0(8) null g8<4;4,1>.xD 100D
(+f0) break(8)
add(8) g5<1>.xD g5<4;4,1>.xD g4<4;4,1>.xD
add(8) g8<1>.xD g8<4;4,1>.xD 1D
add(8) g4<1>.xD g4<4;4,1>.xD 1D
while(8) loop
After this patch, the "(+f0) break(8)" turns into:
(+f0) if(8)
break(8)
endif(8)
because the back-end isn't smart enough to recognize that "if
(condition) break;" can be done using a conditional break instruction.
However, it should be relatively easy for a future peephole
optimization to properly optimize this.
---
src/glsl/Makefile.sources | 1 +
src/glsl/ir_optimization.h | 1 +
src/glsl/lower_bounded_loops.cpp | 117 +++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 33 +------
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 34 +------
src/mesa/program/ir_to_mesa.cpp | 42 +--------
src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 46 +---------
8 files changed, 133 insertions(+), 143 deletions(-)
create mode 100644 src/glsl/lower_bounded_loops.cpp
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 2e81ded..724c7b4 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -59,6 +59,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/loop_analysis.cpp \
$(GLSL_SRCDIR)/loop_controls.cpp \
$(GLSL_SRCDIR)/loop_unroll.cpp \
+ $(GLSL_SRCDIR)/lower_bounded_loops.cpp \
$(GLSL_SRCDIR)/lower_clip_distance.cpp \
$(GLSL_SRCDIR)/lower_discard.cpp \
$(GLSL_SRCDIR)/lower_discard_flow.cpp \
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 3ca9f57..87c2c82 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -101,6 +101,7 @@ bool do_swizzle_swizzle(exec_list *instructions);
bool do_tree_grafting(exec_list *instructions);
bool do_vec_index_to_cond_assign(exec_list *instructions);
bool do_vec_index_to_swizzle(exec_list *instructions);
+bool lower_bounded_loops(exec_list *instructions);
bool lower_discard(exec_list *instructions);
void lower_discard_flow(exec_list *instructions);
bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
diff --git a/src/glsl/lower_bounded_loops.cpp b/src/glsl/lower_bounded_loops.cpp
new file mode 100644
index 0000000..10f272f
--- /dev/null
+++ b/src/glsl/lower_bounded_loops.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/** \file
+ *
+ * This pass converts bounded loops (those whose ir_loop contains non-null
+ * values for \c from, \c to, \c increment, and \c counter) into unbounded
+ * loops.
+ *
+ * For instance:
+ *
+ * (loop (declare () uint i) (constant uint 0) (constant uint 4)
+ * (constant uint 1)
+ * ...loop body...)
+ *
+ * Is transformed into:
+ *
+ * (declare () uint i)
+ * (assign (x) (var_ref i) (constant uint 0))
+ * (loop
+ * (if (expression bool >= (var_ref i) (constant uint 4))
+ * (break)
+ * ())
+ * ...loop body...
+ * (assign (x) (var_ref i)
+ * (expression uint + (var_ref i) (constant uint 1))))
+ */
+
+#include "ir_hierarchical_visitor.h"
+#include "ir.h"
+#include "ir_builder.h"
+
+using namespace ir_builder;
+
+namespace {
+
+class lower_bounded_loops_visitor : public ir_hierarchical_visitor {
+public:
+ lower_bounded_loops_visitor()
+ : progress(false)
+ {
+ }
+
+ virtual ir_visitor_status visit_leave(ir_loop *ir);
+
+ bool progress;
+};
+
+} /* anonymous namespace */
+
+
+ir_visitor_status
+lower_bounded_loops_visitor::visit_leave(ir_loop *ir)
+{
+ if (ir->counter == NULL)
+ return visit_continue;
+
+ exec_list new_instructions;
+ ir_factory f(&new_instructions, ralloc_parent(ir));
+
+ /* Before the loop, declare the counter and initialize it to "from". */
+ f.emit(ir->counter);
+ f.emit(assign(ir->counter, ir->from));
+ ir->insert_before(&new_instructions);
+
+ /* At the top of the loop, compare the counter to "to", and break if the
+ * comparison succeeds.
+ */
+ ir_loop_jump *brk = new(f.mem_ctx) ir_loop_jump(ir_loop_jump::jump_break);
+ ir_expression_operation cmp = (ir_expression_operation) ir->cmp;
+ ir->body_instructions.push_head(if_tree(expr(cmp, ir->counter, ir->to),
+ brk));
+
+ /* At the bottom of the loop, increment the counter. */
+ ir->body_instructions.push_tail(assign(ir->counter,
+ add(ir->counter, ir->increment)));
+
+ /* NULL out the counter, from, to, and increment variables. */
+ ir->counter = NULL;
+ ir->from = NULL;
+ ir->to = NULL;
+ ir->increment = NULL;
+
+ this->progress = true;
+ return visit_continue;
+}
+
+
+bool
+lower_bounded_loops(exec_list *instructions)
+{
+ lower_bounded_loops_visitor v;
+
+ visit_list_elements(&v, instructions);
+
+ return v.progress;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 9eb9a9d..39341f3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2135,39 +2135,16 @@ fs_visitor::visit(ir_if *ir)
void
fs_visitor::visit(ir_loop *ir)
{
- fs_reg counter = reg_undef;
+ /* Any bounded loops should have been lowered by lower_bounded_loops(). */
+ assert(ir->counter == NULL);
if (brw->gen < 6 && dispatch_width == 16) {
fail("Can't support (non-uniform) control flow on 16-wide\n");
}
- if (ir->counter) {
- this->base_ir = ir->counter;
- ir->counter->accept(this);
- counter = *(variable_storage(ir->counter));
-
- if (ir->from) {
- this->base_ir = ir->from;
- ir->from->accept(this);
-
- emit(MOV(counter, this->result));
- }
- }
-
this->base_ir = NULL;
emit(BRW_OPCODE_DO);
- if (ir->to) {
- this->base_ir = ir->to;
- ir->to->accept(this);
-
- emit(CMP(reg_null_d, counter, this->result,
- brw_conditional_for_comparison(ir->cmp)));
-
- fs_inst *inst = emit(BRW_OPCODE_BREAK);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-
foreach_list(node, &ir->body_instructions) {
ir_instruction *ir = (ir_instruction *)node;
@@ -2175,12 +2152,6 @@ fs_visitor::visit(ir_loop *ir)
ir->accept(this);
}
- if (ir->increment) {
- this->base_ir = ir->increment;
- ir->increment->accept(this);
- emit(ADD(counter, counter, this->result));
- }
-
this->base_ir = NULL;
emit(BRW_OPCODE_WHILE);
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index ddb4524..1027910 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -213,6 +213,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
|| progress;
} while (progress);
+ lower_bounded_loops(shader->ir);
+
/* Make a pass over the IR to add state references for any built-in
* uniforms that are used. This has to be done now (during linking).
* Code generation doesn't happen until the first time this shader is
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a13eafb..90fca8a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1007,48 +1007,18 @@ vec4_visitor::visit(ir_variable *ir)
void
vec4_visitor::visit(ir_loop *ir)
{
- dst_reg counter;
+ /* Any bounded loops should have been lowered by lower_bounded_loops(). */
+ assert(ir->counter == NULL);
/* We don't want debugging output to print the whole body of the
* loop as the annotation.
*/
this->base_ir = NULL;
- if (ir->counter != NULL) {
- this->base_ir = ir->counter;
- ir->counter->accept(this);
- counter = *(variable_storage(ir->counter));
-
- if (ir->from != NULL) {
- this->base_ir = ir->from;
- ir->from->accept(this);
-
- emit(MOV(counter, this->result));
- }
- }
-
emit(BRW_OPCODE_DO);
- if (ir->to) {
- this->base_ir = ir->to;
- ir->to->accept(this);
-
- emit(CMP(dst_null_d(), src_reg(counter), this->result,
- brw_conditional_for_comparison(ir->cmp)));
-
- vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-
visit_instructions(&ir->body_instructions);
-
- if (ir->increment) {
- this->base_ir = ir->increment;
- ir->increment->accept(this);
- emit(ADD(counter, src_reg(counter), this->result));
- }
-
emit(BRW_OPCODE_WHILE);
}
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index c833a12..9fd10d1 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -759,49 +759,13 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
void
ir_to_mesa_visitor::visit(ir_loop *ir)
{
- ir_dereference_variable *counter = NULL;
-
- if (ir->counter != NULL)
- counter = new(mem_ctx) ir_dereference_variable(ir->counter);
-
- if (ir->from != NULL) {
- assert(ir->counter != NULL);
-
- ir_assignment *a =
- new(mem_ctx) ir_assignment(counter, ir->from, NULL);
-
- a->accept(this);
- }
+ /* Any bounded loops should have been lowered by lower_bounded_loops(). */
+ assert(ir->counter == NULL);
emit(NULL, OPCODE_BGNLOOP);
- if (ir->to) {
- ir_expression *e =
- new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type,
- counter, ir->to);
- ir_if *if_stmt = new(mem_ctx) ir_if(e);
-
- ir_loop_jump *brk =
- new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break);
-
- if_stmt->then_instructions.push_tail(brk);
-
- if_stmt->accept(this);
- }
-
visit_exec_list(&ir->body_instructions, this);
- if (ir->increment) {
- ir_expression *e =
- new(mem_ctx) ir_expression(ir_binop_add, counter->type,
- counter, ir->increment);
-
- ir_assignment *a =
- new(mem_ctx) ir_assignment(counter, e, NULL);
-
- a->accept(this);
- }
-
emit(NULL, OPCODE_ENDLOOP);
}
@@ -3091,6 +3055,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
progress = lower_vector_insert(ir, true) || progress;
} while (progress);
+ lower_bounded_loops(ir);
+
validate_ir_tree(ir);
}
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ac95968..f748ed8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1137,53 +1137,13 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
void
glsl_to_tgsi_visitor::visit(ir_loop *ir)
{
- ir_dereference_variable *counter = NULL;
-
- if (ir->counter != NULL)
- counter = new(ir) ir_dereference_variable(ir->counter);
-
- if (ir->from != NULL) {
- assert(ir->counter != NULL);
-
- ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
-
- a->accept(this);
- delete a;
- }
+ /* Any bounded loops should have been lowered by lower_bounded_loops(). */
+ assert(ir->counter == NULL);
emit(NULL, TGSI_OPCODE_BGNLOOP);
- if (ir->to) {
- ir_expression *e =
- new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
- counter, ir->to);
- ir_if *if_stmt = new(ir) ir_if(e);
-
- ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
-
- if_stmt->then_instructions.push_tail(brk);
-
- if_stmt->accept(this);
-
- delete if_stmt;
- delete e;
- delete brk;
- }
-
visit_exec_list(&ir->body_instructions, this);
- if (ir->increment) {
- ir_expression *e =
- new(ir) ir_expression(ir_binop_add, counter->type,
- counter, ir->increment);
-
- ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
-
- a->accept(this);
- delete a;
- delete e;
- }
-
emit(NULL, TGSI_OPCODE_ENDLOOP);
}
@@ -5345,6 +5305,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
} while (progress);
+ lower_bounded_loops(ir);
+
validate_ir_tree(ir);
}
--
1.8.4.2
More information about the mesa-dev
mailing list