[Mesa-dev] [PATCH 1/2] glsl: Add a lowering pass to remove reads of shader output variables.
Vincent Lejeune
vljn at ovi.com
Mon Jan 2 11:17:37 PST 2012
This is similar to Gallium's existing glsl_to_tgsi::remove_output_read
lowering pass, but done entirely inside the GLSL compiler.
Signed-off-by: Vincent Lejeune <vljn at ovi.com>
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
v2 [Kayden]:
- Don't reallocate the array for every shader output.
- Move the class into the .cpp file and create a lower_output_reads() wrapper
- Simplify the logic in visit(ir_deference_variable *)
- Fold add_replacement_pair into the only caller.
- Use visit_leave(ir_return *) instead of enter (for paranoia, in case the
return value references shader outputs)
- Visit signatures rather than functions, to avoid pattern matching to find
the actual void main() signature.
- Add some comments
- Whitespace fixes
v3 [Vincent]:
- Fix tab indent
---
src/glsl/Makefile.sources | 1 +
src/glsl/ir_optimization.h | 1 +
src/glsl/lower_output_reads.cpp | 152 +++++++++++++++++++++++++++++++++++++++
3 files changed, 154 insertions(+), 0 deletions(-)
create mode 100644 src/glsl/lower_output_reads.cpp
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index c65bfe4..5e80af2 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -60,6 +60,7 @@ LIBGLSL_CXX_SOURCES := \
lower_vec_index_to_cond_assign.cpp \
lower_vec_index_to_swizzle.cpp \
lower_vector.cpp \
+ lower_output_reads.cpp \
opt_algebraic.cpp \
opt_constant_folding.cpp \
opt_constant_propagation.cpp \
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 7b32e84..085b969 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -72,6 +72,7 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions,
bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
bool lower_clip_distance(exec_list *instructions);
+void lower_output_reads(exec_list *instructions);
bool optimize_redundant_jumps(exec_list *instructions);
ir_rvalue *
diff --git a/src/glsl/lower_output_reads.cpp b/src/glsl/lower_output_reads.cpp
new file mode 100644
index 0000000..4b3f91c
--- /dev/null
+++ b/src/glsl/lower_output_reads.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2012 Vincent Lejeune
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "ir.h"
+#include "program/hash_table.h"
+
+/**
+ * \file lower_output_reads.cpp
+ *
+ * In GLSL, shader output variables (such as varyings) can be both read and
+ * written. However, on some hardware, reading an output register causes
+ * trouble.
+ *
+ * This pass creates temporary shadow copies of every (used) shader output,
+ * and replaces all accesses to use those instead. It also adds code to the
+ * main() function to copy the final values to the actual shader outputs.
+ */
+
+class output_read_remover : public ir_hierarchical_visitor {
+protected:
+ struct replacement_pair {
+ ir_variable *output;
+ ir_variable *temp;
+ };
+
+ /**
+ * A hash table mapping from the original ir_variable shader outputs
+ * (ir_var_out mode) to the new temporaries to be used instead.
+ */
+ hash_table *replacements;
+
+ /**
+ * An array of tuples containing both the output and temporary variables.
+ * This is necessary because we can't iterate over the hash table.
+ */
+ struct replacement_pair *replacements_array;
+ unsigned replacements_count;
+ unsigned replacements_array_size;
+
+ void *mem_ctx;
+public:
+ output_read_remover();
+ ~output_read_remover();
+ virtual ir_visitor_status visit(class ir_dereference_variable *);
+ virtual ir_visitor_status visit_leave(class ir_return *);
+ virtual ir_visitor_status visit_leave(class ir_function_signature *);
+};
+
+output_read_remover::output_read_remover()
+{
+ mem_ctx = ralloc_context(NULL);
+
+ replacements =
+ hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare);
+
+ replacements_count = 0;
+ replacements_array_size = 1;
+ replacements_array = rzalloc_array(mem_ctx, struct replacement_pair,
+ replacements_array_size);
+}
+
+output_read_remover::~output_read_remover()
+{
+ hash_table_dtor(replacements);
+ ralloc_free(mem_ctx);
+}
+
+ir_visitor_status
+output_read_remover::visit(ir_dereference_variable *ir)
+{
+ if (ir->var->mode != ir_var_out)
+ return visit_continue;
+
+ ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var);
+
+ /* If we don't have an existing temporary, create one. */
+ if (temp == NULL) {
+ void *var_ctx = ralloc_parent(ir->var);
+ temp = new(var_ctx) ir_variable(ir->var->type, ir->var->name,
+ ir_var_temporary);
+ hash_table_insert(replacements, temp, ir->var);
+
+ if (replacements_array_size <= replacements_count) {
+ replacements_array_size *= 2;
+ replacements_array = reralloc(mem_ctx, replacements_array, struct replacement_pair, replacements_array_size);
+ }
+
+ replacements_array[replacements_count].output = ir->var;
+ replacements_array[replacements_count].temp = temp;
+ replacements_count++;
+ }
+
+ /* Update the dereference to use the temporary */
+ ir->var = temp;
+
+ return visit_continue;
+}
+
+ir_visitor_status
+output_read_remover::visit_leave(ir_return *ir)
+{
+ for (unsigned i = 0; i < replacements_count; i++) {
+ ir_dereference_variable *lhs = new(ir) ir_dereference_variable(replacements_array[i].output);
+ ir_dereference_variable *rhs = new(ir) ir_dereference_variable(replacements_array[i].temp);
+ ir_assignment *assign = new(ir) ir_assignment(lhs, rhs);
+ ir->insert_before(assign);
+ }
+ return visit_continue;
+}
+
+ir_visitor_status
+output_read_remover::visit_leave(ir_function_signature *sig)
+{
+ if (strcmp(sig->function_name(), "main") != 0)
+ return visit_continue;
+
+ for (unsigned i = 0; i < replacements_count; i++) {
+ ir_dereference_variable *lhs = new(sig) ir_dereference_variable(replacements_array[i].output);
+ ir_dereference_variable *rhs = new(sig) ir_dereference_variable(replacements_array[i].temp);
+ ir_assignment *assign = new(sig) ir_assignment(lhs, rhs);
+ sig->body.push_tail(assign);
+ }
+ return visit_continue;
+}
+
+void
+lower_output_reads(exec_list *instructions)
+{
+ output_read_remover v;
+ visit_list_elements(&v, instructions);
+}
--
1.7.7
More information about the mesa-dev
mailing list