[Mesa-dev] [PATCH 1/2] glsl: Add a lowering pass to remove reads of shader output variables.

Ian Romanick idr at freedesktop.org
Mon Jan 2 20:07:12 PST 2012


On 01/02/2012 11:17 AM, Vincent Lejeune wrote:
> This is similar to Gallium's existing glsl_to_tgsi::remove_output_read
> lowering pass, but done entirely inside the GLSL compiler.
>
> Signed-off-by: Vincent Lejeune<vljn at ovi.com>
> Signed-off-by: Kenneth Graunke<kenneth at whitecape.org>
>
> v2 [Kayden]:
>   - Don't reallocate the array for every shader output.
>   - Move the class into the .cpp file and create a lower_output_reads() wrapper
>   - Simplify the logic in visit(ir_deference_variable *)
>   - Fold add_replacement_pair into the only caller.
>   - Use visit_leave(ir_return *) instead of enter (for paranoia, in case the
>     return value references shader outputs)
>   - Visit signatures rather than functions, to avoid pattern matching to find
>     the actual void main() signature.
>   - Add some comments
>   - Whitespace fixes
>
> v3 [Vincent]:
>   - Fix tab indent
> ---
>   src/glsl/Makefile.sources       |    1 +
>   src/glsl/ir_optimization.h      |    1 +
>   src/glsl/lower_output_reads.cpp |  152 +++++++++++++++++++++++++++++++++++++++
>   3 files changed, 154 insertions(+), 0 deletions(-)
>   create mode 100644 src/glsl/lower_output_reads.cpp
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index c65bfe4..5e80af2 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -60,6 +60,7 @@ LIBGLSL_CXX_SOURCES := \
>   	lower_vec_index_to_cond_assign.cpp \
>   	lower_vec_index_to_swizzle.cpp \
>   	lower_vector.cpp \
> +	lower_output_reads.cpp \
>   	opt_algebraic.cpp \
>   	opt_constant_folding.cpp \
>   	opt_constant_propagation.cpp \
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index 7b32e84..085b969 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -72,6 +72,7 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions,
>       bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
>   bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
>   bool lower_clip_distance(exec_list *instructions);
> +void lower_output_reads(exec_list *instructions);
>   bool optimize_redundant_jumps(exec_list *instructions);
>
>   ir_rvalue *
> diff --git a/src/glsl/lower_output_reads.cpp b/src/glsl/lower_output_reads.cpp
> new file mode 100644
> index 0000000..4b3f91c
> --- /dev/null
> +++ b/src/glsl/lower_output_reads.cpp
> @@ -0,0 +1,152 @@
> +/*
> + * Copyright © 2012 Vincent Lejeune
> + * Copyright © 2012 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include "ir.h"
> +#include "program/hash_table.h"
> +
> +/**
> + * \file lower_output_reads.cpp
> + *
> + * In GLSL, shader output variables (such as varyings) can be both read and
> + * written.  However, on some hardware, reading an output register causes
> + * trouble.
> + *
> + * This pass creates temporary shadow copies of every (used) shader output,
> + * and replaces all accesses to use those instead.  It also adds code to the
> + * main() function to copy the final values to the actual shader outputs.
> + */
> +
> +class output_read_remover : public ir_hierarchical_visitor {
> +protected:
> +   struct replacement_pair {
> +      ir_variable *output;
> +      ir_variable *temp;
> +   };
> +
> +   /**
> +    * A hash table mapping from the original ir_variable shader outputs
> +    * (ir_var_out mode) to the new temporaries to be used instead.
> +    */
> +   hash_table *replacements;
> +
> +   /**
> +    * An array of tuples containing both the output and temporary variables.
> +    * This is necessary because we can't iterate over the hash table.
> +    */

I believe Eric added hash_table_call_foreach for just this sort of 
situation.

> +   struct replacement_pair *replacements_array;
> +   unsigned replacements_count;
> +   unsigned replacements_array_size;
> +
> +   void *mem_ctx;
> +public:
> +   output_read_remover();
> +   ~output_read_remover();
> +   virtual ir_visitor_status visit(class ir_dereference_variable *);
> +   virtual ir_visitor_status visit_leave(class ir_return *);
> +   virtual ir_visitor_status visit_leave(class ir_function_signature *);
> +};
> +
> +output_read_remover::output_read_remover()
> +{
> +   mem_ctx = ralloc_context(NULL);
> +
> +   replacements =
> +      hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare);
> +
> +   replacements_count = 0;
> +   replacements_array_size = 1;
> +   replacements_array = rzalloc_array(mem_ctx, struct replacement_pair,
> +				      replacements_array_size);
> +}
> +
> +output_read_remover::~output_read_remover()
> +{
> +   hash_table_dtor(replacements);
> +   ralloc_free(mem_ctx);
> +}
> +
> +ir_visitor_status
> +output_read_remover::visit(ir_dereference_variable *ir)
> +{
> +   if (ir->var->mode != ir_var_out)
> +      return visit_continue;
> +
> +   ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var);
> +
> +   /* If we don't have an existing temporary, create one. */
> +   if (temp == NULL) {
> +      void *var_ctx = ralloc_parent(ir->var);
> +      temp = new(var_ctx) ir_variable(ir->var->type, ir->var->name,
> +				      ir_var_temporary);
> +      hash_table_insert(replacements, temp, ir->var);
> +
> +      if (replacements_array_size<= replacements_count) {
> +         replacements_array_size *= 2;
> +         replacements_array = reralloc(mem_ctx, replacements_array, struct replacement_pair, replacements_array_size);
> +      }
> +
> +      replacements_array[replacements_count].output = ir->var;
> +      replacements_array[replacements_count].temp = temp;
> +      replacements_count++;
> +   }
> +
> +   /* Update the dereference to use the temporary */
> +   ir->var = temp;
> +
> +   return visit_continue;
> +}
> +
> +ir_visitor_status
> +output_read_remover::visit_leave(ir_return *ir)
> +{
> +   for (unsigned i = 0; i<  replacements_count; i++) {
> +      ir_dereference_variable *lhs = new(ir) ir_dereference_variable(replacements_array[i].output);
> +      ir_dereference_variable *rhs = new(ir) ir_dereference_variable(replacements_array[i].temp);
> +      ir_assignment *assign = new(ir) ir_assignment(lhs, rhs);
> +      ir->insert_before(assign);
> +   }
> +   return visit_continue;
> +}
> +
> +ir_visitor_status
> +output_read_remover::visit_leave(ir_function_signature *sig)
> +{
> +   if (strcmp(sig->function_name(), "main") != 0)
> +      return visit_continue;
> +
> +   for (unsigned i = 0; i<  replacements_count; i++) {
> +      ir_dereference_variable *lhs = new(sig) ir_dereference_variable(replacements_array[i].output);
> +      ir_dereference_variable *rhs = new(sig) ir_dereference_variable(replacements_array[i].temp);
> +      ir_assignment *assign = new(sig) ir_assignment(lhs, rhs);
> +      sig->body.push_tail(assign);
> +   }
> +   return visit_continue;
> +}
> +
> +void
> +lower_output_reads(exec_list *instructions)
> +{
> +   output_read_remover v;
> +   visit_list_elements(&v, instructions);
> +}



More information about the mesa-dev mailing list