[Mesa-dev] [PATCH v2] glsl: Add a CSE pass.

Paul Berry stereotype441 at gmail.com
Wed Oct 30 19:28:57 CET 2013


On 28 October 2013 14:19, Eric Anholt <eric at anholt.net> wrote:

> This only operates on constant/uniform values for now, because otherwise
> I'd
> have to deal with killing my available CSE entries when assignments happen,
> and getting even this working in the tree ir was painful enough.
>
> As is, it has the following effect in shader-db:
>
> total instructions in shared programs: 1524077 -> 1521964 (-0.14%)
> instructions in affected programs:     50629 -> 48516 (-4.17%)
> GAINED:                                0
> LOST:                                  0
>
> And, for tropics, that accounts for most of the effect, the FPS
> improvement is 11.67% +/- 0.72% (n=3).
>
> v2: Use read_only field of the variable, manually check the lod_info union
>     members, use get_num_operands(), rename cse_operands_visitor to
>     is_cse_candidate_visitor, move all is-a-candidate logic to that
>     function, and call it before checking for CSE on a given rvalue, more
>     comments, use private keyword.
>

Thanks, Eric.

Reviewed-by: Paul Berry <stereotype441 at gmail.com>


> ---
>  src/glsl/Makefile.sources       |   1 +
>  src/glsl/glsl_parser_extras.cpp |   1 +
>  src/glsl/ir.h                   |   6 +
>  src/glsl/ir_optimization.h      |   1 +
>  src/glsl/opt_cse.cpp            | 599
> ++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 608 insertions(+)
>  create mode 100644 src/glsl/opt_cse.cpp
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index 2f7bfa1..2aabc05 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -83,6 +83,7 @@ LIBGLSL_FILES = \
>         $(GLSL_SRCDIR)/opt_constant_variable.cpp \
>         $(GLSL_SRCDIR)/opt_copy_propagation.cpp \
>         $(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \
> +       $(GLSL_SRCDIR)/opt_cse.cpp \
>         $(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \
>         $(GLSL_SRCDIR)/opt_dead_code.cpp \
>         $(GLSL_SRCDIR)/opt_dead_code_local.cpp \
> diff --git a/src/glsl/glsl_parser_extras.cpp
> b/src/glsl/glsl_parser_extras.cpp
> index be17109..e6e71d7 100644
> --- a/src/glsl/glsl_parser_extras.cpp
> +++ b/src/glsl/glsl_parser_extras.cpp
> @@ -1597,6 +1597,7 @@ do_common_optimization(exec_list *ir, bool linked,
>     else
>        progress = do_constant_variable_unlinked(ir) || progress;
>     progress = do_constant_folding(ir) || progress;
> +   progress = do_cse(ir) || progress;
>     progress = do_algebraic(ir) || progress;
>     progress = do_lower_jumps(ir) || progress;
>     progress = do_vec_index_to_swizzle(ir) || progress;
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index 8d5bec9..c6662cd 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -133,6 +133,7 @@ public:
>     virtual class ir_return *            as_return()           { return
> NULL; }
>     virtual class ir_if *                as_if()               { return
> NULL; }
>     virtual class ir_swizzle *           as_swizzle()          { return
> NULL; }
> +   virtual class ir_texture *           as_texture()          { return
> NULL; }
>     virtual class ir_constant *          as_constant()         { return
> NULL; }
>     virtual class ir_discard *           as_discard()          { return
> NULL; }
>     virtual class ir_jump *              as_jump()             { return
> NULL; }
> @@ -1717,6 +1718,11 @@ public:
>        v->visit(this);
>     }
>
> +   virtual ir_texture *as_texture()
> +   {
> +      return this;
> +   }
> +
>     virtual ir_visitor_status accept(ir_hierarchical_visitor *);
>
>     /**
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index 074686c..3ca9f57 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -77,6 +77,7 @@ bool do_constant_variable_unlinked(exec_list
> *instructions);
>  bool do_copy_propagation(exec_list *instructions);
>  bool do_copy_propagation_elements(exec_list *instructions);
>  bool do_constant_propagation(exec_list *instructions);
> +bool do_cse(exec_list *instructions);
>  void do_dead_builtin_varyings(struct gl_context *ctx,
>                                gl_shader *producer, gl_shader *consumer,
>                                unsigned num_tfeedback_decls,
> diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp
> new file mode 100644
> index 0000000..c0fdb23
> --- /dev/null
> +++ b/src/glsl/opt_cse.cpp
> @@ -0,0 +1,599 @@
> +/*
> + * Copyright © 2013 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> next
> + * paragraph) shall be included in all copies or substantial portions of
> the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * \file opt_cse.cpp
> + *
> + * constant subexpression elimination at the GLSL IR level.
> + *
> + * Compare to brw_fs_cse.cpp for a more complete CSE implementation.
>  This one
> + * is generic and handles texture operations, but it's rather simple
> currently
> + * and doesn't support modification of variables in the available
> expressions
> + * list, so it can't do variables other than uniforms or shader inputs.
> + */
> +
> +#include "ir.h"
> +#include "ir_visitor.h"
> +#include "ir_rvalue_visitor.h"
> +#include "ir_basic_block.h"
> +#include "ir_optimization.h"
> +#include "ir_builder.h"
> +#include "glsl_types.h"
> +
> +using namespace ir_builder;
> +
> +static bool debug = false;
> +
> +namespace {
> +
> +/**
> + * This is the record of an available expression for common subexpression
> + * elimination.
> + */
> +class ae_entry : public exec_node
> +{
> +public:
> +   ae_entry(ir_instruction *base_ir, ir_rvalue **val)
> +      : val(val), base_ir(base_ir)
> +   {
> +      assert(val);
> +      assert(*val);
> +      assert(base_ir);
> +
> +      var = NULL;
> +   }
> +
> +   /**
> +    * The pointer to the expression that we might be able to reuse
> +    *
> +    * Note the double pointer -- this is the place in the base_ir
> expression
> +    * tree that we would rewrite to move the expression out to a new
> variable
> +    * assignment.
> +    */
> +   ir_rvalue **val;
> +
> +   /**
> +    * Root instruction in the basic block where the expression appeared.
> +    *
> +    * This is used so that we can insert the new variable declaration
> into the
> +    * instruction stream (since *val is just somewhere in base_ir's
> expression
> +    * tree).
> +    */
> +   ir_instruction *base_ir;
> +
> +   /**
> +    * The variable that the expression has been stored in, if it's been
> CSEd
> +    * once already.
> +    */
> +   ir_variable *var;
> +};
> +
> +class cse_visitor : public ir_rvalue_visitor {
> +public:
> +   cse_visitor(exec_list *validate_instructions)
> +      : validate_instructions(validate_instructions)
> +   {
> +      progress = false;
> +      mem_ctx = ralloc_context(NULL);
> +      this->ae = new(mem_ctx) exec_list;
> +   }
> +   ~cse_visitor()
> +   {
> +      ralloc_free(mem_ctx);
> +   }
> +
> +   virtual ir_visitor_status visit_enter(ir_function_signature *ir);
> +   virtual ir_visitor_status visit_enter(ir_loop *ir);
> +   virtual ir_visitor_status visit_enter(ir_if *ir);
> +   virtual ir_visitor_status visit_enter(ir_call *ir);
> +   virtual void handle_rvalue(ir_rvalue **rvalue);
> +
> +   bool progress;
> +
> +private:
> +   void *mem_ctx;
> +
> +   ir_rvalue *try_cse(ir_rvalue *rvalue);
> +   void add_to_ae(ir_rvalue **rvalue);
> +
> +   /** List of ae_entry: The available expressions to reuse */
> +   exec_list *ae;
> +
> +   /**
> +    * The whole shader, so that we can validate_ir_tree in debug mode.
> +    *
> +    * This proved quite useful when trying to get the tree manipulation
> +    * right.
> +    */
> +   exec_list *validate_instructions;
> +};
> +
> +/**
> + * Visitor to walk an expression tree to check that all variables
> referenced
> + * are constants.
> + */
> +class is_cse_candidate_visitor : public ir_hierarchical_visitor
> +{
> +public:
> +
> +   is_cse_candidate_visitor()
> +      : ok(true)
> +   {
> +   }
> +
> +   virtual ir_visitor_status visit(ir_dereference_variable *ir);
> +
> +   bool ok;
> +};
> +
> +
> +class contains_rvalue_visitor : public ir_rvalue_visitor
> +{
> +public:
> +
> +   contains_rvalue_visitor(ir_rvalue *val)
> +      : val(val)
> +   {
> +      found = false;
> +   }
> +
> +   virtual void handle_rvalue(ir_rvalue **rvalue);
> +
> +   bool found;
> +
> +private:
> +   ir_rvalue *val;
> +};
> +
> +} /* unnamed namespace */
> +
> +static void
> +dump_ae(exec_list *ae)
> +{
> +   int i = 0;
> +
> +   printf("CSE: AE contents:\n");
> +   foreach_list(node, ae) {
> +      ae_entry *entry = (ae_entry *)node;
> +
> +      printf("CSE:   AE %2d (%p): ", i, entry);
> +      (*entry->val)->print();
> +      printf("\n");
> +
> +      if (entry->var)
> +         printf("CSE:     in var %p:\n", entry->var);
> +
> +      i++;
> +   }
> +}
> +
> +ir_visitor_status
> +is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
> +{
> +   /* Currently, since we don't handle kills of the ae based on variables
> +    * getting assigned, we can only handle constant variables.
> +    */
> +   if (ir->var->read_only) {
> +      return visit_continue;
> +   } else {
> +      ok = false;
> +      return visit_stop;
> +   }
> +}
> +
> +void
> +contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
> +{
> +   if (*rvalue == val)
> +      found = true;
> +}
> +
> +static bool
> +contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
> +{
> +   contains_rvalue_visitor v(needle);
> +   haystack->accept(&v);
> +   return v.found;
> +}
> +
> +static bool
> +is_cse_candidate(ir_rvalue *ir)
> +{
> +   /* Our temporary variable assignment generation isn't ready to handle
> +    * anything bigger than a vector.
> +    */
> +   if (!ir->type->is_vector() && !ir->type->is_scalar())
> +      return false;
> +
> +   /* Only handle expressions and textures currently.  We may want to
> extend
> +    * to variable-index array dereferences at some point.
> +    */
> +   switch (ir->ir_type) {
> +   case ir_type_expression:
> +   case ir_type_texture:
> +      break;
> +   default:
> +      return false;
> +   }
> +
> +   is_cse_candidate_visitor v;
> +
> +   ir->accept(&v);
> +
> +   return v.ok;
> +}
> +
> +static bool
> +equals(ir_rvalue *a, ir_rvalue *b);
> +
> +static bool
> +equals(ir_constant *a, ir_constant *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   for (unsigned i = 0; i < a->type->components(); i++) {
> +      if (a->value.u[i] != b->value.u[i])
> +         return false;
> +   }
> +
> +   return true;
> +}
> +
> +static bool
> +equals(ir_dereference_variable *a, ir_dereference_variable *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   return a->var == b->var;
> +}
> +
> +static bool
> +equals(ir_dereference_array *a, ir_dereference_array *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (!equals(a->array, b->array))
> +      return false;
> +
> +   if (!equals(a->array_index, b->array_index))
> +      return false;
> +
> +   return true;
> +}
> +
> +static bool
> +equals(ir_swizzle *a, ir_swizzle *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   if (a->mask.x != b->mask.x ||
> +       a->mask.y != b->mask.y ||
> +       a->mask.z != b->mask.z ||
> +       a->mask.w != b->mask.w) {
> +      return false;
> +   }
> +
> +   return equals(a->val, b->val);
> +}
> +
> +static bool
> +equals(ir_texture *a, ir_texture *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   if (a->op != b->op)
> +      return false;
> +
> +   if (!equals(a->coordinate, b->coordinate))
> +      return false;
> +
> +   if (!equals(a->projector, b->projector))
> +      return false;
> +
> +   if (!equals(a->shadow_comparitor, b->shadow_comparitor))
> +      return false;
> +
> +   if (!equals(a->offset, b->offset))
> +      return false;
> +
> +   if (!equals(a->sampler, b->sampler))
> +      return false;
> +
> +   switch (a->op) {
> +   case ir_tex:
> +   case ir_lod:
> +   case ir_query_levels:
> +      break;
> +   case ir_txb:
> +      if (!equals(a->lod_info.bias, b->lod_info.bias))
> +         return false;
> +      break;
> +   case ir_txl:
> +   case ir_txf:
> +   case ir_txs:
> +      if (!equals(a->lod_info.lod, b->lod_info.lod))
> +         return false;
> +      break;
> +   case ir_txd:
> +      if (!equals(a->lod_info.grad.dPdx, b->lod_info.grad.dPdx) ||
> +          !equals(a->lod_info.grad.dPdy, b->lod_info.grad.dPdy))
> +         return false;
> +   case ir_txf_ms:
> +      if (!equals(a->lod_info.sample_index, b->lod_info.sample_index))
> +         return false;
> +      break;
> +   case ir_tg4:
> +      if (!equals(a->lod_info.component, b->lod_info.component))
> +         return false;
> +   default:
> +      assert(!"Unrecognized texture op");
> +   }
> +
> +   return true;
> +}
> +
> +static bool
> +equals(ir_expression *a, ir_expression *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   if (a->operation != b->operation)
> +      return false;
> +
> +   for (unsigned i = 0; i < a->get_num_operands(); i++) {
> +      if (!equals(a->operands[i], b->operands[i]))
> +         return false;
> +   }
> +
> +   return true;
> +}
> +
> +static bool
> +equals(ir_rvalue *a, ir_rvalue *b)
> +{
> +   if (!a || !b)
> +      return !a && !b;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   switch (a->ir_type) {
> +   case ir_type_texture:
> +      return equals(a->as_texture(), b->as_texture());
> +
> +   case ir_type_constant:
> +      return equals(a->as_constant(), b->as_constant());
> +
> +   case ir_type_expression:
> +      return equals(a->as_expression(), b->as_expression());
> +
> +   case ir_type_dereference_variable:
> +      return equals(a->as_dereference_variable(),
> b->as_dereference_variable());
> +
> +   case ir_type_dereference_array:
> +      return equals(a->as_dereference_array(), b->as_dereference_array());
> +
> +   case ir_type_swizzle:
> +      return equals(a->as_swizzle(), b->as_swizzle());
> +
> +   default:
> +      return false;
> +   }
> +}
> +
> +/**
> + * Tries to find and return a reference to a previous computation of a
> given
> + * expression.
> + *
> + * Walk the list of available expressions checking if any of them match
> the
> + * rvalue, and if so, move the previous copy of the expression to a
> temporary
> + * and return a reference of the temporary.
> + */
> +ir_rvalue *
> +cse_visitor::try_cse(ir_rvalue *rvalue)
> +{
> +   foreach_list(node, ae) {
> +      ae_entry *entry = (ae_entry *)node;
> +
> +      if (debug) {
> +         printf("Comparing to AE %p: ", entry);
> +         (*entry->val)->print();
> +         printf("\n");
> +      }
> +
> +      if (!equals(rvalue, *entry->val))
> +         continue;
> +
> +      if (debug) {
> +         printf("CSE: Replacing: ");
> +         (*entry->val)->print();
> +         printf("\n");
> +         printf("CSE:      with: ");
> +         rvalue->print();
> +         printf("\n");
> +      }
> +
> +      if (!entry->var) {
> +         ir_instruction *base_ir = entry->base_ir;
> +
> +         ir_variable *var = new(rvalue) ir_variable(rvalue->type,
> +                                                    "cse",
> +                                                    ir_var_auto);
> +
> +         /* Write the previous expression result into a new variable. */
> +         base_ir->insert_before(var);
> +         ir_assignment *assignment = assign(var, *entry->val);
> +         base_ir->insert_before(assignment);
> +
> +         /* Replace the expression in the original tree with a deref of
> the
> +          * variable, but keep tracking the expression for further reuse.
> +          */
> +         *entry->val = new(rvalue) ir_dereference_variable(var);
> +         entry->val = &assignment->rhs;
> +
> +         entry->var = var;
> +
> +         /* Update the base_irs in the AE list.  We have to be sure that
> +          * they're correct -- expressions from our base_ir that weren't
> moved
> +          * need to stay in this base_ir (so that later consumption of
> them
> +          * puts new variables between our new variable and our base_ir),
> but
> +          * expressions from our base_ir that we *did* move need base_ir
> +          * updated so that any further elimination from inside gets its
> new
> +          * assignments put before our new assignment.
> +          */
> +         foreach_list(fixup_node, ae) {
> +            ae_entry *fixup_entry = (ae_entry *)fixup_node;
> +            if (contains_rvalue(assignment->rhs, *fixup_entry->val))
> +               fixup_entry->base_ir = assignment;
> +         }
> +
> +         if (debug)
> +            dump_ae(ae);
> +      }
> +
> +      /* Replace the expression in our current tree with the variable. */
> +      return new(rvalue) ir_dereference_variable(entry->var);
> +   }
> +
> +   return NULL;
> +}
> +
> +/** Add the rvalue to the list of available expressions for CSE. */
> +void
> +cse_visitor::add_to_ae(ir_rvalue **rvalue)
> +{
> +   if (debug) {
> +      printf("CSE: Add to AE: ");
> +      (*rvalue)->print();
> +      printf("\n");
> +   }
> +
> +   ae->push_tail(new(mem_ctx) ae_entry(base_ir, rvalue));
> +
> +   if (debug)
> +      dump_ae(ae);
> +}
> +
> +void
> +cse_visitor::handle_rvalue(ir_rvalue **rvalue)
> +{
> +   if (!*rvalue)
> +      return;
> +
> +   if (debug) {
> +      printf("CSE: handle_rvalue ");
> +      (*rvalue)->print();
> +      printf("\n");
> +   }
> +
> +   if (!is_cse_candidate(*rvalue))
> +      return;
> +
> +   ir_rvalue *new_rvalue = try_cse(*rvalue);
> +   if (new_rvalue) {
> +      *rvalue = new_rvalue;
> +      progress = true;
> +
> +      if (debug)
> +         validate_ir_tree(validate_instructions);
> +   } else {
> +      add_to_ae(rvalue);
> +   }
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_if *ir)
> +{
> +   handle_rvalue(&ir->condition);
> +
> +   ae->make_empty();
> +   visit_list_elements(this, &ir->then_instructions);
> +
> +   ae->make_empty();
> +   visit_list_elements(this, &ir->else_instructions);
> +
> +   ae->make_empty();
> +   return visit_continue_with_parent;
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_function_signature *ir)
> +{
> +   ae->make_empty();
> +   visit_list_elements(this, &ir->body);
> +
> +   ae->make_empty();
> +   return visit_continue_with_parent;
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_loop *ir)
> +{
> +   ae->make_empty();
> +   visit_list_elements(this, &ir->body_instructions);
> +
> +   ae->make_empty();
> +   return visit_continue_with_parent;
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_call *ir)
> +{
> +   /* Because call is an exec_list of ir_rvalues, handle_rvalue gets
> passed a
> +    * pointer to the (ir_rvalue *) on the stack.  Since we save those
> pointers
> +    * in the AE list, we can't let handle_rvalue get called.
> +    */
> +   return visit_continue_with_parent;
> +}
> +
> +/**
> + * Does a (uniform-value) constant subexpression elimination pass on the
> code
> + * present in the instruction stream.
> + */
> +bool
> +do_cse(exec_list *instructions)
> +{
> +   cse_visitor v(instructions);
> +
> +   visit_list_elements(&v, instructions);
> +
> +   return v.progress;
> +}
> --
> 1.8.4.rc3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131030/38b21d5d/attachment-0001.html>


More information about the mesa-dev mailing list