[Mesa-dev] [PATCH v2] glsl: Add a CSE pass.

Eric Anholt eric at anholt.net
Mon Oct 28 22:19:08 CET 2013


This only operates on constant/uniform values for now, because otherwise I'd
have to deal with killing my available CSE entries when assignments happen,
and getting even this working in the tree ir was painful enough.

As is, it has the following effect in shader-db:

total instructions in shared programs: 1524077 -> 1521964 (-0.14%)
instructions in affected programs:     50629 -> 48516 (-4.17%)
GAINED:                                0
LOST:                                  0

And, for tropics, that accounts for most of the effect, the FPS
improvement is 11.67% +/- 0.72% (n=3).

v2: Use read_only field of the variable, manually check the lod_info union
    members, use get_num_operands(), rename cse_operands_visitor to
    is_cse_candidate_visitor, move all is-a-candidate logic to that
    function, and call it before checking for CSE on a given rvalue, more
    comments, use private keyword.
---
 src/glsl/Makefile.sources       |   1 +
 src/glsl/glsl_parser_extras.cpp |   1 +
 src/glsl/ir.h                   |   6 +
 src/glsl/ir_optimization.h      |   1 +
 src/glsl/opt_cse.cpp            | 599 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 608 insertions(+)
 create mode 100644 src/glsl/opt_cse.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 2f7bfa1..2aabc05 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -83,6 +83,7 @@ LIBGLSL_FILES = \
 	$(GLSL_SRCDIR)/opt_constant_variable.cpp \
 	$(GLSL_SRCDIR)/opt_copy_propagation.cpp \
 	$(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \
+	$(GLSL_SRCDIR)/opt_cse.cpp \
 	$(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \
 	$(GLSL_SRCDIR)/opt_dead_code.cpp \
 	$(GLSL_SRCDIR)/opt_dead_code_local.cpp \
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index be17109..e6e71d7 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1597,6 +1597,7 @@ do_common_optimization(exec_list *ir, bool linked,
    else
       progress = do_constant_variable_unlinked(ir) || progress;
    progress = do_constant_folding(ir) || progress;
+   progress = do_cse(ir) || progress;
    progress = do_algebraic(ir) || progress;
    progress = do_lower_jumps(ir) || progress;
    progress = do_vec_index_to_swizzle(ir) || progress;
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 8d5bec9..c6662cd 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -133,6 +133,7 @@ public:
    virtual class ir_return *            as_return()           { return NULL; }
    virtual class ir_if *                as_if()               { return NULL; }
    virtual class ir_swizzle *           as_swizzle()          { return NULL; }
+   virtual class ir_texture *           as_texture()          { return NULL; }
    virtual class ir_constant *          as_constant()         { return NULL; }
    virtual class ir_discard *           as_discard()          { return NULL; }
    virtual class ir_jump *              as_jump()             { return NULL; }
@@ -1717,6 +1718,11 @@ public:
       v->visit(this);
    }
 
+   virtual ir_texture *as_texture()
+   {
+      return this;
+   }
+
    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
 
    /**
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 074686c..3ca9f57 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -77,6 +77,7 @@ bool do_constant_variable_unlinked(exec_list *instructions);
 bool do_copy_propagation(exec_list *instructions);
 bool do_copy_propagation_elements(exec_list *instructions);
 bool do_constant_propagation(exec_list *instructions);
+bool do_cse(exec_list *instructions);
 void do_dead_builtin_varyings(struct gl_context *ctx,
                               gl_shader *producer, gl_shader *consumer,
                               unsigned num_tfeedback_decls,
diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp
new file mode 100644
index 0000000..c0fdb23
--- /dev/null
+++ b/src/glsl/opt_cse.cpp
@@ -0,0 +1,599 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file opt_cse.cpp
+ *
+ * constant subexpression elimination at the GLSL IR level.
+ *
+ * Compare to brw_fs_cse.cpp for a more complete CSE implementation.  This one
+ * is generic and handles texture operations, but it's rather simple currently
+ * and doesn't support modification of variables in the available expressions
+ * list, so it can't do variables other than uniforms or shader inputs.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_basic_block.h"
+#include "ir_optimization.h"
+#include "ir_builder.h"
+#include "glsl_types.h"
+
+using namespace ir_builder;
+
+static bool debug = false;
+
+namespace {
+
+/**
+ * This is the record of an available expression for common subexpression
+ * elimination.
+ */
+class ae_entry : public exec_node
+{
+public:
+   ae_entry(ir_instruction *base_ir, ir_rvalue **val)
+      : val(val), base_ir(base_ir)
+   {
+      assert(val);
+      assert(*val);
+      assert(base_ir);
+
+      var = NULL;
+   }
+
+   /**
+    * The pointer to the expression that we might be able to reuse
+    *
+    * Note the double pointer -- this is the place in the base_ir expression
+    * tree that we would rewrite to move the expression out to a new variable
+    * assignment.
+    */
+   ir_rvalue **val;
+
+   /**
+    * Root instruction in the basic block where the expression appeared.
+    *
+    * This is used so that we can insert the new variable declaration into the
+    * instruction stream (since *val is just somewhere in base_ir's expression
+    * tree).
+    */
+   ir_instruction *base_ir;
+
+   /**
+    * The variable that the expression has been stored in, if it's been CSEd
+    * once already.
+    */
+   ir_variable *var;
+};
+
+class cse_visitor : public ir_rvalue_visitor {
+public:
+   cse_visitor(exec_list *validate_instructions)
+      : validate_instructions(validate_instructions)
+   {
+      progress = false;
+      mem_ctx = ralloc_context(NULL);
+      this->ae = new(mem_ctx) exec_list;
+   }
+   ~cse_visitor()
+   {
+      ralloc_free(mem_ctx);
+   }
+
+   virtual ir_visitor_status visit_enter(ir_function_signature *ir);
+   virtual ir_visitor_status visit_enter(ir_loop *ir);
+   virtual ir_visitor_status visit_enter(ir_if *ir);
+   virtual ir_visitor_status visit_enter(ir_call *ir);
+   virtual void handle_rvalue(ir_rvalue **rvalue);
+
+   bool progress;
+
+private:
+   void *mem_ctx;
+
+   ir_rvalue *try_cse(ir_rvalue *rvalue);
+   void add_to_ae(ir_rvalue **rvalue);
+
+   /** List of ae_entry: The available expressions to reuse */
+   exec_list *ae;
+
+   /**
+    * The whole shader, so that we can validate_ir_tree in debug mode.
+    *
+    * This proved quite useful when trying to get the tree manipulation
+    * right.
+    */
+   exec_list *validate_instructions;
+};
+
+/**
+ * Visitor to walk an expression tree to check that all variables referenced
+ * are constants.
+ */
+class is_cse_candidate_visitor : public ir_hierarchical_visitor
+{
+public:
+
+   is_cse_candidate_visitor()
+      : ok(true)
+   {
+   }
+
+   virtual ir_visitor_status visit(ir_dereference_variable *ir);
+
+   bool ok;
+};
+
+
+class contains_rvalue_visitor : public ir_rvalue_visitor
+{
+public:
+
+   contains_rvalue_visitor(ir_rvalue *val)
+      : val(val)
+   {
+      found = false;
+   }
+
+   virtual void handle_rvalue(ir_rvalue **rvalue);
+
+   bool found;
+
+private:
+   ir_rvalue *val;
+};
+
+} /* unnamed namespace */
+
+static void
+dump_ae(exec_list *ae)
+{
+   int i = 0;
+
+   printf("CSE: AE contents:\n");
+   foreach_list(node, ae) {
+      ae_entry *entry = (ae_entry *)node;
+
+      printf("CSE:   AE %2d (%p): ", i, entry);
+      (*entry->val)->print();
+      printf("\n");
+
+      if (entry->var)
+         printf("CSE:     in var %p:\n", entry->var);
+
+      i++;
+   }
+}
+
+ir_visitor_status
+is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
+{
+   /* Currently, since we don't handle kills of the ae based on variables
+    * getting assigned, we can only handle constant variables.
+    */
+   if (ir->var->read_only) {
+      return visit_continue;
+   } else {
+      ok = false;
+      return visit_stop;
+   }
+}
+
+void
+contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (*rvalue == val)
+      found = true;
+}
+
+static bool
+contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
+{
+   contains_rvalue_visitor v(needle);
+   haystack->accept(&v);
+   return v.found;
+}
+
+static bool
+is_cse_candidate(ir_rvalue *ir)
+{
+   /* Our temporary variable assignment generation isn't ready to handle
+    * anything bigger than a vector.
+    */
+   if (!ir->type->is_vector() && !ir->type->is_scalar())
+      return false;
+
+   /* Only handle expressions and textures currently.  We may want to extend
+    * to variable-index array dereferences at some point.
+    */
+   switch (ir->ir_type) {
+   case ir_type_expression:
+   case ir_type_texture:
+      break;
+   default:
+      return false;
+   }
+
+   is_cse_candidate_visitor v;
+
+   ir->accept(&v);
+
+   return v.ok;
+}
+
+static bool
+equals(ir_rvalue *a, ir_rvalue *b);
+
+static bool
+equals(ir_constant *a, ir_constant *b)
+{
+   if (!a || !b)
+      return false;
+
+   if (a->type != b->type)
+      return false;
+
+   for (unsigned i = 0; i < a->type->components(); i++) {
+      if (a->value.u[i] != b->value.u[i])
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+equals(ir_dereference_variable *a, ir_dereference_variable *b)
+{
+   if (!a || !b)
+      return false;
+
+   return a->var == b->var;
+}
+
+static bool
+equals(ir_dereference_array *a, ir_dereference_array *b)
+{
+   if (!a || !b)
+      return false;
+
+   if (!equals(a->array, b->array))
+      return false;
+
+   if (!equals(a->array_index, b->array_index))
+      return false;
+
+   return true;
+}
+
+static bool
+equals(ir_swizzle *a, ir_swizzle *b)
+{
+   if (!a || !b)
+      return false;
+
+   if (a->type != b->type)
+      return false;
+
+   if (a->mask.x != b->mask.x ||
+       a->mask.y != b->mask.y ||
+       a->mask.z != b->mask.z ||
+       a->mask.w != b->mask.w) {
+      return false;
+   }
+
+   return equals(a->val, b->val);
+}
+
+static bool
+equals(ir_texture *a, ir_texture *b)
+{
+   if (!a || !b)
+      return false;
+
+   if (a->type != b->type)
+      return false;
+
+   if (a->op != b->op)
+      return false;
+
+   if (!equals(a->coordinate, b->coordinate))
+      return false;
+
+   if (!equals(a->projector, b->projector))
+      return false;
+
+   if (!equals(a->shadow_comparitor, b->shadow_comparitor))
+      return false;
+
+   if (!equals(a->offset, b->offset))
+      return false;
+
+   if (!equals(a->sampler, b->sampler))
+      return false;
+
+   switch (a->op) {
+   case ir_tex:
+   case ir_lod:
+   case ir_query_levels:
+      break;
+   case ir_txb:
+      if (!equals(a->lod_info.bias, b->lod_info.bias))
+         return false;
+      break;
+   case ir_txl:
+   case ir_txf:
+   case ir_txs:
+      if (!equals(a->lod_info.lod, b->lod_info.lod))
+         return false;
+      break;
+   case ir_txd:
+      if (!equals(a->lod_info.grad.dPdx, b->lod_info.grad.dPdx) ||
+          !equals(a->lod_info.grad.dPdy, b->lod_info.grad.dPdy))
+         return false;
+   case ir_txf_ms:
+      if (!equals(a->lod_info.sample_index, b->lod_info.sample_index))
+         return false;
+      break;
+   case ir_tg4:
+      if (!equals(a->lod_info.component, b->lod_info.component))
+         return false;
+   default:
+      assert(!"Unrecognized texture op");
+   }
+
+   return true;
+}
+
+static bool
+equals(ir_expression *a, ir_expression *b)
+{
+   if (!a || !b)
+      return false;
+
+   if (a->type != b->type)
+      return false;
+
+   if (a->operation != b->operation)
+      return false;
+
+   for (unsigned i = 0; i < a->get_num_operands(); i++) {
+      if (!equals(a->operands[i], b->operands[i]))
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+equals(ir_rvalue *a, ir_rvalue *b)
+{
+   if (!a || !b)
+      return !a && !b;
+
+   if (a->type != b->type)
+      return false;
+
+   switch (a->ir_type) {
+   case ir_type_texture:
+      return equals(a->as_texture(), b->as_texture());
+
+   case ir_type_constant:
+      return equals(a->as_constant(), b->as_constant());
+
+   case ir_type_expression:
+      return equals(a->as_expression(), b->as_expression());
+
+   case ir_type_dereference_variable:
+      return equals(a->as_dereference_variable(), b->as_dereference_variable());
+
+   case ir_type_dereference_array:
+      return equals(a->as_dereference_array(), b->as_dereference_array());
+
+   case ir_type_swizzle:
+      return equals(a->as_swizzle(), b->as_swizzle());
+
+   default:
+      return false;
+   }
+}
+
+/**
+ * Tries to find and return a reference to a previous computation of a given
+ * expression.
+ *
+ * Walk the list of available expressions checking if any of them match the
+ * rvalue, and if so, move the previous copy of the expression to a temporary
+ * and return a reference of the temporary.
+ */
+ir_rvalue *
+cse_visitor::try_cse(ir_rvalue *rvalue)
+{
+   foreach_list(node, ae) {
+      ae_entry *entry = (ae_entry *)node;
+
+      if (debug) {
+         printf("Comparing to AE %p: ", entry);
+         (*entry->val)->print();
+         printf("\n");
+      }
+
+      if (!equals(rvalue, *entry->val))
+         continue;
+
+      if (debug) {
+         printf("CSE: Replacing: ");
+         (*entry->val)->print();
+         printf("\n");
+         printf("CSE:      with: ");
+         rvalue->print();
+         printf("\n");
+      }
+
+      if (!entry->var) {
+         ir_instruction *base_ir = entry->base_ir;
+
+         ir_variable *var = new(rvalue) ir_variable(rvalue->type,
+                                                    "cse",
+                                                    ir_var_auto);
+
+         /* Write the previous expression result into a new variable. */
+         base_ir->insert_before(var);
+         ir_assignment *assignment = assign(var, *entry->val);
+         base_ir->insert_before(assignment);
+
+         /* Replace the expression in the original tree with a deref of the
+          * variable, but keep tracking the expression for further reuse.
+          */
+         *entry->val = new(rvalue) ir_dereference_variable(var);
+         entry->val = &assignment->rhs;
+
+         entry->var = var;
+
+         /* Update the base_irs in the AE list.  We have to be sure that
+          * they're correct -- expressions from our base_ir that weren't moved
+          * need to stay in this base_ir (so that later consumption of them
+          * puts new variables between our new variable and our base_ir), but
+          * expressions from our base_ir that we *did* move need base_ir
+          * updated so that any further elimination from inside gets its new
+          * assignments put before our new assignment.
+          */
+         foreach_list(fixup_node, ae) {
+            ae_entry *fixup_entry = (ae_entry *)fixup_node;
+            if (contains_rvalue(assignment->rhs, *fixup_entry->val))
+               fixup_entry->base_ir = assignment;
+         }
+
+         if (debug)
+            dump_ae(ae);
+      }
+
+      /* Replace the expression in our current tree with the variable. */
+      return new(rvalue) ir_dereference_variable(entry->var);
+   }
+
+   return NULL;
+}
+
+/** Add the rvalue to the list of available expressions for CSE. */
+void
+cse_visitor::add_to_ae(ir_rvalue **rvalue)
+{
+   if (debug) {
+      printf("CSE: Add to AE: ");
+      (*rvalue)->print();
+      printf("\n");
+   }
+
+   ae->push_tail(new(mem_ctx) ae_entry(base_ir, rvalue));
+
+   if (debug)
+      dump_ae(ae);
+}
+
+void
+cse_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+
+   if (debug) {
+      printf("CSE: handle_rvalue ");
+      (*rvalue)->print();
+      printf("\n");
+   }
+
+   if (!is_cse_candidate(*rvalue))
+      return;
+
+   ir_rvalue *new_rvalue = try_cse(*rvalue);
+   if (new_rvalue) {
+      *rvalue = new_rvalue;
+      progress = true;
+
+      if (debug)
+         validate_ir_tree(validate_instructions);
+   } else {
+      add_to_ae(rvalue);
+   }
+}
+
+ir_visitor_status
+cse_visitor::visit_enter(ir_if *ir)
+{
+   handle_rvalue(&ir->condition);
+
+   ae->make_empty();
+   visit_list_elements(this, &ir->then_instructions);
+
+   ae->make_empty();
+   visit_list_elements(this, &ir->else_instructions);
+
+   ae->make_empty();
+   return visit_continue_with_parent;
+}
+
+ir_visitor_status
+cse_visitor::visit_enter(ir_function_signature *ir)
+{
+   ae->make_empty();
+   visit_list_elements(this, &ir->body);
+
+   ae->make_empty();
+   return visit_continue_with_parent;
+}
+
+ir_visitor_status
+cse_visitor::visit_enter(ir_loop *ir)
+{
+   ae->make_empty();
+   visit_list_elements(this, &ir->body_instructions);
+
+   ae->make_empty();
+   return visit_continue_with_parent;
+}
+
+ir_visitor_status
+cse_visitor::visit_enter(ir_call *ir)
+{
+   /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a
+    * pointer to the (ir_rvalue *) on the stack.  Since we save those pointers
+    * in the AE list, we can't let handle_rvalue get called.
+    */
+   return visit_continue_with_parent;
+}
+
+/**
+ * Does a (uniform-value) constant subexpression elimination pass on the code
+ * present in the instruction stream.
+ */
+bool
+do_cse(exec_list *instructions)
+{
+   cse_visitor v(instructions);
+
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
-- 
1.8.4.rc3



More information about the mesa-dev mailing list