[Mesa-dev] [PATCH] Var packer optimisation.

Vincent Lejeune vljn at ovi.com
Tue Aug 9 14:19:05 PDT 2011


From: vlj <vljn at ovi.com>

This optimisation pass will look for and pack together vector (up to vec3) and scalar in function body. It only concerns local variable, not temporary which should however be packed as a side effect. It should reduce register pressure.
---
 src/glsl/Makefile               |    1 +
 src/glsl/glsl_parser_extras.cpp |    1 +
 src/glsl/ir_optimization.h      |    1 +
 src/glsl/opt_var_packer.cpp     |  309 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 312 insertions(+), 0 deletions(-)
 create mode 100644 src/glsl/opt_var_packer.cpp

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 4100414..fff0dc7 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -83,6 +83,7 @@ CXX_SOURCES = \
 	opt_structure_splitting.cpp \
 	opt_swizzle_swizzle.cpp \
 	opt_tree_grafting.cpp \
+	opt_var_packer.cpp \
 	s_expression.cpp
 
 LIBS = \
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index d9aa300..3b0e84a 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -776,6 +776,7 @@ do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iteration
 {
    GLboolean progress = GL_FALSE;
 
+   progress = do_var_packing(ir) || progress;
    progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
 
    if (linked) {
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index dd26567..83d124f 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -71,3 +71,4 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions,
     bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
 bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
 bool optimize_redundant_jumps(exec_list *instructions);
+bool do_var_packing(exec_list *instructions);
diff --git a/src/glsl/opt_var_packer.cpp b/src/glsl/opt_var_packer.cpp
new file mode 100644
index 0000000..e7d0638
--- /dev/null
+++ b/src/glsl/opt_var_packer.cpp
@@ -0,0 +1,309 @@
+/*
+ * Copyright © 2011 Vincent Lejeune
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "ir.h"
+#include "ir_hierarchical_visitor.h"
+#include "ir_rvalue_visitor.h"
+#include <cstring>
+
+class box : public exec_node
+{
+public:
+   void* content;
+   box(void* c):content(c)
+   {
+
+   }
+};
+
+
+
+class boxed_exec_list : public exec_list
+{
+public:
+   void push_tail(void *n)
+   {
+      box* b = new (this) box(n);
+      exec_list::push_tail(b);
+   }
+
+   void push_head(void *n)
+   {
+      box* b = new (this) box(n);
+      exec_list::push_head(b);
+   }
+
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   bool has(const void* pointer) const
+   {
+      foreach_list_const(tmp,this)
+      {
+         box* tmpb = reinterpret_cast<box*>(const_cast<exec_node*>(tmp));
+         if(tmpb->content == pointer)
+            return true;
+      }
+      return false;
+   }
+
+   int size()
+   {
+      int result=0;
+      foreach_list_const(tmp,this)
+      {
+         result++;
+      }
+      return result;
+   }
+};
+
+#define list_item(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(pointer)->content)
+#define list_item_const(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(const_cast<exec_node*>(pointer))->content)
+
+
+#include <iostream>
+using namespace std;
+
+class ir_variable_lister : public ir_hierarchical_visitor
+{
+   friend class ir_packer;
+protected:
+   boxed_exec_list* available_vec3;
+   boxed_exec_list* available_vec2;
+   boxed_exec_list* available_float;
+
+   void store_variable(ir_variable* var)
+   {
+      if(strcmp(var->name,"_ret_val") == 0)
+         return ;
+      switch(var->type->gl_type)
+      {
+      case GL_FLOAT_VEC3:
+         if(available_vec3->has(var)) break;
+         available_vec3->push_tail(var);
+         break;
+      case GL_FLOAT_VEC2:
+         if(available_vec2->has(var)) break;
+         available_vec2->push_tail(var);
+         break;
+      case GL_FLOAT:
+         if(available_float->has(var)) break;
+         available_float->push_tail(var);
+         break;
+      default:
+         break;
+      }
+   }
+
+   bool find_candidates(ir_variable*& var1,ir_swizzle_mask& mask1, ir_variable*& var2, ir_swizzle_mask& mask2)
+   {
+      if(available_vec3->size() > 1 && available_float->size() >1)
+      {
+         var1 = list_item(ir_variable*,available_vec3->pop_head());
+         var2 = list_item(ir_variable*,available_float->pop_head());
+         mask1.x = 0;mask1.y = 1;mask1.z = 2;mask1.num_components = 3;mask1.has_duplicates=false;
+         mask2.x = 3;mask2.num_components = 1;mask1.has_duplicates=false;
+         return true;
+      }
+      if(available_vec2->size() > 2)
+      {
+         var1 = list_item_const(ir_variable*,available_vec2->pop_head());
+         var2 = list_item_const(ir_variable*,available_vec2->pop_head());
+         mask1.x = 0;mask1.y = 1;mask1.num_components = 2;
+         mask2.x = 2; mask2.y = 3;mask2.num_components = 2;
+         return true;
+      }
+      if(available_vec2->size() > 1 && available_float->size() > 1)
+      {
+         var1 = list_item_const(ir_variable*,available_vec2->pop_head());
+         var2 = list_item_const(ir_variable*,available_float->pop_head());
+         mask1.x = 0;mask1.y = 1;mask1.num_components = 2;
+         mask2.x = 2; mask2.num_components = 1;
+         return true;
+      }
+      if(available_float->size() > 2)
+      {
+         var1 = list_item_const(ir_variable*,available_float->pop_head());
+         var2 = list_item_const(ir_variable*,available_float->pop_head());
+         mask1.x = 0;mask1.num_components = 1;
+         mask2.x = 1;mask2.num_components = 1;
+         return true;
+      }
+      return false;
+   }
+
+public:
+   ir_visitor_status visit_enter(ir_assignment * assign)
+   {
+      if(ir_dereference_variable* dref = assign->lhs->as_dereference_variable())
+      {
+         if(dref->var->mode != ir_var_auto)
+            return visit_continue;
+         store_variable(dref->var);
+      }
+      return visit_continue;
+   }
+
+   ir_variable_lister(void* ctx)
+   {
+      available_float = new (ctx) boxed_exec_list();
+      available_vec2 = new (ctx) boxed_exec_list();
+      available_vec3 = new (ctx) boxed_exec_list();
+   }
+
+};
+
+
+class ir_variable_replacer : public ir_rvalue_visitor
+{
+protected:
+   ir_variable* var1;
+   ir_swizzle_mask mask_for_var1;
+   ir_variable* var2;
+   ir_swizzle_mask mask_for_var2;
+   ir_variable* packed_var;
+
+   inline
+   unsigned cyclic_right_shift(unsigned mask,unsigned step) const
+   {
+      unsigned result = 0;
+      result |= mask << step;
+      result |= mask >> (4 - step);
+      return result;
+   }
+
+
+public:
+   void handle_rvalue(ir_rvalue **rvalue)
+   {
+      if(!*rvalue)
+         return;
+      ir_rvalue* tmp_rvalue = *rvalue;
+      if(ir_dereference_variable* dref = tmp_rvalue->as_dereference_variable())
+      {
+         if(dref->var == var1)
+         {
+            ir_dereference_variable* ndref = new (packed_var) ir_dereference_variable(packed_var);
+            ir_swizzle* swz = new (dref) ir_swizzle(ndref,mask_for_var1);
+            *rvalue = swz;
+         }
+         if(dref->var == var2)
+         {
+            ir_dereference_variable* ndref = new (packed_var) ir_dereference_variable(packed_var);
+            ir_swizzle* swz = new (dref) ir_swizzle(ndref,mask_for_var2);
+            *rvalue = swz;
+         }
+      }
+   }
+
+   ir_visitor_status visit_enter(ir_assignment * assign)
+   {
+      assign->rhs->accept(this);
+      if(ir_dereference_variable* dref = assign->lhs->as_dereference_variable())
+      {
+         if(dref->var == var1)
+         {
+            dref->var = packed_var;
+         }
+         if(dref->var == var2)
+         {
+            dref->var = packed_var;
+            assign->write_mask = cyclic_right_shift(assign->write_mask,mask_for_var1.num_components);
+         }
+      }
+      return visit_continue;
+   }
+
+   ir_variable_replacer(ir_variable* v1,ir_swizzle_mask mask1, ir_variable* v2, ir_swizzle_mask mask2, ir_variable* newvar):var1(v1),mask_for_var1(mask1), var2(v2),mask_for_var2(mask2), packed_var(newvar)
+   {
+   }
+};
+
+
+class ir_packer : public ir_hierarchical_visitor
+{
+public:
+   ir_visitor_status visit_enter(ir_function_signature * fonc)
+   {
+      int body_size = 0;
+      foreach_list_const(tmp,&(fonc->body))
+      {
+         body_size++;
+      }
+      if(!body_size)
+         return visit_continue;
+      //cout << "ENTERING " << fonc->function_name() << ":"<< endl;
+      ir_variable_lister v(fonc);
+      foreach_list(tmp,&(fonc->body))
+      {
+         ir_instruction* inst = (ir_instruction*) tmp;
+         inst->accept(&v);
+      }
+      ir_variable *v1=0,*v2=0;
+      ir_swizzle_mask m1,m2;
+      if(!v.find_candidates(v1,m1,v2,m2))
+         return visit_continue;
+      cout << "PACKING " << v1->name<< " WITH " << v2->name << endl;
+      ir_variable* newvar=0;
+      switch(m1.num_components + m2.num_components)
+      {
+      case 4:
+         newvar = new (fonc) ir_variable(glsl_type::vec4_type,"vec4_tmp",ir_var_temporary);
+         break;
+      case 3:
+         newvar = new (fonc) ir_variable(glsl_type::vec3_type,"vec3_tmp",ir_var_temporary);
+         break;
+      case 2:
+         newvar = new (fonc) ir_variable(glsl_type::vec2_type,"vec2_tmp",ir_var_temporary);
+         break;
+      }
+      fonc->body.push_head(newvar);
+      ir_variable_replacer vis2(v1,m1,v2,m2,newvar);
+      foreach_list(tmp,&(fonc->body))
+      {
+         ir_instruction* inst = (ir_instruction*) tmp;
+         inst->accept(&vis2);
+      }
+      return visit_continue;
+   }
+};
+
+
+bool
+do_var_packing(exec_list *instructions)
+{
+   ir_packer v;
+   for(int i=0;i<5;i++)
+      visit_list_elements(&v, instructions);
+
+
+   return false;
+}
-- 
1.7.3.4



More information about the mesa-dev mailing list