[Mesa-dev] [PATCH] Implementing varying packing

Vincent Lejeune vljn at ovi.com
Mon Aug 15 13:38:55 PDT 2011


This optimisation pass will look for and pack together float,
vec2, vec3 varyings in fragment shaders and transform the vertex
shader accordingly. It might improve performance depending on
the hardware.
---
 src/glsl/Makefile          |    1 +
 src/glsl/SConscript        |    1 +
 src/glsl/linker.cpp        |    3 +
 src/glsl/pack_varyings.cpp |  583 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 588 insertions(+), 0 deletions(-)
 create mode 100644 src/glsl/pack_varyings.cpp

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index e2d29bd..8ce06bd 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -84,6 +84,7 @@ CXX_SOURCES = \
 	opt_structure_splitting.cpp \
 	opt_swizzle_swizzle.cpp \
 	opt_tree_grafting.cpp \
+	pack_varyings.cpp \
 	s_expression.cpp
 
 LIBS = \
diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index c325583..082aaa9 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -93,6 +93,7 @@ glsl_sources = [
     'opt_structure_splitting.cpp',
     'opt_swizzle_swizzle.cpp',
     'opt_tree_grafting.cpp',
+    'packing_varying.cpp',
     'ralloc.c',
     's_expression.cpp',
     'strtod.c',
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 255edc6..97b25ff 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1404,6 +1404,7 @@ demote_shader_inputs_and_outputs(gl_shader *sh, enum ir_variable_mode mode)
    }
 }
 
+extern void pack_varyings(gl_shader*& vs,gl_shader*& fs);
 
 void
 assign_varying_locations(struct gl_shader_program *prog,
@@ -1413,6 +1414,8 @@ assign_varying_locations(struct gl_shader_program *prog,
    unsigned output_index = VERT_RESULT_VAR0;
    unsigned input_index = FRAG_ATTRIB_VAR0;
 
+   pack_varyings(producer,consumer);
+
    /* Operate in a total of three passes.
     *
     * 1. Assign locations for any matching inputs and outputs.
diff --git a/src/glsl/pack_varyings.cpp b/src/glsl/pack_varyings.cpp
new file mode 100644
index 0000000..261db49
--- /dev/null
+++ b/src/glsl/pack_varyings.cpp
@@ -0,0 +1,583 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ * Copyright © 2011 Vincent Lejeune
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file pack_varyings.cpp
+ *
+ * Try to find packeable varyings and pack them.
+ * Currently the code only concerns varying float, vec2 and vec3.
+ *
+ * This is a 2 passes code :
+ * - In a first pass, all varyings from shaders are collected. A packing
+ * strategy is then build looking only at the amount of each type of varyings
+ * (float, vec2, vec3) ; in particular the number of occurence of each varyings
+ * is ignored as it is assumed that swizzle is mostly performance free. This
+ * strategy is stored as a hash_table of packing_remap structures. If a shader
+ * object is reused by several program, storing this strategy might avoid
+ * unnecessary recomputations ; this is not done yet.
+ * - In a second pass, the packing variables are declared in the shaders and the
+ * packed variables are removed. Instructions are then parsed and each occurence
+ * of packed variables is replaced by corresponding packing variable swizzled.
+ */
+
+#include "main/core.h"
+#include "glsl_symbol_table.h"
+#include "ir.h"
+#include "program.h"
+#include "program/hash_table.h"
+#include "linker.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_optimization.h"
+#include "list.h"
+
+extern "C" {
+#include "main/shaderobj.h"
+}
+
+/*
+ * Boxed List definition
+ * Such a list holds pointer instead of exec_node ; in addition they can contain
+ * item stored in another list (exec_node can belong to a unique exec_list).
+ * This container is needed to store ir_variable* in the first pass without
+ * removing them from instruction flow.
+ *
+ */
+
+// BOXED LIST BEGIN
+
+class box : public exec_node
+{
+public:
+   void* content;
+   box(void* c):content(c) {
+
+   }
+};
+
+class boxed_exec_list : public exec_list
+{
+public:
+   void push_tail(void *n) {
+      box* b = new (this) box(n);
+      exec_list::push_tail(b);
+   }
+
+   void push_head(void *n) {
+      box* b = new (this) box(n);
+      exec_list::push_head(b);
+   }
+
+   static void* operator new(size_t size, void *ctx){
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   bool has(const void* pointer) const {
+      foreach_list_const(tmp,this)  {
+         box* tmpb = reinterpret_cast<box*>(const_cast<exec_node*>(tmp));
+         if(tmpb->content == pointer)
+            return true;
+      }
+      return false;
+   }
+
+};
+
+#define list_item(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(pointer)->content)
+#define list_item_const(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(const_cast<exec_node*>(pointer))->content)
+
+
+// BOXED LIST END
+
+
+/**
+ * From Ian Romanick
+ *
+ * This structure holds information on the mapping of packed variables into
+ * a packing variable.
+ *
+ * The packing variable is not declared there because a new variable occurs in 2
+ * shaders (a vertex shader and a fragment shader). Only name and type of this
+ * variable is present.
+ * The packed variables are not declared here, they are however stored as keys
+ * in the hash_table that defines the packing strategy.
+ */
+struct packing_remap {
+   /**
+    * Swizzle to access packed variable from packing variable.
+    */
+   ir_swizzle_mask read_swiz;
+
+   /**
+    * Shift to apply to write-mask when writing to the values of
+    * packed variable.
+    */
+   unsigned write_mask_shift;
+
+   /**
+    * Name of the packing variable.
+    */
+   char* new_variable_name;
+
+   /**
+    * type of the packing variable.
+    */
+   const glsl_type* gltype;
+
+};
+
+#define DBG_MSG(msg) printf("%s\n",msg);
+
+class varying_packer {
+protected:
+   boxed_exec_list *list_float,*list_vec2,*list_vec3;
+   size_t list_float_size,list_vec2_size,list_vec3_size;
+   const exec_list* shader_ir;
+   const int interpolation:2;
+   const int centroid:1;
+
+
+   void generate_packing(hash_table* result,ir_variable** vars,
+                         int* size,size_t number_of_element,char* name,
+                         const glsl_type* tp) const {
+
+      int offset = 0;
+      for(int i=0;i<number_of_element;i++)
+      {
+         ir_swizzle_mask swizzle = {offset,offset+1,offset+2,offset+3,size[i],false};
+         packing_remap pr =
+            {swizzle,offset,name,tp};
+         hash_table_insert(result,
+                           new packing_remap(pr),vars[i]->name);
+         offset+= size[i];
+      }
+   }
+
+
+public:
+   /**
+    * This function parse \c shader_ir and fills \c list_float ,
+    * \c list_vec2 and \c list_vec3 accordingly.
+    */
+   void collect_varyings() {
+
+      list_vec3_size = list_vec2_size = list_float_size = 0;
+      list_float->make_empty();
+      list_vec2->make_empty();
+      list_vec3->make_empty();
+
+      foreach_list_const(tmp,shader_ir) {
+
+         ir_instruction* inst = (ir_instruction*) tmp;
+
+         if(ir_variable* var = inst->as_variable()) {
+
+            if(var->mode == ir_var_in && var->centroid == centroid &&
+                  var->interpolation == interpolation) {
+               switch(var->type->gl_type) {
+               case GL_FLOAT: // varying float case
+                  list_float->push_head(var);
+                  list_float_size++;
+                  break;
+               case GL_FLOAT_VEC2: // varying vec2 case
+                  list_vec2->push_head(var);
+                  list_vec2_size++;
+                  break;
+               case GL_FLOAT_VEC3: // varying vec3 case
+                  list_vec3->push_head(var);
+                  list_vec3_size++;
+                  break;
+               default: // varying vec4 not packeable, other case not handled
+                  break;
+               }
+            }
+         }
+      }
+      return;
+   }
+
+   /**
+    * This function try to gather packable varying and store them in a
+    * \c packing_remap hash_table.
+    *
+    * The result is a hash_table whose keys are name of variables (and not
+    * pointer) because varyings in different shaders can be matched with their
+    * name, not with their pointer value.
+    * The data of the result are packing_remap*. A single packing_remap* is
+    * obviously shared by several keys.
+    *
+    * The result has to be freed by function caller.
+    */
+   hash_table* define_pack_mapping() {
+      hash_table* result = hash_table_ctor(32, hash_table_string_hash,
+                                           hash_table_string_compare);
+
+      while(true) {
+         /**
+          * We pack when we found at least 1 vec3 and 1 float varyings
+          * in a vec4 varying
+          */
+         if(list_vec3_size >= 1 && list_float_size >= 1) {
+            ir_variable* vec3_var =
+                  list_item(ir_variable*,list_vec3->pop_head());
+            ir_variable* float_var =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_vec3_size--;
+            list_float_size--;
+
+            ir_variable* vars[] = {vec3_var,float_var};
+            int sizes[]= {3,1};
+            generate_packing(result,vars,sizes,2,"packed_vec3_float_varying",glsl_type::vec4_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 2 vec2 varyings
+          * in a vec4 varying
+          */
+         if(list_vec2_size >= 2 ) {
+            ir_variable* var1 =
+                  list_item(ir_variable*,list_vec2->pop_head());
+            ir_variable* var2 =
+                  list_item(ir_variable*,list_vec2->pop_head());
+            list_vec2_size--;
+            list_vec2_size--;
+
+            ir_variable* vars[] = {var1,var2};
+            int sizes[] = {2,2};
+            generate_packing(result,vars,sizes,2,"packed_vec2_vec2_varying",glsl_type::vec4_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 2 float and 1 vec2 varyings
+          * in a vec4 varying
+          */
+         if(list_float_size >= 2 && list_vec2_size >=1 ) {
+            ir_variable* vec2 =
+                  list_item(ir_variable*,list_vec2->pop_head());
+            ir_variable* flt1 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt2 =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_float_size--;
+            list_vec2_size--;
+
+            ir_variable* vars[] = {vec2,flt1,flt2};
+            int sizes[] = {2,1,1};
+            generate_packing(result,vars,sizes,3,"packed_vec2_float_float",glsl_type::vec4_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 4 float  varyings
+          * in a vec4 varying
+          */
+         if(list_float_size >= 4 ) {
+            ir_variable* flt1 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt2 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt3 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt4 =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_float_size--;
+            list_float_size--;
+            list_float_size--;
+
+            ir_variable* vars[] = {flt1,flt2,flt3,flt4};
+            int sizes[] = {1,1,1,1};
+            generate_packing(result,vars,sizes,4,"packed_4_float",glsl_type::vec4_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 1 float and 1 vec2 varyings
+          * in a vec3 varying
+          */
+         if(list_float_size >= 1 && list_vec2_size >=1 ) {
+            ir_variable* vec2 =
+                  list_item(ir_variable*,list_vec2->pop_head());
+            ir_variable* flt =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_vec2_size--;
+
+            ir_variable* vars[] = {vec2,flt};
+            int sizes[] = {2,1};
+            generate_packing(result,vars,sizes,2,"packed_vec2_float",glsl_type::vec3_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 3 float varyings
+          * in a vec3 varying
+          */
+         if(list_float_size >= 3) {
+            ir_variable* flt1 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt2 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt3 =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_float_size--;
+            list_float_size--;
+
+            ir_variable* vars[] = {flt1,flt2,flt3};
+            int sizes[] = {1,1,1};
+            generate_packing(result,vars,sizes,3,"packed_3_float",glsl_type::vec3_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 2 float varyings
+          * in a vec2 varying
+          */
+         if(list_float_size >= 2) {
+            ir_variable* flt1 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt2 =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_float_size--;
+
+            ir_variable* vars[] = {flt1,flt2};
+            int sizes[] = {1,1};
+            generate_packing(result,vars,sizes,2,"packed_2_float",glsl_type::vec2_type);
+
+            continue;
+         }
+         break;
+      }
+
+      return result;
+   }
+
+   varying_packer(exec_list* instructions,unsigned i,unsigned c):
+      shader_ir(instructions),interpolation(i),centroid(c) {
+      list_float = new (instructions) boxed_exec_list();
+      list_vec2 = new (instructions) boxed_exec_list();
+      list_vec3 = new (instructions) boxed_exec_list();
+      list_vec3_size = list_vec2_size = list_float_size = 0;
+   }
+
+   ~varying_packer() {
+      delete list_float;
+      delete list_vec2;
+      delete list_vec3;
+   }
+
+};
+
+/**
+ * This utility function shifts \c mask bits by \c step.
+ *
+ * It is used for instance when a packed variable pv is
+ * stored in a packing variable pack is written at position i :
+ * - In original ir the assignment is pv.i = ...
+ * - In new ir the assignement is pack.new_i = ...
+ * where new_i is the output of the function, and depends only on \c step
+ */
+inline
+unsigned cyclic_right_shift(unsigned mask,unsigned step) {
+   unsigned result = 0;
+   result |= mask << step;
+   result |= mask >> (4 - step);
+   return result;
+}
+
+
+
+/**
+ * This visitor replaces every occurence of a packed variable in a rhs
+ * expression by packing variable and corresponding swizzle.
+ */
+class ir_rvalue_substituter : public ir_rvalue_visitor {
+protected:
+   hash_table* replacement_table;
+   hash_table* introduced_variables;
+
+public:
+   void handle_rvalue(ir_rvalue **rvalue) {
+      if(!*rvalue)
+         return;
+      ir_rvalue* tmp_rvalue = *rvalue;
+      if(ir_dereference_variable* dref = tmp_rvalue->as_dereference_variable()) {
+         packing_remap* pr = static_cast<packing_remap*>(
+                  hash_table_find(replacement_table,dref->var->name));
+         if(!pr)
+            return;
+         ir_variable* newvar = static_cast<ir_variable*>(
+                  hash_table_find(introduced_variables,pr));
+         ir_dereference_variable* ndref =
+               new (dref->var) ir_dereference_variable(newvar);
+         ir_swizzle* swz = new (dref) ir_swizzle(ndref,pr->read_swiz);
+         *rvalue = swz;
+      }
+   }
+
+   ir_rvalue_substituter(hash_table* htb1, hash_table* htb2):
+      replacement_table(htb1),introduced_variables(htb2) {
+
+   }
+
+};
+
+
+/**
+ * This visitor replaces every lhs occurence of a packed variable by packing
+ * variable and corresponding swizzle, and call a \c ir_rvalue_visitor
+ * to parse rhs.
+ */
+
+class ir_variable_substituter : public ir_hierarchical_visitor {
+protected:
+   hash_table* replacement_table;
+   hash_table* introduced_variables;
+   glsl_symbol_table* symbols;
+
+   virtual
+   ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const=0;
+
+public:
+   ir_visitor_status visit_enter(ir_assignment *assign) {
+      ir_rvalue_substituter rs(replacement_table,introduced_variables);
+      assign->rhs->accept(&rs);
+      if(ir_dereference_variable* dref = assign->lhs->as_dereference_variable()) {
+         packing_remap* pr =  static_cast<packing_remap*>(
+                  hash_table_find(replacement_table,dref->var->name));
+         if(!pr)
+            return visit_continue;
+         dref->var = static_cast<ir_variable*>(hash_table_find(
+                                                  introduced_variables,pr));
+         assign->write_mask =
+               cyclic_right_shift(assign->write_mask,pr->write_mask_shift);
+      }
+      return visit_continue;
+   }
+
+   /**
+    * This method generates the ir_variable* packing variable and add them
+    * at the beginning of \c instructions and removes declaration of packed
+    * variables.
+    * The new variable pointers are stored in \c introduced_variables hash_table
+    * where they can be reused when replacement of packed variables in ir takes
+    * place.
+    */
+   void add_prelude(exec_list* instructions) {
+      foreach_list_safe(tmp,instructions) {
+         ir_instruction* inst = (ir_instruction*) tmp;
+         ir_variable* var = inst->as_variable();
+         if(!var)
+            return;
+         packing_remap* pr =  static_cast<packing_remap*>(
+                  hash_table_find(replacement_table,var->name));
+         if(!pr)
+            continue;
+         ir_variable* newvar = symbols->get_variable(pr->new_variable_name);
+         if(!newvar) {
+            newvar = generate_new_varying(var,pr);
+            inst->insert_before(newvar);
+            symbols->add_variable(newvar);
+         }
+         hash_table_insert(introduced_variables,newvar,pr);
+         inst->remove();
+      }
+   }
+
+   ir_variable_substituter(hash_table* htb,glsl_symbol_table* s):
+      ir_hierarchical_visitor(),replacement_table(htb),symbols(s) {
+      introduced_variables = hash_table_ctor(0, hash_table_pointer_hash,
+                                           hash_table_pointer_compare);
+   }
+
+   ~ir_variable_substituter() {
+      free(introduced_variables);
+   }
+};
+
+class ir_variable_substituter_fs : public ir_variable_substituter {
+protected:
+   virtual
+   ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const {
+      return new (ctx) ir_variable(pr->gltype,pr->new_variable_name,ir_var_in);
+   }
+public:
+   ir_variable_substituter_fs(hash_table* htb,glsl_symbol_table* s):
+      ir_variable_substituter(htb,s) {
+
+   }
+};
+
+class ir_variable_substituter_vs : public ir_variable_substituter {
+protected:
+   virtual
+   ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const {
+      return new (ctx) ir_variable(pr->gltype,pr->new_variable_name,ir_var_out);
+   }
+public:
+   ir_variable_substituter_vs(hash_table* htb,glsl_symbol_table* s):
+      ir_variable_substituter(htb,s) {
+
+   }
+};
+
+
+void pack_varyings_according_interpolation_centroid(
+   unsigned interpolation, unsigned centroid,
+   gl_shader*& vs,gl_shader*& fs) {
+
+   varying_packer vp(fs->ir,interpolation,centroid);
+   vp.collect_varyings();
+   hash_table* htb = vp.define_pack_mapping();
+
+   ir_variable_substituter_fs fsreplacer(htb,fs->symbols);
+   fsreplacer.add_prelude(fs->ir);
+   visit_list_elements(&fsreplacer,fs->ir);
+
+   ir_variable_substituter_vs vsreplacer(htb,vs->symbols);
+   vsreplacer.add_prelude(vs->ir);
+   visit_list_elements(&vsreplacer,vs->ir);
+}
+
+
+void pack_varyings(gl_shader*& vs,gl_shader*& fs){
+   pack_varyings_according_interpolation_centroid(ir_var_flat,false);
+   pack_varyings_according_interpolation_centroid(ir_var_smooth,false);
+   pack_varyings_according_interpolation_centroid(ir_var_noperspective,false);
+
+   pack_varyings_according_interpolation_centroid(ir_var_flat,true);
+   pack_varyings_according_interpolation_centroid(ir_var_smooth,true);
+   pack_varyings_according_interpolation_centroid(ir_var_noperspective,true);
+}
+
+
-- 
1.7.6



More information about the mesa-dev mailing list