[Mesa-dev] [PATCH] Implementing varying packing

vlj vljn at ovi.com
Tue Aug 16 07:06:07 PDT 2011


From: Vincent Lejeune <vljn at ovi.com>

This optimisation pass will look for and pack together float,
vec2, vec3 varyings in fragment shaders and transform the vertex
shader accordingly. It might improve performance depending on
the hardware.

# Changes from first patch :
# - Scons build file modified
# - More case defined in strategy (into vec3 and into vec2 packing)
# - Pack with respect to interpolation/centroid
# - Fix missing args in pack_varyings_according_interpolation_centroid
---
 src/glsl/Makefile          |    1 +
 src/glsl/SConscript        |    1 +
 src/glsl/linker.cpp        |    3 +
 src/glsl/pack_varyings.cpp |  583 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 588 insertions(+), 0 deletions(-)
 create mode 100644 src/glsl/pack_varyings.cpp

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index e2d29bd..8ce06bd 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -84,6 +84,7 @@ CXX_SOURCES = \
 	opt_structure_splitting.cpp \
 	opt_swizzle_swizzle.cpp \
 	opt_tree_grafting.cpp \
+	pack_varyings.cpp \
 	s_expression.cpp
 
 LIBS = \
diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index c325583..082aaa9 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -93,6 +93,7 @@ glsl_sources = [
     'opt_structure_splitting.cpp',
     'opt_swizzle_swizzle.cpp',
     'opt_tree_grafting.cpp',
+    'packing_varying.cpp',
     'ralloc.c',
     's_expression.cpp',
     'strtod.c',
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 255edc6..97b25ff 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1404,6 +1404,7 @@ demote_shader_inputs_and_outputs(gl_shader *sh, enum ir_variable_mode mode)
    }
 }
 
+extern void pack_varyings(gl_shader*& vs,gl_shader*& fs);
 
 void
 assign_varying_locations(struct gl_shader_program *prog,
@@ -1413,6 +1414,8 @@ assign_varying_locations(struct gl_shader_program *prog,
    unsigned output_index = VERT_RESULT_VAR0;
    unsigned input_index = FRAG_ATTRIB_VAR0;
 
+   pack_varyings(producer,consumer);
+
    /* Operate in a total of three passes.
     *
     * 1. Assign locations for any matching inputs and outputs.
diff --git a/src/glsl/pack_varyings.cpp b/src/glsl/pack_varyings.cpp
new file mode 100644
index 0000000..a4103fe
--- /dev/null
+++ b/src/glsl/pack_varyings.cpp
@@ -0,0 +1,583 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ * Copyright © 2011 Vincent Lejeune
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file pack_varyings.cpp
+ *
+ * Try to find packeable varyings and pack them.
+ * Currently the code only concerns varying float, vec2 and vec3.
+ *
+ * This is a 2 passes code :
+ * - In a first pass, all varyings from shaders are collected. A packing
+ * strategy is then build looking only at the amount of each type of varyings
+ * (float, vec2, vec3) ; in particular the number of occurence of each varyings
+ * is ignored as it is assumed that swizzle is mostly performance free. This
+ * strategy is stored as a hash_table of packing_remap structures. If a shader
+ * object is reused by several program, storing this strategy might avoid
+ * unnecessary recomputations ; this is not done yet.
+ * - In a second pass, the packing variables are declared in the shaders and the
+ * packed variables are removed. Instructions are then parsed and each occurence
+ * of packed variables is replaced by corresponding packing variable swizzled.
+ */
+
+#include "main/core.h"
+#include "glsl_symbol_table.h"
+#include "ir.h"
+#include "program.h"
+#include "program/hash_table.h"
+#include "linker.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_optimization.h"
+#include "list.h"
+
+extern "C" {
+#include "main/shaderobj.h"
+}
+
+/*
+ * Boxed List definition
+ * Such a list holds pointer instead of exec_node ; in addition they can contain
+ * item stored in another list (exec_node can belong to a unique exec_list).
+ * This container is needed to store ir_variable* in the first pass without
+ * removing them from instruction flow.
+ *
+ */
+
+// BOXED LIST BEGIN
+
+class box : public exec_node
+{
+public:
+   void* content;
+   box(void* c):content(c) {
+
+   }
+};
+
+class boxed_exec_list : public exec_list
+{
+public:
+   void push_tail(void *n) {
+      box* b = new (this) box(n);
+      exec_list::push_tail(b);
+   }
+
+   void push_head(void *n) {
+      box* b = new (this) box(n);
+      exec_list::push_head(b);
+   }
+
+   static void* operator new(size_t size, void *ctx){
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   bool has(const void* pointer) const {
+      foreach_list_const(tmp,this)  {
+         box* tmpb = reinterpret_cast<box*>(const_cast<exec_node*>(tmp));
+         if(tmpb->content == pointer)
+            return true;
+      }
+      return false;
+   }
+
+};
+
+#define list_item(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(pointer)->content)
+#define list_item_const(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(const_cast<exec_node*>(pointer))->content)
+
+
+// BOXED LIST END
+
+
+/**
+ * From Ian Romanick
+ *
+ * This structure holds information on the mapping of packed variables into
+ * a packing variable.
+ *
+ * The packing variable is not declared there because a new variable occurs in 2
+ * shaders (a vertex shader and a fragment shader). Only name and type of this
+ * variable is present.
+ * The packed variables are not declared here, they are however stored as keys
+ * in the hash_table that defines the packing strategy.
+ */
+struct packing_remap {
+   /**
+    * Swizzle to access packed variable from packing variable.
+    */
+   ir_swizzle_mask read_swiz;
+
+   /**
+    * Shift to apply to write-mask when writing to the values of
+    * packed variable.
+    */
+   unsigned write_mask_shift;
+
+   /**
+    * Name of the packing variable.
+    */
+   char* new_variable_name;
+
+   /**
+    * type of the packing variable.
+    */
+   const glsl_type* gltype;
+
+};
+
+#define DBG_MSG(msg) printf("%s\n",msg);
+
+class varying_packer {
+protected:
+   boxed_exec_list *list_float,*list_vec2,*list_vec3;
+   size_t list_float_size,list_vec2_size,list_vec3_size;
+   const exec_list* shader_ir;
+   const int interpolation:2;
+   const int centroid:1;
+
+
+   void generate_packing(hash_table* result,ir_variable** vars,
+                         int* size,size_t number_of_element,char* name,
+                         const glsl_type* tp) const {
+
+      int offset = 0;
+      for(int i=0;i<number_of_element;i++)
+      {
+         ir_swizzle_mask swizzle = {offset,offset+1,offset+2,offset+3,size[i],false};
+         packing_remap pr =
+            {swizzle,offset,name,tp};
+         hash_table_insert(result,
+                           new packing_remap(pr),vars[i]->name);
+         offset+= size[i];
+      }
+   }
+
+
+public:
+   /**
+    * This function parse \c shader_ir and fills \c list_float ,
+    * \c list_vec2 and \c list_vec3 accordingly.
+    */
+   void collect_varyings() {
+
+      list_vec3_size = list_vec2_size = list_float_size = 0;
+      list_float->make_empty();
+      list_vec2->make_empty();
+      list_vec3->make_empty();
+
+      foreach_list_const(tmp,shader_ir) {
+
+         ir_instruction* inst = (ir_instruction*) tmp;
+
+         if(ir_variable* var = inst->as_variable()) {
+
+            if(var->mode == ir_var_in && var->centroid == centroid &&
+                  var->interpolation == interpolation) {
+               switch(var->type->gl_type) {
+               case GL_FLOAT: // varying float case
+                  list_float->push_head(var);
+                  list_float_size++;
+                  break;
+               case GL_FLOAT_VEC2: // varying vec2 case
+                  list_vec2->push_head(var);
+                  list_vec2_size++;
+                  break;
+               case GL_FLOAT_VEC3: // varying vec3 case
+                  list_vec3->push_head(var);
+                  list_vec3_size++;
+                  break;
+               default: // varying vec4 not packeable, other case not handled
+                  break;
+               }
+            }
+         }
+      }
+      return;
+   }
+
+   /**
+    * This function try to gather packable varying and store them in a
+    * \c packing_remap hash_table.
+    *
+    * The result is a hash_table whose keys are name of variables (and not
+    * pointer) because varyings in different shaders can be matched with their
+    * name, not with their pointer value.
+    * The data of the result are packing_remap*. A single packing_remap* is
+    * obviously shared by several keys.
+    *
+    * The result has to be freed by function caller.
+    */
+   hash_table* define_pack_mapping() {
+      hash_table* result = hash_table_ctor(32, hash_table_string_hash,
+                                           hash_table_string_compare);
+
+      while(true) {
+         /**
+          * We pack when we found at least 1 vec3 and 1 float varyings
+          * in a vec4 varying
+          */
+         if(list_vec3_size >= 1 && list_float_size >= 1) {
+            ir_variable* vec3_var =
+                  list_item(ir_variable*,list_vec3->pop_head());
+            ir_variable* float_var =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_vec3_size--;
+            list_float_size--;
+
+            ir_variable* vars[] = {vec3_var,float_var};
+            int sizes[]= {3,1};
+            generate_packing(result,vars,sizes,2,"packed_vec3_float_varying",glsl_type::vec4_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 2 vec2 varyings
+          * in a vec4 varying
+          */
+         if(list_vec2_size >= 2 ) {
+            ir_variable* var1 =
+                  list_item(ir_variable*,list_vec2->pop_head());
+            ir_variable* var2 =
+                  list_item(ir_variable*,list_vec2->pop_head());
+            list_vec2_size--;
+            list_vec2_size--;
+
+            ir_variable* vars[] = {var1,var2};
+            int sizes[] = {2,2};
+            generate_packing(result,vars,sizes,2,"packed_vec2_vec2_varying",glsl_type::vec4_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 2 float and 1 vec2 varyings
+          * in a vec4 varying
+          */
+         if(list_float_size >= 2 && list_vec2_size >=1 ) {
+            ir_variable* vec2 =
+                  list_item(ir_variable*,list_vec2->pop_head());
+            ir_variable* flt1 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt2 =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_float_size--;
+            list_vec2_size--;
+
+            ir_variable* vars[] = {vec2,flt1,flt2};
+            int sizes[] = {2,1,1};
+            generate_packing(result,vars,sizes,3,"packed_vec2_float_float",glsl_type::vec4_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 4 float  varyings
+          * in a vec4 varying
+          */
+         if(list_float_size >= 4 ) {
+            ir_variable* flt1 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt2 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt3 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt4 =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_float_size--;
+            list_float_size--;
+            list_float_size--;
+
+            ir_variable* vars[] = {flt1,flt2,flt3,flt4};
+            int sizes[] = {1,1,1,1};
+            generate_packing(result,vars,sizes,4,"packed_4_float",glsl_type::vec4_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 1 float and 1 vec2 varyings
+          * in a vec3 varying
+          */
+         if(list_float_size >= 1 && list_vec2_size >=1 ) {
+            ir_variable* vec2 =
+                  list_item(ir_variable*,list_vec2->pop_head());
+            ir_variable* flt =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_vec2_size--;
+
+            ir_variable* vars[] = {vec2,flt};
+            int sizes[] = {2,1};
+            generate_packing(result,vars,sizes,2,"packed_vec2_float",glsl_type::vec3_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 3 float varyings
+          * in a vec3 varying
+          */
+         if(list_float_size >= 3) {
+            ir_variable* flt1 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt2 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt3 =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_float_size--;
+            list_float_size--;
+
+            ir_variable* vars[] = {flt1,flt2,flt3};
+            int sizes[] = {1,1,1};
+            generate_packing(result,vars,sizes,3,"packed_3_float",glsl_type::vec3_type);
+
+            continue;
+         }
+         /**
+          * We pack when we found at least 2 float varyings
+          * in a vec2 varying
+          */
+         if(list_float_size >= 2) {
+            ir_variable* flt1 =
+                  list_item(ir_variable*,list_float->pop_head());
+            ir_variable* flt2 =
+                  list_item(ir_variable*,list_float->pop_head());
+            list_float_size--;
+            list_float_size--;
+
+            ir_variable* vars[] = {flt1,flt2};
+            int sizes[] = {1,1};
+            generate_packing(result,vars,sizes,2,"packed_2_float",glsl_type::vec2_type);
+
+            continue;
+         }
+         break;
+      }
+
+      return result;
+   }
+
+   varying_packer(exec_list* instructions,unsigned i,unsigned c):
+      shader_ir(instructions),interpolation(i),centroid(c) {
+      list_float = new (instructions) boxed_exec_list();
+      list_vec2 = new (instructions) boxed_exec_list();
+      list_vec3 = new (instructions) boxed_exec_list();
+      list_vec3_size = list_vec2_size = list_float_size = 0;
+   }
+
+   ~varying_packer() {
+      delete list_float;
+      delete list_vec2;
+      delete list_vec3;
+   }
+
+};
+
+/**
+ * This utility function shifts \c mask bits by \c step.
+ *
+ * It is used for instance when a packed variable pv is
+ * stored in a packing variable pack is written at position i :
+ * - In original ir the assignment is pv.i = ...
+ * - In new ir the assignement is pack.new_i = ...
+ * where new_i is the output of the function, and depends only on \c step
+ */
+inline
+unsigned cyclic_right_shift(unsigned mask,unsigned step) {
+   unsigned result = 0;
+   result |= mask << step;
+   result |= mask >> (4 - step);
+   return result;
+}
+
+
+
+/**
+ * This visitor replaces every occurence of a packed variable in a rhs
+ * expression by packing variable and corresponding swizzle.
+ */
+class ir_rvalue_substituter : public ir_rvalue_visitor {
+protected:
+   hash_table* replacement_table;
+   hash_table* introduced_variables;
+
+public:
+   void handle_rvalue(ir_rvalue **rvalue) {
+      if(!*rvalue)
+         return;
+      ir_rvalue* tmp_rvalue = *rvalue;
+      if(ir_dereference_variable* dref = tmp_rvalue->as_dereference_variable()) {
+         packing_remap* pr = static_cast<packing_remap*>(
+                  hash_table_find(replacement_table,dref->var->name));
+         if(!pr)
+            return;
+         ir_variable* newvar = static_cast<ir_variable*>(
+                  hash_table_find(introduced_variables,pr));
+         ir_dereference_variable* ndref =
+               new (dref->var) ir_dereference_variable(newvar);
+         ir_swizzle* swz = new (dref) ir_swizzle(ndref,pr->read_swiz);
+         *rvalue = swz;
+      }
+   }
+
+   ir_rvalue_substituter(hash_table* htb1, hash_table* htb2):
+      replacement_table(htb1),introduced_variables(htb2) {
+
+   }
+
+};
+
+
+/**
+ * This visitor replaces every lhs occurence of a packed variable by packing
+ * variable and corresponding swizzle, and call a \c ir_rvalue_visitor
+ * to parse rhs.
+ */
+
+class ir_variable_substituter : public ir_hierarchical_visitor {
+protected:
+   hash_table* replacement_table;
+   hash_table* introduced_variables;
+   glsl_symbol_table* symbols;
+
+   virtual
+   ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const=0;
+
+public:
+   ir_visitor_status visit_enter(ir_assignment *assign) {
+      ir_rvalue_substituter rs(replacement_table,introduced_variables);
+      assign->rhs->accept(&rs);
+      if(ir_dereference_variable* dref = assign->lhs->as_dereference_variable()) {
+         packing_remap* pr =  static_cast<packing_remap*>(
+                  hash_table_find(replacement_table,dref->var->name));
+         if(!pr)
+            return visit_continue;
+         dref->var = static_cast<ir_variable*>(hash_table_find(
+                                                  introduced_variables,pr));
+         assign->write_mask =
+               cyclic_right_shift(assign->write_mask,pr->write_mask_shift);
+      }
+      return visit_continue;
+   }
+
+   /**
+    * This method generates the ir_variable* packing variable and add them
+    * at the beginning of \c instructions and removes declaration of packed
+    * variables.
+    * The new variable pointers are stored in \c introduced_variables hash_table
+    * where they can be reused when replacement of packed variables in ir takes
+    * place.
+    */
+   void add_prelude(exec_list* instructions) {
+      foreach_list_safe(tmp,instructions) {
+         ir_instruction* inst = (ir_instruction*) tmp;
+         ir_variable* var = inst->as_variable();
+         if(!var)
+            return;
+         packing_remap* pr =  static_cast<packing_remap*>(
+                  hash_table_find(replacement_table,var->name));
+         if(!pr)
+            continue;
+         ir_variable* newvar = symbols->get_variable(pr->new_variable_name);
+         if(!newvar) {
+            newvar = generate_new_varying(var,pr);
+            inst->insert_before(newvar);
+            symbols->add_variable(newvar);
+         }
+         hash_table_insert(introduced_variables,newvar,pr);
+         inst->remove();
+      }
+   }
+
+   ir_variable_substituter(hash_table* htb,glsl_symbol_table* s):
+      ir_hierarchical_visitor(),replacement_table(htb),symbols(s) {
+      introduced_variables = hash_table_ctor(0, hash_table_pointer_hash,
+                                           hash_table_pointer_compare);
+   }
+
+   ~ir_variable_substituter() {
+      free(introduced_variables);
+   }
+};
+
+class ir_variable_substituter_fs : public ir_variable_substituter {
+protected:
+   virtual
+   ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const {
+      return new (ctx) ir_variable(pr->gltype,pr->new_variable_name,ir_var_in);
+   }
+public:
+   ir_variable_substituter_fs(hash_table* htb,glsl_symbol_table* s):
+      ir_variable_substituter(htb,s) {
+
+   }
+};
+
+class ir_variable_substituter_vs : public ir_variable_substituter {
+protected:
+   virtual
+   ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const {
+      return new (ctx) ir_variable(pr->gltype,pr->new_variable_name,ir_var_out);
+   }
+public:
+   ir_variable_substituter_vs(hash_table* htb,glsl_symbol_table* s):
+      ir_variable_substituter(htb,s) {
+
+   }
+};
+
+
+void pack_varyings_according_interpolation_centroid(
+   unsigned interpolation, unsigned centroid,
+   gl_shader*& vs,gl_shader*& fs) {
+
+   varying_packer vp(fs->ir,interpolation,centroid);
+   vp.collect_varyings();
+   hash_table* htb = vp.define_pack_mapping();
+
+   ir_variable_substituter_fs fsreplacer(htb,fs->symbols);
+   fsreplacer.add_prelude(fs->ir);
+   visit_list_elements(&fsreplacer,fs->ir);
+
+   ir_variable_substituter_vs vsreplacer(htb,vs->symbols);
+   vsreplacer.add_prelude(vs->ir);
+   visit_list_elements(&vsreplacer,vs->ir);
+}
+
+
+void pack_varyings(gl_shader*& vs,gl_shader*& fs){
+   pack_varyings_according_interpolation_centroid(ir_var_flat,false,vs,fs);
+   pack_varyings_according_interpolation_centroid(ir_var_smooth,false,vs,fs);
+   pack_varyings_according_interpolation_centroid(ir_var_noperspective,false,vs,fs);
+
+   pack_varyings_according_interpolation_centroid(ir_var_flat,true,vs,fs);
+   pack_varyings_according_interpolation_centroid(ir_var_smooth,true,vs,fs);
+   pack_varyings_according_interpolation_centroid(ir_var_noperspective,true,vs,fs);
+}
+
+
-- 
1.7.6



More information about the mesa-dev mailing list