[Mesa-dev] [PATCH] Implementing varying packing
vlj
vljn at ovi.com
Tue Aug 16 07:06:07 PDT 2011
From: Vincent Lejeune <vljn at ovi.com>
This optimisation pass will look for and pack together float,
vec2, vec3 varyings in fragment shaders and transform the vertex
shader accordingly. It might improve performance depending on
the hardware.
# Changes from first patch :
# - Scons build file modified
# - More case defined in strategy (into vec3 and into vec2 packing)
# - Pack with respect to interpolation/centroid
# - Fix missing args in pack_varyings_according_interpolation_centroid
---
src/glsl/Makefile | 1 +
src/glsl/SConscript | 1 +
src/glsl/linker.cpp | 3 +
src/glsl/pack_varyings.cpp | 583 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 588 insertions(+), 0 deletions(-)
create mode 100644 src/glsl/pack_varyings.cpp
diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index e2d29bd..8ce06bd 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -84,6 +84,7 @@ CXX_SOURCES = \
opt_structure_splitting.cpp \
opt_swizzle_swizzle.cpp \
opt_tree_grafting.cpp \
+ pack_varyings.cpp \
s_expression.cpp
LIBS = \
diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index c325583..082aaa9 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -93,6 +93,7 @@ glsl_sources = [
'opt_structure_splitting.cpp',
'opt_swizzle_swizzle.cpp',
'opt_tree_grafting.cpp',
+ 'packing_varying.cpp',
'ralloc.c',
's_expression.cpp',
'strtod.c',
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 255edc6..97b25ff 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1404,6 +1404,7 @@ demote_shader_inputs_and_outputs(gl_shader *sh, enum ir_variable_mode mode)
}
}
+extern void pack_varyings(gl_shader*& vs,gl_shader*& fs);
void
assign_varying_locations(struct gl_shader_program *prog,
@@ -1413,6 +1414,8 @@ assign_varying_locations(struct gl_shader_program *prog,
unsigned output_index = VERT_RESULT_VAR0;
unsigned input_index = FRAG_ATTRIB_VAR0;
+ pack_varyings(producer,consumer);
+
/* Operate in a total of three passes.
*
* 1. Assign locations for any matching inputs and outputs.
diff --git a/src/glsl/pack_varyings.cpp b/src/glsl/pack_varyings.cpp
new file mode 100644
index 0000000..a4103fe
--- /dev/null
+++ b/src/glsl/pack_varyings.cpp
@@ -0,0 +1,583 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ * Copyright © 2011 Vincent Lejeune
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file pack_varyings.cpp
+ *
+ * Try to find packeable varyings and pack them.
+ * Currently the code only concerns varying float, vec2 and vec3.
+ *
+ * This is a 2 passes code :
+ * - In a first pass, all varyings from shaders are collected. A packing
+ * strategy is then build looking only at the amount of each type of varyings
+ * (float, vec2, vec3) ; in particular the number of occurence of each varyings
+ * is ignored as it is assumed that swizzle is mostly performance free. This
+ * strategy is stored as a hash_table of packing_remap structures. If a shader
+ * object is reused by several program, storing this strategy might avoid
+ * unnecessary recomputations ; this is not done yet.
+ * - In a second pass, the packing variables are declared in the shaders and the
+ * packed variables are removed. Instructions are then parsed and each occurence
+ * of packed variables is replaced by corresponding packing variable swizzled.
+ */
+
+#include "main/core.h"
+#include "glsl_symbol_table.h"
+#include "ir.h"
+#include "program.h"
+#include "program/hash_table.h"
+#include "linker.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_optimization.h"
+#include "list.h"
+
+extern "C" {
+#include "main/shaderobj.h"
+}
+
+/*
+ * Boxed List definition
+ * Such a list holds pointer instead of exec_node ; in addition they can contain
+ * item stored in another list (exec_node can belong to a unique exec_list).
+ * This container is needed to store ir_variable* in the first pass without
+ * removing them from instruction flow.
+ *
+ */
+
+// BOXED LIST BEGIN
+
+class box : public exec_node
+{
+public:
+ void* content;
+ box(void* c):content(c) {
+
+ }
+};
+
+class boxed_exec_list : public exec_list
+{
+public:
+ void push_tail(void *n) {
+ box* b = new (this) box(n);
+ exec_list::push_tail(b);
+ }
+
+ void push_head(void *n) {
+ box* b = new (this) box(n);
+ exec_list::push_head(b);
+ }
+
+ static void* operator new(size_t size, void *ctx){
+ void *node;
+
+ node = ralloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ bool has(const void* pointer) const {
+ foreach_list_const(tmp,this) {
+ box* tmpb = reinterpret_cast<box*>(const_cast<exec_node*>(tmp));
+ if(tmpb->content == pointer)
+ return true;
+ }
+ return false;
+ }
+
+};
+
+#define list_item(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(pointer)->content)
+#define list_item_const(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(const_cast<exec_node*>(pointer))->content)
+
+
+// BOXED LIST END
+
+
+/**
+ * From Ian Romanick
+ *
+ * This structure holds information on the mapping of packed variables into
+ * a packing variable.
+ *
+ * The packing variable is not declared there because a new variable occurs in 2
+ * shaders (a vertex shader and a fragment shader). Only name and type of this
+ * variable is present.
+ * The packed variables are not declared here, they are however stored as keys
+ * in the hash_table that defines the packing strategy.
+ */
+struct packing_remap {
+ /**
+ * Swizzle to access packed variable from packing variable.
+ */
+ ir_swizzle_mask read_swiz;
+
+ /**
+ * Shift to apply to write-mask when writing to the values of
+ * packed variable.
+ */
+ unsigned write_mask_shift;
+
+ /**
+ * Name of the packing variable.
+ */
+ char* new_variable_name;
+
+ /**
+ * type of the packing variable.
+ */
+ const glsl_type* gltype;
+
+};
+
+#define DBG_MSG(msg) printf("%s\n",msg);
+
+class varying_packer {
+protected:
+ boxed_exec_list *list_float,*list_vec2,*list_vec3;
+ size_t list_float_size,list_vec2_size,list_vec3_size;
+ const exec_list* shader_ir;
+ const int interpolation:2;
+ const int centroid:1;
+
+
+ void generate_packing(hash_table* result,ir_variable** vars,
+ int* size,size_t number_of_element,char* name,
+ const glsl_type* tp) const {
+
+ int offset = 0;
+ for(int i=0;i<number_of_element;i++)
+ {
+ ir_swizzle_mask swizzle = {offset,offset+1,offset+2,offset+3,size[i],false};
+ packing_remap pr =
+ {swizzle,offset,name,tp};
+ hash_table_insert(result,
+ new packing_remap(pr),vars[i]->name);
+ offset+= size[i];
+ }
+ }
+
+
+public:
+ /**
+ * This function parse \c shader_ir and fills \c list_float ,
+ * \c list_vec2 and \c list_vec3 accordingly.
+ */
+ void collect_varyings() {
+
+ list_vec3_size = list_vec2_size = list_float_size = 0;
+ list_float->make_empty();
+ list_vec2->make_empty();
+ list_vec3->make_empty();
+
+ foreach_list_const(tmp,shader_ir) {
+
+ ir_instruction* inst = (ir_instruction*) tmp;
+
+ if(ir_variable* var = inst->as_variable()) {
+
+ if(var->mode == ir_var_in && var->centroid == centroid &&
+ var->interpolation == interpolation) {
+ switch(var->type->gl_type) {
+ case GL_FLOAT: // varying float case
+ list_float->push_head(var);
+ list_float_size++;
+ break;
+ case GL_FLOAT_VEC2: // varying vec2 case
+ list_vec2->push_head(var);
+ list_vec2_size++;
+ break;
+ case GL_FLOAT_VEC3: // varying vec3 case
+ list_vec3->push_head(var);
+ list_vec3_size++;
+ break;
+ default: // varying vec4 not packeable, other case not handled
+ break;
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ /**
+ * This function try to gather packable varying and store them in a
+ * \c packing_remap hash_table.
+ *
+ * The result is a hash_table whose keys are name of variables (and not
+ * pointer) because varyings in different shaders can be matched with their
+ * name, not with their pointer value.
+ * The data of the result are packing_remap*. A single packing_remap* is
+ * obviously shared by several keys.
+ *
+ * The result has to be freed by function caller.
+ */
+ hash_table* define_pack_mapping() {
+ hash_table* result = hash_table_ctor(32, hash_table_string_hash,
+ hash_table_string_compare);
+
+ while(true) {
+ /**
+ * We pack when we found at least 1 vec3 and 1 float varyings
+ * in a vec4 varying
+ */
+ if(list_vec3_size >= 1 && list_float_size >= 1) {
+ ir_variable* vec3_var =
+ list_item(ir_variable*,list_vec3->pop_head());
+ ir_variable* float_var =
+ list_item(ir_variable*,list_float->pop_head());
+ list_vec3_size--;
+ list_float_size--;
+
+ ir_variable* vars[] = {vec3_var,float_var};
+ int sizes[]= {3,1};
+ generate_packing(result,vars,sizes,2,"packed_vec3_float_varying",glsl_type::vec4_type);
+
+ continue;
+ }
+ /**
+ * We pack when we found at least 2 vec2 varyings
+ * in a vec4 varying
+ */
+ if(list_vec2_size >= 2 ) {
+ ir_variable* var1 =
+ list_item(ir_variable*,list_vec2->pop_head());
+ ir_variable* var2 =
+ list_item(ir_variable*,list_vec2->pop_head());
+ list_vec2_size--;
+ list_vec2_size--;
+
+ ir_variable* vars[] = {var1,var2};
+ int sizes[] = {2,2};
+ generate_packing(result,vars,sizes,2,"packed_vec2_vec2_varying",glsl_type::vec4_type);
+
+ continue;
+ }
+ /**
+ * We pack when we found at least 2 float and 1 vec2 varyings
+ * in a vec4 varying
+ */
+ if(list_float_size >= 2 && list_vec2_size >=1 ) {
+ ir_variable* vec2 =
+ list_item(ir_variable*,list_vec2->pop_head());
+ ir_variable* flt1 =
+ list_item(ir_variable*,list_float->pop_head());
+ ir_variable* flt2 =
+ list_item(ir_variable*,list_float->pop_head());
+ list_float_size--;
+ list_float_size--;
+ list_vec2_size--;
+
+ ir_variable* vars[] = {vec2,flt1,flt2};
+ int sizes[] = {2,1,1};
+ generate_packing(result,vars,sizes,3,"packed_vec2_float_float",glsl_type::vec4_type);
+
+ continue;
+ }
+ /**
+ * We pack when we found at least 4 float varyings
+ * in a vec4 varying
+ */
+ if(list_float_size >= 4 ) {
+ ir_variable* flt1 =
+ list_item(ir_variable*,list_float->pop_head());
+ ir_variable* flt2 =
+ list_item(ir_variable*,list_float->pop_head());
+ ir_variable* flt3 =
+ list_item(ir_variable*,list_float->pop_head());
+ ir_variable* flt4 =
+ list_item(ir_variable*,list_float->pop_head());
+ list_float_size--;
+ list_float_size--;
+ list_float_size--;
+ list_float_size--;
+
+ ir_variable* vars[] = {flt1,flt2,flt3,flt4};
+ int sizes[] = {1,1,1,1};
+ generate_packing(result,vars,sizes,4,"packed_4_float",glsl_type::vec4_type);
+
+ continue;
+ }
+ /**
+ * We pack when we found at least 1 float and 1 vec2 varyings
+ * in a vec3 varying
+ */
+ if(list_float_size >= 1 && list_vec2_size >=1 ) {
+ ir_variable* vec2 =
+ list_item(ir_variable*,list_vec2->pop_head());
+ ir_variable* flt =
+ list_item(ir_variable*,list_float->pop_head());
+ list_float_size--;
+ list_vec2_size--;
+
+ ir_variable* vars[] = {vec2,flt};
+ int sizes[] = {2,1};
+ generate_packing(result,vars,sizes,2,"packed_vec2_float",glsl_type::vec3_type);
+
+ continue;
+ }
+ /**
+ * We pack when we found at least 3 float varyings
+ * in a vec3 varying
+ */
+ if(list_float_size >= 3) {
+ ir_variable* flt1 =
+ list_item(ir_variable*,list_float->pop_head());
+ ir_variable* flt2 =
+ list_item(ir_variable*,list_float->pop_head());
+ ir_variable* flt3 =
+ list_item(ir_variable*,list_float->pop_head());
+ list_float_size--;
+ list_float_size--;
+ list_float_size--;
+
+ ir_variable* vars[] = {flt1,flt2,flt3};
+ int sizes[] = {1,1,1};
+ generate_packing(result,vars,sizes,3,"packed_3_float",glsl_type::vec3_type);
+
+ continue;
+ }
+ /**
+ * We pack when we found at least 2 float varyings
+ * in a vec2 varying
+ */
+ if(list_float_size >= 2) {
+ ir_variable* flt1 =
+ list_item(ir_variable*,list_float->pop_head());
+ ir_variable* flt2 =
+ list_item(ir_variable*,list_float->pop_head());
+ list_float_size--;
+ list_float_size--;
+
+ ir_variable* vars[] = {flt1,flt2};
+ int sizes[] = {1,1};
+ generate_packing(result,vars,sizes,2,"packed_2_float",glsl_type::vec2_type);
+
+ continue;
+ }
+ break;
+ }
+
+ return result;
+ }
+
+ varying_packer(exec_list* instructions,unsigned i,unsigned c):
+ shader_ir(instructions),interpolation(i),centroid(c) {
+ list_float = new (instructions) boxed_exec_list();
+ list_vec2 = new (instructions) boxed_exec_list();
+ list_vec3 = new (instructions) boxed_exec_list();
+ list_vec3_size = list_vec2_size = list_float_size = 0;
+ }
+
+ ~varying_packer() {
+ delete list_float;
+ delete list_vec2;
+ delete list_vec3;
+ }
+
+};
+
+/**
+ * This utility function shifts \c mask bits by \c step.
+ *
+ * It is used for instance when a packed variable pv is
+ * stored in a packing variable pack is written at position i :
+ * - In original ir the assignment is pv.i = ...
+ * - In new ir the assignement is pack.new_i = ...
+ * where new_i is the output of the function, and depends only on \c step
+ */
+inline
+unsigned cyclic_right_shift(unsigned mask,unsigned step) {
+ unsigned result = 0;
+ result |= mask << step;
+ result |= mask >> (4 - step);
+ return result;
+}
+
+
+
+/**
+ * This visitor replaces every occurence of a packed variable in a rhs
+ * expression by packing variable and corresponding swizzle.
+ */
+class ir_rvalue_substituter : public ir_rvalue_visitor {
+protected:
+ hash_table* replacement_table;
+ hash_table* introduced_variables;
+
+public:
+ void handle_rvalue(ir_rvalue **rvalue) {
+ if(!*rvalue)
+ return;
+ ir_rvalue* tmp_rvalue = *rvalue;
+ if(ir_dereference_variable* dref = tmp_rvalue->as_dereference_variable()) {
+ packing_remap* pr = static_cast<packing_remap*>(
+ hash_table_find(replacement_table,dref->var->name));
+ if(!pr)
+ return;
+ ir_variable* newvar = static_cast<ir_variable*>(
+ hash_table_find(introduced_variables,pr));
+ ir_dereference_variable* ndref =
+ new (dref->var) ir_dereference_variable(newvar);
+ ir_swizzle* swz = new (dref) ir_swizzle(ndref,pr->read_swiz);
+ *rvalue = swz;
+ }
+ }
+
+ ir_rvalue_substituter(hash_table* htb1, hash_table* htb2):
+ replacement_table(htb1),introduced_variables(htb2) {
+
+ }
+
+};
+
+
+/**
+ * This visitor replaces every lhs occurence of a packed variable by packing
+ * variable and corresponding swizzle, and call a \c ir_rvalue_visitor
+ * to parse rhs.
+ */
+
+class ir_variable_substituter : public ir_hierarchical_visitor {
+protected:
+ hash_table* replacement_table;
+ hash_table* introduced_variables;
+ glsl_symbol_table* symbols;
+
+ virtual
+ ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const=0;
+
+public:
+ ir_visitor_status visit_enter(ir_assignment *assign) {
+ ir_rvalue_substituter rs(replacement_table,introduced_variables);
+ assign->rhs->accept(&rs);
+ if(ir_dereference_variable* dref = assign->lhs->as_dereference_variable()) {
+ packing_remap* pr = static_cast<packing_remap*>(
+ hash_table_find(replacement_table,dref->var->name));
+ if(!pr)
+ return visit_continue;
+ dref->var = static_cast<ir_variable*>(hash_table_find(
+ introduced_variables,pr));
+ assign->write_mask =
+ cyclic_right_shift(assign->write_mask,pr->write_mask_shift);
+ }
+ return visit_continue;
+ }
+
+ /**
+ * This method generates the ir_variable* packing variable and add them
+ * at the beginning of \c instructions and removes declaration of packed
+ * variables.
+ * The new variable pointers are stored in \c introduced_variables hash_table
+ * where they can be reused when replacement of packed variables in ir takes
+ * place.
+ */
+ void add_prelude(exec_list* instructions) {
+ foreach_list_safe(tmp,instructions) {
+ ir_instruction* inst = (ir_instruction*) tmp;
+ ir_variable* var = inst->as_variable();
+ if(!var)
+ return;
+ packing_remap* pr = static_cast<packing_remap*>(
+ hash_table_find(replacement_table,var->name));
+ if(!pr)
+ continue;
+ ir_variable* newvar = symbols->get_variable(pr->new_variable_name);
+ if(!newvar) {
+ newvar = generate_new_varying(var,pr);
+ inst->insert_before(newvar);
+ symbols->add_variable(newvar);
+ }
+ hash_table_insert(introduced_variables,newvar,pr);
+ inst->remove();
+ }
+ }
+
+ ir_variable_substituter(hash_table* htb,glsl_symbol_table* s):
+ ir_hierarchical_visitor(),replacement_table(htb),symbols(s) {
+ introduced_variables = hash_table_ctor(0, hash_table_pointer_hash,
+ hash_table_pointer_compare);
+ }
+
+ ~ir_variable_substituter() {
+ free(introduced_variables);
+ }
+};
+
+class ir_variable_substituter_fs : public ir_variable_substituter {
+protected:
+ virtual
+ ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const {
+ return new (ctx) ir_variable(pr->gltype,pr->new_variable_name,ir_var_in);
+ }
+public:
+ ir_variable_substituter_fs(hash_table* htb,glsl_symbol_table* s):
+ ir_variable_substituter(htb,s) {
+
+ }
+};
+
+class ir_variable_substituter_vs : public ir_variable_substituter {
+protected:
+ virtual
+ ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const {
+ return new (ctx) ir_variable(pr->gltype,pr->new_variable_name,ir_var_out);
+ }
+public:
+ ir_variable_substituter_vs(hash_table* htb,glsl_symbol_table* s):
+ ir_variable_substituter(htb,s) {
+
+ }
+};
+
+
+void pack_varyings_according_interpolation_centroid(
+ unsigned interpolation, unsigned centroid,
+ gl_shader*& vs,gl_shader*& fs) {
+
+ varying_packer vp(fs->ir,interpolation,centroid);
+ vp.collect_varyings();
+ hash_table* htb = vp.define_pack_mapping();
+
+ ir_variable_substituter_fs fsreplacer(htb,fs->symbols);
+ fsreplacer.add_prelude(fs->ir);
+ visit_list_elements(&fsreplacer,fs->ir);
+
+ ir_variable_substituter_vs vsreplacer(htb,vs->symbols);
+ vsreplacer.add_prelude(vs->ir);
+ visit_list_elements(&vsreplacer,vs->ir);
+}
+
+
+void pack_varyings(gl_shader*& vs,gl_shader*& fs){
+ pack_varyings_according_interpolation_centroid(ir_var_flat,false,vs,fs);
+ pack_varyings_according_interpolation_centroid(ir_var_smooth,false,vs,fs);
+ pack_varyings_according_interpolation_centroid(ir_var_noperspective,false,vs,fs);
+
+ pack_varyings_according_interpolation_centroid(ir_var_flat,true,vs,fs);
+ pack_varyings_according_interpolation_centroid(ir_var_smooth,true,vs,fs);
+ pack_varyings_according_interpolation_centroid(ir_var_noperspective,true,vs,fs);
+}
+
+
--
1.7.6
More information about the mesa-dev
mailing list