[Mesa-dev] [PATCH] Starting work on var packer
Brian Paul
brianp at vmware.com
Mon Aug 8 07:39:08 PDT 2011
I'll let the GLSL guys review the functionality of this, but a few
high-level comments:
1. What exactly is var packing? Do you mean storing 4 float vars in a
vector register? A better commit message is needed, plus some
comments at the top of the file explaining what's going on.
2. Core Mesa and the GLSL compiler uses 3-space indentation. I see a
mix of 4 and 8-space indentation in your code.
3. Please replace "if(expr)" with "if (expr)" and similarly for for-loops.
4. Change this:
if(expr) break;
to read:
if (expr)
break;
In case someone is debugging and wants to put a breakpoint on the
break statement.
5. In switch statements where there's case fallthrough, it's good to
put a comment like /* fall-through */ to make it clear to the reader
that fall-through is intentional.
Thanks.
-Brian
On 08/07/2011 01:49 PM, Vincent Lejeune wrote:
> From: vlj<vljn at ovi.com>
>
> ---
> src/glsl/Makefile | 1 +
> src/glsl/glsl_parser_extras.cpp | 7 +-
> src/glsl/ir_optimization.h | 1 +
> src/glsl/opt_var_packer.cpp | 332 +++++++++++++++++++++++++++++++++++++++
> 4 files changed, 338 insertions(+), 3 deletions(-)
> create mode 100644 src/glsl/opt_var_packer.cpp
>
> diff --git a/src/glsl/Makefile b/src/glsl/Makefile
> index 68b98b2..226acc4 100644
> --- a/src/glsl/Makefile
> +++ b/src/glsl/Makefile
> @@ -84,6 +84,7 @@ CXX_SOURCES = \
> opt_swizzle_swizzle.cpp \
> opt_tree_grafting.cpp \
> opt_common_subexpression_elimination.cpp \
> + opt_var_packer.cpp \
> s_expression.cpp
>
> LIBS = \
> diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
> index 0a57386..1aa7d88 100644
> --- a/src/glsl/glsl_parser_extras.cpp
> +++ b/src/glsl/glsl_parser_extras.cpp
> @@ -777,8 +777,9 @@ do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iteration
> GLboolean progress = GL_FALSE;
>
>
> - progress = do_common_subexpression_elimination(ir) || progress;
> - /*progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
> + progress = do_var_packing (ir) || progress;
> + //progress = do_common_subexpression_elimination(ir) || progress;
> + progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
>
> if (linked) {
> progress = do_function_inlining(ir) || progress;
> @@ -815,7 +816,7 @@ do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iteration
> progress = set_loop_controls(ir, ls) || progress;
> progress = unroll_loops(ir, ls, max_unroll_iterations) || progress;
> }
> - delete ls;*/
> + delete ls;
>
> return progress;
> }
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index 3604e4e..5b1de66 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -72,3 +72,4 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions,
> bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
> bool optimize_redundant_jumps(exec_list *instructions);
> bool do_common_subexpression_elimination(exec_list *instructions);
> +bool do_var_packing(exec_list *instructions);
> diff --git a/src/glsl/opt_var_packer.cpp b/src/glsl/opt_var_packer.cpp
> new file mode 100644
> index 0000000..fe579f9
> --- /dev/null
> +++ b/src/glsl/opt_var_packer.cpp
> @@ -0,0 +1,332 @@
> +/*
> + * Copyright © 2011 Vincent Lejeune
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include "ir.h"
> +#include "ir_hierarchical_visitor.h"
> +#include "ir_rvalue_visitor.h"
> +#include<cstring>
> +
> +class box : public exec_node
> +{
> +public:
> + void* content;
> + box(void* c):content(c)
> + {
> +
> + }
> +};
> +
> +
> +
> +class boxed_exec_list : public exec_list
> +{
> +public:
> + void push_tail(void *n)
> + {
> + box* b = new (this) box(n);
> + exec_list::push_tail(b);
> + }
> +
> + void push_head(void *n)
> + {
> + box* b = new (this) box(n);
> + exec_list::push_head(b);
> + }
> +
> + static void* operator new(size_t size, void *ctx)
> + {
> + void *node;
> +
> + node = ralloc_size(ctx, size);
> + assert(node != NULL);
> +
> + return node;
> + }
> +
> + bool has(const void* pointer) const
> + {
> + foreach_list_const(tmp,this)
> + {
> + box* tmpb = reinterpret_cast<box*>(const_cast<exec_node*>(tmp));
> + if(tmpb->content == pointer)
> + return true;
> + }
> + return false;
> + }
> +
> + int size()
> + {
> + int result=0;
> + foreach_list_const(tmp,this)
> + {
> + result++;
> + }
> + return result;
> + }
> +};
> +
> +#define list_item(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(pointer)->content)
> +#define list_item_const(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(const_cast<exec_node*>(pointer))->content)
> +
> +
> +#include<iostream>
> +using namespace std;
> +
> +class ir_variable_lister : public ir_hierarchical_visitor
> +{
> + friend class ir_packer;
> +protected:
> + boxed_exec_list* available_vec3;
> + boxed_exec_list* available_vec2;
> + boxed_exec_list* available_float;
> +
> + void store_variable(ir_variable* var)
> + {
> + if(strcmp(var->name,"_ret_val") == 0)
> + return ;
> + switch(var->type->gl_type)
> + {
> + case GL_FLOAT_VEC3:
> + if(available_vec3->has(var)) break;
> + available_vec3->push_tail(var);
> + break;
> + case GL_FLOAT_VEC2:
> + if(available_vec2->has(var)) break;
> + available_vec2->push_tail(var);
> + break;
> + case GL_FLOAT:
> + if(available_float->has(var)) break;
> + available_float->push_tail(var);
> + break;
> + default:
> + break;
> + }
> + }
> +
> + bool find_candidates(ir_variable*& var1,ir_swizzle_mask& mask1, ir_variable*& var2, ir_swizzle_mask& mask2)
> + {
> + if(available_vec3->size()> 1&& available_float->size()>1)
> + {
> + var1 = list_item(ir_variable*,available_vec3->pop_head());
> + var2 = list_item(ir_variable*,available_float->pop_head());
> + mask1.x = 0;mask1.y = 1;mask1.z = 2;mask1.num_components = 3;mask1.has_duplicates=false;
> + mask2.x = 3;mask2.num_components = 1;mask1.has_duplicates=false;
> + return true;
> + }
> + if(available_vec2->size()> 2)
> + {
> + var1 = list_item_const(ir_variable*,available_vec2->pop_head());
> + var2 = list_item_const(ir_variable*,available_vec2->pop_head());
> + mask1.x = 0;mask1.y = 1;mask1.num_components = 2;
> + mask2.x = 2; mask2.y = 3;mask2.num_components = 2;
> + return true;
> + }
> + if(available_vec2->size()> 1&& available_float->size()> 1)
> + {
> + var1 = list_item_const(ir_variable*,available_vec2->pop_head());
> + var2 = list_item_const(ir_variable*,available_float->pop_head());
> + mask1.x = 0;mask1.y = 1;mask1.num_components = 2;
> + mask2.x = 2; mask2.num_components = 1;
> + return true;
> + }
> + if(available_float->size()> 2)
> + {
> + var1 = list_item_const(ir_variable*,available_float->pop_head());
> + var2 = list_item_const(ir_variable*,available_float->pop_head());
> + mask1.x = 0;mask1.num_components = 1;
> + mask2.x = 1;mask2.num_components = 1;
> + return true;
> + }
> + return false;
> + }
> +
> +public:
> + ir_visitor_status visit(ir_dereference_variable * dref)
> + {
> + if(dref->var->mode != ir_var_auto)
> + return visit_continue;
> + store_variable(dref->var);
> + return visit_continue;
> + }
> +
> + ir_variable_lister(void* ctx)
> + {
> + available_float = new (ctx) boxed_exec_list();
> + available_vec2 = new (ctx) boxed_exec_list();
> + available_vec3 = new (ctx) boxed_exec_list();
> + }
> +
> +};
> +
> +class ir_variable_replacer : public ir_rvalue_visitor
> +{
> +protected:
> + ir_variable* var1;
> + ir_swizzle_mask mask_for_var1;
> + ir_variable* var2;
> + ir_swizzle_mask mask_for_var2;
> + ir_variable* packed_var;
> +
> + unsigned from_component(unsigned x) const
> + {
> + switch(x)
> + {
> + case 3:
> + return 1<< 3;
> + case 2:
> + return 1<< 2;
> + case 1:
> + return 1<< 1;
> + default:
> + return 1<< 0;
> + }
> + }
> +
> + unsigned write_mask_from_swizzle(const ir_swizzle_mask& mask) const
> + {
> + unsigned result = 0;
> + switch(mask.num_components)
> + {
> + case 4:
> + result |= from_component(mask.w);
> + case 3:
> + result |= from_component(mask.z);
> + case 2:
> + result |= from_component(mask.y);
> + case 1:
> + result |= from_component(mask.x);
> + default:
> + break;
> + }
> + return result;
> + }
> +
> +
> +
> +public:
> + void handle_rvalue(ir_rvalue **rvalue)
> + {
> + if(!*rvalue)
> + return;
> + ir_rvalue* tmp_rvalue = *rvalue;
> + if(ir_dereference_variable* dref = tmp_rvalue->as_dereference_variable())
> + {
> + if(dref->var == var1)
> + {
> + ir_dereference_variable* ndref = new (packed_var) ir_dereference_variable(packed_var);
> + ir_swizzle* swz = new (dref) ir_swizzle(ndref,mask_for_var1);
> + *rvalue = swz;
> + }
> + if(dref->var == var2)
> + {
> + ir_dereference_variable* ndref = new (packed_var) ir_dereference_variable(packed_var);
> + ir_swizzle* swz = new (dref) ir_swizzle(ndref,mask_for_var2);
> + *rvalue = swz;
> + }
> + }
> + }
> +
> + ir_visitor_status visit_enter(ir_assignment * assign)
> + {
> + assign->rhs->accept(this);
> + if(ir_dereference_variable* dref = assign->lhs->as_dereference_variable())
> + {
> + if(dref->var == var1)
> + {
> + dref->var = packed_var;
> + assign->write_mask = write_mask_from_swizzle(mask_for_var1);
> + }
> + if(dref->var == var2)
> + {
> + dref->var = packed_var;
> + assign->write_mask = write_mask_from_swizzle(mask_for_var2);
> + }
> + }
> + return visit_continue;
> + }
> +
> + ir_variable_replacer(ir_variable* v1,ir_swizzle_mask mask1, ir_variable* v2, ir_swizzle_mask mask2, ir_variable* newvar):var1(v1),mask_for_var1(mask1), var2(v2),mask_for_var2(mask2), packed_var(newvar)
> + {
> + }
> +};
> +
> +
> +class ir_packer : public ir_hierarchical_visitor
> +{
> +public:
> + ir_visitor_status visit_enter(ir_function_signature * fonc)
> + {
> + int body_size = 0;
> + foreach_list_const(tmp,&(fonc->body))
> + {
> + body_size++;
> + }
> + if(!body_size)
> + return visit_continue;
> + //cout<< "ENTERING "<< fonc->function_name()<< ":"<< endl;
> + ir_variable_lister v(fonc);
> + foreach_list(tmp,&(fonc->body))
> + {
> + ir_instruction* inst = (ir_instruction*) tmp;
> + inst->accept(&v);
> + }
> + ir_variable *v1=0,*v2=0;
> + ir_swizzle_mask m1,m2;
> + if(!v.find_candidates(v1,m1,v2,m2))
> + return visit_continue;
> + cout<< "PACKING "<< v1->name<< " WITH "<< v2->name<< endl;
> + ir_variable* newvar=0;
> + switch(m1.num_components + m2.num_components)
> + {
> + case 4:
> + newvar = new (fonc) ir_variable(glsl_type::vec4_type,"vec4_tmp",ir_var_temporary);
> + break;
> + case 3:
> + newvar = new (fonc) ir_variable(glsl_type::vec3_type,"vec3_tmp",ir_var_temporary);
> + break;
> + case 2:
> + newvar = new (fonc) ir_variable(glsl_type::vec2_type,"vec2_tmp",ir_var_temporary);
> + break;
> + }
> + fonc->body.push_head(newvar);
> + ir_variable_replacer vis2(v1,m1,v2,m2,newvar);
> + foreach_list(tmp,&(fonc->body))
> + {
> + ir_instruction* inst = (ir_instruction*) tmp;
> + inst->accept(&vis2);
> + }
> + return visit_continue;
> + }
> +};
> +
> +
> +bool
> +do_var_packing(exec_list *instructions)
> +{
> + ir_packer v;
> + for(int i=0;i<50;i++)
> + visit_list_elements(&v, instructions);
> +
> +
> + return false;
> +}
More information about the mesa-dev
mailing list