[Mesa-dev] [PATCH v2] glsl: Add a CSE pass.
Paul Berry
stereotype441 at gmail.com
Wed Oct 30 19:28:57 CET 2013
On 28 October 2013 14:19, Eric Anholt <eric at anholt.net> wrote:
> This only operates on constant/uniform values for now, because otherwise
> I'd
> have to deal with killing my available CSE entries when assignments happen,
> and getting even this working in the tree ir was painful enough.
>
> As is, it has the following effect in shader-db:
>
> total instructions in shared programs: 1524077 -> 1521964 (-0.14%)
> instructions in affected programs: 50629 -> 48516 (-4.17%)
> GAINED: 0
> LOST: 0
>
> And, for tropics, that accounts for most of the effect, the FPS
> improvement is 11.67% +/- 0.72% (n=3).
>
> v2: Use read_only field of the variable, manually check the lod_info union
> members, use get_num_operands(), rename cse_operands_visitor to
> is_cse_candidate_visitor, move all is-a-candidate logic to that
> function, and call it before checking for CSE on a given rvalue, more
> comments, use private keyword.
>
Thanks, Eric.
Reviewed-by: Paul Berry <stereotype441 at gmail.com>
> ---
> src/glsl/Makefile.sources | 1 +
> src/glsl/glsl_parser_extras.cpp | 1 +
> src/glsl/ir.h | 6 +
> src/glsl/ir_optimization.h | 1 +
> src/glsl/opt_cse.cpp | 599
> ++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 608 insertions(+)
> create mode 100644 src/glsl/opt_cse.cpp
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index 2f7bfa1..2aabc05 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -83,6 +83,7 @@ LIBGLSL_FILES = \
> $(GLSL_SRCDIR)/opt_constant_variable.cpp \
> $(GLSL_SRCDIR)/opt_copy_propagation.cpp \
> $(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \
> + $(GLSL_SRCDIR)/opt_cse.cpp \
> $(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \
> $(GLSL_SRCDIR)/opt_dead_code.cpp \
> $(GLSL_SRCDIR)/opt_dead_code_local.cpp \
> diff --git a/src/glsl/glsl_parser_extras.cpp
> b/src/glsl/glsl_parser_extras.cpp
> index be17109..e6e71d7 100644
> --- a/src/glsl/glsl_parser_extras.cpp
> +++ b/src/glsl/glsl_parser_extras.cpp
> @@ -1597,6 +1597,7 @@ do_common_optimization(exec_list *ir, bool linked,
> else
> progress = do_constant_variable_unlinked(ir) || progress;
> progress = do_constant_folding(ir) || progress;
> + progress = do_cse(ir) || progress;
> progress = do_algebraic(ir) || progress;
> progress = do_lower_jumps(ir) || progress;
> progress = do_vec_index_to_swizzle(ir) || progress;
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index 8d5bec9..c6662cd 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -133,6 +133,7 @@ public:
> virtual class ir_return * as_return() { return
> NULL; }
> virtual class ir_if * as_if() { return
> NULL; }
> virtual class ir_swizzle * as_swizzle() { return
> NULL; }
> + virtual class ir_texture * as_texture() { return
> NULL; }
> virtual class ir_constant * as_constant() { return
> NULL; }
> virtual class ir_discard * as_discard() { return
> NULL; }
> virtual class ir_jump * as_jump() { return
> NULL; }
> @@ -1717,6 +1718,11 @@ public:
> v->visit(this);
> }
>
> + virtual ir_texture *as_texture()
> + {
> + return this;
> + }
> +
> virtual ir_visitor_status accept(ir_hierarchical_visitor *);
>
> /**
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index 074686c..3ca9f57 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -77,6 +77,7 @@ bool do_constant_variable_unlinked(exec_list
> *instructions);
> bool do_copy_propagation(exec_list *instructions);
> bool do_copy_propagation_elements(exec_list *instructions);
> bool do_constant_propagation(exec_list *instructions);
> +bool do_cse(exec_list *instructions);
> void do_dead_builtin_varyings(struct gl_context *ctx,
> gl_shader *producer, gl_shader *consumer,
> unsigned num_tfeedback_decls,
> diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp
> new file mode 100644
> index 0000000..c0fdb23
> --- /dev/null
> +++ b/src/glsl/opt_cse.cpp
> @@ -0,0 +1,599 @@
> +/*
> + * Copyright © 2013 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> next
> + * paragraph) shall be included in all copies or substantial portions of
> the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * \file opt_cse.cpp
> + *
> + * constant subexpression elimination at the GLSL IR level.
> + *
> + * Compare to brw_fs_cse.cpp for a more complete CSE implementation.
> This one
> + * is generic and handles texture operations, but it's rather simple
> currently
> + * and doesn't support modification of variables in the available
> expressions
> + * list, so it can't do variables other than uniforms or shader inputs.
> + */
> +
> +#include "ir.h"
> +#include "ir_visitor.h"
> +#include "ir_rvalue_visitor.h"
> +#include "ir_basic_block.h"
> +#include "ir_optimization.h"
> +#include "ir_builder.h"
> +#include "glsl_types.h"
> +
> +using namespace ir_builder;
> +
> +static bool debug = false;
> +
> +namespace {
> +
> +/**
> + * This is the record of an available expression for common subexpression
> + * elimination.
> + */
> +class ae_entry : public exec_node
> +{
> +public:
> + ae_entry(ir_instruction *base_ir, ir_rvalue **val)
> + : val(val), base_ir(base_ir)
> + {
> + assert(val);
> + assert(*val);
> + assert(base_ir);
> +
> + var = NULL;
> + }
> +
> + /**
> + * The pointer to the expression that we might be able to reuse
> + *
> + * Note the double pointer -- this is the place in the base_ir
> expression
> + * tree that we would rewrite to move the expression out to a new
> variable
> + * assignment.
> + */
> + ir_rvalue **val;
> +
> + /**
> + * Root instruction in the basic block where the expression appeared.
> + *
> + * This is used so that we can insert the new variable declaration
> into the
> + * instruction stream (since *val is just somewhere in base_ir's
> expression
> + * tree).
> + */
> + ir_instruction *base_ir;
> +
> + /**
> + * The variable that the expression has been stored in, if it's been
> CSEd
> + * once already.
> + */
> + ir_variable *var;
> +};
> +
> +class cse_visitor : public ir_rvalue_visitor {
> +public:
> + cse_visitor(exec_list *validate_instructions)
> + : validate_instructions(validate_instructions)
> + {
> + progress = false;
> + mem_ctx = ralloc_context(NULL);
> + this->ae = new(mem_ctx) exec_list;
> + }
> + ~cse_visitor()
> + {
> + ralloc_free(mem_ctx);
> + }
> +
> + virtual ir_visitor_status visit_enter(ir_function_signature *ir);
> + virtual ir_visitor_status visit_enter(ir_loop *ir);
> + virtual ir_visitor_status visit_enter(ir_if *ir);
> + virtual ir_visitor_status visit_enter(ir_call *ir);
> + virtual void handle_rvalue(ir_rvalue **rvalue);
> +
> + bool progress;
> +
> +private:
> + void *mem_ctx;
> +
> + ir_rvalue *try_cse(ir_rvalue *rvalue);
> + void add_to_ae(ir_rvalue **rvalue);
> +
> + /** List of ae_entry: The available expressions to reuse */
> + exec_list *ae;
> +
> + /**
> + * The whole shader, so that we can validate_ir_tree in debug mode.
> + *
> + * This proved quite useful when trying to get the tree manipulation
> + * right.
> + */
> + exec_list *validate_instructions;
> +};
> +
> +/**
> + * Visitor to walk an expression tree to check that all variables
> referenced
> + * are constants.
> + */
> +class is_cse_candidate_visitor : public ir_hierarchical_visitor
> +{
> +public:
> +
> + is_cse_candidate_visitor()
> + : ok(true)
> + {
> + }
> +
> + virtual ir_visitor_status visit(ir_dereference_variable *ir);
> +
> + bool ok;
> +};
> +
> +
> +class contains_rvalue_visitor : public ir_rvalue_visitor
> +{
> +public:
> +
> + contains_rvalue_visitor(ir_rvalue *val)
> + : val(val)
> + {
> + found = false;
> + }
> +
> + virtual void handle_rvalue(ir_rvalue **rvalue);
> +
> + bool found;
> +
> +private:
> + ir_rvalue *val;
> +};
> +
> +} /* unnamed namespace */
> +
> +static void
> +dump_ae(exec_list *ae)
> +{
> + int i = 0;
> +
> + printf("CSE: AE contents:\n");
> + foreach_list(node, ae) {
> + ae_entry *entry = (ae_entry *)node;
> +
> + printf("CSE: AE %2d (%p): ", i, entry);
> + (*entry->val)->print();
> + printf("\n");
> +
> + if (entry->var)
> + printf("CSE: in var %p:\n", entry->var);
> +
> + i++;
> + }
> +}
> +
> +ir_visitor_status
> +is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
> +{
> + /* Currently, since we don't handle kills of the ae based on variables
> + * getting assigned, we can only handle constant variables.
> + */
> + if (ir->var->read_only) {
> + return visit_continue;
> + } else {
> + ok = false;
> + return visit_stop;
> + }
> +}
> +
> +void
> +contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
> +{
> + if (*rvalue == val)
> + found = true;
> +}
> +
> +static bool
> +contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
> +{
> + contains_rvalue_visitor v(needle);
> + haystack->accept(&v);
> + return v.found;
> +}
> +
> +static bool
> +is_cse_candidate(ir_rvalue *ir)
> +{
> + /* Our temporary variable assignment generation isn't ready to handle
> + * anything bigger than a vector.
> + */
> + if (!ir->type->is_vector() && !ir->type->is_scalar())
> + return false;
> +
> + /* Only handle expressions and textures currently. We may want to
> extend
> + * to variable-index array dereferences at some point.
> + */
> + switch (ir->ir_type) {
> + case ir_type_expression:
> + case ir_type_texture:
> + break;
> + default:
> + return false;
> + }
> +
> + is_cse_candidate_visitor v;
> +
> + ir->accept(&v);
> +
> + return v.ok;
> +}
> +
> +static bool
> +equals(ir_rvalue *a, ir_rvalue *b);
> +
> +static bool
> +equals(ir_constant *a, ir_constant *b)
> +{
> + if (!a || !b)
> + return false;
> +
> + if (a->type != b->type)
> + return false;
> +
> + for (unsigned i = 0; i < a->type->components(); i++) {
> + if (a->value.u[i] != b->value.u[i])
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static bool
> +equals(ir_dereference_variable *a, ir_dereference_variable *b)
> +{
> + if (!a || !b)
> + return false;
> +
> + return a->var == b->var;
> +}
> +
> +static bool
> +equals(ir_dereference_array *a, ir_dereference_array *b)
> +{
> + if (!a || !b)
> + return false;
> +
> + if (!equals(a->array, b->array))
> + return false;
> +
> + if (!equals(a->array_index, b->array_index))
> + return false;
> +
> + return true;
> +}
> +
> +static bool
> +equals(ir_swizzle *a, ir_swizzle *b)
> +{
> + if (!a || !b)
> + return false;
> +
> + if (a->type != b->type)
> + return false;
> +
> + if (a->mask.x != b->mask.x ||
> + a->mask.y != b->mask.y ||
> + a->mask.z != b->mask.z ||
> + a->mask.w != b->mask.w) {
> + return false;
> + }
> +
> + return equals(a->val, b->val);
> +}
> +
> +static bool
> +equals(ir_texture *a, ir_texture *b)
> +{
> + if (!a || !b)
> + return false;
> +
> + if (a->type != b->type)
> + return false;
> +
> + if (a->op != b->op)
> + return false;
> +
> + if (!equals(a->coordinate, b->coordinate))
> + return false;
> +
> + if (!equals(a->projector, b->projector))
> + return false;
> +
> + if (!equals(a->shadow_comparitor, b->shadow_comparitor))
> + return false;
> +
> + if (!equals(a->offset, b->offset))
> + return false;
> +
> + if (!equals(a->sampler, b->sampler))
> + return false;
> +
> + switch (a->op) {
> + case ir_tex:
> + case ir_lod:
> + case ir_query_levels:
> + break;
> + case ir_txb:
> + if (!equals(a->lod_info.bias, b->lod_info.bias))
> + return false;
> + break;
> + case ir_txl:
> + case ir_txf:
> + case ir_txs:
> + if (!equals(a->lod_info.lod, b->lod_info.lod))
> + return false;
> + break;
> + case ir_txd:
> + if (!equals(a->lod_info.grad.dPdx, b->lod_info.grad.dPdx) ||
> + !equals(a->lod_info.grad.dPdy, b->lod_info.grad.dPdy))
> + return false;
> + case ir_txf_ms:
> + if (!equals(a->lod_info.sample_index, b->lod_info.sample_index))
> + return false;
> + break;
> + case ir_tg4:
> + if (!equals(a->lod_info.component, b->lod_info.component))
> + return false;
> + default:
> + assert(!"Unrecognized texture op");
> + }
> +
> + return true;
> +}
> +
> +static bool
> +equals(ir_expression *a, ir_expression *b)
> +{
> + if (!a || !b)
> + return false;
> +
> + if (a->type != b->type)
> + return false;
> +
> + if (a->operation != b->operation)
> + return false;
> +
> + for (unsigned i = 0; i < a->get_num_operands(); i++) {
> + if (!equals(a->operands[i], b->operands[i]))
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static bool
> +equals(ir_rvalue *a, ir_rvalue *b)
> +{
> + if (!a || !b)
> + return !a && !b;
> +
> + if (a->type != b->type)
> + return false;
> +
> + switch (a->ir_type) {
> + case ir_type_texture:
> + return equals(a->as_texture(), b->as_texture());
> +
> + case ir_type_constant:
> + return equals(a->as_constant(), b->as_constant());
> +
> + case ir_type_expression:
> + return equals(a->as_expression(), b->as_expression());
> +
> + case ir_type_dereference_variable:
> + return equals(a->as_dereference_variable(),
> b->as_dereference_variable());
> +
> + case ir_type_dereference_array:
> + return equals(a->as_dereference_array(), b->as_dereference_array());
> +
> + case ir_type_swizzle:
> + return equals(a->as_swizzle(), b->as_swizzle());
> +
> + default:
> + return false;
> + }
> +}
> +
> +/**
> + * Tries to find and return a reference to a previous computation of a
> given
> + * expression.
> + *
> + * Walk the list of available expressions checking if any of them match
> the
> + * rvalue, and if so, move the previous copy of the expression to a
> temporary
> + * and return a reference of the temporary.
> + */
> +ir_rvalue *
> +cse_visitor::try_cse(ir_rvalue *rvalue)
> +{
> + foreach_list(node, ae) {
> + ae_entry *entry = (ae_entry *)node;
> +
> + if (debug) {
> + printf("Comparing to AE %p: ", entry);
> + (*entry->val)->print();
> + printf("\n");
> + }
> +
> + if (!equals(rvalue, *entry->val))
> + continue;
> +
> + if (debug) {
> + printf("CSE: Replacing: ");
> + (*entry->val)->print();
> + printf("\n");
> + printf("CSE: with: ");
> + rvalue->print();
> + printf("\n");
> + }
> +
> + if (!entry->var) {
> + ir_instruction *base_ir = entry->base_ir;
> +
> + ir_variable *var = new(rvalue) ir_variable(rvalue->type,
> + "cse",
> + ir_var_auto);
> +
> + /* Write the previous expression result into a new variable. */
> + base_ir->insert_before(var);
> + ir_assignment *assignment = assign(var, *entry->val);
> + base_ir->insert_before(assignment);
> +
> + /* Replace the expression in the original tree with a deref of
> the
> + * variable, but keep tracking the expression for further reuse.
> + */
> + *entry->val = new(rvalue) ir_dereference_variable(var);
> + entry->val = &assignment->rhs;
> +
> + entry->var = var;
> +
> + /* Update the base_irs in the AE list. We have to be sure that
> + * they're correct -- expressions from our base_ir that weren't
> moved
> + * need to stay in this base_ir (so that later consumption of
> them
> + * puts new variables between our new variable and our base_ir),
> but
> + * expressions from our base_ir that we *did* move need base_ir
> + * updated so that any further elimination from inside gets its
> new
> + * assignments put before our new assignment.
> + */
> + foreach_list(fixup_node, ae) {
> + ae_entry *fixup_entry = (ae_entry *)fixup_node;
> + if (contains_rvalue(assignment->rhs, *fixup_entry->val))
> + fixup_entry->base_ir = assignment;
> + }
> +
> + if (debug)
> + dump_ae(ae);
> + }
> +
> + /* Replace the expression in our current tree with the variable. */
> + return new(rvalue) ir_dereference_variable(entry->var);
> + }
> +
> + return NULL;
> +}
> +
> +/** Add the rvalue to the list of available expressions for CSE. */
> +void
> +cse_visitor::add_to_ae(ir_rvalue **rvalue)
> +{
> + if (debug) {
> + printf("CSE: Add to AE: ");
> + (*rvalue)->print();
> + printf("\n");
> + }
> +
> + ae->push_tail(new(mem_ctx) ae_entry(base_ir, rvalue));
> +
> + if (debug)
> + dump_ae(ae);
> +}
> +
> +void
> +cse_visitor::handle_rvalue(ir_rvalue **rvalue)
> +{
> + if (!*rvalue)
> + return;
> +
> + if (debug) {
> + printf("CSE: handle_rvalue ");
> + (*rvalue)->print();
> + printf("\n");
> + }
> +
> + if (!is_cse_candidate(*rvalue))
> + return;
> +
> + ir_rvalue *new_rvalue = try_cse(*rvalue);
> + if (new_rvalue) {
> + *rvalue = new_rvalue;
> + progress = true;
> +
> + if (debug)
> + validate_ir_tree(validate_instructions);
> + } else {
> + add_to_ae(rvalue);
> + }
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_if *ir)
> +{
> + handle_rvalue(&ir->condition);
> +
> + ae->make_empty();
> + visit_list_elements(this, &ir->then_instructions);
> +
> + ae->make_empty();
> + visit_list_elements(this, &ir->else_instructions);
> +
> + ae->make_empty();
> + return visit_continue_with_parent;
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_function_signature *ir)
> +{
> + ae->make_empty();
> + visit_list_elements(this, &ir->body);
> +
> + ae->make_empty();
> + return visit_continue_with_parent;
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_loop *ir)
> +{
> + ae->make_empty();
> + visit_list_elements(this, &ir->body_instructions);
> +
> + ae->make_empty();
> + return visit_continue_with_parent;
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_call *ir)
> +{
> + /* Because call is an exec_list of ir_rvalues, handle_rvalue gets
> passed a
> + * pointer to the (ir_rvalue *) on the stack. Since we save those
> pointers
> + * in the AE list, we can't let handle_rvalue get called.
> + */
> + return visit_continue_with_parent;
> +}
> +
> +/**
> + * Does a (uniform-value) constant subexpression elimination pass on the
> code
> + * present in the instruction stream.
> + */
> +bool
> +do_cse(exec_list *instructions)
> +{
> + cse_visitor v(instructions);
> +
> + visit_list_elements(&v, instructions);
> +
> + return v.progress;
> +}
> --
> 1.8.4.rc3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131030/38b21d5d/attachment-0001.html>
More information about the mesa-dev
mailing list