[Mesa-dev] [PATCH] glsl: Add a CSE pass.

Sat Oct 26 21:05:58 CEST 2013

On 18 October 2013 15:44, Eric Anholt <eric at anholt.net> wrote:

> This only operates on constant/uniform values for now, because otherwise
> I'd
> have to deal with killing my available CSE entries when assignments happen,
> and getting even this working in the tree ir was painful enough.
>
> As is, it has the following effect in shader-db:
>
> total instructions in shared programs: 1524077 -> 1521964 (-0.14%)
> instructions in affected programs:     50629 -> 48516 (-4.17%)
> GAINED:                                0
> LOST:                                  0
>
> And, for tropics, that accounts for most of the effect, the FPS
> improvement is 11.67% +/- 0.72% (n=3).
> ---
>  src/glsl/Makefile.sources       |   1 +
>  src/glsl/glsl_parser_extras.cpp |   1 +
>  src/glsl/ir.h                   |   6 +
>  src/glsl/ir_optimization.h      |   1 +
>  src/glsl/opt_cse.cpp            | 570
> ++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 579 insertions(+)
>  create mode 100644 src/glsl/opt_cse.cpp
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index 2f7bfa1..2aabc05 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -83,6 +83,7 @@ LIBGLSL_FILES = \
>         $(GLSL_SRCDIR)/opt_constant_variable.cpp \
>         $(GLSL_SRCDIR)/opt_copy_propagation.cpp \
>         $(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \
> +       $(GLSL_SRCDIR)/opt_cse.cpp \
>         $(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \
>         $(GLSL_SRCDIR)/opt_dead_code.cpp \
>         $(GLSL_SRCDIR)/opt_dead_code_local.cpp \
> diff --git a/src/glsl/glsl_parser_extras.cpp
> b/src/glsl/glsl_parser_extras.cpp
> index be17109..e6e71d7 100644
> --- a/src/glsl/glsl_parser_extras.cpp
> +++ b/src/glsl/glsl_parser_extras.cpp
> @@ -1597,6 +1597,7 @@ do_common_optimization(exec_list *ir, bool linked,
>     else
>        progress = do_constant_variable_unlinked(ir) || progress;
>     progress = do_constant_folding(ir) || progress;
> +   progress = do_cse(ir) || progress;
>     progress = do_algebraic(ir) || progress;
>     progress = do_lower_jumps(ir) || progress;
>     progress = do_vec_index_to_swizzle(ir) || progress;
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index aac8cbb..bb3e99d 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -133,6 +133,7 @@ public:
>     virtual class ir_return *            as_return()           { return
> NULL; }
>     virtual class ir_if *                as_if()               { return
> NULL; }
>     virtual class ir_swizzle *           as_swizzle()          { return
> NULL; }
> +   virtual class ir_texture *           as_texture()          { return
> NULL; }
>     virtual class ir_constant *          as_constant()         { return
> NULL; }
>     virtual class ir_discard *           as_discard()          { return
> NULL; }
>     virtual class ir_jump *              as_jump()             { return
> NULL; }
> @@ -1697,6 +1698,11 @@ public:
>        v->visit(this);
>     }
>
> +   virtual ir_texture *as_texture()
> +   {
> +      return this;
> +   }
> +
>     virtual ir_visitor_status accept(ir_hierarchical_visitor *);
>
>     /**
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index 074686c..3ca9f57 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -77,6 +77,7 @@ bool do_constant_variable_unlinked(exec_list
> *instructions);
>  bool do_copy_propagation(exec_list *instructions);
>  bool do_copy_propagation_elements(exec_list *instructions);
>  bool do_constant_propagation(exec_list *instructions);
> +bool do_cse(exec_list *instructions);
>  void do_dead_builtin_varyings(struct gl_context *ctx,
>                                gl_shader *producer, gl_shader *consumer,
>                                unsigned num_tfeedback_decls,
> diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp
> new file mode 100644
> index 0000000..a509923
> --- /dev/null
> +++ b/src/glsl/opt_cse.cpp
> @@ -0,0 +1,570 @@
> +/*
> + * Copyright © 2013 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> next
> + * paragraph) shall be included in all copies or substantial portions of
> the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * \file opt_cse.cpp
> + *
> + * constant subexpression elimination at the GLSL IR level.
> + *
> + * Compare to brw_fs_cse.cpp for a more complete CSE implementation.
>  This one
> + * is generic and handles texture operations, but it's rather simple
> currently
> + * and doesn't support modification of variables in the available
> expressions
> + * list, so it can't do variables other than uniforms or shader inputs.
> + */
> +
> +#include "ir.h"
> +#include "ir_visitor.h"
> +#include "ir_rvalue_visitor.h"
> +#include "ir_basic_block.h"
> +#include "ir_optimization.h"
> +#include "ir_builder.h"
> +#include "glsl_types.h"
> +
> +using namespace ir_builder;
> +
> +static bool debug = false;
> +
> +namespace {
> +
> +/**
> + * This is the record of an available expression for common subexpression
> + * elimination.
> + */
> +class ae_entry : public exec_node
> +{
> +public:
> +   ae_entry(ir_instruction *base_ir, ir_rvalue **val)
> +      : val(val), base_ir(base_ir)
> +   {
> +      assert(val);
> +      assert(*val);
> +      assert(base_ir);
> +
> +      var = NULL;
> +   }
> +
> +   /**
> +    * The pointer to the expression that we might be able to reuse
> +    *
> +    * Note the double pointer -- this is the place in the base_ir
> expression
> +    * tree that we would rewrite to move the expression out to a new
> variable
> +    * assignment.
> +    */
> +   ir_rvalue **val;
> +
> +   /**
> +    * Root instruction in the basic block where the expression appeared.
> +    *
> +    * This is used so that we can insert the new variable declaration
> into the
> +    * instruction stream (since *val is just somewhere in base_ir's
> expression
> +    * tree).
> +    */
> +   ir_instruction *base_ir;
> +
> +   /**
> +    * The variable that the expression has been stored in, if it's been
> CSEd
> +    * once already.
> +    */
> +   ir_variable *var;
> +};
> +
> +class cse_visitor : public ir_rvalue_visitor {
> +public:
> +   cse_visitor(exec_list *validate_instructions)
> +      : validate_instructions(validate_instructions)
> +   {
> +      progress = false;
> +      mem_ctx = ralloc_context(NULL);
> +      this->ae = new(mem_ctx) exec_list;
> +   }
> +   ~cse_visitor()
> +   {
> +      ralloc_free(mem_ctx);
> +   }
> +
> +   ir_rvalue *try_cse(ir_rvalue *rvalue);
> +   void try_add_to_ae(ir_rvalue **rvalue);
> +
> +   virtual ir_visitor_status visit_enter(ir_function_signature *ir);
> +   virtual ir_visitor_status visit_enter(ir_loop *ir);
> +   virtual ir_visitor_status visit_enter(ir_if *ir);
> +   virtual ir_visitor_status visit_enter(ir_call *ir);
> +   virtual void handle_rvalue(ir_rvalue **rvalue);
> +
> +   /** List of ae_entry: The available expressions to reuse */
> +   exec_list *ae;
>

Calling this "ae" makes the code below hard to read because when I run
across it later, I assume it is referring to a single ae, not a list of ae
entries.  Can we call it "ae_entries" or something?

> +
> +   bool progress;
> +
> +   void *mem_ctx;
> +
> +   /**
> +    * The whole shader, so that we can validate_ir_tree in debug mode.
> +    *
> +    * This proved quite useful when trying to get the tree manipulation
> +    * right.
> +    */
> +   exec_list *validate_instructions;
>

try_cse(), try_add_to_ae(), ae, mem_ctx, and validate_instructions are only
used by members of the class.  For safety we should make them private.

> +};
> +
> +class cse_operands_visitor : public ir_hierarchical_visitor
> +{
> +public:
> +
> +   cse_operands_visitor()
> +      : ok(true)
> +   {
> +   }
> +
> +   virtual ir_visitor_status visit(ir_dereference_variable *ir);
> +
> +   bool ok;
> +};
>

Can we have a sentence or two comment above this class explaining what it
does?  It's not obvious from the class name.  Maybe something like "visitor
to determine whether an expression is CSE-able based on the
ir_variable_mode of its operands.

You might also consider renaming this class to a name that reflects its
purpose rather than its implementation (e.g. "is_cse_candidate_visitor").

> +
> +
> +class contains_rvalue_visitor : public ir_rvalue_visitor
> +{
> +public:
> +
> +   contains_rvalue_visitor(ir_rvalue *val)
> +      : val(val)
> +   {
> +      found = false;
> +   }
> +
> +   virtual void handle_rvalue(ir_rvalue **rvalue);
> +
> +   bool found;
> +   ir_rvalue *val;
>

"val" should be private.

> +};
> +
> +} /* unnamed namespace */
> +
> +static void
> +dump_ae(exec_list *ae)
> +{
> +   int i = 0;
> +
> +   printf("CSE: AE contents:\n");
> +   foreach_list(node, ae) {
> +      ae_entry *entry = (ae_entry *)node;
> +
> +      printf("CSE:   AE %2d (%p): ", i, entry);
> +      (*entry->val)->print();
> +      printf("\n");
> +
> +      if (entry->var)
> +         printf("CSE:     in var %p:\n", entry->var);
> +
> +      i++;
> +   }
> +}
> +
> +ir_visitor_status
> +cse_operands_visitor::visit(ir_dereference_variable *ir)
> +{
> +   /* Currently, since we don't handle kills of the ae based on variables
> +    * getting assigned, we can only handle constant variables.
> +    */
> +   switch (ir->var->mode) {
> +   case ir_var_uniform:
> +   case ir_var_shader_in:
> +   case ir_var_system_value:
> +      return visit_continue;
> +   default:
> +      ok = false;
> +      return visit_stop;
> +   }
>

Would this work just as well?

   if (ir->var->read_only) {
      return visit_continue;
   } else {
      ok = false;
      return visit_stop;
   }

> +}
> +
> +void
> +contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
> +{
> +   if (*rvalue == val)
> +      found = true;
> +}
> +
> +static bool
> +contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
> +{
> +   contains_rvalue_visitor v(needle);
> +   haystack->accept(&v);
> +   return v.found;
> +}
> +
> +static bool
> +valid_cse_operands(ir_rvalue *ir)
> +{
> +   /* We don't want to "CSE" for bare derefs.  We might want to do so for
> +    * variable index derefs though.
> +    */
> +   if (ir->as_dereference_variable())
> +      return false;
>

The caller (cse_visitor::try_add_to_ae()) only tries to CSE rvalues of type
ir_type_expression or ir_type_texture, so this "return false" will never
get executed.  Also, we'll never CSE variable index derefs.  Is this how
you meant things to work?

> +
> +   cse_operands_visitor v;
> +
> +   ir->accept(&v);
> +
> +   return v.ok;
> +}
> +
> +static bool
> +equals(ir_rvalue *a, ir_rvalue *b);
> +
> +static bool
> +equals(ir_constant *a, ir_constant *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   for (unsigned i = 0; i < a->type->components(); i++) {
> +      if (a->value.u[i] != b->value.u[i])
> +         return false;
> +   }
> +
> +   return true;
> +}
> +
> +static bool
> +equals(ir_dereference_variable *a, ir_dereference_variable *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   return a->var == b->var;
> +}
> +
> +static bool
> +equals(ir_dereference_array *a, ir_dereference_array *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (!equals(a->array, b->array))
> +      return false;
> +
> +   if (!equals(a->array_index, b->array_index))
> +      return false;
> +
> +   return true;
> +}
> +
> +static bool
> +equals(ir_swizzle *a, ir_swizzle *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   if (a->mask.x != b->mask.x ||
> +       a->mask.y != b->mask.y ||
> +       a->mask.z != b->mask.z ||
> +       a->mask.w != b->mask.w) {
> +      return false;
> +   }
> +
> +   return equals(a->val, b->val);
> +}
> +
> +static bool
> +equals(ir_texture *a, ir_texture *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   if (a->op != b->op)
> +      return false;
> +
> +   if (!equals(a->coordinate, b->coordinate))
> +      return false;
> +
> +   if (!equals(a->projector, b->projector))
> +      return false;
> +
> +   if (!equals(a->shadow_comparitor, b->shadow_comparitor))
> +      return false;
> +
> +   if (!equals(a->offset, b->offset))
> +      return false;
> +
> +   if (!equals(a->sampler, b->sampler))
> +      return false;
> +
> +   if (!equals(a->lod_info.grad.dPdx, b->lod_info.grad.dPdx) ||
> +       !equals(a->lod_info.grad.dPdy, b->lod_info.grad.dPdy))
> +      return false;
>

This seems fragile, since its correctness relies on the fact that:
- Everything in the lod_info union consists of ir_rvalue *'s
- lod_info.grad is the largest constituent of the union
- ir_textures are rzalloc'ed, so if the texturing operation isn't txd,
lod_info.grad.dPdy will be NULL.

How about this instead?

   switch (a->op) {
   case ir_tex:
   case ir_lod:
   case ir_query_levels:
      break;
   case ir_txb:
      if (!equals(a->lod_info.bias, b->lod_info.bias))
         return false;
      break;
   case ir_txl:
   case ir_txf:
   case ir_txs:
      if (!equals(a->lod_info.lod, b->lod_info.lod))
         return false;
      break;
   case ir_txd:
      if (!equals(a->lod_info.grad.dPdx, b->lod_info.grad.dPdx) ||
          !equals(a->lod_info.grad.dPdy, b->lod_info.grad.dPdy))
         return false;
   case ir_txf_ms:
      if (!equals(a->lod_info.sample_index, b->lod_info.sample_index))
         return false;
      break;
   case ir_tg4:
      if (!equals(a->lod_info.component, b->lod_info.component))
         return false;
   default:
      assert(!"Unrecognized texture op");
   }

I realize it's more code, but it's safer, and I'm guessing it will be
faster too, since it avoids calling equals() on irrelevant parts of the
union.

> +
> +   return true;
> +}
> +
> +static bool
> +equals(ir_expression *a, ir_expression *b)
> +{
> +   if (!a || !b)
> +      return false;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   if (a->operation != b->operation)
> +      return false;
> +
> +   for (unsigned i = 0; i < Elements(a->operands); i++) {
>

Using a->get_num_operands() as the loop bound would probably be faster.

> +      if (!equals(a->operands[i], b->operands[i]))
> +         return false;
> +   }
> +
> +   return true;
> +}
> +
> +static bool
> +equals(ir_rvalue *a, ir_rvalue *b)
> +{
> +   if (!a || !b)
> +      return !a && !b;
> +
> +   if (a->type != b->type)
> +      return false;
> +
> +   switch (a->ir_type) {
> +   case ir_type_texture:
> +      return equals(a->as_texture(), b->as_texture());
> +
> +   case ir_type_constant:
> +      return equals(a->as_constant(), b->as_constant());
> +
> +   case ir_type_expression:
> +      return equals(a->as_expression(), b->as_expression());
> +
> +   case ir_type_dereference_variable:
> +      return equals(a->as_dereference_variable(),
> b->as_dereference_variable());
> +
> +   case ir_type_dereference_array:
> +      return equals(a->as_dereference_array(), b->as_dereference_array());
> +
> +   case ir_type_swizzle:
> +      return equals(a->as_swizzle(), b->as_swizzle());
> +
> +   default:
> +      return false;
> +   }
> +}
> +
> +ir_rvalue *
> +cse_visitor::try_cse(ir_rvalue *rvalue)
> +{
>
+   foreach_list(node, ae) {
> +      ae_entry *entry = (ae_entry *)node;
> +
> +      if (debug) {
> +         printf("Comparing to AE %p: ", entry);
> +         (*entry->val)->print();
> +         printf("\n");
> +      }
> +
> +      if (!equals(rvalue, *entry->val))
> +         continue;
> +
> +      if (debug) {
> +         printf("CSE: Replacing: ");
> +         (*entry->val)->print();
> +         printf("\n");
> +         printf("CSE:      with: ");
> +         rvalue->print();
> +         printf("\n");
> +      }
> +
> +      if (!entry->var) {
> +         ir_instruction *base_ir = entry->base_ir;
> +
> +         ir_variable *var = new(rvalue) ir_variable(rvalue->type,
> +                                                    "cse",
> +                                                    ir_var_auto);
> +
> +         /* Write the previous expression result into a new variable. */
> +         base_ir->insert_before(var);
> +         ir_assignment *assignment = assign(var, *entry->val);
> +         base_ir->insert_before(assignment);
> +
> +         /* Replace the expression in the original tree with a deref of
> the
> +          * variable, but keep tracking the expression for further reuse.
> +          */
> +         *entry->val = new(rvalue) ir_dereference_variable(var);
> +         entry->val = &assignment->rhs;
> +
> +         entry->var = var;
> +
> +         /* Update the base_irs in the AE list.  We have to be sure that
> +          * they're correct -- expressions from our base_ir that weren't
> moved
> +          * need to stay in this base_ir (so that later consumption of
> them
> +          * puts new variables between our new variable and our base_ir),
> but
> +          * expressions from our base_ir that we *did* move need base_ir
> +          * updated so that any further elimination from inside gets its
> new
> +          * assignments put before our new assignment.
> +          */
> +         foreach_list(fixup_node, ae) {
> +            ae_entry *fixup_entry = (ae_entry *)fixup_node;
> +            if (contains_rvalue(assignment->rhs, *fixup_entry->val))
> +               fixup_entry->base_ir = assignment;
> +         }
> +
> +         if (debug)
> +            dump_ae(ae);
> +      }
> +
> +      /* Replace the expression in our current tree with the variable. */
> +      return new(rvalue) ir_dereference_variable(entry->var);
> +   }
> +
> +   return NULL;
> +}
> +
> +/**
> + * If the rvalue is an appropriate one, add it to the list of available
> + * expressions for later CSEing if it gets seen again.
> + */
> +void
> +cse_visitor::try_add_to_ae(ir_rvalue **inout_rvalue)
> +{
> +   ir_rvalue *rvalue = *inout_rvalue;
> +
> +   if (!rvalue->type->is_vector() &&
> +       !rvalue->type->is_scalar()) {
> +      /* Our temporary variable assignment generation isn't ready to
> handle
> +       * anything bigger than a vector.
> +       */
> +      return;
> +   }
> +
> +   switch (rvalue->ir_type) {
> +   case ir_type_expression:
> +   case ir_type_texture:
> +      break;
> +   default:
> +      return;
> +   }
>

It seems strange to me that two of our criteria for determining whether
something is CSE-able are in valid_cse_operands() (don't CSE bare variable
derefs and don't CSE expressions containing non-const variables), and the
other two of our criteria are here (don't CSE things larger than vectors,
and only CSE expressions and textures).

Can we rename valid_cse_operands() to something like valid_for_cse() and
put all four criteria together into it?  (Well, actually, all three
criteria, since "don't CSE bare variable derefs" is redundant with "only
CSE expressions and textures".)

Also, can we move all of this logic to the top of
cse_visitor::handle_rvalue() (before both the call to try_cse() and
try_add_to_ae())?  That way we won't waste time searching the AE list for
expressions that it could never possibly contain.

> +
> +   if (valid_cse_operands(*inout_rvalue)) {
> +      if (debug) {
> +         printf("CSE: Add to AE: ");
> +         (*inout_rvalue)->print();
> +         printf("\n");
> +      }
> +
> +      ae->push_tail(new(mem_ctx) ae_entry(base_ir, inout_rvalue));
> +
> +      if (debug)
> +         dump_ae(ae);
> +   }
> +}
> +
> +void
> +cse_visitor::handle_rvalue(ir_rvalue **rvalue)
> +{
> +   if (!*rvalue)
> +      return;
> +
> +   if (debug) {
> +      printf("CSE: handle_rvalue ");
> +      (*rvalue)->print();
> +      printf("\n");
> +   }
> +
> +   ir_rvalue *new_rvalue = try_cse(*rvalue);
> +   if (new_rvalue) {
> +      *rvalue = new_rvalue;
> +      progress = true;
> +
> +      if (debug)
> +         validate_ir_tree(validate_instructions);
> +   } else {
> +      try_add_to_ae(rvalue);
> +   }
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_if *ir)
> +{
> +   handle_rvalue(&ir->condition);
> +
> +   ae->make_empty();
> +   visit_list_elements(this, &ir->then_instructions);
> +
> +   ae->make_empty();
> +   visit_list_elements(this, &ir->else_instructions);
> +
> +   ae->make_empty();
> +   return visit_continue_with_parent;
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_function_signature *ir)
> +{
> +   ae->make_empty();
> +   visit_list_elements(this, &ir->body);
> +
> +   ae->make_empty();
> +   return visit_continue_with_parent;
> +}
> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_loop *ir)
> +{
> +   ae->make_empty();
> +   visit_list_elements(this, &ir->body_instructions);
> +
> +   ae->make_empty();
> +   return visit_continue_with_parent;
> +}
>

I think we could avoid having to write these three visitors if we wrote
this optimization pass using call_for_basic_blocks().

> +
> +ir_visitor_status
> +cse_visitor::visit_enter(ir_call *ir)
> +{
> +   /* Because call is an exec_list of ir_rvalues, handle_rvalue gets
> passed a
> +    * pointer to the (ir_rvalue *) on the stack.  Since we save those
> pointers
> +    * in the AE list, we can't let handle_rvalue get called.
> +    */
> +   return visit_continue_with_parent;
>

Argh, this is really unfortunate.  Not only does this mean that we can't
CSE expressions appearing as function arguments, we can't even CSE
subexpressions of those expressions.

For a long time I've been wanting to change ir_call::actual_parameters from
this:

   /* List of ir_rvalue of paramaters passed in this call. */
   exec_list actual_parameters;

to this:

   /* Number of parameters passed in this call. */
   unsigned num_parameters;

   /* Parameters passed in this call. */
   ir_rvalue **actual_parameters;

Now I'm even more motivated to try that.  But that seems like something
that should wait for after Mesa 10.0.

For the time being what you're doing seems reasonable, though.

> +}
> +
> +/**
> + * Does a (uniform-value) constant subexpression elimination pass on the
> code
> + * present in the instruction stream.
> + */
> +bool
> +do_cse(exec_list *instructions)
> +{
> +   cse_visitor v(instructions);
> +
> +   visit_list_elements(&v, instructions);
> +
> +   return v.progress;
> +}
> --
> 1.8.4.rc3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131026/eead9a16/attachment-0001.html>