<div dir="ltr">On 18 October 2013 15:44, Eric Anholt <span dir="ltr"><<a href="mailto:eric@anholt.net" target="_blank">eric@anholt.net</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
This only operates on constant/uniform values for now, because otherwise I'd<br>
have to deal with killing my available CSE entries when assignments happen,<br>
and getting even this working in the tree ir was painful enough.<br>
<br>
As is, it has the following effect in shader-db:<br>
<br>
total instructions in shared programs: 1524077 -> 1521964 (-0.14%)<br>
instructions in affected programs: 50629 -> 48516 (-4.17%)<br>
GAINED: 0<br>
LOST: 0<br>
<br>
And, for tropics, that accounts for most of the effect, the FPS<br>
improvement is 11.67% +/- 0.72% (n=3).<br>
---<br>
src/glsl/Makefile.sources | 1 +<br>
src/glsl/glsl_parser_extras.cpp | 1 +<br>
src/glsl/ir.h | 6 +<br>
src/glsl/ir_optimization.h | 1 +<br>
src/glsl/opt_cse.cpp | 570 ++++++++++++++++++++++++++++++++++++++++<br>
5 files changed, 579 insertions(+)<br>
create mode 100644 src/glsl/opt_cse.cpp<br>
<br>
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources<br>
index 2f7bfa1..2aabc05 100644<br>
--- a/src/glsl/Makefile.sources<br>
+++ b/src/glsl/Makefile.sources<br>
@@ -83,6 +83,7 @@ LIBGLSL_FILES = \<br>
$(GLSL_SRCDIR)/opt_constant_variable.cpp \<br>
$(GLSL_SRCDIR)/opt_copy_propagation.cpp \<br>
$(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \<br>
+ $(GLSL_SRCDIR)/opt_cse.cpp \<br>
$(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \<br>
$(GLSL_SRCDIR)/opt_dead_code.cpp \<br>
$(GLSL_SRCDIR)/opt_dead_code_local.cpp \<br>
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp<br>
index be17109..e6e71d7 100644<br>
--- a/src/glsl/glsl_parser_extras.cpp<br>
+++ b/src/glsl/glsl_parser_extras.cpp<br>
@@ -1597,6 +1597,7 @@ do_common_optimization(exec_list *ir, bool linked,<br>
else<br>
progress = do_constant_variable_unlinked(ir) || progress;<br>
progress = do_constant_folding(ir) || progress;<br>
+ progress = do_cse(ir) || progress;<br>
progress = do_algebraic(ir) || progress;<br>
progress = do_lower_jumps(ir) || progress;<br>
progress = do_vec_index_to_swizzle(ir) || progress;<br>
diff --git a/src/glsl/ir.h b/src/glsl/ir.h<br>
index aac8cbb..bb3e99d 100644<br>
--- a/src/glsl/ir.h<br>
+++ b/src/glsl/ir.h<br>
@@ -133,6 +133,7 @@ public:<br>
virtual class ir_return * as_return() { return NULL; }<br>
virtual class ir_if * as_if() { return NULL; }<br>
virtual class ir_swizzle * as_swizzle() { return NULL; }<br>
+ virtual class ir_texture * as_texture() { return NULL; }<br>
virtual class ir_constant * as_constant() { return NULL; }<br>
virtual class ir_discard * as_discard() { return NULL; }<br>
virtual class ir_jump * as_jump() { return NULL; }<br>
@@ -1697,6 +1698,11 @@ public:<br>
v->visit(this);<br>
}<br>
<br>
+ virtual ir_texture *as_texture()<br>
+ {<br>
+ return this;<br>
+ }<br>
+<br>
virtual ir_visitor_status accept(ir_hierarchical_visitor *);<br>
<br>
/**<br>
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h<br>
index 074686c..3ca9f57 100644<br>
--- a/src/glsl/ir_optimization.h<br>
+++ b/src/glsl/ir_optimization.h<br>
@@ -77,6 +77,7 @@ bool do_constant_variable_unlinked(exec_list *instructions);<br>
bool do_copy_propagation(exec_list *instructions);<br>
bool do_copy_propagation_elements(exec_list *instructions);<br>
bool do_constant_propagation(exec_list *instructions);<br>
+bool do_cse(exec_list *instructions);<br>
void do_dead_builtin_varyings(struct gl_context *ctx,<br>
gl_shader *producer, gl_shader *consumer,<br>
unsigned num_tfeedback_decls,<br>
diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp<br>
new file mode 100644<br>
index 0000000..a509923<br>
--- /dev/null<br>
+++ b/src/glsl/opt_cse.cpp<br>
@@ -0,0 +1,570 @@<br>
+/*<br>
+ * Copyright © 2013 Intel Corporation<br>
+ *<br>
+ * Permission is hereby granted, free of charge, to any person obtaining a<br>
+ * copy of this software and associated documentation files (the "Software"),<br>
+ * to deal in the Software without restriction, including without limitation<br>
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+ * and/or sell copies of the Software, and to permit persons to whom the<br>
+ * Software is furnished to do so, subject to the following conditions:<br>
+ *<br>
+ * The above copyright notice and this permission notice (including the next<br>
+ * paragraph) shall be included in all copies or substantial portions of the<br>
+ * Software.<br>
+ *<br>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL<br>
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br>
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING<br>
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER<br>
+ * DEALINGS IN THE SOFTWARE.<br>
+ */<br>
+<br>
+/**<br>
+ * \file opt_cse.cpp<br>
+ *<br>
+ * constant subexpression elimination at the GLSL IR level.<br>
+ *<br>
+ * Compare to brw_fs_cse.cpp for a more complete CSE implementation. This one<br>
+ * is generic and handles texture operations, but it's rather simple currently<br>
+ * and doesn't support modification of variables in the available expressions<br>
+ * list, so it can't do variables other than uniforms or shader inputs.<br>
+ */<br>
+<br>
+#include "ir.h"<br>
+#include "ir_visitor.h"<br>
+#include "ir_rvalue_visitor.h"<br>
+#include "ir_basic_block.h"<br>
+#include "ir_optimization.h"<br>
+#include "ir_builder.h"<br>
+#include "glsl_types.h"<br>
+<br>
+using namespace ir_builder;<br>
+<br>
+static bool debug = false;<br>
+<br>
+namespace {<br>
+<br>
+/**<br>
+ * This is the record of an available expression for common subexpression<br>
+ * elimination.<br>
+ */<br>
+class ae_entry : public exec_node<br>
+{<br>
+public:<br>
+ ae_entry(ir_instruction *base_ir, ir_rvalue **val)<br>
+ : val(val), base_ir(base_ir)<br>
+ {<br>
+ assert(val);<br>
+ assert(*val);<br>
+ assert(base_ir);<br>
+<br>
+ var = NULL;<br>
+ }<br>
+<br>
+ /**<br>
+ * The pointer to the expression that we might be able to reuse<br>
+ *<br>
+ * Note the double pointer -- this is the place in the base_ir expression<br>
+ * tree that we would rewrite to move the expression out to a new variable<br>
+ * assignment.<br>
+ */<br>
+ ir_rvalue **val;<br>
+<br>
+ /**<br>
+ * Root instruction in the basic block where the expression appeared.<br>
+ *<br>
+ * This is used so that we can insert the new variable declaration into the<br>
+ * instruction stream (since *val is just somewhere in base_ir's expression<br>
+ * tree).<br>
+ */<br>
+ ir_instruction *base_ir;<br>
+<br>
+ /**<br>
+ * The variable that the expression has been stored in, if it's been CSEd<br>
+ * once already.<br>
+ */<br>
+ ir_variable *var;<br>
+};<br>
+<br>
+class cse_visitor : public ir_rvalue_visitor {<br>
+public:<br>
+ cse_visitor(exec_list *validate_instructions)<br>
+ : validate_instructions(validate_instructions)<br>
+ {<br>
+ progress = false;<br>
+ mem_ctx = ralloc_context(NULL);<br>
+ this->ae = new(mem_ctx) exec_list;<br>
+ }<br>
+ ~cse_visitor()<br>
+ {<br>
+ ralloc_free(mem_ctx);<br>
+ }<br>
+<br>
+ ir_rvalue *try_cse(ir_rvalue *rvalue);<br>
+ void try_add_to_ae(ir_rvalue **rvalue);<br>
+<br>
+ virtual ir_visitor_status visit_enter(ir_function_signature *ir);<br>
+ virtual ir_visitor_status visit_enter(ir_loop *ir);<br>
+ virtual ir_visitor_status visit_enter(ir_if *ir);<br>
+ virtual ir_visitor_status visit_enter(ir_call *ir);<br>
+ virtual void handle_rvalue(ir_rvalue **rvalue);<br>
+<br>
+ /** List of ae_entry: The available expressions to reuse */<br>
+ exec_list *ae;<br></blockquote><div><br></div><div>Calling this "ae" makes the code below hard to read because when I run across it later, I assume it is referring to a single ae, not a list of ae entries. Can we call it "ae_entries" or something?<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+ bool progress;<br>
+<br>
+ void *mem_ctx;<br>
+<br>
+ /**<br>
+ * The whole shader, so that we can validate_ir_tree in debug mode.<br>
+ *<br>
+ * This proved quite useful when trying to get the tree manipulation<br>
+ * right.<br>
+ */<br>
+ exec_list *validate_instructions;<br></blockquote><div><br></div><div>try_cse(), try_add_to_ae(), ae, mem_ctx, and validate_instructions are only used by members of the class. For safety we should make them private.<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+};<br>
+<br>
+class cse_operands_visitor : public ir_hierarchical_visitor<br>
+{<br>
+public:<br>
+<br>
+ cse_operands_visitor()<br>
+ : ok(true)<br>
+ {<br>
+ }<br>
+<br>
+ virtual ir_visitor_status visit(ir_dereference_variable *ir);<br>
+<br>
+ bool ok;<br>
+};<br></blockquote><div><br></div><div>Can we have a sentence or two comment above this class explaining what it does? It's not obvious from the class name. Maybe something like "visitor to determine whether an expression is CSE-able based on the ir_variable_mode of its operands.<br>
<br></div><div>You might also consider renaming this class to a name that reflects its purpose rather than its implementation (e.g. "is_cse_candidate_visitor").<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+<br>
+class contains_rvalue_visitor : public ir_rvalue_visitor<br>
+{<br>
+public:<br>
+<br>
+ contains_rvalue_visitor(ir_rvalue *val)<br>
+ : val(val)<br>
+ {<br>
+ found = false;<br>
+ }<br>
+<br>
+ virtual void handle_rvalue(ir_rvalue **rvalue);<br>
+<br>
+ bool found;<br>
+ ir_rvalue *val;<br></blockquote><div><br></div><div>"val" should be private.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+};<br>
+<br>
+} /* unnamed namespace */<br>
+<br>
+static void<br>
+dump_ae(exec_list *ae)<br>
+{<br>
+ int i = 0;<br>
+<br>
+ printf("CSE: AE contents:\n");<br>
+ foreach_list(node, ae) {<br>
+ ae_entry *entry = (ae_entry *)node;<br>
+<br>
+ printf("CSE: AE %2d (%p): ", i, entry);<br>
+ (*entry->val)->print();<br>
+ printf("\n");<br>
+<br>
+ if (entry->var)<br>
+ printf("CSE: in var %p:\n", entry->var);<br>
+<br>
+ i++;<br>
+ }<br>
+}<br>
+<br>
+ir_visitor_status<br>
+cse_operands_visitor::visit(ir_dereference_variable *ir)<br>
+{<br>
+ /* Currently, since we don't handle kills of the ae based on variables<br>
+ * getting assigned, we can only handle constant variables.<br>
+ */<br>
+ switch (ir->var->mode) {<br>
+ case ir_var_uniform:<br>
+ case ir_var_shader_in:<br>
+ case ir_var_system_value:<br>
+ return visit_continue;<br>
+ default:<br>
+ ok = false;<br>
+ return visit_stop;<br>
+ }<br></blockquote><div><br></div><div>Would this work just as well?<br><br> if (ir->var->read_only) {<br> return visit_continue;<br> } else {<br> ok = false;<br> return visit_stop;<br> }<br>
<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+}<br>
+<br>
+void<br>
+contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)<br>
+{<br>
+ if (*rvalue == val)<br>
+ found = true;<br>
+}<br>
+<br>
+static bool<br>
+contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)<br>
+{<br>
+ contains_rvalue_visitor v(needle);<br>
+ haystack->accept(&v);<br>
+ return v.found;<br>
+}<br>
+<br>
+static bool<br>
+valid_cse_operands(ir_rvalue *ir)<br>
+{<br>
+ /* We don't want to "CSE" for bare derefs. We might want to do so for<br>
+ * variable index derefs though.<br>
+ */<br>
+ if (ir->as_dereference_variable())<br>
+ return false;<br></blockquote><div><br></div><div>The caller (cse_visitor::try_add_to_ae()) only tries to CSE rvalues of type ir_type_expression or ir_type_texture, so this "return false" will never get executed. Also, we'll never CSE variable index derefs. Is this how you meant things to work?<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+ cse_operands_visitor v;<br>
+<br>
+ ir->accept(&v);<br>
+<br>
+ return v.ok;<br>
+}<br>
+<br>
+static bool<br>
+equals(ir_rvalue *a, ir_rvalue *b);<br>
+<br>
+static bool<br>
+equals(ir_constant *a, ir_constant *b)<br>
+{<br>
+ if (!a || !b)<br>
+ return false;<br>
+<br>
+ if (a->type != b->type)<br>
+ return false;<br>
+<br>
+ for (unsigned i = 0; i < a->type->components(); i++) {<br>
+ if (a->value.u[i] != b->value.u[i])<br>
+ return false;<br>
+ }<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+static bool<br>
+equals(ir_dereference_variable *a, ir_dereference_variable *b)<br>
+{<br>
+ if (!a || !b)<br>
+ return false;<br>
+<br>
+ return a->var == b->var;<br>
+}<br>
+<br>
+static bool<br>
+equals(ir_dereference_array *a, ir_dereference_array *b)<br>
+{<br>
+ if (!a || !b)<br>
+ return false;<br>
+<br>
+ if (!equals(a->array, b->array))<br>
+ return false;<br>
+<br>
+ if (!equals(a->array_index, b->array_index))<br>
+ return false;<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+static bool<br>
+equals(ir_swizzle *a, ir_swizzle *b)<br>
+{<br>
+ if (!a || !b)<br>
+ return false;<br>
+<br>
+ if (a->type != b->type)<br>
+ return false;<br>
+<br>
+ if (a->mask.x != b->mask.x ||<br>
+ a->mask.y != b->mask.y ||<br>
+ a->mask.z != b->mask.z ||<br>
+ a->mask.w != b->mask.w) {<br>
+ return false;<br>
+ }<br>
+<br>
+ return equals(a->val, b->val);<br>
+}<br>
+<br>
+static bool<br>
+equals(ir_texture *a, ir_texture *b)<br>
+{<br>
+ if (!a || !b)<br>
+ return false;<br>
+<br>
+ if (a->type != b->type)<br>
+ return false;<br>
+<br>
+ if (a->op != b->op)<br>
+ return false;<br>
+<br>
+ if (!equals(a->coordinate, b->coordinate))<br>
+ return false;<br>
+<br>
+ if (!equals(a->projector, b->projector))<br>
+ return false;<br>
+<br>
+ if (!equals(a->shadow_comparitor, b->shadow_comparitor))<br>
+ return false;<br>
+<br>
+ if (!equals(a->offset, b->offset))<br>
+ return false;<br>
+<br>
+ if (!equals(a->sampler, b->sampler))<br>
+ return false;<br>
+<br>
+ if (!equals(a->lod_info.grad.dPdx, b->lod_info.grad.dPdx) ||<br>
+ !equals(a->lod_info.grad.dPdy, b->lod_info.grad.dPdy))<br>
+ return false;<br></blockquote><div><br></div><div>This seems fragile, since its correctness relies on the fact that:<br></div><div>- Everything in the lod_info union consists of ir_rvalue *'s<br></div><div>- lod_info.grad is the largest constituent of the union<br>
</div><div>- ir_textures are rzalloc'ed, so if the texturing operation isn't txd, lod_info.grad.dPdy will be NULL.<br><br></div><div>How about this instead?<br><br> switch (a->op) {<br> case ir_tex:<br> case ir_lod:<br>
case ir_query_levels:<br> break;<br> case ir_txb:<br> if (!equals(a->lod_info.bias, b->lod_info.bias))<br> return false;<br> break;<br> case ir_txl:<br> case ir_txf:<br> case ir_txs:<br>
if (!equals(a->lod_info.lod, b->lod_info.lod))<br> return false;<br> break;<br> case ir_txd:<br> if (!equals(a->lod_info.grad.dPdx, b->lod_info.grad.dPdx) ||<br> !equals(a->lod_info.grad.dPdy, b->lod_info.grad.dPdy))<br>
return false;<br> case ir_txf_ms:<br> if (!equals(a->lod_info.sample_index, b->lod_info.sample_index))<br> return false;<br> break;<br> case ir_tg4:<br> if (!equals(a->lod_info.component, b->lod_info.component))<br>
return false;<br> default:<br> assert(!"Unrecognized texture op");<br> }<br><br></div><div>I realize it's more code, but it's safer, and I'm guessing it will be faster too, since it avoids calling equals() on irrelevant parts of the union.<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+ return true;<br>
+}<br>
+<br>
+static bool<br>
+equals(ir_expression *a, ir_expression *b)<br>
+{<br>
+ if (!a || !b)<br>
+ return false;<br>
+<br>
+ if (a->type != b->type)<br>
+ return false;<br>
+<br>
+ if (a->operation != b->operation)<br>
+ return false;<br>
+<br>
+ for (unsigned i = 0; i < Elements(a->operands); i++) {<br></blockquote><div><br></div><div>Using a->get_num_operands() as the loop bound would probably be faster.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ if (!equals(a->operands[i], b->operands[i]))<br>
+ return false;<br>
+ }<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+static bool<br>
+equals(ir_rvalue *a, ir_rvalue *b)<br>
+{<br>
+ if (!a || !b)<br>
+ return !a && !b;<br>
+<br>
+ if (a->type != b->type)<br>
+ return false;<br>
+<br>
+ switch (a->ir_type) {<br>
+ case ir_type_texture:<br>
+ return equals(a->as_texture(), b->as_texture());<br>
+<br>
+ case ir_type_constant:<br>
+ return equals(a->as_constant(), b->as_constant());<br>
+<br>
+ case ir_type_expression:<br>
+ return equals(a->as_expression(), b->as_expression());<br>
+<br>
+ case ir_type_dereference_variable:<br>
+ return equals(a->as_dereference_variable(), b->as_dereference_variable());<br>
+<br>
+ case ir_type_dereference_array:<br>
+ return equals(a->as_dereference_array(), b->as_dereference_array());<br>
+<br>
+ case ir_type_swizzle:<br>
+ return equals(a->as_swizzle(), b->as_swizzle());<br>
+<br>
+ default:<br>
+ return false;<br>
+ }<br>
+}<br>
+<br>
+ir_rvalue *<br>
+cse_visitor::try_cse(ir_rvalue *rvalue)<br>
+{ <br></blockquote><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ foreach_list(node, ae) {<br>
+ ae_entry *entry = (ae_entry *)node;<br>
+<br>
+ if (debug) {<br>
+ printf("Comparing to AE %p: ", entry);<br>
+ (*entry->val)->print();<br>
+ printf("\n");<br>
+ }<br>
+<br>
+ if (!equals(rvalue, *entry->val))<br>
+ continue;<br>
+<br>
+ if (debug) {<br>
+ printf("CSE: Replacing: ");<br>
+ (*entry->val)->print();<br>
+ printf("\n");<br>
+ printf("CSE: with: ");<br>
+ rvalue->print();<br>
+ printf("\n");<br>
+ }<br>
+<br>
+ if (!entry->var) {<br>
+ ir_instruction *base_ir = entry->base_ir;<br>
+<br>
+ ir_variable *var = new(rvalue) ir_variable(rvalue->type,<br>
+ "cse",<br>
+ ir_var_auto);<br>
+<br>
+ /* Write the previous expression result into a new variable. */<br>
+ base_ir->insert_before(var);<br>
+ ir_assignment *assignment = assign(var, *entry->val);<br>
+ base_ir->insert_before(assignment);<br>
+<br>
+ /* Replace the expression in the original tree with a deref of the<br>
+ * variable, but keep tracking the expression for further reuse.<br>
+ */<br>
+ *entry->val = new(rvalue) ir_dereference_variable(var);<br>
+ entry->val = &assignment->rhs;<br>
+<br>
+ entry->var = var;<br>
+<br>
+ /* Update the base_irs in the AE list. We have to be sure that<br>
+ * they're correct -- expressions from our base_ir that weren't moved<br>
+ * need to stay in this base_ir (so that later consumption of them<br>
+ * puts new variables between our new variable and our base_ir), but<br>
+ * expressions from our base_ir that we *did* move need base_ir<br>
+ * updated so that any further elimination from inside gets its new<br>
+ * assignments put before our new assignment.<br>
+ */<br>
+ foreach_list(fixup_node, ae) {<br>
+ ae_entry *fixup_entry = (ae_entry *)fixup_node;<br>
+ if (contains_rvalue(assignment->rhs, *fixup_entry->val))<br>
+ fixup_entry->base_ir = assignment;<br>
+ }<br>
+<br>
+ if (debug)<br>
+ dump_ae(ae);<br>
+ }<br>
+<br>
+ /* Replace the expression in our current tree with the variable. */<br>
+ return new(rvalue) ir_dereference_variable(entry->var);<br>
+ }<br>
+<br>
+ return NULL;<br>
+}<br>
+<br>
+/**<br>
+ * If the rvalue is an appropriate one, add it to the list of available<br>
+ * expressions for later CSEing if it gets seen again.<br>
+ */<br>
+void<br>
+cse_visitor::try_add_to_ae(ir_rvalue **inout_rvalue)<br>
+{<br>
+ ir_rvalue *rvalue = *inout_rvalue;<br>
+<br>
+ if (!rvalue->type->is_vector() &&<br>
+ !rvalue->type->is_scalar()) {<br>
+ /* Our temporary variable assignment generation isn't ready to handle<br>
+ * anything bigger than a vector.<br>
+ */<br>
+ return;<br>
+ }<br>
+<br>
+ switch (rvalue->ir_type) {<br>
+ case ir_type_expression:<br>
+ case ir_type_texture:<br>
+ break;<br>
+ default:<br>
+ return;<br>
+ }<br></blockquote><div><br></div><div>It seems strange to me that two of our criteria for determining whether something is CSE-able are in valid_cse_operands() (don't CSE bare variable derefs and don't CSE expressions containing non-const variables), and the other two of our criteria are here (don't CSE things larger than vectors, and only CSE expressions and textures).<br>
<br></div><div>Can we rename valid_cse_operands() to something like valid_for_cse() and put all four criteria together into it? (Well, actually, all three criteria, since "don't CSE bare variable derefs" is redundant with "only CSE expressions and textures".)<br>
<br>Also, can we move all of this logic to the top of cse_visitor::handle_rvalue() (before both the call to try_cse() and try_add_to_ae())? That way we won't waste time searching the AE list for expressions that it could never possibly contain. <br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+ if (valid_cse_operands(*inout_rvalue)) {<br>
+ if (debug) {<br>
+ printf("CSE: Add to AE: ");<br>
+ (*inout_rvalue)->print();<br>
+ printf("\n");<br>
+ }<br>
+<br>
+ ae->push_tail(new(mem_ctx) ae_entry(base_ir, inout_rvalue));<br>
+<br>
+ if (debug)<br>
+ dump_ae(ae);<br>
+ }<br>
+}<br>
+<br>
+void<br>
+cse_visitor::handle_rvalue(ir_rvalue **rvalue)<br>
+{<br>
+ if (!*rvalue)<br>
+ return;<br>
+<br>
+ if (debug) {<br>
+ printf("CSE: handle_rvalue ");<br>
+ (*rvalue)->print();<br>
+ printf("\n");<br>
+ }<br>
+<br>
+ ir_rvalue *new_rvalue = try_cse(*rvalue);<br>
+ if (new_rvalue) {<br>
+ *rvalue = new_rvalue;<br>
+ progress = true;<br>
+<br>
+ if (debug)<br>
+ validate_ir_tree(validate_instructions);<br>
+ } else {<br>
+ try_add_to_ae(rvalue);<br>
+ }<br>
+}<br>
+<br>
+ir_visitor_status<br>
+cse_visitor::visit_enter(ir_if *ir)<br>
+{<br>
+ handle_rvalue(&ir->condition);<br>
+<br>
+ ae->make_empty();<br>
+ visit_list_elements(this, &ir->then_instructions);<br>
+<br>
+ ae->make_empty();<br>
+ visit_list_elements(this, &ir->else_instructions);<br>
+<br>
+ ae->make_empty();<br>
+ return visit_continue_with_parent;<br>
+}<br>
+<br>
+ir_visitor_status<br>
+cse_visitor::visit_enter(ir_function_signature *ir)<br>
+{<br>
+ ae->make_empty();<br>
+ visit_list_elements(this, &ir->body);<br>
+<br>
+ ae->make_empty();<br>
+ return visit_continue_with_parent;<br>
+}<br>
+<br>
+ir_visitor_status<br>
+cse_visitor::visit_enter(ir_loop *ir)<br>
+{<br>
+ ae->make_empty();<br>
+ visit_list_elements(this, &ir->body_instructions);<br>
+<br>
+ ae->make_empty();<br>
+ return visit_continue_with_parent;<br>
+}<br></blockquote><div><br></div><div>I think we could avoid having to write these three visitors if we wrote this optimization pass using call_for_basic_blocks().<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+ir_visitor_status<br>
+cse_visitor::visit_enter(ir_call *ir)<br>
+{<br>
+ /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a<br>
+ * pointer to the (ir_rvalue *) on the stack. Since we save those pointers<br>
+ * in the AE list, we can't let handle_rvalue get called.<br>
+ */<br>
+ return visit_continue_with_parent;<br></blockquote><div><br></div><div>Argh, this is really unfortunate. Not only does this mean that we can't CSE expressions appearing as function arguments, we can't even CSE subexpressions of those expressions.<br>
<br></div><div>For a long time I've been wanting to change ir_call::actual_parameters from this:<br><br> /* List of ir_rvalue of paramaters passed in this call. */<br> exec_list actual_parameters;<br><br></div><div>
to this:<br><br> /* Number of parameters passed in this call. */<br> unsigned num_parameters;<br><br> /* Parameters passed in this call. */<br> ir_rvalue **actual_parameters;<br><br></div><div>Now I'm even more motivated to try that. But that seems like something that should wait for after Mesa 10.0.<br>
<br></div><div>For the time being what you're doing seems reasonable, though.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+}<br>
+<br>
+/**<br>
+ * Does a (uniform-value) constant subexpression elimination pass on the code<br>
+ * present in the instruction stream.<br>
+ */<br>
+bool<br>
+do_cse(exec_list *instructions)<br>
+{<br>
+ cse_visitor v(instructions);<br>
+<br>
+ visit_list_elements(&v, instructions);<br>
+<br>
+ return v.progress;<br>
+}<br>
<span><font color="#888888">--<br>
1.8.4.rc3<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>