[Mesa-dev] [PATCH] glsl: optimize copy_propagation_elements pass
Ian Romanick
idr at freedesktop.org
Wed Sep 28 15:14:29 UTC 2016
On 09/16/2016 06:21 PM, Tapani Pälli wrote:
> Changes make copy_propagation_elements pass faster, reducing link
> time spent in test case of bug 94477. Does not fix the actual issue
> but brings down the total time. No regressions seen in CI.
How does this affect the time of a full shader-db run?
There are a bunch of bits of this that are confusing to me. I think
some high-level explanation about which hash tables the acp_ref can be
in, which lists it can be in, and how they relate would help. I've
pointed out a couple of the confusing bits below.
> Signed-off-by: Tapani Pälli <tapani.palli at intel.com>
> ---
>
> For performance measurements, Martina reported in the bug 8x speedup
> to the test case shader link time when using this patch together with
> commit 2cd02e30d2e1677762d34f1831b8e609970ef0f3
>
> .../glsl/opt_copy_propagation_elements.cpp | 187 ++++++++++++++++-----
> 1 file changed, 145 insertions(+), 42 deletions(-)
>
> diff --git a/src/compiler/glsl/opt_copy_propagation_elements.cpp b/src/compiler/glsl/opt_copy_propagation_elements.cpp
> index e4237cc..1c5060a 100644
> --- a/src/compiler/glsl/opt_copy_propagation_elements.cpp
> +++ b/src/compiler/glsl/opt_copy_propagation_elements.cpp
> @@ -46,6 +46,7 @@
> #include "ir_basic_block.h"
> #include "ir_optimization.h"
> #include "compiler/glsl_types.h"
> +#include "util/hash_table.h"
>
> static bool debug = false;
>
> @@ -76,6 +77,18 @@ public:
> int swizzle[4];
> };
>
> +/* Class that refers to acp_entry in another exec_list. Used
> + * when making removals based on rhs.
> + */
> +class acp_ref : public exec_node
This pattern is called a box, so maybe acp_box would be a better name.
I'm not too hung up on it.
With this change, can the acp_entry itself still be in a list?
> +{
> +public:
> + acp_ref(acp_entry *e)
> + {
> + entry = e;
> + }
> + acp_entry *entry;
> +};
>
> class kill_entry : public exec_node
> {
> @@ -98,14 +111,42 @@ public:
> this->killed_all = false;
> this->mem_ctx = ralloc_context(NULL);
> this->shader_mem_ctx = NULL;
> - this->acp = new(mem_ctx) exec_list;
> this->kills = new(mem_ctx) exec_list;
> +
> + create_acp();
> }
> ~ir_copy_propagation_elements_visitor()
> {
> ralloc_free(mem_ctx);
> }
>
> + void create_acp()
> + {
> + lhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
> + _mesa_key_pointer_equal);
> + rhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
> + _mesa_key_pointer_equal);
> + }
> +
> + void destroy_acp()
> + {
> + _mesa_hash_table_destroy(lhs_ht, NULL);
> + _mesa_hash_table_destroy(rhs_ht, NULL);
> + }
> +
> + void populate_acp(hash_table *lhs, hash_table *rhs)
> + {
> + struct hash_entry *entry;
> + hash_table_foreach(lhs, entry)
> + {
Opening { on the hash_table_foreach line.
> + _mesa_hash_table_insert(lhs_ht, entry->key, entry->data);
> + }
Blank line here.
> + hash_table_foreach(rhs, entry)
> + {
Opening { on the hash_table_foreach line.
> + _mesa_hash_table_insert(rhs_ht, entry->key, entry->data);
> + }
> + }
> +
> void handle_loop(ir_loop *, bool keep_acp);
> virtual ir_visitor_status visit_enter(class ir_loop *);
> virtual ir_visitor_status visit_enter(class ir_function_signature *);
> @@ -120,8 +161,10 @@ public:
> void kill(kill_entry *k);
> void handle_if_block(exec_list *instructions);
>
> - /** List of acp_entry: The available copies to propagate */
> - exec_list *acp;
> + /** Hash of acp_entry: The available copies to propagate */
> + hash_table *lhs_ht;
> + hash_table *rhs_ht;
> +
> /**
> * List of kill_entry: The variables whose values were killed in this
> * block.
> @@ -147,23 +190,29 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
> * block. Any instructions at global scope will be shuffled into
> * main() at link time, so they're irrelevant to us.
> */
> - exec_list *orig_acp = this->acp;
> exec_list *orig_kills = this->kills;
> bool orig_killed_all = this->killed_all;
>
> - this->acp = new(mem_ctx) exec_list;
> + hash_table *orig_lhs_ht = lhs_ht;
> + hash_table *orig_rhs_ht = rhs_ht;
> +
> this->kills = new(mem_ctx) exec_list;
Orthogonal to this patch, there is some efficiency to be gained by not
doing this extra memory allocation. Doing
this->kills.move_nodes_to(&orig_kills);
here, and
assert(this->kills.is_empty());
orig_kills.move_nodes_to(&this->kills);
below is sufficient. You'll also have to convert kills from exec_list*
to just exec_list. One nice thing about that is it will just delete
code. :)
> this->killed_all = false;
>
> + create_acp();
> +
> visit_list_elements(this, &ir->body);
>
> - ralloc_free(this->acp);
> ralloc_free(this->kills);
>
> + destroy_acp();
> +
> this->kills = orig_kills;
> - this->acp = orig_acp;
> this->killed_all = orig_killed_all;
>
> + lhs_ht = orig_lhs_ht;
> + rhs_ht = orig_rhs_ht;
> +
> return visit_continue_with_parent;
> }
>
> @@ -249,17 +298,19 @@ ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
> /* Try to find ACP entries covering swizzle_chan[], hoping they're
> * the same source variable.
> */
> - foreach_in_list(acp_entry, entry, this->acp) {
> - if (var == entry->lhs) {
> - for (int c = 0; c < chans; c++) {
> - if (entry->write_mask & (1 << swizzle_chan[c])) {
> - source[c] = entry->rhs;
> - source_chan[c] = entry->swizzle[swizzle_chan[c]];
> + hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, var);
> + if (ht_entry) {
> + exec_list *ht_list = (exec_list *) ht_entry->data;
> + foreach_in_list(acp_entry, entry, ht_list) {
> + for (int c = 0; c < chans; c++) {
> + if (entry->write_mask & (1 << swizzle_chan[c])) {
> + source[c] = entry->rhs;
> + source_chan[c] = entry->swizzle[swizzle_chan[c]];
>
> if (source_chan[c] != swizzle_chan[c])
> noop_swizzle = false;
> - }
> - }
> + }
I think this indentation is off.
> + }
> }
> }
>
> @@ -319,7 +370,9 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
> /* Since we're unlinked, we don't (necessarily) know the side effects of
> * this call. So kill all copies.
> */
> - acp->make_empty();
> + _mesa_hash_table_clear(lhs_ht, NULL);
> + _mesa_hash_table_clear(rhs_ht, NULL);
> +
> this->killed_all = true;
>
> return visit_continue_with_parent;
> @@ -328,31 +381,36 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
> void
> ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
> {
> - exec_list *orig_acp = this->acp;
> exec_list *orig_kills = this->kills;
> bool orig_killed_all = this->killed_all;
>
> - this->acp = new(mem_ctx) exec_list;
> + hash_table *orig_lhs_ht = lhs_ht;
> + hash_table *orig_rhs_ht = rhs_ht;
> +
> this->kills = new(mem_ctx) exec_list;
> this->killed_all = false;
>
> + create_acp();
> +
> /* Populate the initial acp with a copy of the original */
> - foreach_in_list(acp_entry, a, orig_acp) {
> - this->acp->push_tail(new(this->acp) acp_entry(a));
> - }
> + populate_acp(orig_lhs_ht, orig_rhs_ht);
>
> visit_list_elements(this, instructions);
>
> if (this->killed_all) {
> - orig_acp->make_empty();
> + _mesa_hash_table_clear(orig_lhs_ht, NULL);
> + _mesa_hash_table_clear(orig_rhs_ht, NULL);
> }
>
> exec_list *new_kills = this->kills;
> this->kills = orig_kills;
> - ralloc_free(this->acp);
> - this->acp = orig_acp;
> this->killed_all = this->killed_all || orig_killed_all;
>
> + destroy_acp();
> +
> + lhs_ht = orig_lhs_ht;
> + rhs_ht = orig_rhs_ht;
> +
> /* Move the new kills into the parent block's list, removing them
> * from the parent's ACP list in the process.
> */
> @@ -378,37 +436,42 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
> void
> ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp)
> {
> - exec_list *orig_acp = this->acp;
> exec_list *orig_kills = this->kills;
> bool orig_killed_all = this->killed_all;
>
> + hash_table *orig_lhs_ht = lhs_ht;
> + hash_table *orig_rhs_ht = rhs_ht;
> +
> /* FINISHME: For now, the initial acp for loops is totally empty.
> * We could go through once, then go through again with the acp
> * cloned minus the killed entries after the first run through.
> */
> - this->acp = new(mem_ctx) exec_list;
> this->kills = new(mem_ctx) exec_list;
> this->killed_all = false;
>
> + create_acp();
> +
> if (keep_acp) {
> /* Populate the initial acp with a copy of the original */
> - foreach_in_list(acp_entry, a, orig_acp) {
> - this->acp->push_tail(new(this->acp) acp_entry(a));
> - }
> + populate_acp(orig_lhs_ht, orig_rhs_ht);
> }
>
> visit_list_elements(this, &ir->body_instructions);
>
> if (this->killed_all) {
> - orig_acp->make_empty();
> + _mesa_hash_table_clear(orig_lhs_ht, NULL);
> + _mesa_hash_table_clear(orig_rhs_ht, NULL);
> }
>
> exec_list *new_kills = this->kills;
> this->kills = orig_kills;
> - ralloc_free(this->acp);
> - this->acp = orig_acp;
> this->killed_all = this->killed_all || orig_killed_all;
>
> + destroy_acp();
> +
> + lhs_ht = orig_lhs_ht;
> + rhs_ht = orig_rhs_ht;
> +
> foreach_in_list_safe(kill_entry, k, new_kills) {
> kill(k);
> }
> @@ -430,16 +493,33 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
> void
> ir_copy_propagation_elements_visitor::kill(kill_entry *k)
> {
> - foreach_in_list_safe(acp_entry, entry, acp) {
> - if (entry->lhs == k->var) {
> - entry->write_mask = entry->write_mask & ~k->write_mask;
> - if (entry->write_mask == 0) {
> - entry->remove();
> - continue;
> - }
> + /* removal of lhs entries */
> + hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, k->var);
> + if (ht_entry) {
> + exec_list *lhs_list = (exec_list *) ht_entry->data;
> + foreach_in_list_safe(acp_entry, entry, lhs_list) {
> + entry->write_mask = entry->write_mask & ~k->write_mask;
> + if (entry->write_mask == 0) {
> + entry->remove();
> + continue;
> + }
> }
> - if (entry->rhs == k->var) {
> - entry->remove();
> + }
> +
> + /* removal of rhs entries */
> + ht_entry = _mesa_hash_table_search(rhs_ht, k->var);
> + if (ht_entry) {
> + exec_list *rhs_list = (exec_list *) ht_entry->data;
> + foreach_in_list_safe(acp_ref, ref, rhs_list) {
> + acp_entry *entry = ref->entry;
> + /* If entry is still in a list (not already removed by lhs entry
> + * removal above), remove it.
> + */
> + if (entry->prev || entry->next)
> + entry->remove();
This seems weird.
> +
> + /* remove from acp_ref list */
> + ref->remove();
What list is ref in?
> }
> }
>
> @@ -513,7 +593,30 @@ ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
>
> entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask,
> swizzle);
> - this->acp->push_tail(entry);
> +
> + /* lhs hash, hash of lhs -> acp_entry lists */
> + hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, lhs->var);
> + if (ht_entry) {
> + exec_list *lhs_list = (exec_list *) ht_entry->data;
> + lhs_list->push_tail(entry);
> + } else {
> + exec_list *lhs_list = new(mem_ctx) exec_list;
> + lhs_list->push_tail(entry);
> + _mesa_hash_table_insert(lhs_ht, lhs->var, lhs_list);
> + }
> +
> + acp_ref *ref = new(mem_ctx) acp_ref(entry);
> +
> + /* rhs hash, hash of rhs -> acp_entry pointers to lhs lists */
> + ht_entry = _mesa_hash_table_search(rhs_ht, rhs->var);
> + if (ht_entry) {
> + exec_list *rhs_list = (exec_list *) ht_entry->data;
> + rhs_list->push_tail(ref);
> + } else {
> + exec_list *rhs_list = new(mem_ctx) exec_list;
> + rhs_list->push_tail(ref);
> + _mesa_hash_table_insert(rhs_ht, rhs->var, rhs_list);
> + }
> }
>
> bool
>
More information about the mesa-dev
mailing list