[Mesa-dev] [PATCH 02/10] i965/cfg: Calculate the immediate dominators.

Kenneth Graunke kenneth at whitecape.org
Mon Feb 9 17:50:07 PST 2015


On Wednesday, February 04, 2015 08:21:19 PM Matt Turner wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_cfg.cpp              | 68 +++++++++++++++++++++-
>  src/mesa/drivers/dri/i965/brw_cfg.h                |  7 ++-
>  .../drivers/dri/i965/brw_dead_control_flow.cpp     |  5 +-
>  3 files changed, 75 insertions(+), 5 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
> index ca5b01c..068d4d6 100644
> --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
> @@ -51,7 +51,7 @@ link(void *mem_ctx, bblock_t *block)
>  }
>  
>  bblock_t::bblock_t(cfg_t *cfg) :
> -   cfg(cfg), start_ip(0), end_ip(0), num(0)
> +   cfg(cfg), idom(NULL), start_ip(0), end_ip(0), num(0)
>  {
>     instructions.make_empty();
>     parents.make_empty();
> @@ -157,6 +157,7 @@ cfg_t::cfg_t(exec_list *instructions)
>     block_list.make_empty();
>     blocks = NULL;
>     num_blocks = 0;
> +   idom_dirty = true;
>  
>     bblock_t *cur = NULL;
>     int ip = 0;
> @@ -409,10 +410,13 @@ cfg_t::make_block_array()
>  }
>  
>  void
> -cfg_t::dump(backend_visitor *v) const
> +cfg_t::dump(backend_visitor *v)
>  {
> +   if (idom_dirty)
> +      calculate_idom();
> +
>     foreach_block (block, this) {
> -      fprintf(stderr, "START B%d", block->num);
> +      fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num);
>        foreach_list_typed(bblock_link, link, link, &block->parents) {
>           fprintf(stderr, " <-B%d",
>                   link->block->num);
> @@ -428,3 +432,61 @@ cfg_t::dump(backend_visitor *v) const
>        fprintf(stderr, "\n");
>     }
>  }
> +
> +/* Calculates the immediate dominator of each block, according to "A Simple,
> + * Fast Dominance Algorithm" by Keith D. Cooper, Timothy J. Harvey, and Ken
> + * Kennedy.
> + *
> + * The authors claim that for control flow graphs of sizes normally encountered
> + * (less than 1000 nodes) that this algorithm is significantly faster than
> + * others like Lengauer-Tarjan.
> + */
> +void
> +cfg_t::calculate_idom()
> +{
> +   foreach_block(block, this) {
> +      block->idom = NULL;
> +   }
> +   blocks[0]->idom = blocks[0];
> +
> +   bool changed;
> +   do {
> +      changed = false;
> +
> +      foreach_block(block, this) {
> +         if (block->num == 0)
> +            continue;
> +
> +         bblock_t *new_idom = NULL;
> +         foreach_list_typed(bblock_link, parent, link, &block->parents) {
> +            if (parent->block->idom) {
> +               if (new_idom == NULL) {
> +                  new_idom = parent->block;
> +               } else if (parent->block->idom != NULL) {
> +                  new_idom = intersect(parent->block, new_idom);
> +               }
> +            }
> +         }
> +
> +         if (block->idom != new_idom) {
> +            block->idom = new_idom;
> +            changed = true;
> +         }
> +      }
> +   } while (changed);
> +
> +   idom_dirty = false;
> +}
> +
> +bblock_t *
> +cfg_t::intersect(bblock_t *b1, bblock_t *b2)
> +{

Be nice to your future self and leave a comment to the effect of:

<mattst88> and in intersect the > vs the paper's < is due to the fact that we
number basic blocks backwards from what the paper expects, IIRC

> +   while (b1->num != b2->num) {
> +      while (b1->num > b2->num)
> +         b1 = b1->idom;
> +      while (b2->num > b1->num)
> +         b2 = b2->idom;
> +   }
> +   assert(b1);
> +   return b1;
> +}
> diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
> index 0b60fec..215f248 100644
> --- a/src/mesa/drivers/dri/i965/brw_cfg.h
> +++ b/src/mesa/drivers/dri/i965/brw_cfg.h
> @@ -81,6 +81,7 @@ struct bblock_t {
>  
>     struct exec_node link;
>     struct cfg_t *cfg;
> +   struct bblock_t *idom;
>  
>     int start_ip;
>     int end_ip;
> @@ -269,8 +270,10 @@ struct cfg_t {
>     bblock_t *new_block();
>     void set_next_block(bblock_t **cur, bblock_t *block, int ip);
>     void make_block_array();
> +   void calculate_idom();
> +   static bblock_t *intersect(bblock_t *b1, bblock_t *b2);
>  
> -   void dump(backend_visitor *v) const;
> +   void dump(backend_visitor *v);
>  #endif
>     void *mem_ctx;
>  
> @@ -278,6 +281,8 @@ struct cfg_t {
>     struct exec_list block_list;
>     struct bblock_t **blocks;
>     int num_blocks;
> +
> +   bool idom_dirty;
>  };
>  
>  /* Note that this is implemented with a double for loop -- break will
> diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> index 03f838d..4d68e12 100644
> --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> @@ -114,8 +114,11 @@ dead_control_flow_eliminate(backend_visitor *v)
>        }
>     }
>  
> -   if (progress)
> +   if (progress) {
>        v->invalidate_live_intervals();
>  
> +      v->cfg->idom_dirty = true;
> +   }
> +
>     return progress;
>  }
> 

I think that you need to invalidate this info in more places - anywhere
that adds/removes a block would be a safe approximation.  Could we
invalidate it from cfg_t::remove_block() and whatever functions adds a
block?  Then we wouldn't have to teach optimization passes about this at
all, which would be really nice.

With those two things fixed,
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150209/e507d9b5/attachment.sig>


More information about the mesa-dev mailing list