[Mesa-dev] [PATCH 02/10] i965/cfg: Calculate the immediate dominators.

Mon Feb 9 17:52:54 PST 2015

On Mon, Feb 9, 2015 at 5:50 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On Wednesday, February 04, 2015 08:21:19 PM Matt Turner wrote:
>> ---
>>  src/mesa/drivers/dri/i965/brw_cfg.cpp              | 68 +++++++++++++++++++++-
>>  src/mesa/drivers/dri/i965/brw_cfg.h                |  7 ++-
>>  .../drivers/dri/i965/brw_dead_control_flow.cpp     |  5 +-
>>  3 files changed, 75 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
>> index ca5b01c..068d4d6 100644
>> --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
>> @@ -51,7 +51,7 @@ link(void *mem_ctx, bblock_t *block)
>>  }
>>
>>  bblock_t::bblock_t(cfg_t *cfg) :
>> -   cfg(cfg), start_ip(0), end_ip(0), num(0)
>> +   cfg(cfg), idom(NULL), start_ip(0), end_ip(0), num(0)
>>  {
>>     instructions.make_empty();
>>     parents.make_empty();
>> @@ -157,6 +157,7 @@ cfg_t::cfg_t(exec_list *instructions)
>>     block_list.make_empty();
>>     blocks = NULL;
>>     num_blocks = 0;
>> +   idom_dirty = true;
>>
>>     bblock_t *cur = NULL;
>>     int ip = 0;
>> @@ -409,10 +410,13 @@ cfg_t::make_block_array()
>>  }
>>
>>  void
>> -cfg_t::dump(backend_visitor *v) const
>> +cfg_t::dump(backend_visitor *v)
>>  {
>> +   if (idom_dirty)
>> +      calculate_idom();
>> +
>>     foreach_block (block, this) {
>> -      fprintf(stderr, "START B%d", block->num);
>> +      fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num);
>>        foreach_list_typed(bblock_link, link, link, &block->parents) {
>>           fprintf(stderr, " <-B%d",
>>                   link->block->num);
>> @@ -428,3 +432,61 @@ cfg_t::dump(backend_visitor *v) const
>>        fprintf(stderr, "\n");
>>     }
>>  }
>> +
>> +/* Calculates the immediate dominator of each block, according to "A Simple,
>> + * Fast Dominance Algorithm" by Keith D. Cooper, Timothy J. Harvey, and Ken
>> + * Kennedy.
>> + *
>> + * The authors claim that for control flow graphs of sizes normally encountered
>> + * (less than 1000 nodes) that this algorithm is significantly faster than
>> + * others like Lengauer-Tarjan.
>> + */
>> +void
>> +cfg_t::calculate_idom()
>> +{
>> +   foreach_block(block, this) {
>> +      block->idom = NULL;
>> +   }
>> +   blocks[0]->idom = blocks[0];
>> +
>> +   bool changed;
>> +   do {
>> +      changed = false;
>> +
>> +      foreach_block(block, this) {
>> +         if (block->num == 0)
>> +            continue;
>> +
>> +         bblock_t *new_idom = NULL;
>> +         foreach_list_typed(bblock_link, parent, link, &block->parents) {
>> +            if (parent->block->idom) {
>> +               if (new_idom == NULL) {
>> +                  new_idom = parent->block;
>> +               } else if (parent->block->idom != NULL) {
>> +                  new_idom = intersect(parent->block, new_idom);
>> +               }
>> +            }
>> +         }
>> +
>> +         if (block->idom != new_idom) {
>> +            block->idom = new_idom;
>> +            changed = true;
>> +         }
>> +      }
>> +   } while (changed);
>> +
>> +   idom_dirty = false;
>> +}
>> +
>> +bblock_t *
>> +cfg_t::intersect(bblock_t *b1, bblock_t *b2)
>> +{
>
> Be nice to your future self and leave a comment to the effect of:
>
> <mattst88> and in intersect the > vs the paper's < is due to the fact that we
> number basic blocks backwards from what the paper expects, IIRC

Sure. I'll copy NIR's:

/* Note, the comparisons here are the opposite of what the paper says
 * because we index blocks from beginning -> end (i.e. reverse
 * post-order) instead of post-order like they assume.
 */

>> +   while (b1->num != b2->num) {
>> +      while (b1->num > b2->num)
>> +         b1 = b1->idom;
>> +      while (b2->num > b1->num)
>> +         b2 = b2->idom;
>> +   }
>> +   assert(b1);
>> +   return b1;
>> +}
>> diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
>> index 0b60fec..215f248 100644
>> --- a/src/mesa/drivers/dri/i965/brw_cfg.h
>> +++ b/src/mesa/drivers/dri/i965/brw_cfg.h
>> @@ -81,6 +81,7 @@ struct bblock_t {
>>
>>     struct exec_node link;
>>     struct cfg_t *cfg;
>> +   struct bblock_t *idom;
>>
>>     int start_ip;
>>     int end_ip;
>> @@ -269,8 +270,10 @@ struct cfg_t {
>>     bblock_t *new_block();
>>     void set_next_block(bblock_t **cur, bblock_t *block, int ip);
>>     void make_block_array();
>> +   void calculate_idom();
>> +   static bblock_t *intersect(bblock_t *b1, bblock_t *b2);
>>
>> -   void dump(backend_visitor *v) const;
>> +   void dump(backend_visitor *v);
>>  #endif
>>     void *mem_ctx;
>>
>> @@ -278,6 +281,8 @@ struct cfg_t {
>>     struct exec_list block_list;
>>     struct bblock_t **blocks;
>>     int num_blocks;
>> +
>> +   bool idom_dirty;
>>  };
>>
>>  /* Note that this is implemented with a double for loop -- break will
>> diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
>> index 03f838d..4d68e12 100644
>> --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
>> @@ -114,8 +114,11 @@ dead_control_flow_eliminate(backend_visitor *v)
>>        }
>>     }
>>
>> -   if (progress)
>> +   if (progress) {
>>        v->invalidate_live_intervals();
>>
>> +      v->cfg->idom_dirty = true;
>> +   }
>> +
>>     return progress;
>>  }
>>
>
> I think that you need to invalidate this info in more places - anywhere
> that adds/removes a block would be a safe approximation.  Could we
> invalidate it from cfg_t::remove_block() and whatever functions adds a
> block?  Then we wouldn't have to teach optimization passes about this at
> all, which would be really nice.
>
> With those two things fixed,
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

Yeah, I'll remove the hunk from dead control flow and replace it with
idom_dirty = true as the last statement in cfg_t::remove_block.

Thanks!