[Mesa-dev] [PATCH 2/3] intel/fs: Don't let undefined values prevent copy propagation.

Jason Ekstrand jason at jlekstrand.net
Thu Nov 9 19:10:06 UTC 2017


Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>

On Fri, Oct 27, 2017 at 5:05 PM, Francisco Jerez <currojerez at riseup.net>
wrote:

> This makes the dataflow propagation logic of the copy propagation pass
> more intelligent in cases where the destination of a copy is known to
> be undefined for some incoming CFG edges, building upon the
> definedness information provided by the last patch.  Helps a few
> programs, and avoids a handful shader-db regressions from the next
> patch.
>
> shader-db results on ILK:
>
>   total instructions in shared programs: 6541547 -> 6541523 (-0.00%)
>   instructions in affected programs: 360 -> 336 (-6.67%)
>   helped: 8
>   HURT: 0
>
>   LOST:   0
>   GAINED: 10
>
> shader-db results on BDW:
>
>   total instructions in shared programs: 8174323 -> 8173882 (-0.01%)
>   instructions in affected programs: 7730 -> 7289 (-5.71%)
>   helped: 5
>   HURT: 2
>
>   LOST:   0
>   GAINED: 4
>
> shader-db results on SKL:
>
>   total instructions in shared programs: 8185669 -> 8184598 (-0.01%)
>   instructions in affected programs: 10364 -> 9293 (-10.33%)
>   helped: 5
>   HURT: 2
>
>   LOST:   0
>   GAINED: 2
> ---
>  src/intel/compiler/brw_fs_copy_propagation.cpp | 50
> ++++++++++++++++++++++++--
>  1 file changed, 47 insertions(+), 3 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp
> b/src/intel/compiler/brw_fs_copy_propagation.cpp
> index cb117396089..5897cff35be 100644
> --- a/src/intel/compiler/brw_fs_copy_propagation.cpp
> +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
> @@ -36,9 +36,12 @@
>
>  #include "util/bitset.h"
>  #include "brw_fs.h"
> +#include "brw_fs_live_variables.h"
>  #include "brw_cfg.h"
>  #include "brw_eu.h"
>
> +using namespace brw;
> +
>  namespace { /* avoid conflict with opt_copy_propagation_elements */
>  struct acp_entry : public exec_node {
>     fs_reg dst;
> @@ -77,12 +80,19 @@ struct block_data {
>      * course of this block.
>      */
>     BITSET_WORD *kill;
> +
> +   /**
> +    * Which entries in the fs_copy_prop_dataflow acp table are guaranteed
> to
> +    * have a fully uninitialized destination at the end of this block.
> +    */
> +   BITSET_WORD *undef;
>  };
>
>  class fs_copy_prop_dataflow
>  {
>  public:
>     fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
> +                         const fs_live_variables *live,
>                           exec_list *out_acp[ACP_HASH_SIZE]);
>
>     void setup_initial_values();
> @@ -92,6 +102,7 @@ public:
>
>     void *mem_ctx;
>     cfg_t *cfg;
> +   const fs_live_variables *live;
>
>     acp_entry **acp;
>     int num_acp;
> @@ -102,8 +113,9 @@ public:
>  } /* anonymous namespace */
>
>  fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
> +                                             const fs_live_variables
> *live,
>                                               exec_list
> *out_acp[ACP_HASH_SIZE])
> -   : mem_ctx(mem_ctx), cfg(cfg)
> +   : mem_ctx(mem_ctx), cfg(cfg), live(live)
>  {
>     bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
>
> @@ -124,6 +136,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void
> *mem_ctx, cfg_t *cfg,
>        bd[block->num].liveout = rzalloc_array(bd, BITSET_WORD,
> bitset_words);
>        bd[block->num].copy = rzalloc_array(bd, BITSET_WORD, bitset_words);
>        bd[block->num].kill = rzalloc_array(bd, BITSET_WORD, bitset_words);
> +      bd[block->num].undef = rzalloc_array(bd, BITSET_WORD, bitset_words);
>
>        for (int i = 0; i < ACP_HASH_SIZE; i++) {
>           foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) {
> @@ -189,6 +202,18 @@ fs_copy_prop_dataflow::setup_initial_values()
>           }
>        }
>     }
> +
> +   /* Initialize the undef set. */
> +   foreach_block (block, cfg) {
> +      for (int i = 0; i < num_acp; i++) {
> +         BITSET_SET(bd[block->num].undef, i);
> +         for (unsigned off = 0; off < acp[i]->size_written; off +=
> REG_SIZE) {
> +            if (BITSET_TEST(live->block_data[block->num].defout,
> +                            live->var_from_reg(byte_offset(acp[i]->dst,
> off))))
> +               BITSET_CLEAR(bd[block->num].undef, i);
> +         }
> +      }
> +   }
>  }
>
>  /**
> @@ -229,13 +254,30 @@ fs_copy_prop_dataflow::run()
>
>           for (int i = 0; i < bitset_words; i++) {
>              const BITSET_WORD old_livein = bd[block->num].livein[i];
> +            BITSET_WORD livein_from_any_block = 0;
>
>              bd[block->num].livein[i] = ~0u;
>              foreach_list_typed(bblock_link, parent_link, link,
> &block->parents) {
>                 bblock_t *parent = parent_link->block;
> -               bd[block->num].livein[i] &= bd[parent->num].liveout[i];
> +               /* Consider ACP entries with a known-undefined destination
> to
> +                * be available from the parent.  This is valid because
> we're
> +                * free to set the undefined variable equal to the source
> of
> +                * the ACP entry without breaking the application's
> +                * expectations, since the variable is undefined.
> +                */
> +               bd[block->num].livein[i] &= (bd[parent->num].liveout[i] |
> +                                            bd[parent->num].undef[i]);
> +               livein_from_any_block |= bd[parent->num].liveout[i];
>              }
>
> +            /* Limit to the set of ACP entries that can possibly be
> available
> +             * at the start of the block, since propagating from a
> variable
> +             * which is guaranteed to be undefined (rather than
> potentially
> +             * undefined for some dynamic control-flow paths) doesn't seem
> +             * particularly useful.
> +             */
> +            bd[block->num].livein[i] &= livein_from_any_block;
> +
>              if (old_livein != bd[block->num].livein[i])
>                 progress = true;
>           }
> @@ -830,6 +872,8 @@ fs_visitor::opt_copy_propagation()
>     for (int i = 0; i < cfg->num_blocks; i++)
>        out_acp[i] = new exec_list [ACP_HASH_SIZE];
>
> +   calculate_live_intervals();
> +
>     /* First, walk through each block doing local copy propagation and
> getting
>      * the set of copies available at the end of the block.
>      */
> @@ -839,7 +883,7 @@ fs_visitor::opt_copy_propagation()
>     }
>
>     /* Do dataflow analysis for those available copies. */
> -   fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, out_acp);
> +   fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, live_intervals,
> out_acp);
>
>     /* Next, re-run local copy propagation, this time with the set of
> copies
>      * provided by the dataflow analysis available at the start of a block.
> --
> 2.14.2
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171109/49ab507c/attachment-0001.html>


More information about the mesa-dev mailing list