[Mesa-dev] [PATCH 1/4] i965/fs: Improve live variables calculation performance.

Kenneth Graunke kenneth at whitecape.org
Sun Mar 3 00:29:15 PST 2013


On 02/19/2013 06:03 PM, Eric Anholt wrote:
> We can execute way fewer instructions by doing our boolean manipulation
> on an "int" of bits at a time, while also reducing our working set size.

I see...it lets you do the computation on whole words at a time rather 
than looping over (up to) 32 individual bools.  Yeah.  That would be 
much faster.

> Reduces compile time of L4D2's slowest shader from 4s to 1.1s
> (-72.4% +/- 0.2%, n=10)
> ---
>   .../drivers/dri/i965/brw_fs_live_variables.cpp     |   44 +++++++++++---------
>   src/mesa/drivers/dri/i965/brw_fs_live_variables.h  |   10 +++--
>   2 files changed, 30 insertions(+), 24 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> index db8f397..e7de43e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> @@ -40,7 +40,7 @@ using namespace brw;
>    */
>
>   /**
> - * Sets up the use[] and def[] arrays.
> + * Sets up the use[] and def[] bitsets.
>    *
>    * The basic-block-level live variable analysis needs to know which
>    * variables get used before they're completely defined, and which
> @@ -67,8 +67,8 @@ fs_live_variables::setup_def_use()
>   	    if (inst->src[i].file == GRF) {
>   	       int reg = inst->src[i].reg;
>
> -	       if (!bd[b].def[reg])
> -		  bd[b].use[reg] = true;
> +	       if (!BITSET_TEST(bd[b].def, reg))
> +		  BITSET_SET(bd[b].use, reg);
>   	    }
>   	 }
>
> @@ -82,8 +82,8 @@ fs_live_variables::setup_def_use()
>   	     !inst->force_uncompressed &&
>   	     !inst->force_sechalf) {
>   	    int reg = inst->dst.reg;
> -	    if (!bd[b].use[reg])
> -	       bd[b].def[reg] = true;
> +            if (!BITSET_TEST(bd[b].use, reg))
> +               BITSET_SET(bd[b].def, reg);
>   	 }
>
>   	 ip++;
> @@ -107,12 +107,12 @@ fs_live_variables::compute_live_variables()
>
>         for (int b = 0; b < cfg->num_blocks; b++) {
>   	 /* Update livein */
> -	 for (int i = 0; i < num_vars; i++) {
> -	    if (bd[b].use[i] || (bd[b].liveout[i] && !bd[b].def[i])) {
> -	       if (!bd[b].livein[i]) {
> -		  bd[b].livein[i] = true;
> -		  cont = true;
> -	       }
> +	 for (int i = 0; i < bitset_words; i++) {
> +            BITSET_WORD new_livein = (bd[b].use[i] |
> +                                      (bd[b].liveout[i] & ~bd[b].def[i]));
> +	    if (new_livein & ~bd[b].livein[i]) {
> +               bd[b].livein[i] |= new_livein;
> +               cont = true;
>   	    }
>   	 }
>
> @@ -121,9 +121,11 @@ fs_live_variables::compute_live_variables()
>   	    bblock_link *link = (bblock_link *)block_node;
>   	    bblock_t *block = link->block;
>
> -	    for (int i = 0; i < num_vars; i++) {
> -	       if (bd[block->block_num].livein[i] && !bd[b].liveout[i]) {
> -		  bd[b].liveout[i] = true;
> +	    for (int i = 0; i < bitset_words; i++) {
> +               BITSET_WORD new_liveout = (bd[block->block_num].livein[i] &
> +                                          ~bd[b].liveout[i]);
> +	       if (new_liveout & ~bd[b].liveout[i]) {
> +		  bd[b].liveout[i] |= new_liveout;

This hunk doesn't quite seem right...new_liveout already has & 
~bd[b].liveout[i]...so couldn't the if condition just be new_liveout? It 
seems equivalent.

Otherwise, this patch looks good and gets a:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

>   		  cont = true;
>   	       }
>   	    }
> @@ -140,11 +142,13 @@ fs_live_variables::fs_live_variables(fs_visitor *v, cfg_t *cfg)
>      num_vars = v->virtual_grf_count;
>      bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
>
> +   bitset_words = (ALIGN(v->virtual_grf_count, BITSET_WORDBITS) /
> +                   BITSET_WORDBITS);
>      for (int i = 0; i < cfg->num_blocks; i++) {
> -      bd[i].def = rzalloc_array(mem_ctx, bool, num_vars);
> -      bd[i].use = rzalloc_array(mem_ctx, bool, num_vars);
> -      bd[i].livein = rzalloc_array(mem_ctx, bool, num_vars);
> -      bd[i].liveout = rzalloc_array(mem_ctx, bool, num_vars);
> +      bd[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> +      bd[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> +      bd[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> +      bd[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
>      }
>
>      setup_def_use();
> @@ -208,12 +212,12 @@ fs_visitor::calculate_live_intervals()
>
>      for (int b = 0; b < cfg.num_blocks; b++) {
>         for (int i = 0; i < num_vars; i++) {
> -	 if (livevars.bd[b].livein[i]) {
> +	 if (BITSET_TEST(livevars.bd[b].livein, i)) {
>   	    def[i] = MIN2(def[i], cfg.blocks[b]->start_ip);
>   	    use[i] = MAX2(use[i], cfg.blocks[b]->start_ip);
>   	 }
>
> -	 if (livevars.bd[b].liveout[i]) {
> +	 if (BITSET_TEST(livevars.bd[b].liveout, i)) {
>   	    def[i] = MIN2(def[i], cfg.blocks[b]->end_ip);
>   	    use[i] = MAX2(use[i], cfg.blocks[b]->end_ip);
>   	 }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
> index 5f7e67e..1cde5f4 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
> @@ -26,6 +26,7 @@
>    */
>
>   #include "brw_fs.h"
> +#include "main/bitset.h"
>
>   namespace brw {
>
> @@ -36,18 +37,18 @@ struct block_data {
>       * Note that for our purposes, "defined" means unconditionally, completely
>       * defined.
>       */
> -   bool *def;
> +   BITSET_WORD *def;
>
>      /**
>       * Which variables are used before being defined in the block.
>       */
> -   bool *use;
> +   BITSET_WORD *use;
>
>      /** Which defs reach the entry point of the block. */
> -   bool *livein;
> +   BITSET_WORD *livein;
>
>      /** Which defs reach the exit point of the block. */
> -   bool *liveout;
> +   BITSET_WORD *liveout;
>   };
>
>   class fs_live_variables {
> @@ -73,6 +74,7 @@ public:
>      void *mem_ctx;
>
>      int num_vars;
> +   int bitset_words;
>
>      /** Per-basic-block information on live variables */
>      struct block_data *bd;
>



More information about the mesa-dev mailing list