[Mesa-dev] [PATCH] i965/vec4: Reduce working set size of live variables computation.
Paul Berry
stereotype441 at gmail.com
Sun Oct 27 19:02:19 CET 2013
On 21 October 2013 11:20, Eric Anholt <eric at anholt.net> wrote:
> Orbital Explorer was generating a 4000 instruction geometry shader, which
> was taking 275 trips through dead code elimination and register
> coalescing, each of which updated live variables to get its work done, and
> invalidated those live variables afterwards.
>
> By using bitfields instead of bools (reducing the working set size by a
> factor of 8) in live variables analysis, it drops from 88% of the profile
> to 57%, and reduces overall runtime from I-got-bored-and-killed-it (Paul
> says 3+ minutes) to 10.5 seconds.
>
> Compare to f179f419d1d0a03fad36c2b0a58e8b853bae6118 on the FS side.
> ---
> .../drivers/dri/i965/brw_vec4_live_variables.cpp | 41
> ++++++++++++----------
> .../drivers/dri/i965/brw_vec4_live_variables.h | 10 +++---
> 2 files changed, 28 insertions(+), 23 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> index db3787b..f6675c8 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> @@ -83,8 +83,8 @@ vec4_live_variables::setup_def_use()
>
> for (int j = 0; j < 4; j++) {
> int c = BRW_GET_SWZ(inst->src[i].swizzle, j);
> - if (!bd[b].def[reg * 4 + c])
> - bd[b].use[reg * 4 + c] = true;
> + if (!BITSET_TEST(bd[b].def, reg * 4 + c))
> + BITSET_SET(bd[b].use, reg * 4 + c);
> }
> }
> }
> @@ -99,8 +99,8 @@ vec4_live_variables::setup_def_use()
> for (int c = 0; c < 4; c++) {
> if (inst->dst.writemask & (1 << c)) {
> int reg = inst->dst.reg;
> - if (!bd[b].use[reg * 4 + c])
> - bd[b].def[reg * 4 + c] = true;
> + if (!BITSET_TEST(bd[b].use, reg * 4 + c))
> + BITSET_SET(bd[b].def, reg * 4 + c);
> }
> }
> }
> @@ -126,12 +126,12 @@ vec4_live_variables::compute_live_variables()
>
> for (int b = 0; b < cfg->num_blocks; b++) {
> /* Update livein */
> - for (int i = 0; i < num_vars; i++) {
> - if (bd[b].use[i] || (bd[b].liveout[i] && !bd[b].def[i])) {
> - if (!bd[b].livein[i]) {
> - bd[b].livein[i] = true;
> - cont = true;
> - }
> + for (int i = 0; i < bitset_words; i++) {
> + BITSET_WORD new_livein = (bd[b].use[i] |
> + (bd[b].liveout[i] & ~bd[b].def[i]));
> + if (new_livein & ~bd[b].livein[i]) {
> + bd[b].livein[i] |= new_livein;
> + cont = true;
>
Personally, I think this would be slightly clearer as:
BITSET_WORD new_livein = bd[b].livein[i] | bd[b].use[i] | (bd[b].liveout[i]
& ~bd[b].def[i]);
if (new_livein != bd[b].livein[i]) {
bd[b].livein[i] = new_livein;
cont = true;
}
> }
> }
>
> @@ -140,9 +140,11 @@ vec4_live_variables::compute_live_variables()
> bblock_link *link = (bblock_link *)block_node;
> bblock_t *block = link->block;
>
> - for (int i = 0; i < num_vars; i++) {
> - if (bd[block->block_num].livein[i] && !bd[b].liveout[i]) {
> - bd[b].liveout[i] = true;
> + for (int i = 0; i < bitset_words; i++) {
> + BITSET_WORD new_liveout = (bd[block->block_num].livein[i] &
> + ~bd[b].liveout[i]);
> + if (new_liveout) {
> + bd[b].liveout[i] |= new_liveout;
> cont = true;
> }
>
Similarly, here I think it would be clearer to do:
BITSET_WORD new_liveout = bd[block->block_num].livein[i];
if (new_liveout != bd[b].liveout[i]) {
bd[b].liveout[i] |= new_liveout;
cont = true;
}
Either way, the patch is:
Reviewed-by: Paul Berry <stereotype441 at gmail.com>
> }
> @@ -159,11 +161,12 @@
> vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg)
> num_vars = v->virtual_grf_count * 4;
> bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
>
> + bitset_words = BITSET_WORDS(num_vars);
> for (int i = 0; i < cfg->num_blocks; i++) {
> - bd[i].def = rzalloc_array(mem_ctx, bool, num_vars);
> - bd[i].use = rzalloc_array(mem_ctx, bool, num_vars);
> - bd[i].livein = rzalloc_array(mem_ctx, bool, num_vars);
> - bd[i].liveout = rzalloc_array(mem_ctx, bool, num_vars);
> + bd[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> + bd[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> + bd[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> + bd[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> }
>
> setup_def_use();
> @@ -248,12 +251,12 @@ vec4_visitor::calculate_live_intervals()
>
> for (int b = 0; b < cfg.num_blocks; b++) {
> for (int i = 0; i < livevars.num_vars; i++) {
> - if (livevars.bd[b].livein[i]) {
> + if (BITSET_TEST(livevars.bd[b].livein, i)) {
> start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->start_ip);
> end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->start_ip);
> }
>
> - if (livevars.bd[b].liveout[i]) {
> + if (BITSET_TEST(livevars.bd[b].liveout, i)) {
> start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->end_ip);
> end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->end_ip);
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> index 296468a..b2d8b33 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> @@ -25,6 +25,7 @@
> *
> */
>
> +#include "main/bitset.h"
> #include "brw_vec4.h"
>
> namespace brw {
> @@ -36,18 +37,18 @@ struct block_data {
> * Note that for our purposes, "defined" means unconditionally,
> completely
> * defined.
> */
> - bool *def;
> + BITSET_WORD *def;
>
> /**
> * Which variables are used before being defined in the block.
> */
> - bool *use;
> + BITSET_WORD *use;
>
> /** Which defs reach the entry point of the block. */
> - bool *livein;
> + BITSET_WORD *livein;
>
> /** Which defs reach the exit point of the block. */
> - bool *liveout;
> + BITSET_WORD *liveout;
> };
>
> class vec4_live_variables {
> @@ -65,6 +66,7 @@ public:
> void *mem_ctx;
>
> int num_vars;
> + int bitset_words;
>
> /** Per-basic-block information on live variables */
> struct block_data *bd;
> --
> 1.8.4.rc3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131027/e93a8926/attachment-0001.html>
More information about the mesa-dev
mailing list