<div dir="ltr">On 21 October 2013 11:20, Eric Anholt <span dir="ltr"><<a href="mailto:eric@anholt.net" target="_blank">eric@anholt.net</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Orbital Explorer was generating a 4000 instruction geometry shader, which<br>
was taking 275 trips through dead code elimination and register<br>
coalescing, each of which updated live variables to get its work done, and<br>
invalidated those live variables afterwards.<br>
<br>
By using bitfields instead of bools (reducing the working set size by a<br>
factor of 8) in live variables analysis, it drops from 88% of the profile<br>
to 57%, and reduces overall runtime from I-got-bored-and-killed-it (Paul<br>
says 3+ minutes) to 10.5 seconds.<br>
<br>
Compare to f179f419d1d0a03fad36c2b0a58e8b853bae6118 on the FS side.<br>
---<br>
.../drivers/dri/i965/brw_vec4_live_variables.cpp | 41 ++++++++++++----------<br>
.../drivers/dri/i965/brw_vec4_live_variables.h | 10 +++---<br>
2 files changed, 28 insertions(+), 23 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp<br>
index db3787b..f6675c8 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp<br>
@@ -83,8 +83,8 @@ vec4_live_variables::setup_def_use()<br>
<br>
for (int j = 0; j < 4; j++) {<br>
int c = BRW_GET_SWZ(inst->src[i].swizzle, j);<br>
- if (!bd[b].def[reg * 4 + c])<br>
- bd[b].use[reg * 4 + c] = true;<br>
+ if (!BITSET_TEST(bd[b].def, reg * 4 + c))<br>
+ BITSET_SET(bd[b].use, reg * 4 + c);<br>
}<br>
}<br>
}<br>
@@ -99,8 +99,8 @@ vec4_live_variables::setup_def_use()<br>
for (int c = 0; c < 4; c++) {<br>
if (inst->dst.writemask & (1 << c)) {<br>
int reg = inst->dst.reg;<br>
- if (!bd[b].use[reg * 4 + c])<br>
- bd[b].def[reg * 4 + c] = true;<br>
+ if (!BITSET_TEST(bd[b].use, reg * 4 + c))<br>
+ BITSET_SET(bd[b].def, reg * 4 + c);<br>
}<br>
}<br>
}<br>
@@ -126,12 +126,12 @@ vec4_live_variables::compute_live_variables()<br>
<br>
for (int b = 0; b < cfg->num_blocks; b++) {<br>
/* Update livein */<br>
- for (int i = 0; i < num_vars; i++) {<br>
- if (bd[b].use[i] || (bd[b].liveout[i] && !bd[b].def[i])) {<br>
- if (!bd[b].livein[i]) {<br>
- bd[b].livein[i] = true;<br>
- cont = true;<br>
- }<br>
+ for (int i = 0; i < bitset_words; i++) {<br>
+ BITSET_WORD new_livein = (bd[b].use[i] |<br>
+ (bd[b].liveout[i] & ~bd[b].def[i]));<br>
+ if (new_livein & ~bd[b].livein[i]) {<br>
+ bd[b].livein[i] |= new_livein;<br>
+ cont = true;<br></blockquote><div><br></div><div>Personally, I think this would be slightly clearer as:<br><br></div><div>BITSET_WORD new_livein = bd[b].livein[i] | bd[b].use[i] | (bd[b].liveout[i] & ~bd[b].def[i]);<br>
</div><div>if (new_livein != bd[b].livein[i]) {<br></div><div> bd[b].livein[i] = new_livein;<br></div><div> cont = true;<br>}<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
}<br>
}<br>
<br>
@@ -140,9 +140,11 @@ vec4_live_variables::compute_live_variables()<br>
bblock_link *link = (bblock_link *)block_node;<br>
bblock_t *block = link->block;<br>
<br>
- for (int i = 0; i < num_vars; i++) {<br>
- if (bd[block->block_num].livein[i] && !bd[b].liveout[i]) {<br>
- bd[b].liveout[i] = true;<br>
+ for (int i = 0; i < bitset_words; i++) {<br>
+ BITSET_WORD new_liveout = (bd[block->block_num].livein[i] &<br>
+ ~bd[b].liveout[i]);<br>
+ if (new_liveout) {<br>
+ bd[b].liveout[i] |= new_liveout;<br>
cont = true;<br>
}<br></blockquote><div><br></div><div>Similarly, here I think it would be clearer to do:<br><br></div><div>BITSET_WORD new_liveout = bd[block->block_num].livein[i];<br></div><div>if (new_liveout != bd[b].liveout[i]) {<br>
</div><div> bd[b].liveout[i] |= new_liveout;<br></div><div> cont = true;<br>}<br><br></div><div>Either way, the patch is:<br><br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
}<br>
@@ -159,11 +161,12 @@ vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg)<br>
num_vars = v->virtual_grf_count * 4;<br>
bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);<br>
<br>
+ bitset_words = BITSET_WORDS(num_vars);<br>
for (int i = 0; i < cfg->num_blocks; i++) {<br>
- bd[i].def = rzalloc_array(mem_ctx, bool, num_vars);<br>
- bd[i].use = rzalloc_array(mem_ctx, bool, num_vars);<br>
- bd[i].livein = rzalloc_array(mem_ctx, bool, num_vars);<br>
- bd[i].liveout = rzalloc_array(mem_ctx, bool, num_vars);<br>
+ bd[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);<br>
+ bd[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);<br>
+ bd[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);<br>
+ bd[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);<br>
}<br>
<br>
setup_def_use();<br>
@@ -248,12 +251,12 @@ vec4_visitor::calculate_live_intervals()<br>
<br>
for (int b = 0; b < cfg.num_blocks; b++) {<br>
for (int i = 0; i < livevars.num_vars; i++) {<br>
- if (<a href="http://livevars.bd" target="_blank">livevars.bd</a>[b].livein[i]) {<br>
+ if (BITSET_TEST(<a href="http://livevars.bd" target="_blank">livevars.bd</a>[b].livein, i)) {<br>
start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->start_ip);<br>
end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->start_ip);<br>
}<br>
<br>
- if (<a href="http://livevars.bd" target="_blank">livevars.bd</a>[b].liveout[i]) {<br>
+ if (BITSET_TEST(<a href="http://livevars.bd" target="_blank">livevars.bd</a>[b].liveout, i)) {<br>
start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->end_ip);<br>
end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->end_ip);<br>
}<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h<br>
index 296468a..b2d8b33 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h<br>
@@ -25,6 +25,7 @@<br>
*<br>
*/<br>
<br>
+#include "main/bitset.h"<br>
#include "brw_vec4.h"<br>
<br>
namespace brw {<br>
@@ -36,18 +37,18 @@ struct block_data {<br>
* Note that for our purposes, "defined" means unconditionally, completely<br>
* defined.<br>
*/<br>
- bool *def;<br>
+ BITSET_WORD *def;<br>
<br>
/**<br>
* Which variables are used before being defined in the block.<br>
*/<br>
- bool *use;<br>
+ BITSET_WORD *use;<br>
<br>
/** Which defs reach the entry point of the block. */<br>
- bool *livein;<br>
+ BITSET_WORD *livein;<br>
<br>
/** Which defs reach the exit point of the block. */<br>
- bool *liveout;<br>
+ BITSET_WORD *liveout;<br>
};<br>
<br>
class vec4_live_variables {<br>
@@ -65,6 +66,7 @@ public:<br>
void *mem_ctx;<br>
<br>
int num_vars;<br>
+ int bitset_words;<br>
<br>
/** Per-basic-block information on live variables */<br>
struct block_data *bd;<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.8.4.rc3<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>