[Mesa-dev] [PATCH 2/2] intel/fs: Improve liveness range calculation for partial writes
Jose Maria Casanova Crespo
jmcasanova at igalia.com
Fri Jul 13 13:31:05 UTC 2018
We use the information of the registers read/write patterns
to improve variable liveness analysis avoiding extending the
liveness range of a variable to the beginning of the block so
it always reaches the beginning of the shader.
This optimization analyses inside each block that if a partial write
defines completely the bytes used by a following instruction we are
in the block we are not in the case of the use of undefined value in
the block.
This avoids almost all the spilling that happens with 8bit/16bit
storage tests, without any compilation performance impact for shader-db
execution that is compensated by spilling reductions.
At this moment we don't extend the logic to intra-block calculations
of livein/liveout to not hurt performance on the general case because of
not taking advance of BITWORD operations.
The execution time for running dEQP-VK.*8bit_storage.* tests is reduced
from 25m20,643s to 0m57,015s
shader-bd on SKL shows improvements reducing spilling on
deus-ex-mankind-divided and dophin without increasing execution time.
total instructions in shared programs: 14867218 -> 14863959 (-0.02%)
instructions in affected programs: 121570 -> 118311 (-2.68%)
helped: 38
HURT: 0
total cycles in shared programs: 537923248 -> 537720965 (-0.04%)
cycles in affected programs: 63154229 -> 62951946 (-0.32%)
helped: 61
HURT: 26
total loops in shared programs: 4828 -> 4828 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0
total spills in shared programs: 7790 -> 7375 (-5.33%)
spills in affected programs: 2824 -> 2409 (-14.70%)
helped: 35
HURT: 0
total fills in shared programs: 10557 -> 10024 (-5.05%)
fills in affected programs: 3752 -> 3219 (-14.21%)
helped: 38
HURT: 0
---
src/intel/compiler/brw_fs_live_variables.cpp | 32 +++++++++++++++-----
src/intel/compiler/brw_fs_live_variables.h | 9 ++++++
2 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp
index 059f076fa51..8947988df06 100644
--- a/src/intel/compiler/brw_fs_live_variables.cpp
+++ b/src/intel/compiler/brw_fs_live_variables.cpp
@@ -64,10 +64,15 @@ fs_live_variables::setup_one_read(struct block_data *bd, fs_inst *inst,
/* The use[] bitset marks when the block makes use of a variable (VGRF
* channel) without having completely defined that variable within the
- * block.
+ * block. We take into account that a partial write could have defined
+ * completely the read bytes in the block.
*/
- if (!BITSET_TEST(bd->def, var))
- BITSET_SET(bd->use, var);
+ if (!BITSET_TEST(bd->def, var)) {
+ unsigned read_pattern = inst->register_byte_use_pattern(reg, false);
+ if ((bd->defpartial[var] & read_pattern) != read_pattern) {
+ BITSET_SET(bd->use, var);
+ }
+ }
}
void
@@ -81,14 +86,23 @@ fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
end[var] = MAX2(end[var], ip);
/* The def[] bitset marks when an initialization in a block completely
- * screens off previous updates of that variable (VGRF channel).
+ * screens off previous updates of that variable (VGRF channel). If
+ * we have a partial write now we store the write pattern so next
+ * reads in the block can check if what they read was completelly screened
+ * of by this partial write.
*/
- if (inst->dst.file == VGRF) {
- if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var))
+ assert(inst->dst.file == VGRF);
+ if(!BITSET_TEST(bd->use, var)) {
+ if (!inst->is_partial_write()) {
BITSET_SET(bd->def, var);
-
- BITSET_SET(bd->defout, var);
+ bd->defpartial[var] = ~0u;
+ } else {
+ bd->defpartial[var] |= inst->register_byte_use_pattern(reg, true);
+ if (bd->defpartial[var] == ~0u)
+ BITSET_SET(bd->def, var);
+ }
}
+ BITSET_SET(bd->defout, var);
}
/**
@@ -281,6 +295,7 @@ fs_live_variables::fs_live_variables(fs_visitor *v, const cfg_t *cfg)
bitset_words = BITSET_WORDS(num_vars);
for (int i = 0; i < cfg->num_blocks; i++) {
block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
+ block_data[i].defpartial = rzalloc_array(mem_ctx, unsigned, num_vars);
block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
@@ -319,6 +334,7 @@ fs_visitor::invalidate_live_intervals()
void
fs_visitor::calculate_live_intervals()
{
+
if (this->live_intervals)
return;
diff --git a/src/intel/compiler/brw_fs_live_variables.h b/src/intel/compiler/brw_fs_live_variables.h
index 9e95e443170..17f1c8177a0 100644
--- a/src/intel/compiler/brw_fs_live_variables.h
+++ b/src/intel/compiler/brw_fs_live_variables.h
@@ -44,6 +44,15 @@ struct block_data {
*/
BITSET_WORD *def;
+ /**
+ * Which bytes of a variable are defined before being used in the block.
+ * Each bit of the 32-bit integer represents one of the register 32 bytes.
+ *
+ * As above, "defined" means unconditionally defined but at byte level.
+ */
+
+ unsigned *defpartial;
+
/**
* Which variables are used before being defined in the block.
*/
--
2.17.1
More information about the mesa-dev
mailing list