[Mesa-dev] [PATCH v2 2/2] intel/fs: Improve liveness range calculation for partial writes
Francisco Jerez
currojerez at riseup.net
Tue Aug 21 20:58:48 UTC 2018
Jose Maria Casanova Crespo <jmcasanova at igalia.com> writes:
> We use the information of the registers read/write patterns
> to improve variable liveness analysis avoiding extending the
> liveness range of a variable to the beginning of the block so
> it always reaches the beginning of the shader.
>
> This optimization analyses inside each block if a partial write
> defines completely the bytes used by a following instruction
> in the block. So we are not in the case of the use of an undefined
> value in the block.
>
> This avoids almost all the spilling that happens with 8bit/16bit
> storage tests, without any compilation performance impact for shader-db
> execution that is compensated by spilling reductions.
>
> At this moment we don't extend the logic to intra-block calculations
> of livein/liveout to not hurt performance on the general case because of
> not taking advance of BITWORD operations.
>
> The execution time for running dEQP-VK.*8bit_storage.* tests is reduced
> from 7m27.966s to 13.015s.
>
> shader-bd on SKL shows improvements reducing spilling on
> deus-ex-mankind-divided and dophin without increasing execution time.
>
> total instructions in shared programs: 14867218 -> 14863959 (-0.02%)
> instructions in affected programs: 121570 -> 118311 (-2.68%)
> helped: 38
> HURT: 0
>
> total cycles in shared programs: 537923248 -> 537720965 (-0.04%)
> cycles in affected programs: 63154229 -> 62951946 (-0.32%)
> helped: 61
> HURT: 26
>
> total loops in shared programs: 4828 -> 4828 (0.00%)
> loops in affected programs: 0 -> 0
> helped: 0
> HURT: 0
>
> total spills in shared programs: 7790 -> 7375 (-5.33%)
> spills in affected programs: 2824 -> 2409 (-14.70%)
> helped: 35
> HURT: 0
>
> total fills in shared programs: 10557 -> 10024 (-5.05%)
> fills in affected programs: 3752 -> 3219 (-14.21%)
> helped: 38
> HURT: 0
>
> v2: - Use functions dst_write_pattern and src_read_pattern
> introduced in previous patch at v2.
> - Avoid calculating read_pattern if defpartial is 0
>
> Cc: Francisco Jerez <currojerez at riseup.net>
> ---
> src/intel/compiler/brw_fs_live_variables.cpp | 61 ++++++++++++--------
> src/intel/compiler/brw_fs_live_variables.h | 13 ++++-
> 2 files changed, 47 insertions(+), 27 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp
> index 059f076fa51..d3559e3114f 100644
> --- a/src/intel/compiler/brw_fs_live_variables.cpp
> +++ b/src/intel/compiler/brw_fs_live_variables.cpp
> @@ -54,9 +54,9 @@ using namespace brw;
>
> void
> fs_live_variables::setup_one_read(struct block_data *bd, fs_inst *inst,
> - int ip, const fs_reg ®)
> + int ip, int src, int reg_offset)
> {
> - int var = var_from_reg(reg);
> + int var = var_from_reg(inst->src[src]) + reg_offset;
> assert(var < num_vars);
>
> start[var] = MIN2(start[var], ip);
> @@ -64,31 +64,48 @@ fs_live_variables::setup_one_read(struct block_data *bd, fs_inst *inst,
>
> /* The use[] bitset marks when the block makes use of a variable (VGRF
> * channel) without having completely defined that variable within the
> - * block.
> + * block. We take into account that a partial write could have defined
> + * completely the read bytes in the block.
> */
> - if (!BITSET_TEST(bd->def, var))
> - BITSET_SET(bd->use, var);
> + if (!BITSET_TEST(bd->def, var)) {
> + if (!bd->defpartial[var]) {
> + BITSET_SET(bd->use, var);
Is there any measurable benefit from not executing the functionally
equivalent else block in this case?
> + } else {
> + unsigned read_pattern = inst->src_read_pattern(src, reg_offset);
> + if ((bd->defpartial[var] & read_pattern) != read_pattern)
> + BITSET_SET(bd->use, var);
> + }
> + }
> }
>
> void
> fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
> - int ip, const fs_reg ®)
> + int ip, int reg_offset)
> {
> - int var = var_from_reg(reg);
> + int var = var_from_reg(inst->dst) + reg_offset;
> assert(var < num_vars);
>
> start[var] = MIN2(start[var], ip);
> end[var] = MAX2(end[var], ip);
>
> /* The def[] bitset marks when an initialization in a block completely
> - * screens off previous updates of that variable (VGRF channel).
> + * screens off previous updates of that variable (VGRF channel). If
> + * we have a partial write now we store the write pattern so next
> + * reads in the block can check if what they read was completelly screened
"completely"
> + * of by this partial write.
"off"
> */
> - if (inst->dst.file == VGRF) {
> - if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var))
> + assert(inst->dst.file == VGRF);
> + if(!BITSET_TEST(bd->use, var)) {
> + if (!inst->is_partial_write()) {
> BITSET_SET(bd->def, var);
> -
> - BITSET_SET(bd->defout, var);
> + bd->defpartial[var] = ~0u;
Same question as above. Is there any measurable benefit from not
executing the functionally equivalent else block here when the if
condition evaluates to true?
> + } else {
> + bd->defpartial[var] |= inst->dst_write_pattern(reg_offset);
> + if (bd->defpartial[var] == ~0u)
> + BITSET_SET(bd->def, var);
> + }
> }
> + BITSET_SET(bd->defout, var);
> }
>
> /**
> @@ -115,14 +132,9 @@ fs_live_variables::setup_def_use()
> foreach_inst_in_block(fs_inst, inst, block) {
> /* Set use[] for this instruction */
> for (unsigned int i = 0; i < inst->sources; i++) {
> - fs_reg reg = inst->src[i];
> -
> - if (reg.file != VGRF)
> - continue;
> -
> - for (unsigned j = 0; j < regs_read(inst, i); j++) {
> - setup_one_read(bd, inst, ip, reg);
> - reg.offset += REG_SIZE;
> + if (inst->src[i].file == VGRF) {
> + for (unsigned j = 0; j < regs_read(inst, i); j++)
> + setup_one_read(bd, inst, ip, i, j);
> }
> }
>
> @@ -130,11 +142,8 @@ fs_live_variables::setup_def_use()
>
> /* Set def[] for this instruction */
> if (inst->dst.file == VGRF) {
> - fs_reg reg = inst->dst;
> - for (unsigned j = 0; j < regs_written(inst); j++) {
> - setup_one_write(bd, inst, ip, reg);
> - reg.offset += REG_SIZE;
> - }
> + for (unsigned j = 0; j < regs_written(inst); j++)
> + setup_one_write(bd, inst, ip, j);
> }
>
> if (!inst->predicate && inst->exec_size >= 8)
> @@ -281,6 +290,7 @@ fs_live_variables::fs_live_variables(fs_visitor *v, const cfg_t *cfg)
> bitset_words = BITSET_WORDS(num_vars);
> for (int i = 0; i < cfg->num_blocks; i++) {
> block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> + block_data[i].defpartial = rzalloc_array(mem_ctx, unsigned, num_vars);
> block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
> @@ -319,6 +329,7 @@ fs_visitor::invalidate_live_intervals()
> void
> fs_visitor::calculate_live_intervals()
> {
> +
Stray whitespace.
> if (this->live_intervals)
> return;
>
> diff --git a/src/intel/compiler/brw_fs_live_variables.h b/src/intel/compiler/brw_fs_live_variables.h
> index 9e95e443170..5842b24f3e5 100644
> --- a/src/intel/compiler/brw_fs_live_variables.h
> +++ b/src/intel/compiler/brw_fs_live_variables.h
> @@ -44,6 +44,15 @@ struct block_data {
> */
> BITSET_WORD *def;
>
> + /**
> + * Which bytes of a variable are defined before being used in the block.
> + * Each bit of the 32-bit integer represents one of the register 32 bytes.
> + *
> + * As above, "defined" means unconditionally defined but at byte level.
> + */
> +
> + unsigned *defpartial;
> +
> /**
> * Which variables are used before being defined in the block.
> */
> @@ -115,9 +124,9 @@ public:
> protected:
> void setup_def_use();
> void setup_one_read(struct block_data *bd, fs_inst *inst, int ip,
> - const fs_reg ®);
> + int src, int reg_offset);
> void setup_one_write(struct block_data *bd, fs_inst *inst, int ip,
> - const fs_reg ®);
> + int reg_offset);
> void compute_live_variables();
> void compute_start_end();
>
> --
> 2.17.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180821/d77db42f/attachment.sig>
More information about the mesa-dev
mailing list