[Mesa-dev] [PATCH v2 2/2] intel/fs: Improve liveness range calculation for partial writes

Jose Maria Casanova Crespo jmcasanova at igalia.com
Thu Jul 19 15:45:47 UTC 2018


We use the information of the registers read/write patterns
to improve variable liveness analysis avoiding extending the
liveness range of a variable to the beginning of the block so
it always reaches the beginning of the shader.

This optimization analyses inside each block if a partial write
defines completely the bytes used by a following instruction
in the block. So we are not in the case of the use of an undefined
value in the block.

This avoids almost all the spilling that happens with 8bit/16bit
storage tests, without any compilation performance impact for shader-db
execution that is compensated by spilling reductions.

At this moment we don't extend the logic to intra-block calculations
of livein/liveout to not hurt performance on the general case because of
not taking advance of BITWORD operations.

The execution time for running dEQP-VK.*8bit_storage.* tests is reduced
from 7m27.966s to 13.015s.

shader-bd on SKL shows improvements reducing spilling on
deus-ex-mankind-divided and dophin without increasing execution time.

total instructions in shared programs: 14867218 -> 14863959 (-0.02%)
instructions in affected programs: 121570 -> 118311 (-2.68%)
helped: 38
HURT: 0

total cycles in shared programs: 537923248 -> 537720965 (-0.04%)
cycles in affected programs: 63154229 -> 62951946 (-0.32%)
helped: 61
HURT: 26

total loops in shared programs: 4828 -> 4828 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 7790 -> 7375 (-5.33%)
spills in affected programs: 2824 -> 2409 (-14.70%)
helped: 35
HURT: 0

total fills in shared programs: 10557 -> 10024 (-5.05%)
fills in affected programs: 3752 -> 3219 (-14.21%)
helped: 38
HURT: 0

v2: - Use functions dst_write_pattern and src_read_pattern
      introduced in previous patch at v2.
    - Avoid calculating read_pattern if defpartial is 0

Cc: Francisco Jerez <currojerez at riseup.net>
---
 src/intel/compiler/brw_fs_live_variables.cpp | 61 ++++++++++++--------
 src/intel/compiler/brw_fs_live_variables.h   | 13 ++++-
 2 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp
index 059f076fa51..d3559e3114f 100644
--- a/src/intel/compiler/brw_fs_live_variables.cpp
+++ b/src/intel/compiler/brw_fs_live_variables.cpp
@@ -54,9 +54,9 @@ using namespace brw;
 
 void
 fs_live_variables::setup_one_read(struct block_data *bd, fs_inst *inst,
-                                  int ip, const fs_reg &reg)
+                                  int ip, int src, int reg_offset)
 {
-   int var = var_from_reg(reg);
+   int var = var_from_reg(inst->src[src]) + reg_offset;
    assert(var < num_vars);
 
    start[var] = MIN2(start[var], ip);
@@ -64,31 +64,48 @@ fs_live_variables::setup_one_read(struct block_data *bd, fs_inst *inst,
 
    /* The use[] bitset marks when the block makes use of a variable (VGRF
     * channel) without having completely defined that variable within the
-    * block.
+    * block. We take into account that a partial write could have defined
+    * completely the read bytes in the block.
     */
-   if (!BITSET_TEST(bd->def, var))
-      BITSET_SET(bd->use, var);
+   if (!BITSET_TEST(bd->def, var)) {
+      if (!bd->defpartial[var]) {
+         BITSET_SET(bd->use, var);
+      } else {
+         unsigned read_pattern = inst->src_read_pattern(src, reg_offset);
+         if ((bd->defpartial[var] & read_pattern) != read_pattern)
+            BITSET_SET(bd->use, var);
+      }
+   }
 }
 
 void
 fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
-                                   int ip, const fs_reg &reg)
+                                   int ip, int reg_offset)
 {
-   int var = var_from_reg(reg);
+   int var = var_from_reg(inst->dst) + reg_offset;
    assert(var < num_vars);
 
    start[var] = MIN2(start[var], ip);
    end[var] = MAX2(end[var], ip);
 
    /* The def[] bitset marks when an initialization in a block completely
-    * screens off previous updates of that variable (VGRF channel).
+    * screens off previous updates of that variable (VGRF channel). If
+    * we have a partial write now we store the write pattern so next
+    * reads in the block can check if what they read was completelly screened
+    * of by this partial write.
     */
-   if (inst->dst.file == VGRF) {
-      if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var))
+   assert(inst->dst.file == VGRF);
+   if(!BITSET_TEST(bd->use, var)) {
+      if (!inst->is_partial_write()) {
          BITSET_SET(bd->def, var);
-
-      BITSET_SET(bd->defout, var);
+         bd->defpartial[var] = ~0u;
+      } else {
+         bd->defpartial[var] |= inst->dst_write_pattern(reg_offset);
+         if (bd->defpartial[var] == ~0u)
+            BITSET_SET(bd->def, var);
+      }
    }
+   BITSET_SET(bd->defout, var);
 }
 
 /**
@@ -115,14 +132,9 @@ fs_live_variables::setup_def_use()
       foreach_inst_in_block(fs_inst, inst, block) {
 	 /* Set use[] for this instruction */
 	 for (unsigned int i = 0; i < inst->sources; i++) {
-            fs_reg reg = inst->src[i];
-
-            if (reg.file != VGRF)
-               continue;
-
-            for (unsigned j = 0; j < regs_read(inst, i); j++) {
-               setup_one_read(bd, inst, ip, reg);
-               reg.offset += REG_SIZE;
+            if (inst->src[i].file == VGRF) {
+               for (unsigned j = 0; j < regs_read(inst, i); j++)
+                  setup_one_read(bd, inst, ip, i, j);
             }
 	 }
 
@@ -130,11 +142,8 @@ fs_live_variables::setup_def_use()
 
          /* Set def[] for this instruction */
          if (inst->dst.file == VGRF) {
-            fs_reg reg = inst->dst;
-            for (unsigned j = 0; j < regs_written(inst); j++) {
-               setup_one_write(bd, inst, ip, reg);
-               reg.offset += REG_SIZE;
-            }
+            for (unsigned j = 0; j < regs_written(inst); j++)
+               setup_one_write(bd, inst, ip, j);
 	 }
 
          if (!inst->predicate && inst->exec_size >= 8)
@@ -281,6 +290,7 @@ fs_live_variables::fs_live_variables(fs_visitor *v, const cfg_t *cfg)
    bitset_words = BITSET_WORDS(num_vars);
    for (int i = 0; i < cfg->num_blocks; i++) {
       block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
+      block_data[i].defpartial = rzalloc_array(mem_ctx, unsigned, num_vars);
       block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
       block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
       block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
@@ -319,6 +329,7 @@ fs_visitor::invalidate_live_intervals()
 void
 fs_visitor::calculate_live_intervals()
 {
+
    if (this->live_intervals)
       return;
 
diff --git a/src/intel/compiler/brw_fs_live_variables.h b/src/intel/compiler/brw_fs_live_variables.h
index 9e95e443170..5842b24f3e5 100644
--- a/src/intel/compiler/brw_fs_live_variables.h
+++ b/src/intel/compiler/brw_fs_live_variables.h
@@ -44,6 +44,15 @@ struct block_data {
     */
    BITSET_WORD *def;
 
+   /**
+    * Which bytes of a variable are defined before being used in the block.
+    * Each bit of the 32-bit integer represents one of the register 32 bytes.
+    *
+    * As above, "defined" means unconditionally defined but at byte level.
+    */
+
+   unsigned *defpartial;
+
    /**
     * Which variables are used before being defined in the block.
     */
@@ -115,9 +124,9 @@ public:
 protected:
    void setup_def_use();
    void setup_one_read(struct block_data *bd, fs_inst *inst, int ip,
-                       const fs_reg &reg);
+                       int src, int reg_offset);
    void setup_one_write(struct block_data *bd, fs_inst *inst, int ip,
-                        const fs_reg &reg);
+                        int reg_offset);
    void compute_live_variables();
    void compute_start_end();
 
-- 
2.17.1



More information about the mesa-dev mailing list