[Mesa-dev] [PATCH 10/95] i965/vec4: handle 32 and 64 bit channels in liveness analysis

Iago Toral Quiroga itoral at igalia.com
Tue Jul 19 10:40:07 UTC 2016


From: "Juan A. Suarez Romero" <jasuarez at igalia.com>

Our current data flow analysis does not take into account that channels
on 64-bit operands are 64-bit. This is a problem when the same register
is accessed using both 64-bit and 32-bit channels. This is very common
in operations where we need to access 64-bit data in 32-bit chunks,
such as the double packing and packing operations.

This patch changes the analysis by checking the bits that each source
or destination datatype needs. Actually, rather than bits, we use
blocks of 32bits, which is the minimum channel size.

Because a vgrf can contain a dvec4 (256 bits), we reserve 8
32-bit blocks to map the channels.
---
 .../dri/i965/brw_vec4_dead_code_eliminate.cpp      | 30 +++++++++++++-----
 .../drivers/dri/i965/brw_vec4_live_variables.cpp   | 36 +++++++++++++++++-----
 .../drivers/dri/i965/brw_vec4_live_variables.h     |  7 +++--
 3 files changed, 55 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index c643212..1f7cd49 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -59,10 +59,16 @@ vec4_visitor::dead_code_eliminate()
             bool result_live[4] = { false };
 
             if (inst->dst.file == VGRF) {
+               const unsigned offs = type_sz(inst->dst.type) == 8 ?
+                  1 : (inst->exec_size == 4 ? 0 : 4);
                for (unsigned i = 0; i < inst->regs_written; i++) {
-                  for (int c = 0; c < 4; c++)
-                     result_live[c] |= BITSET_TEST(
-                        live, var_from_reg(alloc, offset(inst->dst, i), c));
+                  for (int c = 0; c < 4; c++) {
+                     const unsigned v =
+                        var_from_reg(alloc, offset(inst->dst, i), c);
+                     result_live[c] |= BITSET_TEST(live, v);
+                     if (offs)
+                        result_live[c] |= BITSET_TEST(live, v + offs);
+                  }
                }
             } else {
                for (unsigned c = 0; c < 4; c++)
@@ -110,11 +116,16 @@ vec4_visitor::dead_code_eliminate()
          }
 
          if (inst->dst.file == VGRF && !inst->predicate) {
+            const unsigned offs = type_sz(inst->dst.type) == 8 ?
+               1 : (inst->exec_size == 4 ? 0 : 4);
             for (unsigned i = 0; i < inst->regs_written; i++) {
                for (int c = 0; c < 4; c++) {
                   if (inst->dst.writemask & (1 << c)) {
-                     BITSET_CLEAR(live, var_from_reg(alloc,
-                                                     offset(inst->dst, i), c));
+                     const unsigned v =
+                        var_from_reg(alloc, offset(inst->dst, i), c);
+                     BITSET_CLEAR(live, v);
+                     if (offs)
+                        BITSET_CLEAR(live, v + offs);
                   }
                }
             }
@@ -132,10 +143,15 @@ vec4_visitor::dead_code_eliminate()
 
          for (int i = 0; i < 3; i++) {
             if (inst->src[i].file == VGRF) {
+               const unsigned offs = type_sz(inst->src[i].type) == 8 ?
+                  1 : (inst->exec_size == 4 ? 0 : 4);
                for (unsigned j = 0; j < inst->regs_read(i); j++) {
                   for (int c = 0; c < 4; c++) {
-                     BITSET_SET(live, var_from_reg(alloc,
-                                                   offset(inst->src[i], j), c));
+                     const unsigned v =
+                        var_from_reg(alloc, offset(inst->src[i], j), c);
+                     BITSET_SET(live, v);
+                     if (offs)
+                        BITSET_SET(live, v + offs);
                   }
                }
             }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index 57d5fbb..bfc5e44 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -76,12 +76,16 @@ vec4_live_variables::setup_def_use()
 	 /* Set use[] for this instruction */
 	 for (unsigned int i = 0; i < 3; i++) {
 	    if (inst->src[i].file == VGRF) {
+               const unsigned offs = type_sz(inst->dst.type) == 8 ?
+                  1 : (inst->exec_size == 4 ? 0 : 4);
                for (unsigned j = 0; j < inst->regs_read(i); j++) {
                   for (int c = 0; c < 4; c++) {
                      const unsigned v =
                         var_from_reg(alloc, offset(inst->src[i], j), c);
                      if (!BITSET_TEST(bd->def, v))
                         BITSET_SET(bd->use, v);
+                     if (offs && !BITSET_TEST(bd->def, v + offs))
+                        BITSET_SET(bd->use, v + offs);
                   }
                }
 	    }
@@ -99,6 +103,8 @@ vec4_live_variables::setup_def_use()
 	  */
 	 if (inst->dst.file == VGRF &&
 	     (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) {
+            const unsigned offs = type_sz(inst->dst.type) == 8 ?
+               1 : (inst->exec_size == 4 ? 0 : 4);
             for (unsigned i = 0; i < inst->regs_written; i++) {
                for (int c = 0; c < 4; c++) {
                   if (inst->dst.writemask & (1 << c)) {
@@ -106,6 +112,8 @@ vec4_live_variables::setup_def_use()
                         var_from_reg(alloc, offset(inst->dst, i), c);
                      if (!BITSET_TEST(bd->use, v))
                         BITSET_SET(bd->def, v);
+                     if (offs && !BITSET_TEST(bd->use, v + offs))
+                        BITSET_SET(bd->def, v + offs);
                   }
                }
             }
@@ -188,7 +196,7 @@ vec4_live_variables::vec4_live_variables(const simple_allocator &alloc,
 {
    mem_ctx = ralloc_context(NULL);
 
-   num_vars = alloc.total_size * 4;
+   num_vars = alloc.total_size * 8;
    block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
 
    bitset_words = BITSET_WORDS(num_vars);
@@ -238,14 +246,14 @@ vec4_visitor::calculate_live_intervals()
    if (this->live_intervals)
       return;
 
-   int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 4);
-   int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 4);
+   int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 8);
+   int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 8);
    ralloc_free(this->virtual_grf_start);
    ralloc_free(this->virtual_grf_end);
    this->virtual_grf_start = start;
    this->virtual_grf_end = end;
 
-   for (unsigned i = 0; i < this->alloc.total_size * 4; i++) {
+   for (unsigned i = 0; i < this->alloc.total_size * 8; i++) {
       start[i] = MAX_INSTRUCTION;
       end[i] = -1;
    }
@@ -257,18 +265,26 @@ vec4_visitor::calculate_live_intervals()
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == VGRF) {
+            const unsigned offs = type_sz(inst->dst.type) == 8 ?
+               1 : (inst->exec_size == 4 ? 0 : 4);
             for (unsigned j = 0; j < inst->regs_read(i); j++) {
                for (int c = 0; c < 4; c++) {
                   const unsigned v =
                      var_from_reg(alloc, offset(inst->src[i], j), c);
                   start[v] = MIN2(start[v], ip);
                   end[v] = ip;
+                  if (offs) {
+                     start[v + offs] = MIN2(start[v + offs], ip);
+                     end[v + offs] = ip;
+                  }
                }
             }
 	 }
       }
 
       if (inst->dst.file == VGRF) {
+         const unsigned offs = type_sz(inst->dst.type) == 8 ?
+            1 : (inst->exec_size == 4 ? 0 : 4);
          for (unsigned i = 0; i < inst->regs_written; i++) {
             for (int c = 0; c < 4; c++) {
                if (inst->dst.writemask & (1 << c)) {
@@ -276,6 +292,10 @@ vec4_visitor::calculate_live_intervals()
                      var_from_reg(alloc, offset(inst->dst, i), c);
                   start[v] = MIN2(start[v], ip);
                   end[v] = ip;
+                  if (offs) {
+                     start[v + offs] = MIN2(start[v + offs], ip);
+                     end[v + offs] = ip;
+                  }
                }
             }
          }
@@ -340,8 +360,8 @@ vec4_visitor::var_range_end(unsigned v, unsigned n) const
 bool
 vec4_visitor::virtual_grf_interferes(int a, int b)
 {
-   return !((var_range_end(4 * alloc.offsets[a], 4 * alloc.sizes[a]) <=
-             var_range_start(4 * alloc.offsets[b], 4 * alloc.sizes[b])) ||
-            (var_range_end(4 * alloc.offsets[b], 4 * alloc.sizes[b]) <=
-             var_range_start(4 * alloc.offsets[a], 4 * alloc.sizes[a])));
+   return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <=
+             var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) ||
+            (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <=
+             var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a])));
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
index 12d281e..cdbd7dc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
@@ -84,8 +84,8 @@ var_from_reg(const simple_allocator &alloc, const src_reg &reg,
 {
    assert(reg.file == VGRF && reg.nr < alloc.count &&
           reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
-   return (4 * (alloc.offsets[reg.nr] + reg.reg_offset) +
-           BRW_GET_SWZ(reg.swizzle, c));
+   return (8 * (alloc.offsets[reg.nr] + reg.reg_offset) +
+           BRW_GET_SWZ(reg.swizzle, c) * MAX2(1, type_sz(reg.type) / 4));
 }
 
 inline unsigned
@@ -94,7 +94,8 @@ var_from_reg(const simple_allocator &alloc, const dst_reg &reg,
 {
    assert(reg.file == VGRF && reg.nr < alloc.count &&
           reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
-   return 4 * (alloc.offsets[reg.nr] + reg.reg_offset) + c;
+   return 8 * (alloc.offsets[reg.nr] + reg.reg_offset) +
+          c * MAX2(1, type_sz(reg.type) / 4);
 }
 
 } /* namespace brw */
-- 
2.7.4



More information about the mesa-dev mailing list