[Mesa-dev] [PATCH 1/2] intel/fs: New method for register_byte_use_pattern for fs_inst

Jose Maria Casanova Crespo jmcasanova at igalia.com
Fri Jul 13 13:31:04 UTC 2018


For a register source/destination of an instruction the function returns
the read/write byte pattern of a 32-byte registers as a unsigned int.

The returned pattern takes into account the exec_size of the instruction,
the type bitsize, the stride and if the register is source or destination.

The objective of the functions if to help to know the read/written bytes
of the instructions to improve the liveness analysis for partial read/writes.

We manage special cases for SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL
and SHADER_OPCODE_BYTE_SCATTERED_WRITE because depending of the bitsize
parameter they have a different read pattern.
---
 src/intel/compiler/brw_fs.cpp  | 183 +++++++++++++++++++++++++++++++++
 src/intel/compiler/brw_ir_fs.h |   1 +
 2 files changed, 184 insertions(+)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 2b8363ca362..f3045c4ff6c 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -687,6 +687,189 @@ fs_inst::is_partial_write() const
            this->dst.offset % REG_SIZE != 0);
 }
 
+/**
+ * Returns a 32-bit uint whose bits represent if the associated register byte
+ * has been read/written by the instruction. The returned pattern takes into
+ * account the exec_size of the instruction, the type bitsize and the register
+ * stride and the register is source or destination for the instruction.
+ *
+ * The objective of this function is to identify which parts of the register
+ * are read or written for operations that don't read/write a full register.
+ * So we can identify in live range variable analysis if a partial write has
+ * completelly defined the part of the register used by a partial read. So we
+ * avoid extending the liveness range because all data read was already
+ * defined although the wasn't completely written.
+ */
+unsigned
+fs_inst::register_byte_use_pattern(const fs_reg &r, boolean is_dst) const
+{
+   if (is_dst) {
+      /* We don't know what is written so we return the worts case */
+      if (this->predicate && this->opcode != BRW_OPCODE_SEL)
+         return 0;
+      /* We assume that send destinations are completelly written */
+      if (this->is_send_from_grf())
+         return ~0u;
+   } else {
+      /* byte_scattered_write_logical pattern of src[1] is 32-bit aligned
+       * so the read pattern depends on the bitsize stored at src[4]
+       */
+      if (this->opcode == SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL &&
+          this->src[1].nr == r.nr) {
+         switch (this->src[4].ud) {
+         case 32:
+            return ~0u;
+         case 16:
+            return 0x33333333;
+         case 8:
+            return 0x11111111;
+         default:
+            unreachable("Unsupported bitsize at byte_scattered_write_logical");
+         }
+      }
+      /* As for byte_scattered_write_logical but we need to take into account
+       * that data written are in the payload offset 32 with SIMD8 and offset
+       * 64 with SIMD16.
+       */
+      if (this->opcode == SHADER_OPCODE_BYTE_SCATTERED_WRITE &&
+          this->src[0].nr == r.nr) {
+         fs_reg payload = this->src[0];
+         payload.offset = REG_SIZE * this->exec_size / 8;
+         if (regions_overlap(r, REG_SIZE,
+                             payload, REG_SIZE * this->exec_size / 8)) {
+            switch (this->src[2].ud) {
+            case 32:
+               return ~0u;
+            case 16:
+               return 0x33333333;
+            case 8:
+               return 0x11111111;
+            default:
+               unreachable("Unsupported bitsize at byte_scattered_write");
+            }
+         } else {
+            return ~0u;
+         }
+      }
+   }
+
+   /* We define the most conservative value in order to calculate liveness
+    * range. If it is a destination nothing is defined and if is a source
+    * all the bytes of the register could be read. So for release builds
+    * the unreachables would have always safe return value. */
+   unsigned pattern =  is_dst ? 0 : ~0u;
+
+   /* In the general case we calculate the pattern for a specific register
+    * on base of the type_size and stride. We calculate the SIMD8 pattern
+    * and then we adjust the patter if needed for different exec_sizes
+    * and offset
+    */
+   switch (type_sz(r.type)){
+   case 1:
+      switch (r.stride) {
+      case 0:
+         pattern = 0X1;
+         break;
+      case 1:
+         pattern = 0xff;
+         break;
+      case 2:
+         pattern = 0x5555;
+         break;
+      case 4:
+         pattern = 0x11111111;
+         break;
+      case 8:
+         pattern = 0x01010101;
+         break;
+      default:
+         unreachable("Unknown pattern unsupported 8-bit stride");
+      }
+      break;
+   case 2:
+      switch (r.stride) {
+      case 0:
+         pattern = 0X3;
+         break;
+      case 1:
+         pattern = 0xffff;
+         break;
+      case 2:
+         pattern = 0x33333333;
+         break;
+      case 4:
+         pattern = 0x03030303;
+         break;
+      case 8:
+         pattern = 0x00030003;
+         break;
+      default:
+         unreachable("Unknown pattern unsupported 16-bit stride");
+      }
+      break;
+   case 4:
+      switch (r.stride) {
+      case 0:
+         pattern = 0Xf;
+         break;
+      case 1:
+         pattern = ~0u;
+         break;
+      case 2:
+         pattern = 0x0f0f0f0f;
+         break;
+      case 4:
+         pattern = 0x000f000f;
+         break;
+      default:
+         unreachable("Unknown pattern unsupported 32-bit stride");
+      }
+      break;
+   case 8:
+      switch (r.stride) {
+      case 0:
+         pattern = 0Xff;
+         break;
+      case 1:
+         pattern = ~0u;
+         break;
+      case 2:
+         pattern = 0x00ff00ff;
+         break;
+      case 4:
+         pattern = 0xff;
+         break;
+      default:
+         unreachable("Unknown pattern unsupported 64-bit stride");
+      }
+      break;
+   default:
+      unreachable("Unknown pattern for unsupported bitsize ");
+   }
+
+   if (this->exec_size > 8  && r.stride * type_sz(r.type) * 8 < REG_SIZE) {
+      /* For exec_size greater than SIMD8 we repeat the pattern until it
+       * represents a full register already represent a full register */
+      pattern = pattern | (pattern << (8 * r.stride * type_sz(r.type)));
+      if (this->exec_size > 16 && r.stride * type_sz(r.type) * 16 < REG_SIZE)
+         pattern = pattern | (pattern << (16 * r.stride * type_sz(r.type)));
+   } else if (this->exec_size < 8 &&
+              r.stride * type_sz(r.type) * this->exec_size < REG_SIZE) {
+      /* For exec_size smaller than SIMD8 we reduce the pattern if its size
+       * is smaller than a full register. */
+      pattern = pattern >> (MIN2(REG_SIZE, 8 * type_sz(r.type) * r.stride) -
+                            this->exec_size * type_sz(r.type) * r.stride);
+   }
+
+   /* We adjust the pattern to the byte_offset of the register */
+   pattern = pattern << (r.offset % REG_SIZE);
+
+   assert(pattern);
+
+   return pattern;
+}
+
+
 unsigned
 fs_inst::components_read(unsigned i) const
 {
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index 92dad269a34..5ea6294b8ad 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -350,6 +350,7 @@ public:
    bool equals(fs_inst *inst) const;
    bool is_send_from_grf() const;
    bool is_partial_write() const;
+   unsigned register_byte_use_pattern(const fs_reg &r, boolean is_dst) const;
    bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
    unsigned components_read(unsigned i) const;
    unsigned size_read(int arg) const;
-- 
2.17.1



More information about the mesa-dev mailing list