[Mesa-dev] [PATCH 10/41] SQUASH: i965/fs: Properly handle register widths in LOAD_PAYLOAD

Jason Ekstrand jason at jlekstrand.net
Sat Sep 20 10:22:59 PDT 2014


The LOAD_PAYLOAD instruction is a bit special because it collects a bunch
of registers (with possibly different widths) into a single payload block.
Once the payload is constructed, it's treated as a single block of data and
most of the information such as register widths doesn't matter anymore.  In
particular, the offset of any particular source register is the
accumulation of the sizes of the previous source registers.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp               | 31 +++++++++++++++-------
 .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  7 ++++-
 .../drivers/dri/i965/brw_fs_register_coalesce.cpp  | 12 +++------
 3 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9656081..4cabf75 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -268,7 +268,16 @@ fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources)
 {
    fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, dst, src,
                                         sources);
-   inst->regs_written = sources;
+   inst->regs_written = 0;
+   for (int i = 0; i < sources; ++i) {
+      /* The LOAD_PAYLOAD instruction only really makes sense if we are
+       * dealing with whole registers.  If this ever changes, we can deal
+       * with it later.
+       */
+      int size = src[i].effective_width(this) * type_sz(src[i].type);
+      assert(size % 32 == 0);
+      inst->regs_written += (size + 31) / 32;
+   }
 
    return inst;
 }
@@ -2862,15 +2871,19 @@ fs_visitor::lower_load_payload()
       if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
          fs_reg dst = inst->dst;
 
-         /* src[0] represents the (optional) message header. */
-         if (inst->src[0].file != BAD_FILE) {
-            inst->insert_before(block, MOV(dst, inst->src[0]));
-         }
-         dst.reg_offset++;
+         for (int i = 0; i < inst->sources; i++) {
+            dst.width = inst->src[i].effective_width(this);
+            dst.type = inst->src[i].type;
+
+            if (inst->src[i].file == BAD_FILE) {
+               /* Do nothing but otherwise increment as normal */
+            } else {
+               fs_inst *mov = MOV(dst, inst->src[i]);
+               mov->force_writemask_all = true;
+               inst->insert_before(block, mov);
+            }
 
-         for (int i = 1; i < inst->sources; i++) {
-            inst->insert_before(block, MOV(dst, inst->src[i]));
-            dst.reg_offset++;
+            dst = offset(dst, 1);
          }
 
          inst->remove(block);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index e5816df..6344ff5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -585,11 +585,15 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
 	 acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
       } else if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
                  inst->dst.file == GRF) {
+         int offset = 0;
          for (int i = 0; i < inst->sources; i++) {
+            int regs_written = ((inst->src[i].effective_width(this) *
+                                 type_sz(inst->src[i].type)) + 31) / 32;
             if (inst->src[i].file == GRF) {
                acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
                entry->dst = inst->dst;
-               entry->dst.reg_offset = i;
+               entry->dst.reg_offset = offset;
+               entry->dst.width = inst->src[i].effective_width(this);
                entry->src = inst->src[i];
                entry->opcode = inst->opcode;
                if (!entry->dst.equals(inst->src[i])) {
@@ -598,6 +602,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
                   ralloc_free(entry);
                }
             }
+            offset += regs_written;
          }
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
index 0680086..d7966d2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
@@ -69,16 +69,12 @@ is_copy_payload(const fs_visitor *v, const fs_inst *inst)
    if (v->virtual_grf_sizes[inst->src[0].reg] != inst->regs_written)
       return false;
 
-   const int reg = inst->src[0].reg;
-   if (inst->src[0].reg_offset != 0)
-      return false;
+   fs_reg reg = inst->src[0];
 
-   for (int i = 1; i < inst->sources; i++) {
-      if (inst->src[i].reg != reg ||
-          inst->src[i].reg_offset != i) {
+   for (int i = 0; i < inst->sources; i++)
+      if (!inst->src[i].equals(offset(reg, i)))
          return false;
-      }
-   }
+
    return true;
 }
 
-- 
2.1.0



More information about the mesa-dev mailing list