[Mesa-dev] [PATCH v2 10/15] SQUASH: i965/fs: Rework fs_visitor::lower_load_payload

Jason Ekstrand jason at jlekstrand.net
Tue May 5 18:28:13 PDT 2015


Instead of the complicated and broken-by-design pile of heuristics we had
before, we now have a straightforward lowering:

 1) All header sources are copied directly using force_writemask_all and,
    since they are guaranteed to be a single register, there are no
    force_sechalf issues.

 2) All non-header sources are copied using the exact same force_sechalf
    and force_writemask_all modifiers as the LOAD_PAYLOAD operation itself.

 3) In order to accommodate older gens that need interleaved colors,
    lower_load_payload detects when the destination is a COMPR4 register
    and automatically interleaves the non-header sources.  The
    lower_load_payload pass does the right thing here regardless of whether
    or not the hardware actually supports COMPR4.

v2: Don't allow the saturate flag on LOAD_PAYLOAD instructions

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         | 171 ++++++++++++++-------------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |   3 -
 2 files changed, 90 insertions(+), 84 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ee520bb..e7095c9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3419,99 +3419,108 @@ fs_visitor::lower_load_payload()
 {
    bool progress = false;
 
-   int vgrf_to_reg[alloc.count];
-   int reg_count = 0;
-   for (unsigned i = 0; i < alloc.count; ++i) {
-      vgrf_to_reg[i] = reg_count;
-      reg_count += alloc.sizes[i];
-   }
+   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
+      if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
+         continue;
 
-   struct {
-      bool written:1; /* Whether this register has ever been written */
-      bool force_writemask_all:1;
-      bool force_sechalf:1;
-   } metadata[reg_count];
-   memset(metadata, 0, sizeof(metadata));
+      assert(inst->dst.file == MRF || inst->dst.file == GRF);
+      assert(inst->saturate == false);
 
-   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
-      if (inst->dst.file == GRF) {
-         const int dst_reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
-         bool force_sechalf = inst->force_sechalf &&
-                              !inst->force_writemask_all;
-         bool toggle_sechalf = inst->dst.width == 16 &&
-                               type_sz(inst->dst.type) == 4 &&
-                               !inst->force_writemask_all;
-         for (int i = 0; i < inst->regs_written; ++i) {
-            metadata[dst_reg + i].written = true;
-            metadata[dst_reg + i].force_sechalf = force_sechalf;
-            metadata[dst_reg + i].force_writemask_all = inst->force_writemask_all;
-            force_sechalf = (toggle_sechalf != force_sechalf);
+      fs_reg dst = inst->dst;
+
+      /* Get rid of COMPR4.  We'll add it back in if we need it */
+      if (dst.file == MRF)
+         dst.reg = dst.reg & ~BRW_MRF_COMPR4;
+
+      dst.width = 8;
+      for (uint8_t i = 0; i < inst->header_size; i++) {
+         if (inst->src[i].file != BAD_FILE) {
+            fs_reg mov_dst = retype(dst, BRW_REGISTER_TYPE_UD);
+            fs_reg mov_src = retype(inst->src[i], BRW_REGISTER_TYPE_UD);
+            mov_src.width = 8;
+            fs_inst *mov = MOV(mov_dst, mov_src);
+            mov->force_writemask_all = true;
+            inst->insert_before(block, mov);
          }
+         dst = offset(dst, 1);
       }
 
-      if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
-         assert(inst->dst.file == MRF || inst->dst.file == GRF);
-         fs_reg dst = inst->dst;
-
-         for (int i = 0; i < inst->sources; i++) {
-            dst.width = inst->src[i].effective_width;
-            dst.type = inst->src[i].type;
-
-            if (inst->src[i].file == BAD_FILE) {
-               /* Do nothing but otherwise increment as normal */
-            } else if (dst.file == MRF &&
-                       dst.width == 8 &&
-                       devinfo->has_compr4 &&
-                       i + 4 < inst->sources &&
-                       inst->src[i + 4].equals(horiz_offset(inst->src[i], 8))) {
-               fs_reg compr4_dst = dst;
-               compr4_dst.reg += BRW_MRF_COMPR4;
-               compr4_dst.width = 16;
-               fs_reg compr4_src = inst->src[i];
-               compr4_src.width = 16;
-               fs_inst *mov = MOV(compr4_dst, compr4_src);
-               mov->force_writemask_all = true;
-               inst->insert_before(block, mov);
-               /* Mark i+4 as BAD_FILE so we don't emit a MOV for it */
-               inst->src[i + 4].file = BAD_FILE;
-            } else {
-               fs_inst *mov = MOV(dst, inst->src[i]);
-               if (inst->src[i].file == GRF) {
-                  int src_reg = vgrf_to_reg[inst->src[i].reg] +
-                                inst->src[i].reg_offset;
-                  mov->force_sechalf = metadata[src_reg].force_sechalf;
-                  mov->force_writemask_all = metadata[src_reg].force_writemask_all;
+      dst.width = inst->exec_size;
+      if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) &&
+          inst->exec_size > 8) {
+         /* In this case, the payload portion of the LOAD_PAYLOAD isn't
+          * a straightforward copy.  Instead, the result of the
+          * LOAD_PAYLOAD is treated as interleaved and the first four
+          * non-header sources are unpacked as:
+          *
+          * m + 0: r0
+          * m + 1: g0
+          * m + 2: b0
+          * m + 3: a0
+          * m + 4: r1
+          * m + 5: g1
+          * m + 6: b1
+          * m + 7: a1
+          *
+          * This is used for gen <= 5 fb writes.
+          */
+         assert(inst->exec_size == 16);
+         assert(inst->header_size + 4 <= inst->sources);
+         for (uint8_t i = inst->header_size; i < inst->header_size + 4; i++) {
+            if (inst->src[i].file != BAD_FILE) {
+               if (devinfo->has_compr4) {
+                  fs_reg compr4_dst = retype(dst, inst->src[i].type);
+                  compr4_dst.reg |= BRW_MRF_COMPR4;
+
+                  fs_inst *mov = MOV(compr4_dst, inst->src[i]);
+                  mov->force_writemask_all = inst->force_writemask_all;
+                  inst->insert_before(block, mov);
                } else {
-                  /* We don't have any useful metadata for immediates or
-                   * uniforms.  Assume that any of the channels of the
-                   * destination may be used.
-                   */
-                  assert(inst->src[i].file == IMM ||
-                         inst->src[i].file == UNIFORM);
-                  mov->force_writemask_all = true;
-               }
-
-               if (dst.file == GRF) {
-                  const int dst_reg = vgrf_to_reg[dst.reg] + dst.reg_offset;
-                  const bool force_writemask = mov->force_writemask_all;
-                  metadata[dst_reg].force_writemask_all = force_writemask;
-                  metadata[dst_reg].force_sechalf = mov->force_sechalf;
-                  if (dst.width * type_sz(dst.type) > 32) {
-                     assert(!mov->force_sechalf);
-                     metadata[dst_reg + 1].force_writemask_all = force_writemask;
-                     metadata[dst_reg + 1].force_sechalf = !force_writemask;
-                  }
+                  /* Platform doesn't have COMPR4.  We have to fake it */
+                  fs_reg mov_dst = retype(dst, inst->src[i].type);
+                  mov_dst.width = 8;
+
+                  fs_inst *mov = MOV(mov_dst, half(inst->src[i], 0));
+                  mov->force_writemask_all = inst->force_writemask_all;
+                  inst->insert_before(block, mov);
+
+                  mov = MOV(offset(mov_dst, 4), half(inst->src[i], 1));
+                  mov->force_writemask_all = inst->force_writemask_all;
+                  mov->force_sechalf = true;
+                  inst->insert_before(block, mov);
                }
-
-               inst->insert_before(block, mov);
             }
 
-            dst = offset(dst, 1);
+            dst.reg++;
          }
 
-         inst->remove(block);
-         progress = true;
+         /* The loop above only ever incremented us through the first set
+          * of 4 registers.  However, thanks to the magic of COMPR4, we
+          * actually wrote to the first 8 registers, so we need to take
+          * that into account now.
+          */
+         dst.reg += 4;
+
+         /* The COMPR4 code took care of the first 4 sources.  We'll let
+          * the regular path handle any remaining sources.  Yes, we are
+          * modifying the instruction but we're about to delete it so
+          * this really doesn't hurt anything.
+          */
+         inst->header_size += 4;
       }
+
+      for (uint8_t i = inst->header_size; i < inst->sources; i++) {
+         if (inst->src[i].file != BAD_FILE) {
+            fs_inst *mov = MOV(retype(dst, inst->src[i].type),
+                               inst->src[i]);
+            mov->force_writemask_all = inst->force_writemask_all;
+            inst->insert_before(block, mov);
+         }
+         dst = offset(dst, 1);
+      }
+
+      inst->remove(block);
+      progress = true;
    }
 
    if (progress)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index c3ccc9d..772285e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3602,9 +3602,6 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
                               color.type, color.width);
             inst = emit(MOV(dst[len], offset(color, i)));
             inst->saturate = key->clamp_fragment_color;
-         } else if (color.width == 16) {
-            /* We need two BAD_FILE slots for a 16-wide color */
-            len++;
          }
          len++;
       }
-- 
2.3.6



More information about the mesa-dev mailing list