[Mesa-dev] [PATCH 2/4] i965/fs/skl+: Prepare LOD-zero optimization for sample_lz.

Matt Turner mattst88 at gmail.com
Wed May 4 06:04:32 UTC 2016


The next patch will add support for recognizing sample_lz, whose lod
argument is in a different location in the payload. This patch
generalizes the function to handle that (and renames it opt_sample_lz
because sample_lz is a much more important thing to recognize).
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 58 ++++++++++++++++++++++--------------
 src/mesa/drivers/dri/i965/brw_fs.h   |  2 +-
 2 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 15df298..dc2af66 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2483,30 +2483,35 @@ fs_visitor::opt_zero_samples()
    return progress;
 }
 
-static bool
-lod_source_is_zero(const fs_inst *send_inst)
+static fs_inst *
+lod_source_is_zero(const fs_inst *send_inst, int lod_offset)
 {
    int reg_offset = send_inst->exec_size / 8 * 2 + send_inst->header_size;
    const fs_reg src = byte_offset(send_inst->src[0], reg_offset * 32);
 
    /* Look for the last instruction that writes to the source */
-   foreach_inst_in_block_reverse_starting_from(const fs_inst, inst, send_inst) {
-      if (inst->overwrites_reg(src)) {
-         return (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
-                 inst->src[inst->header_size + 2].is_zero());
+   foreach_inst_in_block_reverse_starting_from(fs_inst, inst, send_inst) {
+      if (inst->overwrites_reg(src) &&
+          inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
+          inst->src[inst->header_size + lod_offset].is_zero()) {
+         return inst;
       }
    }
 
-   return false;
+   return NULL;
 }
 
 /**
- * Replace LD sample messages that have a zero LOD with LD_LZ. This
- * instruction is available since Gen9. It would help for doing texelFetch
- * when passing three coordinates because then the LOD can be skipped.
+ * Replace sample messages that have a zero LOD with the special _LZ messages.
+ * These instructions are available since Gen9. The available _LZ messages
+ * are:
+ *
+ *    - ld_lz
+ *    - sample_lz
+ *    - sample_c_lz
  */
 bool
-fs_visitor::opt_ld_lz()
+fs_visitor::opt_sample_lz()
 {
    if (devinfo->gen < 9)
       return false;
@@ -2520,20 +2525,27 @@ fs_visitor::opt_ld_lz()
       /* If the LOD parameter is not sent or is a constant zero then we can
        * change the instruction.
        */
+      int lod_offset;
+      if (inst->opcode == SHADER_OPCODE_TXF) {
+         lod_offset = 2;
+      } else {
+         unreachable("not reached");
+      }
       bool lod_included = (inst->mlen - inst->header_size >=
-                           inst->exec_size / 8 * 3);
-      if (lod_included && !lod_source_is_zero(inst))
-         continue;
+                           inst->exec_size / 8 * (lod_offset + 1));
 
-      inst->opcode = SHADER_OPCODE_TXF_LZ;
+      if (lod_included && inst->mlen - inst->header_size > inst->exec_size / 8) {
+         fs_inst *load_payload = lod_source_is_zero(inst, lod_offset);
+         if (!load_payload)
+            continue;
 
-      if (lod_included) {
          inst->mlen -= inst->exec_size / 8;
 
-         /* If the r coordinate is included then we need a new LOAD_PAYLOAD
-          * instruction which has it in the right place.
+         /* If the lod is included then we need a new LOAD_PAYLOAD instruction
+          * which has latter arguments in the right places.
           */
-         if (inst->mlen - inst->header_size >= inst->exec_size / 8 * 3) {
+         if (inst->mlen - inst->header_size >=
+             inst->exec_size / 8 * (lod_offset + 1)) {
             const fs_builder ibld(this, block, inst);
             fs_reg send_header = fs_reg(VGRF, alloc.allocate(inst->mlen),
                                         BRW_REGISTER_TYPE_F);
@@ -2544,11 +2556,11 @@ fs_visitor::opt_ld_lz()
 
             for (int i = 0; i < n_sources; i++) {
                int j;
-               if (i >= inst->header_size + 2)
+               if (i >= inst->header_size + lod_offset)
                   j = i + 1;
                else
                   j = i;
-               new_sources[i] = offset(inst->src[0], ibld, j);
+               new_sources[i] = load_payload->src[j];
             }
 
             /* The LOAD_PAYLOAD helper is not used for the same reasons given
@@ -2568,6 +2580,8 @@ fs_visitor::opt_ld_lz()
          }
       }
 
+      inst->opcode = SHADER_OPCODE_TXF_LZ;
+
       progress = true;
    }
 
@@ -5425,7 +5439,7 @@ fs_visitor::optimize()
       OPT(opt_redundant_discard_jumps);
       OPT(opt_saturate_propagation);
       OPT(opt_zero_samples);
-      OPT(opt_ld_lz);
+      OPT(opt_sample_lz);
       OPT(register_coalesce);
       OPT(compute_to_mrf);
       OPT(eliminate_find_live_channel);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 66b39dc..9c01709 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -227,7 +227,7 @@ public:
    bool opt_saturate_propagation();
    bool opt_cmod_propagation();
    bool opt_zero_samples();
-   bool opt_ld_lz();
+   bool opt_sample_lz();
    void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
                      uint32_t spill_offset, int count);
    void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg,
-- 
2.7.3



More information about the mesa-dev mailing list