Mesa (master): intel/fs/gen7+: Swap sample mask flag register and FIND_LIVE_CHANNEL temporary.

Fri Feb 14 22:36:09 UTC 2020

Module: Mesa
Branch: master
Commit: a792e11f5ccb28f5d2430008d462c79888a077c3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a792e11f5ccb28f5d2430008d462c79888a077c3

Author: Francisco Jerez <currojerez at riseup.net>
Date:   Sat Jan  4 15:48:07 2020 -0800

intel/fs/gen7+: Swap sample mask flag register and FIND_LIVE_CHANNEL temporary.

FIND_LIVE_CHANNEL was using f1.0-f1.1 as temporary flag register on
Gen7, instead use f0.0-f0.1.  In order to avoid collision with the
discard sample mask, move the latter to f1.0-f1.1.  This makes room
for keeping track of the sample mask of the second half of SIMD32
programs that use discard.

Note that some MOVs of the sample mask into f1.0 become redundant now
in lower_surface_logical_send() and lower_a64_logical_send().

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>x

---

 src/intel/compiler/brw_fs.cpp       | 17 ++++++++++-------
 src/intel/compiler/brw_fs.h         |  6 ++++--
 src/intel/compiler/brw_fs_builder.h |  2 +-
 src/intel/compiler/brw_fs_nir.cpp   |  6 +++---
 4 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index b5834540ef1..276eb70987b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5458,15 +5458,17 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
           * vertical predication mode.
           */
          inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;
-         ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2),
-                         sample_mask.type),
-                  sample_mask);
+         if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1)
+            ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2),
+                            sample_mask.type),
+                     sample_mask);
       } else {
          inst->flag_subreg = 2;
          inst->predicate = BRW_PREDICATE_NORMAL;
          inst->predicate_inverse = false;
-         ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
-                  sample_mask);
+         if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1)
+            ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
+                     sample_mask);
       }
    }
 
@@ -5646,8 +5648,9 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
 
       fs_reg sample_mask = sample_mask_reg(bld);
       const fs_builder ubld = bld.group(1, 0).exec_all();
-      ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
-               sample_mask);
+      if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1)
+         ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
+                  sample_mask);
    }
 
    fs_reg payload, payload2;
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 93d3e460098..543e760b3fe 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -419,13 +419,15 @@ private:
 
 /**
  * Return the flag register used in fragment shaders to keep track of live
- * samples.
+ * samples.  On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
+ * dispatch mode, while earlier generations are constrained to f0.1, which
+ * limits the dispatch width to SIMD16 for fragment shaders that use discard.
  */
 static inline unsigned
 sample_mask_flag_subreg(const fs_visitor *shader)
 {
    assert(shader->stage == MESA_SHADER_FRAGMENT);
-   return 1;
+   return shader->devinfo->gen >= 7 ? 2 : 1;
 }
 
 /**
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index fac4f5c884c..896088cc5b8 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -406,7 +406,7 @@ namespace brw {
          const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
          const dst_reg dst = vgrf(src.type);
 
-         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index)->flag_subreg = 2;
+         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
          ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
 
          return src_reg(component(dst, 0));
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 93c6ee24404..5d66ead4a24 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3490,9 +3490,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
    case nir_intrinsic_discard:
    case nir_intrinsic_demote_if:
    case nir_intrinsic_discard_if: {
-      /* We track our discarded pixels in f0.1.  By predicating on it, we can
-       * update just the flag bits that aren't yet discarded.  If there's no
-       * condition, we emit a CMP of g0 != g0, so all currently executing
+      /* We track our discarded pixels in f0.1/f1.0.  By predicating on it, we
+       * can update just the flag bits that aren't yet discarded.  If there's
+       * no condition, we emit a CMP of g0 != g0, so all currently executing
        * channels will get turned off.
        */
       fs_inst *cmp = NULL;