[Mesa-dev] [PATCH v2 33/53] intel/fs: Emit LINE+MAC for LINTERP with unaligned coordinates

Jason Ekstrand jason at jlekstrand.net
Sat May 26 05:29:03 UTC 2018


On g4x through Sandy Bridge, src1 (the coordinates) of the PLN
instruction is required to be an even register number.  When it's odd
(which can happen with SIMD32), we have to emit a LINE+MAC combination
instead.  Unfortunately, we can't just fall through to the gen4 case
because the input registers are still set up for PLN which lays out the
four src1 registers differently in SIMD16 than LINE.
---
 src/intel/compiler/brw_fs_generator.cpp | 75 +++++++++++++++++++++++++++++----
 src/intel/compiler/brw_shader.cpp       |  3 +-
 2 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 548a208..0ca9a4e 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -761,16 +761,73 @@ fs_generator::generate_linterp(fs_inst *inst,
 
       return true;
    } else if (devinfo->has_pln) {
-      /* From the Sandy Bridge PRM Vol. 4, Pt. 2, Section 8.3.53, "Plane":
-       *
-       *    "[DevSNB]:<src1> must be even register aligned.
-       *
-       * This restriction is lifted on Ivy Bridge.
-       */
-      assert(devinfo->gen >= 7 || (delta_x.nr & 1) == 0);
-      brw_PLN(p, dst, interp, delta_x);
+      if (devinfo->gen <= 6 && (delta_x.nr & 1) != 0) {
+         /* From the Sandy Bridge PRM Vol. 4, Pt. 2, Section 8.3.53, "Plane":
+          *
+          *    "[DevSNB]:<src1> must be even register aligned.
+          *
+          * This restriction is lifted on Ivy Bridge.
+          *
+          * This means that we need to split PLN into LINE+MAC on-the-fly.
+          * Unfortunately, the inputs are laid out for PLN and not LIN+MAC so
+          * we have to split into SIMD8 pieces.
+          */
+         if (inst->exec_size == 8) {
+            i[0] = brw_LINE(p, brw_null_reg(), interp, delta_x);
+            i[1] = brw_MAC(p, dst, suboffset(interp, 1), delta_y);
 
-      return false;
+            /* LINE writes the accumulator automatically on gen4-5.  On Sandy
+             * Bridge and later, we have to explicitly enable it.
+             */
+            if (devinfo->gen >= 6)
+               brw_inst_set_acc_wr_control(p->devinfo, i[0], true);
+
+            brw_inst_set_cond_modifier(p->devinfo, i[1], inst->conditional_mod);
+
+            /* brw_set_default_saturate() is called before emitting
+             * instructions, so the saturate bit is set in each instruction,
+             * so we need to unset it on the first instruction.
+             */
+            brw_inst_set_saturate(p->devinfo, i[0], false);
+         } else {
+            brw_push_insn_state(p);
+            brw_set_default_exec_size(p, BRW_EXECUTE_8);
+
+            brw_set_default_group(p, inst->group);
+            i[0] = brw_LINE(p, brw_null_reg(), interp, offset(delta_x, 0));
+            i[1] = brw_MAC(p, offset(dst, 0),
+                           suboffset(interp, 1), offset(delta_x, 1));
+
+            brw_set_default_group(p, inst->group + 8);
+            i[2] = brw_LINE(p, brw_null_reg(), interp, offset(delta_y, 0));
+            i[3] = brw_MAC(p, offset(dst, 1),
+                           suboffset(interp, 1), offset(delta_y, 1));
+
+            brw_pop_insn_state(p);
+
+            /* LINE writes the accumulator automatically on gen4-5.  On Sandy
+             * Bridge and later, we have to explicitly enable it.
+             */
+            if (devinfo->gen >= 6) {
+               brw_inst_set_acc_wr_control(p->devinfo, i[0], true);
+               brw_inst_set_acc_wr_control(p->devinfo, i[2], true);
+            }
+
+            brw_inst_set_cond_modifier(p->devinfo, i[1], inst->conditional_mod);
+            brw_inst_set_cond_modifier(p->devinfo, i[3], inst->conditional_mod);
+
+            /* brw_set_default_saturate() is called before emitting
+             * instructions, so the saturate bit is set in each instruction,
+             * so we need to unset it on the first instruction of each pair.
+             */
+            brw_inst_set_saturate(p->devinfo, i[0], false);
+            brw_inst_set_saturate(p->devinfo, i[2], false);
+         }
+         return true;
+      } else {
+         brw_PLN(p, dst, interp, delta_x);
+         return false;
+      }
    } else {
       i[0] = brw_LINE(p, brw_null_reg(), interp, delta_x);
       i[1] = brw_MAC(p, dst, suboffset(interp, 1), delta_y);
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
index dfd2c5c..6d25d51 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -985,7 +985,8 @@ backend_instruction::writes_accumulator_implicitly(const struct gen_device_info
           (devinfo->gen < 6 &&
            ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
             (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) ||
-          (opcode == FS_OPCODE_LINTERP && !devinfo->has_pln);
+          (opcode == FS_OPCODE_LINTERP &&
+           (!devinfo->has_pln || devinfo->gen <= 6);
 }
 
 bool
-- 
2.5.0.400.gff86faf



More information about the mesa-dev mailing list