[Mesa-dev] [PATCH 07/11] i965/fs: Add a scale factor to emit_fsign

Mon Sep 10 23:29:10 UTC 2018

From: Ian Romanick <ian.d.romanick at intel.com>

Normally fsign generates -1, 0, or +1.  The new scale factor, S, causes
fsign to generate -S, 0, or +S.

Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
 src/intel/compiler/brw_fs.h       |  3 +-
 src/intel/compiler/brw_fs_nir.cpp | 61 +++++++++++++++++++++++++++++++--------
 2 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index c45f0f608fc..8169498bf5e 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -186,7 +186,8 @@ public:
    void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
    fs_reg resolve_source_modifiers(const fs_reg &src);
    void emit_discard_jump();
-   void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr);
+   void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
+                   unsigned fsign_src);
    bool opt_peephole_sel();
    bool opt_peephole_csel();
    bool opt_peephole_predicated_break();
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index d0f25e96c3e..ef4c41da132 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -670,14 +670,19 @@ brw_rnd_mode_from_nir_op (const nir_op op) {
 }
 
 /**
- * Emit code for nir_op_fsign
+ * Emit code for nir_op_fsign possibly fused with a nir_op_fmul
+ *
+ * If \c instr is not the \c nir_op_fsign, then \c fsign_src is the index of
+ * the source of \c instr that is a \c nir_op_fsign.
  */
 void
-fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr)
+fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr,
+                       unsigned fsign_src)
 {
    fs_inst *inst;
 
-   assert(instr->op == nir_op_fsign);
+   assert(instr->op == nir_op_fsign || instr->op == nir_op_fmul);
+   assert(fsign_src < nir_op_infos[instr->op].num_inputs);
 
    fs_reg result = get_nir_dest(instr->dest.dest);
    result.type = brw_type_for_nir_type(devinfo,
@@ -685,9 +690,18 @@ fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr)
                      nir_dest_bit_size(instr->dest.dest)));
 
    const nir_alu_src *sources[2] = { &instr->src[0], NULL };
-   const unsigned num_sources = 1;
+   const unsigned num_sources = instr->op == nir_op_fsign ? 1 : 2;
    fs_reg op[2];
 
+   if (instr->op != nir_op_fsign) {
+      const nir_alu_instr *const fsign_instr =
+         nir_src_as_alu_instr((nir_src *) &instr->src[fsign_src].src);
+
+      assert(!fsign_instr->dest.saturate);
+      sources[0] = &fsign_instr->src[0];
+      sources[1] = &instr->src[1 - fsign_src];
+   }
+
    for (unsigned i = 0; i < num_sources; i++) {
       op[i] = get_nir_src(sources[i]->src);
 
@@ -714,16 +728,20 @@ fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr)
    for (unsigned i = 0; i < num_sources; i++)
       op[i] = offset(op[i], bld, sources[i]->swizzle[channel]);
 
-   assert(!instr->dest.saturate);
    if (op[0].abs) {
       /* Straightforward since the source can be assumed to be either strictly
        * >= 0 or strictly <= 0 depending on the setting of the negate flag.
        */
       set_condmod(BRW_CONDITIONAL_NZ, bld.MOV(result, op[0]));
 
-      inst = (op[0].negate)
-         ? bld.MOV(result, brw_imm_f(-1.0f))
-         : bld.MOV(result, brw_imm_f(1.0f));
+      if (sources[1] == NULL) {
+         inst = (op[0].negate)
+            ? bld.MOV(result, brw_imm_f(-1.0f))
+            : bld.MOV(result, brw_imm_f(1.0f));
+      } else {
+         op[1].negate = (op[0].negate != op[1].negate);
+         inst = bld.MOV(result, op[1]);
+      }
 
       set_predicate(BRW_PREDICATE_NORMAL, inst);
    } else if (type_sz(op[0].type) < 8) {
@@ -739,7 +757,14 @@ fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr)
       result.type = BRW_REGISTER_TYPE_UD;
       bld.AND(result_int, op[0], brw_imm_ud(0x80000000u));
 
-      inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
+      if (sources[1] == NULL)
+         inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
+      else {
+         /* Use XOR here to get the result sign correct. */
+         inst = bld.XOR(result_int, result_int,
+                        retype(op[1], BRW_REGISTER_TYPE_UD));
+      }
+
       inst->predicate = BRW_PREDICATE_NORMAL;
    } else {
       /* For doubles we do the same but we need to consider:
@@ -759,8 +784,20 @@ fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr)
       bld.AND(r, subscript(op[0], BRW_REGISTER_TYPE_UD, 1),
               brw_imm_ud(0x80000000u));
 
-      set_predicate(BRW_PREDICATE_NORMAL,
-                    bld.OR(r, r, brw_imm_ud(0x3ff00000u)));
+      if (sources[1] == NULL) {
+         set_predicate(BRW_PREDICATE_NORMAL,
+                       bld.OR(r, r, brw_imm_ud(0x3ff00000u)));
+      } else {
+         /* This could be done better in some cases.  If the scale is an
+          * immediate with the low 32-bits all 0, emitting a separate XOR and
+          * OR would allow an algebraic optimization to remove the OR.  There
+          * are currently zero instances of fsign(double(x))*IMM in shader-db
+          * or any test suite, so it is hard to care at this time.
+          */
+         fs_reg result_int64 = retype(result, BRW_REGISTER_TYPE_UQ);
+         inst = bld.XOR(result_int64, result_int64,
+                        retype(op[1], BRW_REGISTER_TYPE_UQ));
+      }
    }
 }
 
@@ -768,7 +805,7 @@ void
 fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
 {
    if (instr->op == nir_op_fsign) {
-      emit_fsign(bld, instr);
+      emit_fsign(bld, instr, 0);
       return;
    }
 
-- 
2.14.4