[Mesa-dev] [RFC 4/5] intel/compiler: Implement unordered comparisons

Thu Nov 22 18:46:47 UTC 2018

The vec4 path has only been compile-tested as there's no easy way to
generate a vec4 shader with an unordered equality.
---
 src/intel/compiler/brw_compiler.c   |  3 +++
 src/intel/compiler/brw_fs_nir.cpp   | 20 +++++++++++++-------
 src/intel/compiler/brw_vec4_nir.cpp | 21 +++++++++++++++++++++
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index fe632c5badc..f9e8fa09a34 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -42,6 +42,9 @@
    .lower_fdiv = true,                                                        \
    .lower_flrp64 = true,                                                      \
    .lower_ldexp = true,                                                       \
+   .lower_fltu = true,                                                        \
+   .lower_fgeu = true,                                                        \
+   .lower_fne_to_fequ = true,                                                 \
    .lower_cs_local_id_from_index = true,                                      \
    .lower_device_index_to_zero = true,                                        \
    .native_integers = true,                                                   \
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index a62d521bb5d..eba3611e447 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1049,6 +1049,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    case nir_op_flt:
    case nir_op_fge:
    case nir_op_feq:
+   case nir_op_fne:
+   case nir_op_fequ:
    case nir_op_fneu: {
       fs_reg dest = result;
 
@@ -1056,26 +1058,30 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       if (bit_size != 32)
          dest = bld.vgrf(op[0].type, 1);
 
-      brw_conditional_mod cond;
       switch (instr->op) {
       case nir_op_flt:
-         cond = BRW_CONDITIONAL_L;
+         bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_L);
          break;
       case nir_op_fge:
-         cond = BRW_CONDITIONAL_GE;
+         bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_GE);
          break;
       case nir_op_feq:
-         cond = BRW_CONDITIONAL_Z;
+         bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_Z);
+         break;
+      case nir_op_fequ:
+         bld.CMP(dest, op[0], op[0], BRW_CONDITIONAL_NZ);
+         set_predicate_inv(BRW_PREDICATE_NORMAL, true, /* inverse */
+                           bld.CMP(dest, op[1], op[1], BRW_CONDITIONAL_NZ));
+         set_predicate_inv(BRW_PREDICATE_NORMAL, true, /* inverse */
+                           bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_Z));
          break;
       case nir_op_fneu:
-         cond = BRW_CONDITIONAL_NZ;
+         bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_NZ);
          break;
       default:
          unreachable("bad opcode");
       }
 
-      bld.CMP(dest, op[0], op[1], cond);
-
       if (bit_size > 32) {
          bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0));
       } else if(bit_size < 32) {
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index f7f46f5034c..32559e1aade 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -1366,6 +1366,27 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       break;
    }
 
+   case nir_op_fequ: {
+      dst_reg cmp_res = dst;
+      if (nir_src_bit_size(instr->src[0].src) == 64)
+         cmp_res = dst_reg(this, glsl_type::dvec4_type);
+
+      vec4_instruction *inst;
+      inst = emit(CMP(cmp_res, op[0], op[0], BRW_CONDITIONAL_NZ));
+      inst = emit(CMP(cmp_res, op[1], op[1], BRW_CONDITIONAL_NZ));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      inst->predicate_inverse = true;
+      inst = emit(CMP(cmp_res, op[0], op[1], BRW_CONDITIONAL_Z));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      inst->predicate_inverse = true;
+
+      if (nir_src_bit_size(instr->src[0].src) == 64) {
+         dst_reg cmp_res32 = dst_reg(this, glsl_type::bvec4_type);
+         emit(VEC4_OPCODE_PICK_LOW_32BIT, cmp_res32, src_reg(cmp_res));
+         emit(MOV(dst, src_reg(cmp_res32)));
+      }
+   }
+
    case nir_op_ball_iequal2:
    case nir_op_ball_iequal3:
    case nir_op_ball_iequal4:
-- 
2.19.1