[Mesa-dev] [PATCH 39/51] intel/compiler/fs: Consider logic ops on 16-bit booleans

Fri Nov 24 12:27:06 UTC 2017

Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 70 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 2a32b1449a..aff592c354 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1662,7 +1662,75 @@ fs_visitor::get_nir_alu_dest(const nir_alu_instr *instr)
     * one component per register.
     */
    const bool pad_components_to_full_register = true;
-   return get_nir_dest(instr->dest.dest, pad_components_to_full_register);
+
+   switch (instr->op) {
+   case nir_op_flt:
+   case nir_op_fge:
+   case nir_op_feq:
+   case nir_op_fne: {
+      assert(instr->dest.dest.is_ssa);
+       
+      if (nir_src_bit_size(instr->src[0].src) > 16)
+         return get_nir_dest(instr->dest.dest);
+
+      assert(nir_src_bit_size(instr->src[0].src) == 16 &&
+             nir_src_bit_size(instr->src[1].src) == 16);
+
+      /* Destination type for comparison operations is boolean which NIR
+       * treats as having 32-bit size. If, however, sources are 16-bit
+       * hardware will produce 16-bit result (0xFFFF/0x0000). Therefore set
+       * the destination type accordingly.
+       */
+      nir_ssa_values[instr->dest.dest.ssa.index] =
+         bld.vgrf(BRW_REGISTER_TYPE_HF,
+                  instr->dest.dest.ssa.num_components,
+                  pad_components_to_full_register);
+      return nir_ssa_values[instr->dest.dest.ssa.index];
+   }
+   case nir_op_inot:
+   case nir_op_ixor:
+   case nir_op_ior:
+   case nir_op_iand: {
+      assert(instr->dest.dest.is_ssa);
+       
+      const fs_reg src0 = get_nir_src(instr->src[0].src);
+      const fs_reg src1 = get_nir_src(instr->src[0].src);
+
+      /* TODO: This specifically prepares for mixed precision operations which
+       *       in principle shouldn't happen. There is, however, corner case
+       *       when this is possible. As NIR doesn't consider how booleans
+       *       are produced, we may end up here with one source operand
+       *       produced from an operation with 32-bit sources and another from
+       *       16-bits.
+       *       This is handled by marking this operation as producing 16-bits
+       *       and relying on nir_emit_alu() to adjust the 32-bit source
+       *       operand to 16-bits with stride == 2. Recall that 32-bit
+       *       booleans are just 0xFFFFFFFF/0x00000000 and it suffices to read
+       *       only the lower 16-bits.
+       * WARN: This blindly assumes that mixed precision integer source
+       *       operands represent boolean values. There is no way of checking
+       *       if that holds.
+       */       
+      if (brw_reg_type_to_size(src0.type) > 2 &&
+          brw_reg_type_to_size(src1.type) > 2)
+         return get_nir_dest(instr->dest.dest);
+
+      /* Translation from GLSL to NIR produces logical operations with
+       * integer operands even when operands are booleans. See handling
+       * of ir_binop_bit_*.
+       * As hardware will produce 16-bit results when the sources are 16-bit
+       * set the destination type accordingly.
+       */
+      nir_ssa_values[instr->dest.dest.ssa.index] =
+         bld.vgrf(BRW_REGISTER_TYPE_W,
+                  instr->dest.dest.ssa.num_components,
+                  pad_components_to_full_register);
+      return nir_ssa_values[instr->dest.dest.ssa.index];
+   }
+   default:
+      return get_nir_dest(instr->dest.dest,
+                          pad_components_to_full_register);
+   }
 }
 
 fs_reg
-- 
2.11.0