<div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Tue, Nov 27, 2018 at 12:55 PM Ian Romanick <<a href="mailto:idr@freedesktop.org">idr@freedesktop.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">On 11/22/2018 10:46 AM, Jason Ekstrand wrote:<br>
> The vec4 path has only been compile-tested as there's no easy way to<br>
> generate a vec4 shader with an unordered equality.<br>
> ---<br>
>  src/intel/compiler/brw_compiler.c   |  3 +++<br>
>  src/intel/compiler/brw_fs_nir.cpp   | 20 +++++++++++++-------<br>
>  src/intel/compiler/brw_vec4_nir.cpp | 21 +++++++++++++++++++++<br>
>  3 files changed, 37 insertions(+), 7 deletions(-)<br>
> <br>
> diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c<br>
> index fe632c5badc..f9e8fa09a34 100644<br>
> --- a/src/intel/compiler/brw_compiler.c<br>
> +++ b/src/intel/compiler/brw_compiler.c<br>
> @@ -42,6 +42,9 @@<br>
>     .lower_fdiv = true,                                                        \<br>
>     .lower_flrp64 = true,                                                      \<br>
>     .lower_ldexp = true,                                                       \<br>
> +   .lower_fltu = true,                                                        \<br>
> +   .lower_fgeu = true,                                                        \<br>
<br>
Can't we use cmpn.l and <a href="http://cmpn.ge" rel="noreferrer" target="_blank">cmpn.ge</a> for these?  On some platforms it has to<br>
be split into two SIMD8 instructions, but that seems better than the<br>
lowering... or maybe just use those on BDW+?<br></blockquote><div><br></div><div>I didn't know about CMPN!  I guess that would do the trick though I suppose we have a pile of back-end work to be able to optimize it.  I think the back-end usually gets rid of the inot by flipping the cmod but maybe not.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
> +   .lower_fne_to_fequ = true,                                                 \<br>
>     .lower_cs_local_id_from_index = true,                                      \<br>
>     .lower_device_index_to_zero = true,                                        \<br>
>     .native_integers = true,                                                   \<br>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp<br>
> index a62d521bb5d..eba3611e447 100644<br>
> --- a/src/intel/compiler/brw_fs_nir.cpp<br>
> +++ b/src/intel/compiler/brw_fs_nir.cpp<br>
> @@ -1049,6 +1049,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)<br>
>     case nir_op_flt:<br>
>     case nir_op_fge:<br>
>     case nir_op_feq:<br>
> +   case nir_op_fne:<br>
> +   case nir_op_fequ:<br>
>     case nir_op_fneu: {<br>
>        fs_reg dest = result;<br>
>  <br>
> @@ -1056,26 +1058,30 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)<br>
>        if (bit_size != 32)<br>
>           dest = bld.vgrf(op[0].type, 1);<br>
>  <br>
> -      brw_conditional_mod cond;<br>
>        switch (instr->op) {<br>
>        case nir_op_flt:<br>
> -         cond = BRW_CONDITIONAL_L;<br>
> +         bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_L);<br>
>           break;<br>
>        case nir_op_fge:<br>
> -         cond = BRW_CONDITIONAL_GE;<br>
> +         bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_GE);<br>
>           break;<br>
>        case nir_op_feq:<br>
> -         cond = BRW_CONDITIONAL_Z;<br>
> +         bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_Z);<br>
> +         break;<br>
> +      case nir_op_fequ:<br>
> +         bld.CMP(dest, op[0], op[0], BRW_CONDITIONAL_NZ);<br>
> +         set_predicate_inv(BRW_PREDICATE_NORMAL, true, /* inverse */<br>
> +                           bld.CMP(dest, op[1], op[1], BRW_CONDITIONAL_NZ));<br>
> +         set_predicate_inv(BRW_PREDICATE_NORMAL, true, /* inverse */<br>
> +                           bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_Z));<br>
<br>
OpFUnordEqual is (isnan(a) || isnan(b) || a == b), and that's literally<br>
what this code does.  It seems like this could be implemented (perhaps<br>
as a lowering?) as (OpFUnordGreaterThanEqual(a,b) &&<br>
OpFUnordGreaterThanEqual(b, a)) via <a href="http://cmpn.ge" rel="noreferrer" target="_blank">cmpn.ge</a>.  Using this same technique,<br>
that would be 2 instructions (on BDW+) instead of 3.  HSW has to do cmpn<br>
as two SIMD8 instructions, so this may be better there.  Dunno.<br></blockquote><div><br></div><div>That's a good idea.  Unfortunately, CMPN dosn't modify Z or NZ.  We could also lower fequ(x, y) to inot(fne(x, y)) and implement fne(x, y) as (x < y) || (x > y) which wouldn't require CMPN.</div><div><br></div><div>--Jason<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
>           break;<br>
>        case nir_op_fneu:<br>
> -         cond = BRW_CONDITIONAL_NZ;<br>
> +         bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_NZ);<br>
>           break;<br>
>        default:<br>
>           unreachable("bad opcode");<br>
>        }<br>
>  <br>
> -      bld.CMP(dest, op[0], op[1], cond);<br>
> -<br>
>        if (bit_size > 32) {<br>
>           bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0));<br>
>        } else if(bit_size < 32) {<br>
> diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp<br>
> index f7f46f5034c..32559e1aade 100644<br>
> --- a/src/intel/compiler/brw_vec4_nir.cpp<br>
> +++ b/src/intel/compiler/brw_vec4_nir.cpp<br>
> @@ -1366,6 +1366,27 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)<br>
>        break;<br>
>     }<br>
>  <br>
> +   case nir_op_fequ: {<br>
> +      dst_reg cmp_res = dst;<br>
> +      if (nir_src_bit_size(instr->src[0].src) == 64)<br>
> +         cmp_res = dst_reg(this, glsl_type::dvec4_type);<br>
> +<br>
> +      vec4_instruction *inst;<br>
> +      inst = emit(CMP(cmp_res, op[0], op[0], BRW_CONDITIONAL_NZ));<br>
> +      inst = emit(CMP(cmp_res, op[1], op[1], BRW_CONDITIONAL_NZ));<br>
> +      inst->predicate = BRW_PREDICATE_NORMAL;<br>
> +      inst->predicate_inverse = true;<br>
> +      inst = emit(CMP(cmp_res, op[0], op[1], BRW_CONDITIONAL_Z));<br>
> +      inst->predicate = BRW_PREDICATE_NORMAL;<br>
> +      inst->predicate_inverse = true;<br>
> +<br>
> +      if (nir_src_bit_size(instr->src[0].src) == 64) {<br>
> +         dst_reg cmp_res32 = dst_reg(this, glsl_type::bvec4_type);<br>
> +         emit(VEC4_OPCODE_PICK_LOW_32BIT, cmp_res32, src_reg(cmp_res));<br>
> +         emit(MOV(dst, src_reg(cmp_res32)));<br>
> +      }<br>
> +   }<br>
> +<br>
>     case nir_op_ball_iequal2:<br>
>     case nir_op_ball_iequal3:<br>
>     case nir_op_ball_iequal4:<br>
> <br>
<br>
</blockquote></div></div>