<p dir="ltr"><br>
On Oct 29, 2015 5:51 PM, "Matt Turner" <<a href="mailto:mattst88@gmail.com">mattst88@gmail.com</a>> wrote:<br>
><br>
> We've made a mistake in calling the Channel Enable bits "writemask",<br>
> because they do more than control which channels of the destination are<br>
> written -- they actually control which channels are enabled (surprise!<br>
> surprise!)<br>
><br>
> So, if we emit<br>
><br>
> cmp.z.f0(8) null.xy<1>D g10<4,4,1>.xyzzD g2<0,4,1>.xyzzD<br>
> mov(8) g12<1>.xUD 0x00000000UD<br>
> (+f0.all4h) mov(8) g12<1>.xUD 0xffffffffUD<br>
><br>
> where the CMP instruction has only .xy channel enables, it won't write<br>
> the .zw channels of the flag register, which are of course read by the<br>
> +f0.all4 predicate.<br>
><br>
> We need to always emit CMP instructions whose flag result might be read<br>
> by such a predicate with all channels enabled.</p>
<p dir="ltr">Makes sense.</p>
<p dir="ltr">Reviewed-by: Jason Ekstrand <<a href="mailto:jason.ekstrand@intel.com">jason.ekstrand@intel.com</a>></p>
<p dir="ltr">> ---<br>
> src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 52 ++++++------------------------<br>
> 1 file changed, 10 insertions(+), 42 deletions(-)<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp<br>
> index 0f04f65..33cc02e 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp<br>
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp<br>
> @@ -1146,26 +1146,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)<br>
> case nir_op_ball_iequal3:<br>
> case nir_op_ball_fequal4:<br>
> case nir_op_ball_iequal4: {<br>
> - dst_reg tmp = dst_reg(this, glsl_type::bool_type);<br>
> + unsigned swiz =<br>
> + brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);<br>
><br>
> - switch (instr->op) {<br>
> - case nir_op_ball_fequal2:<br>
> - case nir_op_ball_iequal2:<br>
> - tmp.writemask = WRITEMASK_XY;<br>
> - break;<br>
> - case nir_op_ball_fequal3:<br>
> - case nir_op_ball_iequal3:<br>
> - tmp.writemask = WRITEMASK_XYZ;<br>
> - break;<br>
> - case nir_op_ball_fequal4:<br>
> - case nir_op_ball_iequal4:<br>
> - tmp.writemask = WRITEMASK_XYZW;<br>
> - break;<br>
> - default:<br>
> - unreachable("not reached");<br>
> - }<br>
> -<br>
> - emit(CMP(tmp, op[0], op[1],<br>
> + emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),<br>
> brw_conditional_for_nir_comparison(instr->op)));<br>
> emit(MOV(dst, src_reg(0)));<br>
> inst = emit(MOV(dst, src_reg(~0)));<br>
> @@ -1179,26 +1163,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)<br>
> case nir_op_bany_inequal3:<br>
> case nir_op_bany_fnequal4:<br>
> case nir_op_bany_inequal4: {<br>
> - dst_reg tmp = dst_reg(this, glsl_type::bool_type);<br>
> + unsigned swiz =<br>
> + brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);<br>
><br>
> - switch (instr->op) {<br>
> - case nir_op_bany_fnequal2:<br>
> - case nir_op_bany_inequal2:<br>
> - tmp.writemask = WRITEMASK_XY;<br>
> - break;<br>
> - case nir_op_bany_fnequal3:<br>
> - case nir_op_bany_inequal3:<br>
> - tmp.writemask = WRITEMASK_XYZ;<br>
> - break;<br>
> - case nir_op_bany_fnequal4:<br>
> - case nir_op_bany_inequal4:<br>
> - tmp.writemask = WRITEMASK_XYZW;<br>
> - break;<br>
> - default:<br>
> - unreachable("not reached");<br>
> - }<br>
> -<br>
> - emit(CMP(tmp, op[0], op[1],<br>
> + emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),<br>
> brw_conditional_for_nir_comparison(instr->op)));<br>
><br>
> emit(MOV(dst, src_reg(0)));<br>
> @@ -1463,11 +1431,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)<br>
> case nir_op_bany2:<br>
> case nir_op_bany3:<br>
> case nir_op_bany4: {<br>
> - dst_reg tmp = dst_reg(this, glsl_type::bool_type);<br>
> - tmp.writemask = brw_writemask_for_size(nir_op_infos[instr->op].input_sizes[0]);<br>
> -<br>
> - emit(CMP(tmp, op[0], src_reg(0), BRW_CONDITIONAL_NZ));<br>
> + unsigned swiz =<br>
> + brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);<br>
><br>
> + emit(CMP(dst_null_d(), swizzle(op[0], swiz), src_reg(0),<br>
> + BRW_CONDITIONAL_NZ));<br>
> emit(MOV(dst, src_reg(0)));<br>
> inst = emit(MOV(dst, src_reg(~0)));<br>
> inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;<br>
> --<br>
> 2.4.9<br>
><br>
</p>