<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Feb 2, 2015 at 10:00 PM, Kenneth Graunke <span dir="ltr"><<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On Thursday, January 29, 2015 01:40:20 PM Jason Ekstrand wrote:<br>
> Originally, get_alu_src was supposed to handle resolving swizzles and<br>
> things like that. However, now that basically every instruction we have<br>
> only takes scalar sources, we don't really need it anymore. The only case<br>
> where it's still marginally useful is for the mov and vecN operations that<br>
> are left over from SSA form. We can handle those cases as a special case<br>
> easily enough. As a side-effect, we don't need the vec_to_movs pass<br>
> anymore.<br>
> ---<br>
> src/mesa/drivers/dri/i965/brw_fs.h | 1 -<br>
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 153 +++++++++++++++++++------------<br>
> 2 files changed, 95 insertions(+), 59 deletions(-)<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h<br>
> index 25197cd..b95e2c0 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_fs.h<br>
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h<br>
> @@ -589,7 +589,6 @@ public:<br>
> void nir_emit_texture(nir_tex_instr *instr);<br>
> void nir_emit_jump(nir_jump_instr *instr);<br>
> fs_reg get_nir_src(nir_src src);<br>
> - fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);<br>
> fs_reg get_nir_dest(nir_dest dest);<br>
> void emit_percomp(fs_inst *inst, unsigned wr_mask);<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp<br>
> index 37ccd24..fbb1622 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp<br>
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp<br>
> @@ -140,8 +140,6 @@ fs_visitor::emit_nir_code()<br>
><br>
> nir_convert_from_ssa(nir);<br>
> nir_validate_shader(nir);<br>
> - nir_lower_vec_to_movs(nir);<br>
> - nir_validate_shader(nir);<br>
><br>
> /* emit the arrays used for inputs and outputs - load/store intrinsics will<br>
> * be converted to reads/writes of these arrays<br>
> @@ -419,6 +417,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)<br>
> void<br>
> fs_visitor::nir_emit_cf_list(exec_list *list)<br>
> {<br>
> + exec_list_validate(list);<br>
> foreach_list_typed(nir_cf_node, node, node, list) {<br>
> switch (node->type) {<br>
> case nir_cf_node_if:<br>
> @@ -541,34 +540,117 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)<br>
> struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;<br>
> fs_inst *inst;<br>
><br>
> - fs_reg op[3];<br>
> fs_reg result = get_nir_dest(instr->dest.dest);<br>
> result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);<br>
><br>
> - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)<br>
> - op[i] = get_nir_alu_src(instr, i);<br>
> + fs_reg op[4];<br>
> + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {<br>
> + op[i] = get_nir_src(instr->src[i].src);<br>
> + op[i].type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);<br>
> + op[i].abs = instr->src[i].abs;<br>
> + op[i].negate = instr->src[i].negate;<br>
> + }<br>
> +<br>
> + /* We get a bunch of mov's out of the from_ssa pass and they may still<br>
> + * be vectorized. We'll handle them as a special-case. We'll also<br>
> + * handle vecN here because it's basically the same thing.<br>
> + */<br>
> + bool need_extra_copy = false;<br>
> + switch (instr->op) {<br>
> + case nir_op_vec4:<br>
> + if (!instr->src[3].src.is_ssa &&<br>
> + instr->dest.dest.reg.reg == instr->src[3].src.reg.reg)<br>
> + need_extra_copy = true;<br>
> + /* fall through */<br>
> + case nir_op_vec3:<br>
> + if (!instr->src[2].src.is_ssa &&<br>
> + instr->dest.dest.reg.reg == instr->src[2].src.reg.reg)<br>
> + need_extra_copy = true;<br>
> + /* fall through */<br>
> + case nir_op_vec2:<br>
> + if (!instr->src[1].src.is_ssa &&<br>
> + instr->dest.dest.reg.reg == instr->src[1].src.reg.reg)<br>
> + need_extra_copy = true;<br>
> + /* fall through */<br>
> + case nir_op_imov:<br>
> + case nir_op_fmov: {<br>
> + if (!instr->src[0].src.is_ssa &&<br>
> + instr->dest.dest.reg.reg == instr->src[0].src.reg.reg)<br>
> + need_extra_copy = true;<br>
<br>
</div></div>How about:<br>
<br>
switch (instr->op) {<br>
case nir_op_vec4:<br>
case nir_op_vec3:<br>
case nir_op_vec2:<br>
case nir_op_imov:<br>
case nir_op_fmov: {<br>
fs_reg temp = result;<br>
bool need_extra_copy = false;<br>
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {<br>
if (!instr->src[i].src.is_ssa &&<br>
instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {<br>
need_extra_copy = true;<br>
temp = retype(vgrf(4), result.type);<br>
break;<br>
}<br>
}<br>
...<br>
}<br>
<br>
That duplicates less code.<br></blockquote><div><br></div><div>Yeah, the original is horrible. That's much better. I'll do that.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div><div class="h5"><br>
> +<br>
> + fs_reg temp;<br>
> + if (need_extra_copy) {<br>
> + temp = retype(vgrf(4), result.type);<br>
> + } else {<br>
> + temp = result;<br>
> + }<br>
> +<br>
> + if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {<br>
> + for (unsigned i = 0; i < 4; i++) {<br>
> + if (!(instr->dest.write_mask & (1 << i)))<br>
> + continue;<br>
> +<br>
> + inst = emit(MOV(offset(temp, i),<br>
> + offset(op[0], instr->src[0].swizzle[i])));<br>
> + inst->saturate = instr->dest.saturate;<br>
> + }<br>
> + } else {<br>
> + for (unsigned i = 0; i < 4; i++) {<br>
> + if (!(instr->dest.write_mask & (1 << i)))<br>
> + continue;<br>
> +<br>
> + inst = emit(MOV(offset(temp, i),<br>
> + offset(op[i], instr->src[i].swizzle[0])));<br>
> + inst->saturate = instr->dest.saturate;<br>
> + }<br>
> + }<br>
> +<br>
> + /* In this case the source and destination registers were the same,<br>
> + * so we need to insert an extra set of moves in order to deal with<br>
> + * any swizzling.<br>
> + */<br>
> + if (need_extra_copy) {<br>
> + for (unsigned i = 0; i < 4; i++) {<br>
> + if (!(instr->dest.write_mask & (1 << i)))<br>
> + continue;<br>
> +<br>
> + emit(MOV(offset(result, i), offset(temp, i)));<br>
> + }<br>
> + }<br>
<br>
</div></div>I might just combine all four loops, and move the if-conditions inside.<br>
It'd probably be less code. Your call.<br></blockquote><div><br></div><div>We can't. We have to fill temp and then dump to result. Otherwise, the swizzles will get messed up. We can probably combine the first two though.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<span class=""><br>
> + return;<br>
> + }<br>
> + default:<br>
> + break;<br>
> + }<br>
><br>
> + /*<br>
<br>
</span>Bonus blank line here - should be /* At this point<br></blockquote><div><br></div><div>Sure.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
Otherwise, this looks good to me.<br></blockquote><div><br></div><div>Do you want a v2?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div class="HOEnZb"><div class="h5"><br>
> + * At this point, we have dealt with any instruction that operates on<br>
> + * more than a single channel. Therefore, we can just adjust the source<br>
> + * and destination registers for that channel and emit the instruction.<br>
> + */<br>
> + unsigned channel = 0;<br>
> if (nir_op_infos[instr->op].output_size == 0) {<br>
> - /* We've already scalarized, so we know that we only have one<br>
> - * channel. The only question is which channel.<br>
> + /* Since NIR is doing the scalarizing for us, we should only ever see<br>
> + * vectorized operations with a single channel.<br>
> */<br>
> assert(_mesa_bitcount(instr->dest.write_mask) == 1);<br>
> - unsigned off = ffs(instr->dest.write_mask) - 1;<br>
> - result = offset(result, off);<br>
> + channel = ffs(instr->dest.write_mask) - 1;<br>
> +<br>
> + result = offset(result, channel);<br>
> + }<br>
><br>
> - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)<br>
> - op[i] = offset(op[i], off);<br>
> + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {<br>
> + assert(nir_op_infos[instr->op].input_sizes[i] < 2);<br>
> + op[i] = offset(op[i], instr->src[i].swizzle[channel]);<br>
> }<br>
><br>
> switch (instr->op) {<br>
> - case nir_op_fmov:<br>
> case nir_op_i2f:<br>
> case nir_op_u2f:<br>
> inst = emit(MOV(result, op[0]));<br>
> inst->saturate = instr->dest.saturate;<br>
> break;<br>
><br>
> - case nir_op_imov:<br>
> case nir_op_f2i:<br>
> case nir_op_f2u:<br>
> emit(MOV(result, op[0]));<br>
> @@ -823,11 +905,6 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)<br>
> case nir_op_fnoise4_4:<br>
> unreachable("not reached: should be handled by lower_noise");<br>
><br>
> - case nir_op_vec2:<br>
> - case nir_op_vec3:<br>
> - case nir_op_vec4:<br>
> - unreachable("not reached: should be handled by lower_quadop_vector");<br>
> -<br>
> case nir_op_ldexp:<br>
> unreachable("not reached: should be handled by ldexp_to_arith()");<br>
><br>
> @@ -1048,46 +1125,6 @@ fs_visitor::get_nir_src(nir_src src)<br>
> }<br>
><br>
> fs_reg<br>
> -fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)<br>
> -{<br>
> - fs_reg reg = get_nir_src(instr->src[src].src);<br>
> -<br>
> - reg.type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[src]);<br>
> - reg.abs = instr->src[src].abs;<br>
> - reg.negate = instr->src[src].negate;<br>
> -<br>
> - bool needs_swizzle = false;<br>
> - unsigned num_components = 0;<br>
> - for (unsigned i = 0; i < 4; i++) {<br>
> - if (!nir_alu_instr_channel_used(instr, src, i))<br>
> - continue;<br>
> -<br>
> - if (instr->src[src].swizzle[i] != i)<br>
> - needs_swizzle = true;<br>
> -<br>
> - num_components = i + 1;<br>
> - }<br>
> -<br>
> - if (needs_swizzle) {<br>
> - /* resolve the swizzle through MOV's */<br>
> - fs_reg new_reg = vgrf(num_components);<br>
> - new_reg.type = reg.type;<br>
> -<br>
> - for (unsigned i = 0; i < 4; i++) {<br>
> - if (!nir_alu_instr_channel_used(instr, src, i))<br>
> - continue;<br>
> -<br>
> - emit(MOV(offset(new_reg, i),<br>
> - offset(reg, instr->src[src].swizzle[i])));<br>
> - }<br>
> -<br>
> - return new_reg;<br>
> - }<br>
> -<br>
> - return reg;<br>
> -}<br>
> -<br>
> -fs_reg<br>
> fs_visitor::get_nir_dest(nir_dest dest)<br>
> {<br>
> fs_reg reg;<br>
><br>
</div></div></blockquote></div><br></div></div>