[Mesa-dev] [PATCH v5 3/3] i965/fs_nir: Get rid of get_alu_src
Kenneth Graunke
kenneth at whitecape.org
Mon Feb 2 22:00:37 PST 2015
On Thursday, January 29, 2015 01:40:20 PM Jason Ekstrand wrote:
> Originally, get_alu_src was supposed to handle resolving swizzles and
> things like that. However, now that basically every instruction we have
> only takes scalar sources, we don't really need it anymore. The only case
> where it's still marginally useful is for the mov and vecN operations that
> are left over from SSA form. We can handle those cases as a special case
> easily enough. As a side-effect, we don't need the vec_to_movs pass
> anymore.
> ---
> src/mesa/drivers/dri/i965/brw_fs.h | 1 -
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 153 +++++++++++++++++++------------
> 2 files changed, 95 insertions(+), 59 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 25197cd..b95e2c0 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -589,7 +589,6 @@ public:
> void nir_emit_texture(nir_tex_instr *instr);
> void nir_emit_jump(nir_jump_instr *instr);
> fs_reg get_nir_src(nir_src src);
> - fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
> fs_reg get_nir_dest(nir_dest dest);
> void emit_percomp(fs_inst *inst, unsigned wr_mask);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 37ccd24..fbb1622 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -140,8 +140,6 @@ fs_visitor::emit_nir_code()
>
> nir_convert_from_ssa(nir);
> nir_validate_shader(nir);
> - nir_lower_vec_to_movs(nir);
> - nir_validate_shader(nir);
>
> /* emit the arrays used for inputs and outputs - load/store intrinsics will
> * be converted to reads/writes of these arrays
> @@ -419,6 +417,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
> void
> fs_visitor::nir_emit_cf_list(exec_list *list)
> {
> + exec_list_validate(list);
> foreach_list_typed(nir_cf_node, node, node, list) {
> switch (node->type) {
> case nir_cf_node_if:
> @@ -541,34 +540,117 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
> fs_inst *inst;
>
> - fs_reg op[3];
> fs_reg result = get_nir_dest(instr->dest.dest);
> result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
>
> - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> - op[i] = get_nir_alu_src(instr, i);
> + fs_reg op[4];
> + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
> + op[i] = get_nir_src(instr->src[i].src);
> + op[i].type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
> + op[i].abs = instr->src[i].abs;
> + op[i].negate = instr->src[i].negate;
> + }
> +
> + /* We get a bunch of mov's out of the from_ssa pass and they may still
> + * be vectorized. We'll handle them as a special-case. We'll also
> + * handle vecN here because it's basically the same thing.
> + */
> + bool need_extra_copy = false;
> + switch (instr->op) {
> + case nir_op_vec4:
> + if (!instr->src[3].src.is_ssa &&
> + instr->dest.dest.reg.reg == instr->src[3].src.reg.reg)
> + need_extra_copy = true;
> + /* fall through */
> + case nir_op_vec3:
> + if (!instr->src[2].src.is_ssa &&
> + instr->dest.dest.reg.reg == instr->src[2].src.reg.reg)
> + need_extra_copy = true;
> + /* fall through */
> + case nir_op_vec2:
> + if (!instr->src[1].src.is_ssa &&
> + instr->dest.dest.reg.reg == instr->src[1].src.reg.reg)
> + need_extra_copy = true;
> + /* fall through */
> + case nir_op_imov:
> + case nir_op_fmov: {
> + if (!instr->src[0].src.is_ssa &&
> + instr->dest.dest.reg.reg == instr->src[0].src.reg.reg)
> + need_extra_copy = true;
How about:
switch (instr->op) {
case nir_op_vec4:
case nir_op_vec3:
case nir_op_vec2:
case nir_op_imov:
case nir_op_fmov: {
fs_reg temp = result;
bool need_extra_copy = false;
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
if (!instr->src[i].src.is_ssa &&
instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
need_extra_copy = true;
temp = retype(vgrf(4), result.type);
break;
}
}
...
}
That duplicates less code.
> +
> + fs_reg temp;
> + if (need_extra_copy) {
> + temp = retype(vgrf(4), result.type);
> + } else {
> + temp = result;
> + }
> +
> + if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
> + for (unsigned i = 0; i < 4; i++) {
> + if (!(instr->dest.write_mask & (1 << i)))
> + continue;
> +
> + inst = emit(MOV(offset(temp, i),
> + offset(op[0], instr->src[0].swizzle[i])));
> + inst->saturate = instr->dest.saturate;
> + }
> + } else {
> + for (unsigned i = 0; i < 4; i++) {
> + if (!(instr->dest.write_mask & (1 << i)))
> + continue;
> +
> + inst = emit(MOV(offset(temp, i),
> + offset(op[i], instr->src[i].swizzle[0])));
> + inst->saturate = instr->dest.saturate;
> + }
> + }
> +
> + /* In this case the source and destination registers were the same,
> + * so we need to insert an extra set of moves in order to deal with
> + * any swizzling.
> + */
> + if (need_extra_copy) {
> + for (unsigned i = 0; i < 4; i++) {
> + if (!(instr->dest.write_mask & (1 << i)))
> + continue;
> +
> + emit(MOV(offset(result, i), offset(temp, i)));
> + }
> + }
I might just combine all four loops, and move the if-conditions inside.
It'd probably be less code. Your call.
> + return;
> + }
> + default:
> + break;
> + }
>
> + /*
Bonus blank line here - should be /* At this point
Otherwise, this looks good to me.
> + * At this point, we have dealt with any instruction that operates on
> + * more than a single channel. Therefore, we can just adjust the source
> + * and destination registers for that channel and emit the instruction.
> + */
> + unsigned channel = 0;
> if (nir_op_infos[instr->op].output_size == 0) {
> - /* We've already scalarized, so we know that we only have one
> - * channel. The only question is which channel.
> + /* Since NIR is doing the scalarizing for us, we should only ever see
> + * vectorized operations with a single channel.
> */
> assert(_mesa_bitcount(instr->dest.write_mask) == 1);
> - unsigned off = ffs(instr->dest.write_mask) - 1;
> - result = offset(result, off);
> + channel = ffs(instr->dest.write_mask) - 1;
> +
> + result = offset(result, channel);
> + }
>
> - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> - op[i] = offset(op[i], off);
> + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
> + assert(nir_op_infos[instr->op].input_sizes[i] < 2);
> + op[i] = offset(op[i], instr->src[i].swizzle[channel]);
> }
>
> switch (instr->op) {
> - case nir_op_fmov:
> case nir_op_i2f:
> case nir_op_u2f:
> inst = emit(MOV(result, op[0]));
> inst->saturate = instr->dest.saturate;
> break;
>
> - case nir_op_imov:
> case nir_op_f2i:
> case nir_op_f2u:
> emit(MOV(result, op[0]));
> @@ -823,11 +905,6 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> case nir_op_fnoise4_4:
> unreachable("not reached: should be handled by lower_noise");
>
> - case nir_op_vec2:
> - case nir_op_vec3:
> - case nir_op_vec4:
> - unreachable("not reached: should be handled by lower_quadop_vector");
> -
> case nir_op_ldexp:
> unreachable("not reached: should be handled by ldexp_to_arith()");
>
> @@ -1048,46 +1125,6 @@ fs_visitor::get_nir_src(nir_src src)
> }
>
> fs_reg
> -fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
> -{
> - fs_reg reg = get_nir_src(instr->src[src].src);
> -
> - reg.type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[src]);
> - reg.abs = instr->src[src].abs;
> - reg.negate = instr->src[src].negate;
> -
> - bool needs_swizzle = false;
> - unsigned num_components = 0;
> - for (unsigned i = 0; i < 4; i++) {
> - if (!nir_alu_instr_channel_used(instr, src, i))
> - continue;
> -
> - if (instr->src[src].swizzle[i] != i)
> - needs_swizzle = true;
> -
> - num_components = i + 1;
> - }
> -
> - if (needs_swizzle) {
> - /* resolve the swizzle through MOV's */
> - fs_reg new_reg = vgrf(num_components);
> - new_reg.type = reg.type;
> -
> - for (unsigned i = 0; i < 4; i++) {
> - if (!nir_alu_instr_channel_used(instr, src, i))
> - continue;
> -
> - emit(MOV(offset(new_reg, i),
> - offset(reg, instr->src[src].swizzle[i])));
> - }
> -
> - return new_reg;
> - }
> -
> - return reg;
> -}
> -
> -fs_reg
> fs_visitor::get_nir_dest(nir_dest dest)
> {
> fs_reg reg;
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150202/46359bfd/attachment-0001.sig>
More information about the mesa-dev
mailing list