[Mesa-dev] [PATCH v5 3/3] i965/fs_nir: Get rid of get_alu_src

Mon Feb 2 22:00:37 PST 2015

On Thursday, January 29, 2015 01:40:20 PM Jason Ekstrand wrote:
> Originally, get_alu_src was supposed to handle resolving swizzles and
> things like that.  However, now that basically every instruction we have
> only takes scalar sources, we don't really need it anymore.  The only case
> where it's still marginally useful is for the mov and vecN operations that
> are left over from SSA form.  We can handle those cases as a special case
> easily enough.  As a side-effect, we don't need the vec_to_movs pass
> anymore.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h       |   1 -
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 153 +++++++++++++++++++------------
>  2 files changed, 95 insertions(+), 59 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 25197cd..b95e2c0 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -589,7 +589,6 @@ public:
>     void nir_emit_texture(nir_tex_instr *instr);
>     void nir_emit_jump(nir_jump_instr *instr);
>     fs_reg get_nir_src(nir_src src);
> -   fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
>     fs_reg get_nir_dest(nir_dest dest);
>     void emit_percomp(fs_inst *inst, unsigned wr_mask);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 37ccd24..fbb1622 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -140,8 +140,6 @@ fs_visitor::emit_nir_code()
>  
>     nir_convert_from_ssa(nir);
>     nir_validate_shader(nir);
> -   nir_lower_vec_to_movs(nir);
> -   nir_validate_shader(nir);
>  
>     /* emit the arrays used for inputs and outputs - load/store intrinsics will
>      * be converted to reads/writes of these arrays
> @@ -419,6 +417,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
>  void
>  fs_visitor::nir_emit_cf_list(exec_list *list)
>  {
> +   exec_list_validate(list);
>     foreach_list_typed(nir_cf_node, node, node, list) {
>        switch (node->type) {
>        case nir_cf_node_if:
> @@ -541,34 +540,117 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
>     struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
>     fs_inst *inst;
>  
> -   fs_reg op[3];
>     fs_reg result = get_nir_dest(instr->dest.dest);
>     result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
>  
> -   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> -      op[i] = get_nir_alu_src(instr, i);
> +   fs_reg op[4];
> +   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
> +      op[i] = get_nir_src(instr->src[i].src);
> +      op[i].type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
> +      op[i].abs = instr->src[i].abs;
> +      op[i].negate = instr->src[i].negate;
> +   }
> +
> +   /* We get a bunch of mov's out of the from_ssa pass and they may still
> +    * be vectorized.  We'll handle them as a special-case.  We'll also
> +    * handle vecN here because it's basically the same thing.
> +    */
> +   bool need_extra_copy = false;
> +   switch (instr->op) {
> +   case nir_op_vec4:
> +      if (!instr->src[3].src.is_ssa &&
> +          instr->dest.dest.reg.reg == instr->src[3].src.reg.reg)
> +         need_extra_copy = true;
> +      /* fall through */
> +   case nir_op_vec3:
> +      if (!instr->src[2].src.is_ssa &&
> +          instr->dest.dest.reg.reg == instr->src[2].src.reg.reg)
> +         need_extra_copy = true;
> +      /* fall through */
> +   case nir_op_vec2:
> +      if (!instr->src[1].src.is_ssa &&
> +          instr->dest.dest.reg.reg == instr->src[1].src.reg.reg)
> +         need_extra_copy = true;
> +      /* fall through */
> +   case nir_op_imov:
> +   case nir_op_fmov: {
> +      if (!instr->src[0].src.is_ssa &&
> +          instr->dest.dest.reg.reg == instr->src[0].src.reg.reg)
> +         need_extra_copy = true;

How about:

switch (instr->op) {
case nir_op_vec4:
case nir_op_vec3:
case nir_op_vec2:
case nir_op_imov:
case nir_op_fmov: {
   fs_reg temp = result;
   bool need_extra_copy = false;
   for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
      if (!instr->src[i].src.is_ssa &&
          instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
         need_extra_copy = true;
         temp = retype(vgrf(4), result.type);
         break;
      }
   }
   ...
}

That duplicates less code.

> +
> +      fs_reg temp;
> +      if (need_extra_copy) {
> +         temp = retype(vgrf(4), result.type);
> +      } else {
> +         temp = result;
> +      }
> +
> +      if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
> +         for (unsigned i = 0; i < 4; i++) {
> +            if (!(instr->dest.write_mask & (1 << i)))
> +               continue;
> +
> +            inst = emit(MOV(offset(temp, i),
> +                        offset(op[0], instr->src[0].swizzle[i])));
> +            inst->saturate = instr->dest.saturate;
> +         }
> +      } else {
> +         for (unsigned i = 0; i < 4; i++) {
> +            if (!(instr->dest.write_mask & (1 << i)))
> +               continue;
> +
> +            inst = emit(MOV(offset(temp, i),
> +                        offset(op[i], instr->src[i].swizzle[0])));
> +            inst->saturate = instr->dest.saturate;
> +         }
> +      }
> +
> +      /* In this case the source and destination registers were the same,
> +       * so we need to insert an extra set of moves in order to deal with
> +       * any swizzling.
> +       */
> +      if (need_extra_copy) {
> +         for (unsigned i = 0; i < 4; i++) {
> +            if (!(instr->dest.write_mask & (1 << i)))
> +               continue;
> +
> +            emit(MOV(offset(result, i), offset(temp, i)));
> +         }
> +      }

I might just combine all four loops, and move the if-conditions inside.
It'd probably be less code.  Your call.

> +      return;
> +   }
> +   default:
> +      break;
> +   }
>  
> +   /*

Bonus blank line here - should be /* At this point

Otherwise, this looks good to me.

> +    * At this point, we have dealt with any instruction that operates on
> +    * more than a single channel.  Therefore, we can just adjust the source
> +    * and destination registers for that channel and emit the instruction.
> +    */
> +   unsigned channel = 0;
>     if (nir_op_infos[instr->op].output_size == 0) {
> -      /* We've already scalarized, so we know that we only have one
> -       * channel.  The only question is which channel.
> +      /* Since NIR is doing the scalarizing for us, we should only ever see
> +       * vectorized operations with a single channel.
>         */
>        assert(_mesa_bitcount(instr->dest.write_mask) == 1);
> -      unsigned off = ffs(instr->dest.write_mask) - 1;
> -      result = offset(result, off);
> +      channel = ffs(instr->dest.write_mask) - 1;
> +
> +      result = offset(result, channel);
> +   }
>  
> -      for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> -         op[i] = offset(op[i], off);
> +   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
> +      assert(nir_op_infos[instr->op].input_sizes[i] < 2);
> +      op[i] = offset(op[i], instr->src[i].swizzle[channel]);
>     }
>  
>     switch (instr->op) {
> -   case nir_op_fmov:
>     case nir_op_i2f:
>     case nir_op_u2f:
>        inst = emit(MOV(result, op[0]));
>        inst->saturate = instr->dest.saturate;
>        break;
>  
> -   case nir_op_imov:
>     case nir_op_f2i:
>     case nir_op_f2u:
>        emit(MOV(result, op[0]));
> @@ -823,11 +905,6 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
>     case nir_op_fnoise4_4:
>        unreachable("not reached: should be handled by lower_noise");
>  
> -   case nir_op_vec2:
> -   case nir_op_vec3:
> -   case nir_op_vec4:
> -      unreachable("not reached: should be handled by lower_quadop_vector");
> -
>     case nir_op_ldexp:
>        unreachable("not reached: should be handled by ldexp_to_arith()");
>  
> @@ -1048,46 +1125,6 @@ fs_visitor::get_nir_src(nir_src src)
>  }
>  
>  fs_reg
> -fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
> -{
> -   fs_reg reg = get_nir_src(instr->src[src].src);
> -
> -   reg.type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[src]);
> -   reg.abs = instr->src[src].abs;
> -   reg.negate = instr->src[src].negate;
> -
> -   bool needs_swizzle = false;
> -   unsigned num_components = 0;
> -   for (unsigned i = 0; i < 4; i++) {
> -      if (!nir_alu_instr_channel_used(instr, src, i))
> -         continue;
> -
> -      if (instr->src[src].swizzle[i] != i)
> -         needs_swizzle = true;
> -
> -      num_components = i + 1;
> -   }
> -
> -   if (needs_swizzle) {
> -      /* resolve the swizzle through MOV's */
> -      fs_reg new_reg = vgrf(num_components);
> -      new_reg.type = reg.type;
> -
> -      for (unsigned i = 0; i < 4; i++) {
> -         if (!nir_alu_instr_channel_used(instr, src, i))
> -            continue;
> -
> -         emit(MOV(offset(new_reg, i),
> -                  offset(reg, instr->src[src].swizzle[i])));
> -      }
> -
> -      return new_reg;
> -   }
> -
> -   return reg;
> -}
> -
> -fs_reg
>  fs_visitor::get_nir_dest(nir_dest dest)
>  {
>     fs_reg reg;
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150202/46359bfd/attachment-0001.sig>