[Mesa-dev] [PATCH v5 3/3] i965/fs_nir: Get rid of get_alu_src

Jason Ekstrand jason at jlekstrand.net
Mon Feb 2 22:20:48 PST 2015


On Mon, Feb 2, 2015 at 10:00 PM, Kenneth Graunke <kenneth at whitecape.org>
wrote:

> On Thursday, January 29, 2015 01:40:20 PM Jason Ekstrand wrote:
> > Originally, get_alu_src was supposed to handle resolving swizzles and
> > things like that.  However, now that basically every instruction we have
> > only takes scalar sources, we don't really need it anymore.  The only
> case
> > where it's still marginally useful is for the mov and vecN operations
> that
> > are left over from SSA form.  We can handle those cases as a special case
> > easily enough.  As a side-effect, we don't need the vec_to_movs pass
> > anymore.
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.h       |   1 -
> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 153
> +++++++++++++++++++------------
> >  2 files changed, 95 insertions(+), 59 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> > index 25197cd..b95e2c0 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > @@ -589,7 +589,6 @@ public:
> >     void nir_emit_texture(nir_tex_instr *instr);
> >     void nir_emit_jump(nir_jump_instr *instr);
> >     fs_reg get_nir_src(nir_src src);
> > -   fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
> >     fs_reg get_nir_dest(nir_dest dest);
> >     void emit_percomp(fs_inst *inst, unsigned wr_mask);
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > index 37ccd24..fbb1622 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > @@ -140,8 +140,6 @@ fs_visitor::emit_nir_code()
> >
> >     nir_convert_from_ssa(nir);
> >     nir_validate_shader(nir);
> > -   nir_lower_vec_to_movs(nir);
> > -   nir_validate_shader(nir);
> >
> >     /* emit the arrays used for inputs and outputs - load/store
> intrinsics will
> >      * be converted to reads/writes of these arrays
> > @@ -419,6 +417,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
> >  void
> >  fs_visitor::nir_emit_cf_list(exec_list *list)
> >  {
> > +   exec_list_validate(list);
> >     foreach_list_typed(nir_cf_node, node, node, list) {
> >        switch (node->type) {
> >        case nir_cf_node_if:
> > @@ -541,34 +540,117 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> >     struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *)
> this->key;
> >     fs_inst *inst;
> >
> > -   fs_reg op[3];
> >     fs_reg result = get_nir_dest(instr->dest.dest);
> >     result.type =
> brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
> >
> > -   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> > -      op[i] = get_nir_alu_src(instr, i);
> > +   fs_reg op[4];
> > +   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
> > +      op[i] = get_nir_src(instr->src[i].src);
> > +      op[i].type =
> brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
> > +      op[i].abs = instr->src[i].abs;
> > +      op[i].negate = instr->src[i].negate;
> > +   }
> > +
> > +   /* We get a bunch of mov's out of the from_ssa pass and they may
> still
> > +    * be vectorized.  We'll handle them as a special-case.  We'll also
> > +    * handle vecN here because it's basically the same thing.
> > +    */
> > +   bool need_extra_copy = false;
> > +   switch (instr->op) {
> > +   case nir_op_vec4:
> > +      if (!instr->src[3].src.is_ssa &&
> > +          instr->dest.dest.reg.reg == instr->src[3].src.reg.reg)
> > +         need_extra_copy = true;
> > +      /* fall through */
> > +   case nir_op_vec3:
> > +      if (!instr->src[2].src.is_ssa &&
> > +          instr->dest.dest.reg.reg == instr->src[2].src.reg.reg)
> > +         need_extra_copy = true;
> > +      /* fall through */
> > +   case nir_op_vec2:
> > +      if (!instr->src[1].src.is_ssa &&
> > +          instr->dest.dest.reg.reg == instr->src[1].src.reg.reg)
> > +         need_extra_copy = true;
> > +      /* fall through */
> > +   case nir_op_imov:
> > +   case nir_op_fmov: {
> > +      if (!instr->src[0].src.is_ssa &&
> > +          instr->dest.dest.reg.reg == instr->src[0].src.reg.reg)
> > +         need_extra_copy = true;
>
> How about:
>
> switch (instr->op) {
> case nir_op_vec4:
> case nir_op_vec3:
> case nir_op_vec2:
> case nir_op_imov:
> case nir_op_fmov: {
>    fs_reg temp = result;
>    bool need_extra_copy = false;
>    for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
>       if (!instr->src[i].src.is_ssa &&
>           instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
>          need_extra_copy = true;
>          temp = retype(vgrf(4), result.type);
>          break;
>       }
>    }
>    ...
> }
>
> That duplicates less code.
>

Yeah, the original is horrible.  That's much better. I'll do that.


>
> > +
> > +      fs_reg temp;
> > +      if (need_extra_copy) {
> > +         temp = retype(vgrf(4), result.type);
> > +      } else {
> > +         temp = result;
> > +      }
> > +
> > +      if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
> > +         for (unsigned i = 0; i < 4; i++) {
> > +            if (!(instr->dest.write_mask & (1 << i)))
> > +               continue;
> > +
> > +            inst = emit(MOV(offset(temp, i),
> > +                        offset(op[0], instr->src[0].swizzle[i])));
> > +            inst->saturate = instr->dest.saturate;
> > +         }
> > +      } else {
> > +         for (unsigned i = 0; i < 4; i++) {
> > +            if (!(instr->dest.write_mask & (1 << i)))
> > +               continue;
> > +
> > +            inst = emit(MOV(offset(temp, i),
> > +                        offset(op[i], instr->src[i].swizzle[0])));
> > +            inst->saturate = instr->dest.saturate;
> > +         }
> > +      }
> > +
> > +      /* In this case the source and destination registers were the
> same,
> > +       * so we need to insert an extra set of moves in order to deal
> with
> > +       * any swizzling.
> > +       */
> > +      if (need_extra_copy) {
> > +         for (unsigned i = 0; i < 4; i++) {
> > +            if (!(instr->dest.write_mask & (1 << i)))
> > +               continue;
> > +
> > +            emit(MOV(offset(result, i), offset(temp, i)));
> > +         }
> > +      }
>
> I might just combine all four loops, and move the if-conditions inside.
> It'd probably be less code.  Your call.
>

We can't.  We have to fill temp and then dump to result.  Otherwise, the
swizzles will get messed up.  We can probably combine the first two though.


>
> > +      return;
> > +   }
> > +   default:
> > +      break;
> > +   }
> >
> > +   /*
>
> Bonus blank line here - should be /* At this point
>

Sure.


>
> Otherwise, this looks good to me.
>

Do you want a v2?


>
> > +    * At this point, we have dealt with any instruction that operates on
> > +    * more than a single channel.  Therefore, we can just adjust the
> source
> > +    * and destination registers for that channel and emit the
> instruction.
> > +    */
> > +   unsigned channel = 0;
> >     if (nir_op_infos[instr->op].output_size == 0) {
> > -      /* We've already scalarized, so we know that we only have one
> > -       * channel.  The only question is which channel.
> > +      /* Since NIR is doing the scalarizing for us, we should only ever
> see
> > +       * vectorized operations with a single channel.
> >         */
> >        assert(_mesa_bitcount(instr->dest.write_mask) == 1);
> > -      unsigned off = ffs(instr->dest.write_mask) - 1;
> > -      result = offset(result, off);
> > +      channel = ffs(instr->dest.write_mask) - 1;
> > +
> > +      result = offset(result, channel);
> > +   }
> >
> > -      for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> > -         op[i] = offset(op[i], off);
> > +   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
> > +      assert(nir_op_infos[instr->op].input_sizes[i] < 2);
> > +      op[i] = offset(op[i], instr->src[i].swizzle[channel]);
> >     }
> >
> >     switch (instr->op) {
> > -   case nir_op_fmov:
> >     case nir_op_i2f:
> >     case nir_op_u2f:
> >        inst = emit(MOV(result, op[0]));
> >        inst->saturate = instr->dest.saturate;
> >        break;
> >
> > -   case nir_op_imov:
> >     case nir_op_f2i:
> >     case nir_op_f2u:
> >        emit(MOV(result, op[0]));
> > @@ -823,11 +905,6 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> >     case nir_op_fnoise4_4:
> >        unreachable("not reached: should be handled by lower_noise");
> >
> > -   case nir_op_vec2:
> > -   case nir_op_vec3:
> > -   case nir_op_vec4:
> > -      unreachable("not reached: should be handled by
> lower_quadop_vector");
> > -
> >     case nir_op_ldexp:
> >        unreachable("not reached: should be handled by ldexp_to_arith()");
> >
> > @@ -1048,46 +1125,6 @@ fs_visitor::get_nir_src(nir_src src)
> >  }
> >
> >  fs_reg
> > -fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
> > -{
> > -   fs_reg reg = get_nir_src(instr->src[src].src);
> > -
> > -   reg.type =
> brw_type_for_nir_type(nir_op_infos[instr->op].input_types[src]);
> > -   reg.abs = instr->src[src].abs;
> > -   reg.negate = instr->src[src].negate;
> > -
> > -   bool needs_swizzle = false;
> > -   unsigned num_components = 0;
> > -   for (unsigned i = 0; i < 4; i++) {
> > -      if (!nir_alu_instr_channel_used(instr, src, i))
> > -         continue;
> > -
> > -      if (instr->src[src].swizzle[i] != i)
> > -         needs_swizzle = true;
> > -
> > -      num_components = i + 1;
> > -   }
> > -
> > -   if (needs_swizzle) {
> > -      /* resolve the swizzle through MOV's */
> > -      fs_reg new_reg = vgrf(num_components);
> > -      new_reg.type = reg.type;
> > -
> > -      for (unsigned i = 0; i < 4; i++) {
> > -         if (!nir_alu_instr_channel_used(instr, src, i))
> > -            continue;
> > -
> > -         emit(MOV(offset(new_reg, i),
> > -                  offset(reg, instr->src[src].swizzle[i])));
> > -      }
> > -
> > -      return new_reg;
> > -   }
> > -
> > -   return reg;
> > -}
> > -
> > -fs_reg
> >  fs_visitor::get_nir_dest(nir_dest dest)
> >  {
> >     fs_reg reg;
> >
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150202/ec525e60/attachment.html>


More information about the mesa-dev mailing list