[Mesa-dev] [PATCH 7/8] i965/vs: Map scalar VS input locations properly; avoid tons of MOVs.

Jason Ekstrand jason at jlekstrand.net
Mon Aug 24 19:36:49 PDT 2015


On Mon, Aug 24, 2015 at 5:03 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On Monday, August 17, 2015 09:38:46 PM Jason Ekstrand wrote:
>> On Aug 17, 2015 4:08 PM, "Kenneth Graunke" <kenneth at whitecape.org> wrote:
>> >
>> > Previously, we used nir_lower_io with the scalar type_size function,
>> > which mapped VERT_ATTRIB_* locations to...some numbers.  Then, in
>> > fs_visitor::nir_setup_inputs(), we created temporaries indexed by
>> > those numbers, and emitted MOVs from the actual ATTR registers to
>> > those temporaries.
>> >
>> > This patch reworks our input lowering to produce NIR lower_input
>> > intrinsics that properly index into the ATTR file, so we can access
>> > it directly.
>> >
>> > No changes in shader-db.
>> >
>> > Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> > ---
>> >  src/mesa/drivers/dri/i965/brw_fs.cpp     |  2 +-
>> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 71
>> +++++++++-----------------------
>> >  src/mesa/drivers/dri/i965/brw_nir.c      | 25 ++++++++++-
>> >  3 files changed, 45 insertions(+), 53 deletions(-)
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> > index 93adbc6..efabb52 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> > @@ -4527,7 +4527,7 @@ fs_visitor::dump_instruction(backend_instruction
>> *be_inst, FILE *file)
>> >           fprintf(file, "***m%d***", inst->src[i].reg);
>> >           break;
>> >        case ATTR:
>> > -         fprintf(file, "attr%d", inst->src[i].reg +
>> inst->src[i].reg_offset);
>> > +         fprintf(file, "attr%d+%d", inst->src[i].reg,
>> inst->src[i].reg_offset);
>>
>> This should probably be its own patch.
>
> Sure.  I will split that out.
>
> (By the way, can you do something about your mail client's word wrapping
> fail?  It's rather hard to find your review comments among the noise...)

Sorry... I think I replied from my phone.  It wraps things funny and
there's not much I can do about it besides not doing patch review in
bed.

>>
>> >           break;
>> >        case UNIFORM:
>> >           fprintf(file, "u%d", inst->src[i].reg +
>> inst->src[i].reg_offset);
>> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> > index 51ba23b..397cdcb 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> > @@ -56,61 +56,25 @@ fs_visitor::emit_nir_code()
>> >  void
>> >  fs_visitor::nir_setup_inputs(nir_shader *shader)
>> >  {
>> > +   if (stage != MESA_SHADER_FRAGMENT)
>> > +      return;
>> > +
>> >     nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_inputs);
>> >
>> >     foreach_list_typed(nir_variable, var, node, &shader->inputs) {
>> > -      enum brw_reg_type type = brw_type_for_base_type(var->type);
>> >        fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
>> >
>> >        fs_reg reg;
>> > -      switch (stage) {
>> > -      case MESA_SHADER_VERTEX: {
>> > -         /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
>> > -          * stored in nir_variable::location.
>> > -          *
>> > -          * However, NIR's load_input intrinsics use a different index -
>> an
>> > -          * offset into a single contiguous array containing all inputs.
>> > -          * This index corresponds to the nir_variable::driver_location
>> field.
>> > -          *
>> > -          * So, we need to copy from fs_reg(ATTR, var->location) to
>> > -          * offset(nir_inputs, var->data.driver_location).
>> > -          */
>> > -         const glsl_type *const t = var->type->without_array();
>> > -         const unsigned components = t->components();
>> > -         const unsigned cols = t->matrix_columns;
>> > -         const unsigned elts = t->vector_elements;
>> > -         unsigned array_length = var->type->is_array() ?
>> var->type->length : 1;
>> > -         for (unsigned i = 0; i < array_length; i++) {
>> > -            for (unsigned j = 0; j < cols; j++) {
>> > -               for (unsigned k = 0; k < elts; k++) {
>> > -                  bld.MOV(offset(retype(input, type), bld,
>> > -                                 components * i + elts * j + k),
>> > -                          offset(fs_reg(ATTR, var->data.location + i,
>> type),
>> > -                                 bld, 4 * j + k));
>> > -               }
>> > -            }
>> > -         }
>> > -         break;
>> > -      }
>> > -      case MESA_SHADER_GEOMETRY:
>> > -      case MESA_SHADER_COMPUTE:
>> > -      case MESA_SHADER_TESS_CTRL:
>> > -      case MESA_SHADER_TESS_EVAL:
>> > -         unreachable("fs_visitor not used for these stages yet.");
>> > -         break;
>> > -      case MESA_SHADER_FRAGMENT:
>> > -         if (var->data.location == VARYING_SLOT_POS) {
>> > -            reg =
>> *emit_fragcoord_interpolation(var->data.pixel_center_integer,
>> > -
>> var->data.origin_upper_left);
>> > -            emit_percomp(bld, fs_inst(BRW_OPCODE_MOV,
>> bld.dispatch_width(),
>> > -                                      input, reg), 0xF);
>> > -         } else {
>> > -            emit_general_interpolation(input, var->name, var->type,
>> > -                                       (glsl_interp_qualifier)
>> var->data.interpolation,
>> > -                                       var->data.location,
>> var->data.centroid,
>> > -                                       var->data.sample);
>> > -         }
>> > -         break;
>> > +      if (var->data.location == VARYING_SLOT_POS) {
>> > +         reg =
>> *emit_fragcoord_interpolation(var->data.pixel_center_integer,
>> > +
>>  var->data.origin_upper_left);
>> > +         emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
>> > +                                   input, reg), 0xF);
>> > +      } else {
>> > +         emit_general_interpolation(input, var->name, var->type,
>> > +                                    (glsl_interp_qualifier)
>> var->data.interpolation,
>> > +                                    var->data.location,
>> var->data.centroid,
>> > +                                    var->data.sample);
>> >        }
>> >     }
>> >  }
>> > @@ -1557,8 +1521,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder
>> &bld, nir_intrinsic_instr *instr
>> >     case nir_intrinsic_load_input: {
>> >        unsigned index = 0;
>> >        for (unsigned j = 0; j < instr->num_components; j++) {
>> > -         fs_reg src = offset(retype(nir_inputs, dest.type), bld,
>> > -                             instr->const_index[0] + index);
>> > +         fs_reg src;
>> > +         if (stage == MESA_SHADER_VERTEX) {
>>
>> == FRAGMENT should be the if case and ATTR should be the else to match the
>> logic in nir_lower_io.
>
> What do you mean?  I don't see any shader stage specific logic in
> nir_lower_io to mimic - plus that's a whole different file?  I was
> just defaulting to pipeline order (VS first).

In fs_visitor::nir_setup_inputs above, you do "if (stage !=
MESA_SHADER_FRAGMENT) return;" and then every other stage-specific
thing you do is "if (stage == MESA_SHADER_VERTEX) do sometiing;"  Not
a big deal, but it seemed strange to me.

--Jason


More information about the mesa-dev mailing list