[Mesa-dev] [PATCH 9/9] i965/vec4: Use NIR to do GS input remapping

Fri May 5 19:24:38 UTC 2017

On Fri, May 5, 2017 at 1:04 AM, Alejandro Piñeiro <apinheiro at igalia.com>
wrote:

> On 05/05/17 04:11, Jason Ekstrand wrote:
> > We're already doing this in the FS back-end.  This just does the same
> > thing in the vec4 back-end.
> > ---
> >  src/intel/compiler/brw_nir.c               |  3 --
> >  src/intel/compiler/brw_vec4.cpp            | 44 -------------------
> >  src/intel/compiler/brw_vec4.h              |  2 -
> >  src/intel/compiler/brw_vec4_gs_nir.cpp     | 10 ++---
> >  src/intel/compiler/brw_vec4_gs_visitor.cpp | 70
> +++++++++++++++++++++---------
> >  src/intel/compiler/brw_vec4_gs_visitor.h   |  4 +-
> >  src/intel/compiler/gen6_gs_visitor.cpp     |  4 +-
> >  7 files changed, 56 insertions(+), 81 deletions(-)
> >
> > diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> > index d92e8fa..8a9f89f 100644
> > --- a/src/intel/compiler/brw_nir.c
> > +++ b/src/intel/compiler/brw_nir.c
> > @@ -342,9 +342,6 @@ brw_nir_lower_vue_inputs(nir_shader *nir, bool
> is_scalar,
> >     /* Inputs are stored in vec4 slots, so use type_size_vec4(). */
> >     nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
> >
> > -   if (nir->stage == MESA_SHADER_GEOMETRY && !is_scalar)
> > -      return;
> > -
> >     /* This pass needs actual constants */
> >     nir_opt_constant_folding(nir);
> >
> > diff --git a/src/intel/compiler/brw_vec4.cpp
> b/src/intel/compiler/brw_vec4.cpp
> > index b387321..ad92951 100644
> > --- a/src/intel/compiler/brw_vec4.cpp
> > +++ b/src/intel/compiler/brw_vec4.cpp
> > @@ -1693,50 +1693,6 @@ attribute_to_hw_reg(int attr, brw_reg_type type,
> bool interleaved)
> >     return reg;
> >  }
> >
> > -
> > -/**
> > - * Replace each register of type ATTR in this->instructions with a
> reference
> > - * to a fixed HW register.
> > - *
> > - * If interleaved is true, then each attribute takes up half a
> register, with
> > - * register N containing attribute 2*N in its first half and attribute
> 2*N+1
> > - * in its second half (this corresponds to the payload setup used by
> geometry
> > - * shaders in "single" or "dual instanced" dispatch mode).  If
> interleaved is
> > - * false, then each attribute takes up a whole register, with register N
> > - * containing attribute N (this corresponds to the payload setup used by
> > - * vertex shaders, and by geometry shaders in "dual object" dispatch
> mode).
> > - */
> > -void
> > -vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
> > -                                          bool interleaved)
> > -{
> > -   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> > -      for (int i = 0; i < 3; i++) {
> > -         if (inst->src[i].file != ATTR)
> > -            continue;
> > -
> > -         int grf = attribute_map[inst->src[i].nr +
> > -                                 inst->src[i].offset / REG_SIZE];
> > -         assert(inst->src[i].offset % REG_SIZE == 0);
> > -
> > -         /* All attributes used in the shader need to have been
> assigned a
> > -          * hardware register by the caller
> > -          */
> > -         assert(grf != 0);
> > -
> > -         struct brw_reg reg =
> > -            attribute_to_hw_reg(grf, inst->src[i].type, interleaved);
> > -         reg.swizzle = inst->src[i].swizzle;
> > -         if (inst->src[i].abs)
> > -            reg = brw_abs(reg);
> > -         if (inst->src[i].negate)
> > -            reg = negate(reg);
> > -
> > -         inst->src[i] = reg;
> > -      }
> > -   }
> > -}
> > -
> >  int
> >  vec4_vs_visitor::setup_attributes(int payload_reg)
> >  {
> > diff --git a/src/intel/compiler/brw_vec4.h
> b/src/intel/compiler/brw_vec4.h
> > index 89adfaa..0f92f34 100644
> > --- a/src/intel/compiler/brw_vec4.h
> > +++ b/src/intel/compiler/brw_vec4.h
> > @@ -367,8 +367,6 @@ public:
> >
> >  protected:
> >     void emit_vertex();
> > -   void lower_attributes_to_hw_regs(const int *attribute_map,
> > -                                    bool interleaved);
> >     void setup_payload_interference(struct ra_graph *g, int
> first_payload_node,
> >                                     int reg_node_count);
> >     virtual void setup_payload() = 0;
> > diff --git a/src/intel/compiler/brw_vec4_gs_nir.cpp
> b/src/intel/compiler/brw_vec4_gs_nir.cpp
> > index ed8c03b..577f587 100644
> > --- a/src/intel/compiler/brw_vec4_gs_nir.cpp
> > +++ b/src/intel/compiler/brw_vec4_gs_nir.cpp
> > @@ -66,8 +66,10 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
> *instr)
> >        nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
> >        nir_const_value *offset_reg = nir_src_as_const_value(instr->
> src[1]);
> >
> > +      const unsigned input_array_stride = prog_data->urb_read_length *
> 2;
>
> The change of using the input_array_stride instead of
> BRW_VARYING_SLOT_COUNT is not evident (at least to me). There is a
> comment explaining the stride purpose on
> vec4_gs_visitor::setup_varying_inputs, but perhaps a note here would be
> welcomed. You can drop this suggestion if you prefer.
>

We were using BRW_VARYING_SLOT_COUNT before because we were doing a generic
thing here that we would later remap using input_array_stride.  Now we're
just using input_array_stride directly.  I think if this weren't a change,
you would probably not notice. :-)  How about:

All of the inputs for the first vertex come in first followed by all inputs
for the second vertex etc.  The stride between vertices is governed by the
URB read length.

--Jason

> Other than that:
> Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com>
>
> > +
> >        if (nir_dest_bit_size(instr->dest) == 64) {
> > -         src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
> > +         src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
> >                         instr->const_index[0] + offset_reg->u32[0],
> >                         glsl_type::dvec4_type);
> >
> > @@ -85,15 +87,11 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
> *instr)
> >           /* Make up a type...we have no way of knowing... */
> >           const glsl_type *const type = glsl_type::ivec(instr->num_
> components);
> >
> > -         src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
> > +         src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
> >                         instr->const_index[0] + offset_reg->u32[0],
> >                         type);
> >           src.swizzle = BRW_SWZ_COMP_INPUT(nir_
> intrinsic_component(instr));
> >
> > -         /* gl_PointSize is passed in the .w component of the VUE
> header */
> > -         if (instr->const_index[0] == VARYING_SLOT_PSIZ)
> > -            src.swizzle = BRW_SWIZZLE_WWWW;
> > -
> >           dest = get_nir_dest(instr->dest, src.type);
> >           dest.writemask = brw_writemask_for_size(instr->
> num_components);
> >           emit(MOV(dest, src));
> > diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp
> b/src/intel/compiler/brw_vec4_gs_visitor.cpp
> > index 4a8b5be..516ab0b 100644
> > --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
> > +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
> > @@ -29,6 +29,7 @@
> >
> >  #include "brw_vec4_gs_visitor.h"
> >  #include "gen6_gs_visitor.h"
> > +#include "brw_cfg.h"
> >  #include "brw_fs.h"
> >  #include "brw_nir.h"
> >  #include "common/gen_debug.h"
> > @@ -72,9 +73,36 @@ vec4_gs_visitor::make_reg_for_system_value(int
> location)
> >     return reg;
> >  }
> >
> > +static inline struct brw_reg
> > +attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved)
> > +{
> > +   struct brw_reg reg;
> >
> > +   unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type));
> > +   if (interleaved) {
> > +      reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0,
> width, 1);
> > +   } else {
> > +      reg = brw_vecn_grf(width, attr, 0);
> > +   }
> > +
> > +   reg.type = type;
> > +   return reg;
> > +}
> > +
> > +/**
> > + * Replace each register of type ATTR in this->instructions with a
> reference
> > + * to a fixed HW register.
> > + *
> > + * If interleaved is true, then each attribute takes up half a
> register, with
> > + * register N containing attribute 2*N in its first half and attribute
> 2*N+1
> > + * in its second half (this corresponds to the payload setup used by
> geometry
> > + * shaders in "single" or "dual instanced" dispatch mode).  If
> interleaved is
> > + * false, then each attribute takes up a whole register, with register N
> > + * containing attribute N (this corresponds to the payload setup used by
> > + * vertex shaders, and by geometry shaders in "dual object" dispatch
> mode).
> > + */
> >  int
> > -vec4_gs_visitor::setup_varying_inputs(int payload_reg, int
> *attribute_map,
> > +vec4_gs_visitor::setup_varying_inputs(int payload_reg,
> >                                        int attributes_per_reg)
> >  {
> >     /* For geometry shaders there are N copies of the input attributes,
> where N
> > @@ -89,12 +117,24 @@ vec4_gs_visitor::setup_varying_inputs(int
> payload_reg, int *attribute_map,
> >     assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
> >     unsigned input_array_stride = prog_data->urb_read_length * 2;
> >
> > -   for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) {
> > -      int varying = c->input_vue_map.slot_to_varying[slot];
> > -      for (unsigned vertex = 0; vertex < num_input_vertices; vertex++) {
> > -         attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] =
> > -            attributes_per_reg * payload_reg + input_array_stride *
> vertex +
> > -            slot;
> > +   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> > +      for (int i = 0; i < 3; i++) {
> > +         if (inst->src[i].file != ATTR)
> > +            continue;
> > +
> > +         assert(inst->src[i].offset % REG_SIZE == 0);
> > +         int grf = payload_reg * attributes_per_reg +
> > +                   inst->src[i].nr + inst->src[i].offset / REG_SIZE;
> > +
> > +         struct brw_reg reg =
> > +            attribute_to_hw_reg(grf, inst->src[i].type,
> attributes_per_reg > 1);
> > +         reg.swizzle = inst->src[i].swizzle;
> > +         if (inst->src[i].abs)
> > +            reg = brw_abs(reg);
> > +         if (inst->src[i].negate)
> > +            reg = negate(reg);
> > +
> > +         inst->src[i] = reg;
> >        }
> >     }
> >
> > @@ -103,25 +143,15 @@ vec4_gs_visitor::setup_varying_inputs(int
> payload_reg, int *attribute_map,
> >     return payload_reg + regs_used;
> >  }
> >
> > -
> >  void
> >  vec4_gs_visitor::setup_payload()
> >  {
> > -   int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
> > -
> >     /* If we are in dual instanced or single mode, then attributes are
> going
> >      * to be interleaved, so one register contains two attribute slots.
> >      */
> >     int attributes_per_reg =
> >        prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
> >
> > -   /* If a geometry shader tries to read from an input that wasn't
> written by
> > -    * the vertex shader, that produces undefined results, but it
> shouldn't
> > -    * crash anything.  So initialize attribute_map to zeros--that
> ensures that
> > -    * these undefined results are read from r0.
> > -    */
> > -   memset(attribute_map, 0, sizeof(attribute_map));
> > -
> >     int reg = 0;
> >
> >     /* The payload always contains important data in r0, which contains
> > @@ -132,13 +162,11 @@ vec4_gs_visitor::setup_payload()
> >
> >     /* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
> >     if (gs_prog_data->include_primitive_id)
> > -      attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg *
> reg++;
> > +      reg++;
> >
> >     reg = setup_uniforms(reg);
> >
> > -   reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg);
> > -
> > -   lower_attributes_to_hw_regs(attribute_map, attributes_per_reg > 1);
> > +   reg = setup_varying_inputs(reg, attributes_per_reg);
> >
> >     this->first_non_payload_grf = reg;
> >  }
> > diff --git a/src/intel/compiler/brw_vec4_gs_visitor.h
> b/src/intel/compiler/brw_vec4_gs_visitor.h
> > index 09221f9..6b21a33 100644
> > --- a/src/intel/compiler/brw_vec4_gs_visitor.h
> > +++ b/src/intel/compiler/brw_vec4_gs_visitor.h
> > @@ -33,6 +33,7 @@
> >  #include "brw_vec4.h"
> >
> >  #define MAX_GS_INPUT_VERTICES 6
> > +#define MAX_GS_INPUT_VARYINGS 32
> >
> >  #ifdef __cplusplus
> >  namespace brw {
> > @@ -64,8 +65,7 @@ protected:
> >     virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
> >
> >  protected:
> > -   int setup_varying_inputs(int payload_reg, int *attribute_map,
> > -                            int attributes_per_reg);
> > +   int setup_varying_inputs(int payload_reg, int attributes_per_reg);
> >     void emit_control_data_bits();
> >     void set_stream_control_data_bits(unsigned stream_id);
> >
> > diff --git a/src/intel/compiler/gen6_gs_visitor.cpp
> b/src/intel/compiler/gen6_gs_visitor.cpp
> > index 075bc4a..afc6056 100644
> > --- a/src/intel/compiler/gen6_gs_visitor.cpp
> > +++ b/src/intel/compiler/gen6_gs_visitor.cpp
> > @@ -516,9 +516,7 @@ gen6_gs_visitor::setup_payload()
> >
> >     reg = setup_uniforms(reg);
> >
> > -   reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg);
> > -
> > -   lower_attributes_to_hw_regs(attribute_map, true);
> > +   reg = setup_varying_inputs(reg, attributes_per_reg);
> >
> >     this->first_non_payload_grf = reg;
> >  }
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170505/956d7d67/attachment-0001.html>