[Mesa-dev] [PATCH 06/14] nir: Add a "compact array" flag and IO lowering code.

Thu Nov 17 04:36:46 UTC 2016

On Wednesday, November 16, 2016 8:20:44 PM PST Jason Ekstrand wrote:
> On Mon, Nov 14, 2016 at 5:41 PM, Kenneth Graunke <kenneth at whitecape.org>
> wrote:
> 
> > Certain built-in arrays, such as gl_ClipDistance[], gl_CullDistance[],
> > gl_TessLevelInner[], and gl_TessLevelOuter[] are specified as scalar
> > arrays.  Normal scalar arrays are sparse - each array element usually
> > occupies a whole vec4 slot.  However, most hardware assumes these
> > built-in arrays are tightly packed.
> >
> > The new var->data.compact flag indicates that a scalar array should
> > be tightly packed, so a float[4] array would take up a single vec4
> > slot, and a float[8] array would take up two slots.
> >
> > They are still arrays, not vec4s, however.  nir_lower_io will generate
> > intrinsics using ARB_enhanced_layouts style component qualifiers.
> >
> > Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> > ---
> >  src/compiler/glsl/glsl_to_nir.cpp            |  1 +
> >  src/compiler/nir/nir.h                       |  7 +++++
> >  src/compiler/nir/nir_gather_info.c           |  9 ++++--
> >  src/compiler/nir/nir_lower_indirect_derefs.c |  8 +++--
> >  src/compiler/nir/nir_lower_io.c              | 44
> > ++++++++++++++++++++--------
> >  src/compiler/nir/nir_print.c                 |  4 ++-
> >  6 files changed, 55 insertions(+), 18 deletions(-)
> >
> > diff --git a/src/compiler/glsl/glsl_to_nir.cpp
> > b/src/compiler/glsl/glsl_to_nir.cpp
> > index 6ca760b..ed1c739 100644
> > --- a/src/compiler/glsl/glsl_to_nir.cpp
> > +++ b/src/compiler/glsl/glsl_to_nir.cpp
> > @@ -331,6 +331,7 @@ nir_visitor::visit(ir_variable *ir)
> >     var->data.explicit_index = ir->data.explicit_index;
> >     var->data.explicit_binding = ir->data.explicit_binding;
> >     var->data.has_initializer = ir->data.has_initializer;
> > +   var->data.compact = false;
> >     var->data.location_frac = ir->data.location_frac;
> >
> >     switch (ir->data.depth_layout) {
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 3d46384..0b78242 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -230,6 +230,13 @@ typedef struct nir_variable {
> >        unsigned location_frac:2;
> >
> >        /**
> > +       * If true, this variable represents an array of scalars that should
> > +       * be tightly packed.  In other words, consecutive array elements
> > +       * should be stored one component apart, rather than one slot apart.
> > +       */
> > +      unsigned compact:1;
> >
> 
> Should this 1-bit integer be a bool?

Yeah.  For whatever reason, {ir,nir}_variable don't do that.
I'm happy to change it here, and we should probably change them
all at some point...

> > +
> > +      /**
> >         * Whether this is a fragment shader output implicitly initialized
> > with
> >         * the previous contents of the specified render target at the
> >         * framebuffer location corresponding to this shader invocation.
> > diff --git a/src/compiler/nir/nir_gather_info.c
> > b/src/compiler/nir/nir_gather_info.c
> > index 63c8a42..4d07dda 100644
> > --- a/src/compiler/nir/nir_gather_info.c
> > +++ b/src/compiler/nir/nir_gather_info.c
> > @@ -94,8 +94,11 @@ mark_whole_variable(nir_shader *shader, nir_variable
> > *var)
> >         var->data.mode == nir_var_shader_in)
> >        is_vertex_input = true;
> >
> > -   set_io_mask(shader, var, 0,
> > -               glsl_count_attribute_slots(type, is_vertex_input));
> > +   const unsigned slots =
> > +      var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
> > +                        : glsl_count_attribute_slots(type,
> > is_vertex_input);
> >
> 
> By using glsl_get_length(), you're assuming that all compact things are 1-D
> arrays with no structs.  Is that your intention?  If so, we should probably
> assert so that we catch it if we ever change this in the future.

Yes, "compact" is intended to only be set for arrays of scalars.
For geometry/tessellation shaders they can be wrapped in a per-vertex
array, i.e. gl_ClipDistance[3][7].

Probably the right thing to do is to check this in nir_validate().
I'll add that.

> > +
> > +   set_io_mask(shader, var, 0, slots);
> >  }
> >
> >  static unsigned
> > @@ -150,7 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var
> > *deref)
> >      * here marking the entire variable as used.
> >      */
> >     if (!(glsl_type_is_matrix(type) ||
> > -         (glsl_type_is_array(type) &&
> > +         (glsl_type_is_array(type) && !var->data.compact &&
> >            (glsl_type_is_numeric(glsl_without_array(type)) ||
> >             glsl_type_is_boolean(glsl_without_array(type)))))) {
> >
> > diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c
> > b/src/compiler/nir/nir_lower_indirect_derefs.c
> > index 356373e..5c97dc8e 100644
> > --- a/src/compiler/nir/nir_lower_indirect_derefs.c
> > +++ b/src/compiler/nir/nir_lower_indirect_derefs.c
> > @@ -175,8 +175,12 @@ lower_indirect_block(nir_block *block, nir_builder *b,
> >        if (!deref_has_indirect(intrin->variables[0]))
> >           continue;
> >
> > -      /* Only lower variables whose mode is in the mask */
> > -      if (!(modes & intrin->variables[0]->var->data.mode))
> > +      /* Only lower variables whose mode is in the mask, or compact
> > +       * array variables.  (We can't handle indirects on tightly packed
> > +       * scalar arrays, so we need to lower them regardless.)
> >
> +       */
> > +      if (!(modes & intrin->variables[0]->var->data.mode) &&
> > +          !intrin->variables[0]->var->data.compact)
> >           continue;
> >
> >        b->cursor = nir_before_instr(&intrin->instr);
> > diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_
> > io.c
> > index a7e7f14..6628947 100644
> > --- a/src/compiler/nir/nir_lower_io.c
> > +++ b/src/compiler/nir/nir_lower_io.c
> > @@ -88,7 +88,8 @@ nir_is_per_vertex_io(nir_variable *var, gl_shader_stage
> > stage)
> >  static nir_ssa_def *
> >  get_io_offset(nir_builder *b, nir_deref_var *deref,
> >                nir_ssa_def **vertex_index,
> > -              int (*type_size)(const struct glsl_type *))
> > +              int (*type_size)(const struct glsl_type *),
> > +              unsigned *component)
> >  {
> >     nir_deref *tail = &deref->deref;
> >
> > @@ -106,6 +107,19 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
> >        *vertex_index = vtx;
> >     }
> >
> > +   if (deref->var->data.compact) {
> > +      assert(tail->child->deref_type == nir_deref_type_array);
> > +      assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
> > +      nir_deref_array *deref_array = nir_deref_as_array(tail->child);
> > +      /* We always lower indirect dereferences for "compact" array vars.
> > */
> > +      assert(deref_array->deref_array_type ==
> > nir_deref_array_type_direct);
> > +
> > +      const unsigned total_offset = *component + deref_array->base_offset;
> > +      const unsigned slot_offset = total_offset / 4;
> > +      *component = total_offset % 4;
> > +      return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
> > +   }
> > +
> >     /* Just emit code and let constant-folding go to town */
> >     nir_ssa_def *offset = nir_imm_int(b, 0);
> >
> > @@ -143,7 +157,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
> >
> >  static nir_intrinsic_instr *
> >  lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
> > -           nir_ssa_def *vertex_index, nir_ssa_def *offset)
> > +           nir_ssa_def *vertex_index, nir_ssa_def *offset,
> > +           unsigned component)
> >  {
> >     const nir_shader *nir = state->builder.shader;
> >     nir_variable *var = intrin->variables[0]->var;
> > @@ -194,7 +209,7 @@ lower_load(nir_intrinsic_instr *intrin, struct
> > lower_io_state *state,
> >
> >     nir_intrinsic_set_base(load, var->data.driver_location);
> >     if (mode == nir_var_shader_in || mode == nir_var_shader_out)
> > -      nir_intrinsic_set_component(load, var->data.location_frac);
> > +      nir_intrinsic_set_component(load, component);
> >
> >     if (load->intrinsic == nir_intrinsic_load_uniform)
> >        nir_intrinsic_set_range(load, state->type_size(var->type));
> > @@ -214,7 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct
> > lower_io_state *state,
> >
> >  static nir_intrinsic_instr *
> >  lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
> > -            nir_ssa_def *vertex_index, nir_ssa_def *offset)
> > +            nir_ssa_def *vertex_index, nir_ssa_def *offset,
> > +            unsigned component)
> >  {
> >     nir_variable *var = intrin->variables[0]->var;
> >     nir_variable_mode mode = var->data.mode;
> > @@ -236,7 +252,7 @@ lower_store(nir_intrinsic_instr *intrin, struct
> > lower_io_state *state,
> >     nir_intrinsic_set_base(store, var->data.driver_location);
> >
> >     if (mode == nir_var_shader_out)
> > -      nir_intrinsic_set_component(store, var->data.location_frac);
> > +      nir_intrinsic_set_component(store, component);
> >
> >     nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
> >
> > @@ -289,7 +305,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct
> > lower_io_state *state,
> >
> >  static nir_intrinsic_instr *
> >  lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state
> > *state,
> > -                     nir_ssa_def *offset)
> > +                     nir_ssa_def *offset, unsigned component)
> >  {
> >     nir_variable *var = intrin->variables[0]->var;
> >
> > @@ -297,7 +313,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin,
> > struct lower_io_state *state,
> >
> >     /* Ignore interpolateAt() for flat variables - flat is flat. */
> >     if (var->data.interpolation == INTERP_MODE_FLAT)
> > -      return lower_load(intrin, state, NULL, offset);
> > +      return lower_load(intrin, state, NULL, offset, component);
> >
> >     nir_intrinsic_op bary_op;
> >     switch (intrin->intrinsic) {
> > @@ -333,7 +349,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin,
> > struct lower_io_state *state,
> >     load->num_components = intrin->num_components;
> >
> >     nir_intrinsic_set_base(load, var->data.driver_location);
> > -   nir_intrinsic_set_component(load, var->data.location_frac);
> > +   nir_intrinsic_set_component(load, component);
> >
> >     load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
> >     load->src[1] = nir_src_for_ssa(offset);
> > @@ -398,20 +414,23 @@ nir_lower_io_block(nir_block *block,
> >
> >        nir_ssa_def *offset;
> >        nir_ssa_def *vertex_index = NULL;
> > +      unsigned component_offset = var->data.location_frac;
> >
> >        offset = get_io_offset(b, intrin->variables[0],
> >                               per_vertex ? &vertex_index : NULL,
> > -                             state->type_size);
> > +                             state->type_size, &component_offset);
> >
> >        nir_intrinsic_instr *replacement;
> >
> >        switch (intrin->intrinsic) {
> >        case nir_intrinsic_load_var:
> > -         replacement = lower_load(intrin, state, vertex_index, offset);
> > +         replacement = lower_load(intrin, state, vertex_index, offset,
> > +                                  component_offset);
> >           break;
> >
> >        case nir_intrinsic_store_var:
> > -         replacement = lower_store(intrin, state, vertex_index, offset);
> > +         replacement = lower_store(intrin, state, vertex_index, offset,
> > +                                   component_offset);
> >           break;
> >
> >        case nir_intrinsic_var_atomic_add:
> > @@ -432,7 +451,8 @@ nir_lower_io_block(nir_block *block,
> >        case nir_intrinsic_interp_var_at_sample:
> >        case nir_intrinsic_interp_var_at_offset:
> >           assert(vertex_index == NULL);
> > -         replacement = lower_interpolate_at(intrin, state, offset);
> > +         replacement = lower_interpolate_at(intrin, state, offset,
> > +                                            component_offset);
> >           break;
> >
> >        default:
> > diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
> > index 242bffb..475e3f2 100644
> > --- a/src/compiler/nir/nir_print.c
> > +++ b/src/compiler/nir/nir_print.c
> > @@ -432,9 +432,11 @@ print_var_decl(nir_variable *var, print_state *state)
> >           loc = buf;
> >        }
> >
> > -      fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
> > +      fprintf(fp, " (%s, %u)%s", loc, var->data.driver_location,
> > +              var->data.compact ? " compact" : "");
> >     }
> >
> > +
> >
> 
> Stray newline
> 
> 
> >     if (var->constant_initializer) {
> >        fprintf(fp, " = { ");
> >        print_constant(var->constant_initializer, var->type, state);
> > --
> > 2.10.2
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20161116/f6212258/attachment.sig>