[Mesa-dev] [PATCH 2/7] nir: add varying component packing helpers

Sat Oct 28 15:21:36 UTC 2017

On October 28, 2017 07:15:10 Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl> wrote:

> On Mon, Oct 23, 2017 at 2:10 AM, Timothy Arceri <tarceri at itsqueeze.com> wrote:
>> ---
>>  src/compiler/nir/nir.h                 |   2 +
>>  src/compiler/nir/nir_linking_helpers.c | 235 +++++++++++++++++++++++++++++++++
>>  2 files changed, 237 insertions(+)
>>
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index dd833cf183..6a761ab655 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -2413,20 +2413,22 @@ void nir_lower_io_to_temporaries(nir_shader *shader,
>>                                   nir_function_impl *entrypoint,
>>                                   bool outputs, bool inputs);
>>
>>  void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
>>
>>  void nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
>>                                int (*type_size)(const struct glsl_type *));
>>
>>  /* Some helpers to do very simple linking */
>>  bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
>> +void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
>> +                          bool default_to_smooth_interp);
>>
>>  typedef enum {
>>     /* If set, this forces all non-flat fragment shader inputs to be
>>      * interpolated as if with the "sample" qualifier.  This requires
>>      * nir_shader_compiler_options::use_interpolated_input_intrinsics.
>>      */
>>     nir_lower_io_force_sample_interpolation = (1 << 1),
>>  } nir_lower_io_options;
>>  bool nir_lower_io(nir_shader *shader,
>>                    nir_variable_mode modes,
>> diff --git a/src/compiler/nir/nir_linking_helpers.c 
>> b/src/compiler/nir/nir_linking_helpers.c
>> index 54ba1c85e5..85da84fbd9 100644
>> --- a/src/compiler/nir/nir_linking_helpers.c
>> +++ b/src/compiler/nir/nir_linking_helpers.c
>> @@ -143,10 +143,245 @@ nir_remove_unused_varyings(nir_shader *producer, 
>> nir_shader *consumer)
>>        tcs_add_output_reads(producer, read);
>>
>>     bool progress = false;
>>     progress = remove_unused_io_vars(producer, &producer->outputs, read);
>>
>>     progress =
>>        remove_unused_io_vars(consumer, &consumer->inputs, written) || progress;
>>
>>     return progress;
>>  }
>> +
>> +static uint8_t
>> +get_interp_type(nir_variable *var, bool default_to_smooth_interp)
>> +{
>> +   return var->data.interpolation == INTERP_MODE_NONE &&
>> +          default_to_smooth_interp ?
>> +             INTERP_MODE_SMOOTH : var->data.interpolation;

How about:

if (var->data.interpolation != INTERP_MODE_NONE)
   return var->data.interpolation;
else if (default_to_smooth_interp)
   return INTERP_MODE_SMOOTH;
else
   return INTERP_MODE_NONE;

That seems much more straightforward.

That said, what does defaulting to smooth interpolation have to do with 
varying packing?

>> +}
>> +
>> +static void
>> +get_slot_component_masks_and_interp_types(struct exec_list *var_list,
>> +                                          uint8_t *comps,  uint8_t 
>> *interp_type,
>> +                                          gl_shader_stage stage,
>> +                                          bool default_to_smooth_interp)
>> +{
>> +   nir_foreach_variable_safe(var, var_list) {
>> +      assert(var->data.location >= 0);
>> +
>> +      /* Only remap things that aren't built-ins.
>> +       * TODO: add TES patch support.
>> +       */
>> +      if (var->data.location >= VARYING_SLOT_VAR0 &&
>> +          var->data.location - VARYING_SLOT_VAR0 < 32) {
>> +
>> +         const struct glsl_type *type = var->type;
>> +         if (nir_is_per_vertex_io(var, stage)) {
>> +            assert(glsl_type_is_array(type));
>> +            type = glsl_get_array_element(type);
>> +         }
>> +
>> +         unsigned location = var->data.location - VARYING_SLOT_VAR0;
>> +         unsigned elements =
>> +            glsl_get_vector_elements(glsl_without_array(type));
>> +
>> +         bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
>> +         unsigned slots = glsl_count_attribute_slots(type, false);
>> +         for (unsigned i = 0; i < slots; i++) {
>> +            interp_type[location + i] =
>> +               get_interp_type(var, default_to_smooth_interp);
>> +
>> +            if (dual_slot) {
>> +               unsigned comps_slot2 = 0;
>
> I'd expect this to be declared outside the loop, otherwise it is
> always going to be 0 in the (i & 1) case below.
>
> With that fixed this patch is
>
> Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>
>
>> +               if (i & 1) {
>> +                  comps[location + i] |= ((1 << comps_slot2) - 1);
>> +               } else {
>> +                  unsigned num_comps = 4 - var->data.location_frac;
>> +                  comps_slot2 = (elements * 2) - num_comps;
>> +
>> +                  /* Assume ARB_enhanced_layouts packing rules for doubles */
>> +                  assert(var->data.location_frac == 0 ||
>> +                         var->data.location_frac == 2);
>> +                  assert(comps_slot2 <= 4);
>> +
>> +                  comps[location + i] |=
>> +                     ((1 << num_comps) - 1) << var->data.location_frac;
>> +               }
>> +            } else {
>> +               comps[location + i] |=
>> +                  ((1 << elements) - 1) << var->data.location_frac;
>> +            }
>> +         }
>> +      }
>> +   }
>> +}
>> +
>> +struct varying_loc
>> +{
>> +   uint8_t component;
>> +   uint32_t location;
>> +};
>> +
>> +static void
>> +remap_slots_and_components(struct exec_list *var_list,
>> +                           struct varying_loc (*remap)[4])
>> +{
>> +   nir_foreach_variable(var, var_list) {
>> +      assert(var->data.location >= 0);
>> +
>> +      /* Only remap things that aren't built-ins */
>> +      if (var->data.location >= VARYING_SLOT_VAR0 &&
>> +          var->data.location - VARYING_SLOT_VAR0 < 32) {
>> +         assert(var->data.location - VARYING_SLOT_VAR0 < 32);
>> +         assert(remap[var->data.location - VARYING_SLOT_VAR0] >= 0);
>> +
>> +         unsigned location = var->data.location - VARYING_SLOT_VAR0;
>> +         struct varying_loc *new_loc = 
>> &remap[location][var->data.location_frac];
>> +         if (new_loc->location) {
>> +            var->data.location = new_loc->location;
>> +            var->data.location_frac = new_loc->component;
>> +         }
>> +      }
>> +   }
>> +}
>> +
>> +/* If there are empty components in the slot compact the remaining components
>> + * as close to component 0 as possible. This will make it easier to fill the
>> + * empty components with components from a different slot in a following pass.
>> + */
>> +static void
>> +compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
>> +                   uint8_t *interp_type, bool default_to_smooth_interp)
>> +{
>> +   struct exec_list *input_list = &consumer->inputs;
>> +   struct exec_list *output_list = &producer->outputs;
>> +   struct varying_loc remap[32][4] = {{{0}, {0}}};
>> +
>> +   /* Create a cursor for each interpolation type */
>> +   unsigned cursor[4] = {0};
>> +
>> +   /* We only need to pass over one stage and we choose the consumer as it 
>> seems
>> +    * to cause a larger reduction in instruction counts (tested on i965).
>> +    */
>> +   nir_foreach_variable(var, input_list) {
>> +
>> +      /* Only remap things that aren't builtins.
>> +       * TODO: add TES patch support.
>> +       */
>> +      if (var->data.location >= VARYING_SLOT_VAR0 &&
>> +          var->data.location - VARYING_SLOT_VAR0 < 32) {
>> +
>> +         const struct glsl_type *type = var->type;
>> +         if (nir_is_per_vertex_io(var, consumer->info.stage)) {
>> +            assert(glsl_type_is_array(type));
>> +            type = glsl_get_array_element(type);
>> +         }
>> +
>> +         /* Skip types that require more complex packing handling.
>> +          * TODO: add support for these types.
>> +          */
>> +         if (glsl_type_is_array(type) ||
>> +             glsl_type_is_dual_slot(type) ||
>> +             glsl_type_is_matrix(type) ||
>> +             glsl_type_is_struct(type) ||
>> +             glsl_type_is_64bit(type))
>> +            continue;
>> +
>> +         /* We ignore complex types above and all other vector types should
>> +          * have been split into scalar variables by the lower_io_to_scalar
>> +          * pass. The only exeption should by OpenGL xfb varyings.
>> +          */
>> +         if (glsl_get_vector_elements(type) != 1)
>> +            continue;
>> +
>> +         unsigned location = var->data.location - VARYING_SLOT_VAR0;
>> +         uint8_t used_comps = comps[location];
>> +
>> +         /* If there are no empty components there is nothing more for us 
>> to do.
>> +          */
>> +         if (used_comps == 0xf)
>> +            continue;
>> +
>> +         bool found_new_offset = false;
>> +         uint8_t interp = get_interp_type(var, default_to_smooth_interp);
>> +         for (; cursor[interp] < 32; cursor[interp]++) {
>> +
>> +            /* We couldn't find anywhere to pack the varying continue on. */
>> +            if (cursor[interp] == location)
>> +               break;
>> +
>> +            /* We can only pack varyings with matching interpolation types */
>> +            if (interp_type[cursor[interp]] != interp)
>> +               continue;
>> +
>> +            uint8_t cursor_used_comps = comps[cursor[interp]];
>> +
>> +            /* If the slot is empty just skip it for now, compact_var_list()
>> +             * can be called after this function to remove empty slots for us.
>> +             * TODO: finish implementing compact_var_list() requires array and
>> +             * matrix splitting.
>> +             */
>> +            if (!cursor_used_comps)
>> +               continue;
>> +
>> +            uint8_t unused_comps = ~cursor_used_comps;
>> +
>> +            for (unsigned i = 0; i < 4; i++) {
>> +               uint8_t new_var_comps = 1 << i;
>> +               if (unused_comps & new_var_comps) {
>> +                  remap[location][var->data.location_frac].component = i;
>> +                  remap[location][var->data.location_frac].location =
>> +                     cursor[interp] + VARYING_SLOT_VAR0;
>> +
>> +                  found_new_offset = true;
>> +
>> +                  /* Turn off the mask for the component we are remapping */
>> +                  if (comps[location] & 1 << var->data.location_frac) {
>> +                     comps[location] ^= 1 << var->data.location_frac;
>> +                     comps[cursor[interp]] |= new_var_comps;
>> +                  }
>> +                  break;
>> +               }
>> +            }
>> +
>> +            if (found_new_offset)
>> +               break;
>> +         }
>> +      }
>> +   }
>> +
>> +   remap_slots_and_components(input_list, remap);
>> +   remap_slots_and_components(output_list, remap);
>> +}
>> +
>> +/* We assume that this has been called more-or-less directly after
>> + * remove_unused_varyings.  At this point, all of the varyings that we
>> + * aren't going to be using have been completely removed and the
>> + * inputs_read and outputs_written fields in nir_shader_info reflect
>> + * this.  Therefore, the total set of valid slots is the OR of the two
>> + * sets of varyings;  this accounts for varyings which one side may need
>> + * to read/write even if the other doesn't.  This can happen if, for
>> + * instance, an array is used indirectly from one side causing it to be
>> + * unsplittable but directly from the other.
>> + */
>> +void
>> +nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
>> +                     bool default_to_smooth_interp)
>> +{
>> +   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
>> +   assert(consumer->info.stage != MESA_SHADER_VERTEX);
>> +
>> +   uint8_t comps[32] = {0};
>> +   uint8_t interp_type[32] = {0};
>> +
>> +   get_slot_component_masks_and_interp_types(&producer->outputs, comps,
>> +                                             interp_type,
>> +                                             producer->info.stage,
>> +                                             default_to_smooth_interp);
>> +   get_slot_component_masks_and_interp_types(&consumer->inputs, comps,
>> +                                             interp_type,
>> +                                             consumer->info.stage,
>> +                                             default_to_smooth_interp);
>> +
>> +   compact_components(producer, consumer, comps, interp_type,
>> +                      default_to_smooth_interp);
>> +}
>> --
>> 2.13.6
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev