[Mesa-dev] [PATCH 2/7] nir: add varying component packing helpers

Timothy Arceri tarceri at itsqueeze.com
Sat Oct 28 23:54:01 UTC 2017


On 29/10/17 02:21, Jason Ekstrand wrote:
> On October 28, 2017 07:15:10 Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl> 
> wrote:
> 
>> On Mon, Oct 23, 2017 at 2:10 AM, Timothy Arceri 
>> <tarceri at itsqueeze.com> wrote:
>>> ---
>>>  src/compiler/nir/nir.h                 |   2 +
>>>  src/compiler/nir/nir_linking_helpers.c | 235 
>>> +++++++++++++++++++++++++++++++++
>>>  2 files changed, 237 insertions(+)
>>>
>>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>>> index dd833cf183..6a761ab655 100644
>>> --- a/src/compiler/nir/nir.h
>>> +++ b/src/compiler/nir/nir.h
>>> @@ -2413,20 +2413,22 @@ void nir_lower_io_to_temporaries(nir_shader 
>>> *shader,
>>>                                   nir_function_impl *entrypoint,
>>>                                   bool outputs, bool inputs);
>>>
>>>  void nir_shader_gather_info(nir_shader *shader, nir_function_impl 
>>> *entrypoint);
>>>
>>>  void nir_assign_var_locations(struct exec_list *var_list, unsigned 
>>> *size,
>>>                                int (*type_size)(const struct 
>>> glsl_type *));
>>>
>>>  /* Some helpers to do very simple linking */
>>>  bool nir_remove_unused_varyings(nir_shader *producer, nir_shader 
>>> *consumer);
>>> +void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
>>> +                          bool default_to_smooth_interp);
>>>
>>>  typedef enum {
>>>     /* If set, this forces all non-flat fragment shader inputs to be
>>>      * interpolated as if with the "sample" qualifier.  This requires
>>>      * nir_shader_compiler_options::use_interpolated_input_intrinsics.
>>>      */
>>>     nir_lower_io_force_sample_interpolation = (1 << 1),
>>>  } nir_lower_io_options;
>>>  bool nir_lower_io(nir_shader *shader,
>>>                    nir_variable_mode modes,
>>> diff --git a/src/compiler/nir/nir_linking_helpers.c 
>>> b/src/compiler/nir/nir_linking_helpers.c
>>> index 54ba1c85e5..85da84fbd9 100644
>>> --- a/src/compiler/nir/nir_linking_helpers.c
>>> +++ b/src/compiler/nir/nir_linking_helpers.c
>>> @@ -143,10 +143,245 @@ nir_remove_unused_varyings(nir_shader 
>>> *producer, nir_shader *consumer)
>>>        tcs_add_output_reads(producer, read);
>>>
>>>     bool progress = false;
>>>     progress = remove_unused_io_vars(producer, &producer->outputs, 
>>> read);
>>>
>>>     progress =
>>>        remove_unused_io_vars(consumer, &consumer->inputs, written) || 
>>> progress;
>>>
>>>     return progress;
>>>  }
>>> +
>>> +static uint8_t
>>> +get_interp_type(nir_variable *var, bool default_to_smooth_interp)
>>> +{
>>> +   return var->data.interpolation == INTERP_MODE_NONE &&
>>> +          default_to_smooth_interp ?
>>> +             INTERP_MODE_SMOOTH : var->data.interpolation;
> 
> How about:
> 
> if (var->data.interpolation != INTERP_MODE_NONE)
>    return var->data.interpolation;
> else if (default_to_smooth_interp)
>    return INTERP_MODE_SMOOTH;
> else
>    return INTERP_MODE_NONE;
> 
> That seems much more straightforward.

Sure why not.

> 
> That said, what does defaulting to smooth interpolation have to do with 
> varying packing?

In core we can pack INTERP_MODE_NONE with varyings explicitly marked as 
INTERP_MODE_SMOOTH, this is extra important in GL 4.4 where outputs no 
longer have to match inputs with regards to explicitly using the smooth 
qualifier and we don't want to end up packing each side of the interface 
differently. In compat we cannot pack smooth with none because 
glShadeModel() can be used to override the default.

> 
>>> +}
>>> +
>>> +static void
>>> +get_slot_component_masks_and_interp_types(struct exec_list *var_list,
>>> +                                          uint8_t *comps,  uint8_t 
>>> *interp_type,
>>> +                                          gl_shader_stage stage,
>>> +                                          bool 
>>> default_to_smooth_interp)
>>> +{
>>> +   nir_foreach_variable_safe(var, var_list) {
>>> +      assert(var->data.location >= 0);
>>> +
>>> +      /* Only remap things that aren't built-ins.
>>> +       * TODO: add TES patch support.
>>> +       */
>>> +      if (var->data.location >= VARYING_SLOT_VAR0 &&
>>> +          var->data.location - VARYING_SLOT_VAR0 < 32) {
>>> +
>>> +         const struct glsl_type *type = var->type;
>>> +         if (nir_is_per_vertex_io(var, stage)) {
>>> +            assert(glsl_type_is_array(type));
>>> +            type = glsl_get_array_element(type);
>>> +         }
>>> +
>>> +         unsigned location = var->data.location - VARYING_SLOT_VAR0;
>>> +         unsigned elements =
>>> +            glsl_get_vector_elements(glsl_without_array(type));
>>> +
>>> +         bool dual_slot = 
>>> glsl_type_is_dual_slot(glsl_without_array(type));
>>> +         unsigned slots = glsl_count_attribute_slots(type, false);
>>> +         for (unsigned i = 0; i < slots; i++) {
>>> +            interp_type[location + i] =
>>> +               get_interp_type(var, default_to_smooth_interp);
>>> +
>>> +            if (dual_slot) {
>>> +               unsigned comps_slot2 = 0;
>>
>> I'd expect this to be declared outside the loop, otherwise it is
>> always going to be 0 in the (i & 1) case below.
>>
>> With that fixed this patch is
>>
>> Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>>
>>
>>> +               if (i & 1) {
>>> +                  comps[location + i] |= ((1 << comps_slot2) - 1);
>>> +               } else {
>>> +                  unsigned num_comps = 4 - var->data.location_frac;
>>> +                  comps_slot2 = (elements * 2) - num_comps;
>>> +
>>> +                  /* Assume ARB_enhanced_layouts packing rules for 
>>> doubles */
>>> +                  assert(var->data.location_frac == 0 ||
>>> +                         var->data.location_frac == 2);
>>> +                  assert(comps_slot2 <= 4);
>>> +
>>> +                  comps[location + i] |=
>>> +                     ((1 << num_comps) - 1) << var->data.location_frac;
>>> +               }
>>> +            } else {
>>> +               comps[location + i] |=
>>> +                  ((1 << elements) - 1) << var->data.location_frac;
>>> +            }
>>> +         }
>>> +      }
>>> +   }
>>> +}
>>> +
>>> +struct varying_loc
>>> +{
>>> +   uint8_t component;
>>> +   uint32_t location;
>>> +};
>>> +
>>> +static void
>>> +remap_slots_and_components(struct exec_list *var_list,
>>> +                           struct varying_loc (*remap)[4])
>>> +{
>>> +   nir_foreach_variable(var, var_list) {
>>> +      assert(var->data.location >= 0);
>>> +
>>> +      /* Only remap things that aren't built-ins */
>>> +      if (var->data.location >= VARYING_SLOT_VAR0 &&
>>> +          var->data.location - VARYING_SLOT_VAR0 < 32) {
>>> +         assert(var->data.location - VARYING_SLOT_VAR0 < 32);
>>> +         assert(remap[var->data.location - VARYING_SLOT_VAR0] >= 0);
>>> +
>>> +         unsigned location = var->data.location - VARYING_SLOT_VAR0;
>>> +         struct varying_loc *new_loc = 
>>> &remap[location][var->data.location_frac];
>>> +         if (new_loc->location) {
>>> +            var->data.location = new_loc->location;
>>> +            var->data.location_frac = new_loc->component;
>>> +         }
>>> +      }
>>> +   }
>>> +}
>>> +
>>> +/* If there are empty components in the slot compact the remaining 
>>> components
>>> + * as close to component 0 as possible. This will make it easier to 
>>> fill the
>>> + * empty components with components from a different slot in a 
>>> following pass.
>>> + */
>>> +static void
>>> +compact_components(nir_shader *producer, nir_shader *consumer, 
>>> uint8_t *comps,
>>> +                   uint8_t *interp_type, bool default_to_smooth_interp)
>>> +{
>>> +   struct exec_list *input_list = &consumer->inputs;
>>> +   struct exec_list *output_list = &producer->outputs;
>>> +   struct varying_loc remap[32][4] = {{{0}, {0}}};
>>> +
>>> +   /* Create a cursor for each interpolation type */
>>> +   unsigned cursor[4] = {0};
>>> +
>>> +   /* We only need to pass over one stage and we choose the consumer 
>>> as it seems
>>> +    * to cause a larger reduction in instruction counts (tested on 
>>> i965).
>>> +    */
>>> +   nir_foreach_variable(var, input_list) {
>>> +
>>> +      /* Only remap things that aren't builtins.
>>> +       * TODO: add TES patch support.
>>> +       */
>>> +      if (var->data.location >= VARYING_SLOT_VAR0 &&
>>> +          var->data.location - VARYING_SLOT_VAR0 < 32) {
>>> +
>>> +         const struct glsl_type *type = var->type;
>>> +         if (nir_is_per_vertex_io(var, consumer->info.stage)) {
>>> +            assert(glsl_type_is_array(type));
>>> +            type = glsl_get_array_element(type);
>>> +         }
>>> +
>>> +         /* Skip types that require more complex packing handling.
>>> +          * TODO: add support for these types.
>>> +          */
>>> +         if (glsl_type_is_array(type) ||
>>> +             glsl_type_is_dual_slot(type) ||
>>> +             glsl_type_is_matrix(type) ||
>>> +             glsl_type_is_struct(type) ||
>>> +             glsl_type_is_64bit(type))
>>> +            continue;
>>> +
>>> +         /* We ignore complex types above and all other vector types 
>>> should
>>> +          * have been split into scalar variables by the 
>>> lower_io_to_scalar
>>> +          * pass. The only exeption should by OpenGL xfb varyings.
>>> +          */
>>> +         if (glsl_get_vector_elements(type) != 1)
>>> +            continue;
>>> +
>>> +         unsigned location = var->data.location - VARYING_SLOT_VAR0;
>>> +         uint8_t used_comps = comps[location];
>>> +
>>> +         /* If there are no empty components there is nothing more 
>>> for us to do.
>>> +          */
>>> +         if (used_comps == 0xf)
>>> +            continue;
>>> +
>>> +         bool found_new_offset = false;
>>> +         uint8_t interp = get_interp_type(var, 
>>> default_to_smooth_interp);
>>> +         for (; cursor[interp] < 32; cursor[interp]++) {
>>> +
>>> +            /* We couldn't find anywhere to pack the varying 
>>> continue on. */
>>> +            if (cursor[interp] == location)
>>> +               break;
>>> +
>>> +            /* We can only pack varyings with matching interpolation 
>>> types */
>>> +            if (interp_type[cursor[interp]] != interp)
>>> +               continue;
>>> +
>>> +            uint8_t cursor_used_comps = comps[cursor[interp]];
>>> +
>>> +            /* If the slot is empty just skip it for now, 
>>> compact_var_list()
>>> +             * can be called after this function to remove empty 
>>> slots for us.
>>> +             * TODO: finish implementing compact_var_list() requires 
>>> array and
>>> +             * matrix splitting.
>>> +             */
>>> +            if (!cursor_used_comps)
>>> +               continue;
>>> +
>>> +            uint8_t unused_comps = ~cursor_used_comps;
>>> +
>>> +            for (unsigned i = 0; i < 4; i++) {
>>> +               uint8_t new_var_comps = 1 << i;
>>> +               if (unused_comps & new_var_comps) {
>>> +                  remap[location][var->data.location_frac].component 
>>> = i;
>>> +                  remap[location][var->data.location_frac].location =
>>> +                     cursor[interp] + VARYING_SLOT_VAR0;
>>> +
>>> +                  found_new_offset = true;
>>> +
>>> +                  /* Turn off the mask for the component we are 
>>> remapping */
>>> +                  if (comps[location] & 1 << var->data.location_frac) {
>>> +                     comps[location] ^= 1 << var->data.location_frac;
>>> +                     comps[cursor[interp]] |= new_var_comps;
>>> +                  }
>>> +                  break;
>>> +               }
>>> +            }
>>> +
>>> +            if (found_new_offset)
>>> +               break;
>>> +         }
>>> +      }
>>> +   }
>>> +
>>> +   remap_slots_and_components(input_list, remap);
>>> +   remap_slots_and_components(output_list, remap);
>>> +}
>>> +
>>> +/* We assume that this has been called more-or-less directly after
>>> + * remove_unused_varyings.  At this point, all of the varyings that we
>>> + * aren't going to be using have been completely removed and the
>>> + * inputs_read and outputs_written fields in nir_shader_info reflect
>>> + * this.  Therefore, the total set of valid slots is the OR of the two
>>> + * sets of varyings;  this accounts for varyings which one side may 
>>> need
>>> + * to read/write even if the other doesn't.  This can happen if, for
>>> + * instance, an array is used indirectly from one side causing it to be
>>> + * unsplittable but directly from the other.
>>> + */
>>> +void
>>> +nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
>>> +                     bool default_to_smooth_interp)
>>> +{
>>> +   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
>>> +   assert(consumer->info.stage != MESA_SHADER_VERTEX);
>>> +
>>> +   uint8_t comps[32] = {0};
>>> +   uint8_t interp_type[32] = {0};
>>> +
>>> +   get_slot_component_masks_and_interp_types(&producer->outputs, comps,
>>> +                                             interp_type,
>>> +                                             producer->info.stage,
>>> +                                             default_to_smooth_interp);
>>> +   get_slot_component_masks_and_interp_types(&consumer->inputs, comps,
>>> +                                             interp_type,
>>> +                                             consumer->info.stage,
>>> +                                             default_to_smooth_interp);
>>> +
>>> +   compact_components(producer, consumer, comps, interp_type,
>>> +                      default_to_smooth_interp);
>>> +}
>>> -- 
>>> 2.13.6
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 


More information about the mesa-dev mailing list