[Mesa-dev] [RFCv3 03/11] nir: add helper to calculate type size

Mon Feb 1 15:35:35 UTC 2016

A few things:

- How much work would it take to actually deduplicate this with the
other vec4 type_size implementations out there? I'm pretty sure no one
would object to it, and if we just leave this as a TODO it probably
won't actually get done.
- Having to include nir_types.h in something that has nothing to do
with NIR doesn't seem so great. It would probably be cleaner to
introduce this as a method in ir_types.h and then wrap it like we do
with everything else. This one isn't as important, though.

On Sun, Jan 31, 2016 at 3:16 PM, Rob Clark <robdclark at gmail.com> wrote:
> From: Rob Clark <robclark at freedesktop.org>
>
> Currently the vec4 version of this is duplicated in brw and mesa/st
> (although the mesa/st version handles 64b types).  There is also a
> scalar version floating around in brw.  We should consolidate all of
> these.
>
> Signed-off-by: Rob Clark <robclark at freedesktop.org>
> ---
>  src/compiler/nir_types.cpp                 |  75 ++++++++++++++++++++
>  src/compiler/nir_types.h                   |   3 +
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 107 +++++------------------------
>  3 files changed, 97 insertions(+), 88 deletions(-)
>
> diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
> index a87dcd8..3ff828a 100644
> --- a/src/compiler/nir_types.cpp
> +++ b/src/compiler/nir_types.cpp
> @@ -190,3 +190,78 @@ glsl_array_type(const glsl_type *base, unsigned elements)
>  {
>     return glsl_type::get_array_instance(base, elements);
>  }
> +
> +/* TODO dup'd from brw_vec4_vistor.cpp..  what should we do?
> + * TODO probably should copy the scalar version to, and de-dup
> + * equiv fxns from mesa/st and brw..
> + */
> +
> +int
> +glsl_attrib_type_size_vec4(const struct glsl_type *type, bool is_vs_input)
> +{
> +   unsigned int i;
> +   int size;
> +
> +   switch (type->base_type) {
> +   case GLSL_TYPE_UINT:
> +   case GLSL_TYPE_INT:
> +   case GLSL_TYPE_FLOAT:
> +   case GLSL_TYPE_BOOL:
> +      if (type->is_matrix()) {
> +         return type->matrix_columns;
> +      } else {
> +         /* Regardless of size of vector, it gets a vec4. This is bad
> +          * packing for things like floats, but otherwise arrays become a
> +          * mess.  Hopefully a later pass over the code can pack scalars
> +          * down if appropriate.
> +          */
> +         return 1;
> +      }
> +      break;
> +   case GLSL_TYPE_DOUBLE:
> +      if (type->is_matrix()) {
> +         if (type->vector_elements <= 2 || is_vs_input)
> +            return type->matrix_columns;
> +         else
> +            return type->matrix_columns * 2;
> +      } else {
> +         /* For doubles if we have a double or dvec2 they fit in one
> +          * vec4, else they need 2 vec4s.
> +          */
> +         if (type->vector_elements <= 2 || is_vs_input)
> +            return 1;
> +         else
> +            return 2;
> +      }
> +      break;
> +   case GLSL_TYPE_ARRAY:
> +      assert(type->length > 0);
> +      return glsl_attrib_type_size_vec4(type->fields.array, is_vs_input) * type->length;
> +   case GLSL_TYPE_STRUCT:
> +      size = 0;
> +      for (i = 0; i < type->length; i++) {
> +         size += glsl_attrib_type_size_vec4(type->fields.structure[i].type, is_vs_input);
> +      }
> +      return size;
> +   case GLSL_TYPE_SAMPLER:
> +   case GLSL_TYPE_IMAGE:
> +   case GLSL_TYPE_SUBROUTINE:
> +      /* Samplers take up one slot in UNIFORMS[], but they're baked in
> +       * at link time.
> +       */
> +      return 1;
> +   case GLSL_TYPE_ATOMIC_UINT:
> +   case GLSL_TYPE_INTERFACE:
> +   case GLSL_TYPE_VOID:
> +   case GLSL_TYPE_ERROR:
> +      assert(!"Invalid type in type_size");
> +      break;
> +   }
> +   return 0;
> +}
> +
> +int
> +glsl_type_size_vec4(const struct glsl_type *type)
> +{
> +   return glsl_attrib_type_size_vec4(type, false);
> +}
> diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
> index 32fc766..fbf458d 100644
> --- a/src/compiler/nir_types.h
> +++ b/src/compiler/nir_types.h
> @@ -82,6 +82,9 @@ const struct glsl_type *glsl_uint_type(void);
>  const struct glsl_type *glsl_array_type(const struct glsl_type *base,
>                                          unsigned elements);
>
> +int glsl_attrib_type_size_vec4(const struct glsl_type *type, bool is_vs_input);
> +int glsl_type_size_vec4(const struct glsl_type *type);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index baf3504..d6459e5 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -34,6 +34,7 @@
>
>  #include "compiler/glsl/glsl_parser_extras.h"
>  #include "compiler/glsl/ir_optimization.h"
> +#include "compiler/nir_types.h"
>
>  #include "main/errors.h"
>  #include "main/shaderobj.h"
> @@ -1142,76 +1143,6 @@ glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
>        return st_src_reg_for_float(val);
>  }
>
> -static int
> -attrib_type_size(const struct glsl_type *type, bool is_vs_input)
> -{
> -   unsigned int i;
> -   int size;
> -
> -   switch (type->base_type) {
> -   case GLSL_TYPE_UINT:
> -   case GLSL_TYPE_INT:
> -   case GLSL_TYPE_FLOAT:
> -   case GLSL_TYPE_BOOL:
> -      if (type->is_matrix()) {
> -         return type->matrix_columns;
> -      } else {
> -         /* Regardless of size of vector, it gets a vec4. This is bad
> -          * packing for things like floats, but otherwise arrays become a
> -          * mess.  Hopefully a later pass over the code can pack scalars
> -          * down if appropriate.
> -          */
> -         return 1;
> -      }
> -      break;
> -   case GLSL_TYPE_DOUBLE:
> -      if (type->is_matrix()) {
> -         if (type->vector_elements <= 2 || is_vs_input)
> -            return type->matrix_columns;
> -         else
> -            return type->matrix_columns * 2;
> -      } else {
> -         /* For doubles if we have a double or dvec2 they fit in one
> -          * vec4, else they need 2 vec4s.
> -          */
> -         if (type->vector_elements <= 2 || is_vs_input)
> -            return 1;
> -         else
> -            return 2;
> -      }
> -      break;
> -   case GLSL_TYPE_ARRAY:
> -      assert(type->length > 0);
> -      return attrib_type_size(type->fields.array, is_vs_input) * type->length;
> -   case GLSL_TYPE_STRUCT:
> -      size = 0;
> -      for (i = 0; i < type->length; i++) {
> -         size += attrib_type_size(type->fields.structure[i].type, is_vs_input);
> -      }
> -      return size;
> -   case GLSL_TYPE_SAMPLER:
> -   case GLSL_TYPE_IMAGE:
> -   case GLSL_TYPE_SUBROUTINE:
> -      /* Samplers take up one slot in UNIFORMS[], but they're baked in
> -       * at link time.
> -       */
> -      return 1;
> -   case GLSL_TYPE_ATOMIC_UINT:
> -   case GLSL_TYPE_INTERFACE:
> -   case GLSL_TYPE_VOID:
> -   case GLSL_TYPE_ERROR:
> -      assert(!"Invalid type in type_size");
> -      break;
> -   }
> -   return 0;
> -}
> -
> -static int
> -type_size(const struct glsl_type *type)
> -{
> -  return attrib_type_size(type, false);
> -}
> -
>  /**
>   * If the given GLSL type is an array or matrix or a structure containing
>   * an array/matrix member, return true.  Else return false.
> @@ -1262,13 +1193,13 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
>
>        src.file = PROGRAM_ARRAY;
>        src.index = next_array << 16 | 0x8000;
> -      array_sizes[next_array] = type_size(type);
> +      array_sizes[next_array] = glsl_type_size_vec4(type);
>        ++next_array;
>
>     } else {
>        src.file = PROGRAM_TEMPORARY;
>        src.index = next_temp;
> -      next_temp += type_size(type);
> +      next_temp += glsl_type_size_vec4(type);
>     }
>
>     if (type->is_array() || type->is_record()) {
> @@ -1332,7 +1263,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
>            * of the type.  However, this had better match the number of state
>            * elements that we're going to copy into the new temporary.
>            */
> -         assert((int) ir->get_num_state_slots() == type_size(ir->type));
> +         assert((int) ir->get_num_state_slots() == glsl_type_size_vec4(ir->type));
>
>           dst = st_dst_reg(get_temp(ir->type));
>
> @@ -1371,7 +1302,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
>           fail_link(this->shader_program,
>                    "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
>                    ir->name, dst.index - storage->index,
> -                  type_size(ir->type));
> +                  glsl_type_size_vec4(ir->type));
>        }
>     }
>  }
> @@ -2370,10 +2301,10 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
>              decl->mesa_index = var->data.location;
>              decl->array_id = num_input_arrays + 1;
>              if (is_2d) {
> -               decl->array_size = type_size(var->type->fields.array);
> +               decl->array_size = glsl_type_size_vec4(var->type->fields.array);
>                 decl->array_type = var->type->fields.array->without_array()->base_type;
>              } else {
> -               decl->array_size = type_size(var->type);
> +               decl->array_size = glsl_type_size_vec4(var->type);
>                 decl->array_type = var->type->without_array()->base_type;
>              }
>              num_input_arrays++;
> @@ -2399,10 +2330,10 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
>              decl->mesa_index = var->data.location;
>              decl->array_id = num_output_arrays + 1;
>              if (is_2d) {
> -               decl->array_size = type_size(var->type->fields.array);
> +               decl->array_size = glsl_type_size_vec4(var->type->fields.array);
>                 decl->array_type = var->type->fields.array->without_array()->base_type;
>              } else {
> -               decl->array_size = type_size(var->type);
> +               decl->array_size = glsl_type_size_vec4(var->type);
>                 decl->array_type = var->type->without_array()->base_type;
>              }
>              num_output_arrays++;
> @@ -2506,7 +2437,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
>  {
>     ir_constant *index;
>     st_src_reg src;
> -   int element_size = type_size(ir->type);
> +   int element_size = glsl_type_size_vec4(ir->type);
>     bool is_2D = false;
>
>     index = ir->array_index->constant_expression_value();
> @@ -2537,7 +2468,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
>
>        if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
>           src.file == PROGRAM_INPUT)
> -        element_size = attrib_type_size(ir->type, true);
> +        element_size = glsl_attrib_type_size_vec4(ir->type, true);
>        if (is_2D) {
>           src.index2D = index->value.i[0];
>           src.has_index2 = true;
> @@ -2610,7 +2541,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
>     for (i = 0; i < struct_type->length; i++) {
>        if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
>           break;
> -      offset += type_size(struct_type->fields.structure[i].type);
> +      offset += glsl_type_size_vec4(struct_type->fields.structure[i].type);
>     }
>
>     /* If the type is smaller than a vec4, replicate the last channel out. */
> @@ -2911,7 +2842,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>     } else if (ir->rhs->as_expression() &&
>                this->instructions.get_tail() &&
>                ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
> -              type_size(ir->lhs->type) == 1 &&
> +              glsl_type_size_vec4(ir->lhs->type) == 1 &&
>                l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
>        /* To avoid emitting an extra MOV when assigning an expression to a
>         * variable, emit the last instruction of the expression again, but
> @@ -2950,7 +2881,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
>        st_dst_reg temp = st_dst_reg(temp_base);
>
>        foreach_in_list(ir_constant, field_value, &ir->components) {
> -         int size = type_size(field_value->type);
> +         int size = glsl_type_size_vec4(field_value->type);
>
>           assert(size > 0);
>
> @@ -2971,7 +2902,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
>     if (ir->type->is_array()) {
>        st_src_reg temp_base = get_temp(ir->type);
>        st_dst_reg temp = st_dst_reg(temp_base);
> -      int size = type_size(ir->type->fields.array);
> +      int size = glsl_type_size_vec4(ir->type->fields.array);
>
>        assert(size > 0);
>        in_array++;
> @@ -3394,7 +3325,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
>           l.writemask = WRITEMASK_XYZW;
>           l.cond_mask = COND_TR;
>
> -         for (i = 0; i < type_size(param->type); i++) {
> +         for (i = 0; i < glsl_type_size_vec4(param->type); i++) {
>              emit_asm(ir, TGSI_OPCODE_MOV, l, r);
>              l.index++;
>              r.index++;
> @@ -3427,7 +3358,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
>           param_rval->accept(this);
>           st_dst_reg l = st_dst_reg(this->result);
>
> -         for (i = 0; i < type_size(param->type); i++) {
> +         for (i = 0; i < glsl_type_size_vec4(param->type); i++) {
>              emit_asm(ir, TGSI_OPCODE_MOV, l, r);
>              l.index++;
>              r.index++;
> @@ -3562,7 +3493,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
>              const glsl_type *elt_type = ir->offset->type->fields.array;
>              for (i = 0; i < ir->offset->type->length; i++) {
>                 offset[i] = this->result;
> -               offset[i].index += i * type_size(elt_type);
> +               offset[i].index += i * glsl_type_size_vec4(elt_type);
>                 offset[i].type = elt_type->base_type;
>                 offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
>              }
> @@ -3778,7 +3709,7 @@ glsl_to_tgsi_visitor::visit(ir_return *ir)
>
>        l = st_dst_reg(current_function->return_reg);
>
> -      for (i = 0; i < type_size(current_function->sig->return_type); i++) {
> +      for (i = 0; i < glsl_type_size_vec4(current_function->sig->return_type); i++) {
>           emit_asm(ir, TGSI_OPCODE_MOV, l, r);
>           l.index++;
>           r.index++;
> --
> 2.5.0
>