[Mesa-dev] [PATCH 10/18] i965: add support for packing arrays

Fri Jun 10 23:03:30 UTC 2016

Here we add a new param to the type_size functions in order to pass
in the size of a varying once packing is taken into account.
---
 src/compiler/nir/nir.h                         |  6 +++--
 src/compiler/nir/nir_lower_io.c                | 37 +++++++++++++++++---------
 src/mesa/drivers/dri/i965/brw_blorp.c          |  6 +++--
 src/mesa/drivers/dri/i965/brw_fs.cpp           | 26 ++++++++++++++----
 src/mesa/drivers/dri/i965/brw_nir.c            | 16 +++++------
 src/mesa/drivers/dri/i965/brw_nir.h            |  6 +++--
 src/mesa/drivers/dri/i965/brw_shader.h         |  6 +++--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 19 ++++++++++---
 src/mesa/state_tracker/st_glsl_to_nir.cpp      |  2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp     |  2 +-
 src/mesa/state_tracker/st_glsl_types.cpp       |  3 ++-
 src/mesa/state_tracker/st_glsl_types.h         |  2 +-
 12 files changed, 90 insertions(+), 41 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 514b455..5789f67 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2317,11 +2317,13 @@ void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
 
 void nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
                               unsigned base_offset,
-                              int (*type_size)(const struct glsl_type *));
+                              int (*type_size)(const struct glsl_type *,
+                                               unsigned num_packed_components));
 
 void nir_lower_io(nir_shader *shader,
                   nir_variable_mode modes,
-                  int (*type_size)(const struct glsl_type *));
+                  int (*type_size)(const struct glsl_type *,
+                                   unsigned num_packed_components));
 nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
 nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
 
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index 0a6e1a8..941aa2d 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -37,14 +37,16 @@
 struct lower_io_state {
    nir_builder builder;
    void *mem_ctx;
-   int (*type_size)(const struct glsl_type *type);
+   int (*type_size)(const struct glsl_type *type,
+                    unsigned num_packed_components);
    nir_variable_mode modes;
 };
 
 void
 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
                          unsigned base_offset,
-                         int (*type_size)(const struct glsl_type *))
+                         int (*type_size)(const struct glsl_type *,
+                                          unsigned num_packed_components))
 {
    unsigned location = 0;
 
@@ -74,13 +76,13 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
          if (locations[idx][var->data.index] == -1) {
             var->data.driver_location = location;
             locations[idx][var->data.index] = location;
-            location += type_size(var->type);
+            location += type_size(var->type, var->data.num_packed_components);
          } else {
             var->data.driver_location = locations[idx][var->data.index];
          }
       } else {
          var->data.driver_location = location;
-         location += type_size(var->type);
+         location += type_size(var->type, var->data.num_packed_components);
       }
    }
 
@@ -113,7 +115,9 @@ is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
 static nir_ssa_def *
 get_io_offset(nir_builder *b, nir_deref_var *deref,
               nir_ssa_def **vertex_index,
-              int (*type_size)(const struct glsl_type *))
+              int (*type_size)(const struct glsl_type *,
+                               unsigned num_packed_components),
+              unsigned num_packed_components)
 {
    nir_deref *tail = &deref->deref;
 
@@ -141,7 +145,7 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
 
       if (tail->deref_type == nir_deref_type_array) {
          nir_deref_array *deref_array = nir_deref_as_array(tail);
-         unsigned size = type_size(tail->type);
+         unsigned size = type_size(tail->type, num_packed_components);
 
          offset = nir_iadd(b, offset,
                            nir_imm_int(b, size * deref_array->base_offset));
@@ -158,7 +162,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
 
          unsigned field_offset = 0;
          for (unsigned i = 0; i < deref_struct->index; i++) {
-            field_offset += type_size(glsl_get_struct_field(parent_type, i));
+            field_offset +=
+               type_size(glsl_get_struct_field(parent_type, i), 0);
          }
          offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
       }
@@ -289,7 +294,9 @@ nir_lower_io_block(nir_block *block,
 
          offset = get_io_offset(b, intrin->variables[0],
                                 per_vertex ? &vertex_index : NULL,
-                                state->type_size);
+                                state->type_size,
+                                intrin->variables[0]->var->
+                                   data.num_packed_components);
 
          nir_intrinsic_instr *load =
             nir_intrinsic_instr_create(state->mem_ctx,
@@ -305,7 +312,7 @@ nir_lower_io_block(nir_block *block,
 
          if (load->intrinsic == nir_intrinsic_load_uniform) {
             nir_intrinsic_set_range(load,
-               state->type_size(intrin->variables[0]->var->type));
+               state->type_size(intrin->variables[0]->var->type, 0));
          }
 
          if (per_vertex)
@@ -339,7 +346,9 @@ nir_lower_io_block(nir_block *block,
 
          offset = get_io_offset(b, intrin->variables[0],
                                 per_vertex ? &vertex_index : NULL,
-                                state->type_size);
+                                state->type_size,
+                                intrin->variables[0]->var->
+                                   data.num_packed_components);
 
          nir_intrinsic_instr *store =
             nir_intrinsic_instr_create(state->mem_ctx,
@@ -381,7 +390,7 @@ nir_lower_io_block(nir_block *block,
          nir_ssa_def *offset;
 
          offset = get_io_offset(b, intrin->variables[0],
-                                NULL, state->type_size);
+                                NULL, state->type_size, 0);
 
          nir_intrinsic_instr *atomic =
             nir_intrinsic_instr_create(state->mem_ctx,
@@ -424,7 +433,8 @@ nir_lower_io_block(nir_block *block,
 static void
 nir_lower_io_impl(nir_function_impl *impl,
                   nir_variable_mode modes,
-                  int (*type_size)(const struct glsl_type *))
+                  int (*type_size)(const struct glsl_type *,
+                                   unsigned num_packed_components))
 {
    struct lower_io_state state;
 
@@ -443,7 +453,8 @@ nir_lower_io_impl(nir_function_impl *impl,
 
 void
 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
-             int (*type_size)(const struct glsl_type *))
+             int (*type_size)(const struct glsl_type *,
+                              unsigned num_packed_components))
 {
    nir_foreach_function(function, shader) {
       if (function->impl)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
index 9590968..97ddfa9 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -157,8 +157,10 @@ brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key)
 }
 
 static int
-nir_uniform_type_size(const struct glsl_type *type)
+nir_uniform_type_size(const struct glsl_type *type, unsigned x)
 {
+   (void) x;
+
    /* Only very basic types are allowed */
    assert(glsl_type_is_vector_or_scalar(type));
    assert(glsl_get_bit_size(type) == 32);
@@ -216,7 +218,7 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
    nir->num_uniforms = 0;
    nir_foreach_variable(var, &nir->uniforms) {
       var->data.driver_location = var->data.location;
-      unsigned end = var->data.location + nir_uniform_type_size(var->type);
+      unsigned end = var->data.location + nir_uniform_type_size(var->type, 0);
       nir->num_uniforms = MAX2(nir->num_uniforms, end);
    }
    nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8997e1a..22a48bc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -464,7 +464,8 @@ fs_reg::component_size(unsigned width) const
 }
 
 extern "C" int
-type_size_scalar(const struct glsl_type *type)
+type_size_scalar_packed(const struct glsl_type *type,
+                        unsigned num_packed_components)
 {
    unsigned int size, i;
 
@@ -473,11 +474,18 @@ type_size_scalar(const struct glsl_type *type)
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
    case GLSL_TYPE_BOOL:
-      return type->components();
+      if (num_packed_components)
+         return num_packed_components;
+      else
+         return type->components();
    case GLSL_TYPE_DOUBLE:
+      if (num_packed_components)
+         return num_packed_components * 2;
+      else
       return type->components() * 2;
    case GLSL_TYPE_ARRAY:
-      return type_size_scalar(type->fields.array) * type->length;
+      return type_size_scalar_packed(type->fields.array,
+                                     num_packed_components) * type->length;
    case GLSL_TYPE_STRUCT:
       size = 0;
       for (i = 0; i < type->length; i++) {
@@ -505,6 +513,12 @@ type_size_scalar(const struct glsl_type *type)
    return 0;
 }
 
+extern int
+type_size_scalar(const struct glsl_type *type)
+{
+   return type_size_scalar_packed(type, 0);
+}
+
 /**
  * Returns the number of scalar components needed to store type, assuming
  * that vectors are padded out to vec4.
@@ -513,8 +527,9 @@ type_size_scalar(const struct glsl_type *type)
  * similar to type_size_scalar().
  */
 extern "C" int
-type_size_vec4_times_4(const struct glsl_type *type)
+type_size_vec4_times_4(const struct glsl_type *type, unsigned x)
 {
+   (void) x;
    return 4 * type_size_vec4(type);
 }
 
@@ -522,8 +537,9 @@ type_size_vec4_times_4(const struct glsl_type *type)
  * except for double-precision types, which are loaded as one dvec4.
  */
 extern "C" int
-type_size_vs_input(const struct glsl_type *type)
+type_size_vs_input(const struct glsl_type *type, unsigned x)
 {
+   (void) x;
    if (type->is_double()) {
       return type_size_dvec4(type);
    } else {
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 5aac0d7..ef81722 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -236,7 +236,7 @@ brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
    }
 
    /* Inputs are stored in vec4 slots, so use type_size_vec4(). */
-   nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+   nir_lower_io(nir, nir_var_shader_in, type_size_vec4_packed);
 
    if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) {
       /* This pass needs actual constants */
@@ -261,7 +261,7 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map)
       var->data.driver_location = var->data.location;
    }
 
-   nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+   nir_lower_io(nir, nir_var_shader_in, type_size_vec4_packed);
 
    /* This pass needs actual constants */
    nir_opt_constant_folding(nir);
@@ -283,8 +283,8 @@ void
 brw_nir_lower_fs_inputs(nir_shader *nir)
 {
    nir_assign_var_locations(&nir->inputs, &nir->num_inputs, VARYING_SLOT_VAR0,
-                            type_size_scalar);
-   nir_lower_io(nir, nir_var_shader_in, type_size_scalar);
+                            type_size_scalar_packed);
+   nir_lower_io(nir, nir_var_shader_in, type_size_scalar_packed);
 }
 
 void
@@ -299,7 +299,7 @@ brw_nir_lower_vue_outputs(nir_shader *nir,
    } else {
       nir_foreach_variable(var, &nir->outputs)
          var->data.driver_location = var->data.location;
-      nir_lower_io(nir, nir_var_shader_out, type_size_vec4);
+      nir_lower_io(nir, nir_var_shader_out, type_size_vec4_packed);
    }
 }
 
@@ -310,7 +310,7 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map)
       var->data.driver_location = var->data.location;
    }
 
-   nir_lower_io(nir, nir_var_shader_out, type_size_vec4);
+   nir_lower_io(nir, nir_var_shader_out, type_size_vec4_packed);
 
    /* This pass needs actual constants */
    nir_opt_constant_folding(nir);
@@ -332,8 +332,8 @@ void
 brw_nir_lower_fs_outputs(nir_shader *nir)
 {
    nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
-                            FRAG_RESULT_DATA0, type_size_scalar);
-   nir_lower_io(nir, nir_var_shader_out, type_size_scalar);
+                            FRAG_RESULT_DATA0, type_size_scalar_packed);
+   nir_lower_io(nir, nir_var_shader_out, type_size_scalar_packed);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index 74c354f..780a9da 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -33,14 +33,16 @@ extern "C" {
 #endif
 
 static inline int
-type_size_scalar_bytes(const struct glsl_type *type)
+type_size_scalar_bytes(const struct glsl_type *type, unsigned x)
 {
+   (void) x;
    return type_size_scalar(type) * 4;
 }
 
 static inline int
-type_size_vec4_bytes(const struct glsl_type *type)
+type_size_vec4_bytes(const struct glsl_type *type, unsigned x)
 {
+   (void) x;
    return type_size_vec4(type) * 16;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 656dc89..2bb6838 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -294,8 +294,10 @@ struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint typ
 int type_size_scalar(const struct glsl_type *type);
 int type_size_vec4(const struct glsl_type *type);
 int type_size_dvec4(const struct glsl_type *type);
-int type_size_vec4_times_4(const struct glsl_type *type);
-int type_size_vs_input(const struct glsl_type *type);
+int type_size_vs_input(const struct glsl_type *type, unsigned x);
+int type_size_scalar_packed(const struct glsl_type *type, unsigned x);
+int type_size_vec4_packed(const struct glsl_type *type, unsigned x);
+int type_size_vec4_times_4(const struct glsl_type *type, unsigned x);
 
 unsigned tesslevel_outer_components(GLenum tes_primitive_mode);
 unsigned tesslevel_inner_components(GLenum tes_primitive_mode);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index b392919..536f0ca 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -566,9 +566,15 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
    emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
 }
 
-/*
- * Returns the minimum number of vec4 (as_vec4 == true) or dvec4 (as_vec4 ==
- * false) elements needed to pack a type.
+/**
+ * Returns the minimum number of vec4 elements needed to pack a type.
+ *
+ * For simple types, it will return 1 (a single vec4); for matrices, the
+ * number of columns; for array and struct, the sum of the vec4_size of
+ * each of its elements; and for sampler and atomic, zero.
+ *
+ * This method is useful to calculate how much register space is needed to
+ * store a particular type.
  */
 static int
 type_size_xvec4(const struct glsl_type *type, bool as_vec4)
@@ -667,6 +673,13 @@ type_size_dvec4(const struct glsl_type *type)
    return type_size_xvec4(type, false);
 }
 
+extern "C" int
+type_size_vec4_packed(const struct glsl_type *type, unsigned x)
+{
+   (void) x;
+   return type_size_vec4(type);
+}
+
 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 {
    init();
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 3008411..73b7e8a 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -188,7 +188,7 @@ st_nir_assign_uniform_locations(struct gl_program *prog,
 
       uniform->data.driver_location = loc;
 
-      max = MAX2(max, loc + st_glsl_type_size(uniform->type));
+      max = MAX2(max, loc + st_glsl_type_size(uniform->type, 0));
    }
    *size = max;
 }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b7eaa13..e6bf467 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1171,7 +1171,7 @@ attrib_type_size(const struct glsl_type *type, bool is_vs_input)
 static int
 type_size(const struct glsl_type *type)
 {
-   return st_glsl_type_size(type);
+   return st_glsl_type_size(type, 0);
 }
 
 /**
diff --git a/src/mesa/state_tracker/st_glsl_types.cpp b/src/mesa/state_tracker/st_glsl_types.cpp
index 857e143..14ec5d8 100644
--- a/src/mesa/state_tracker/st_glsl_types.cpp
+++ b/src/mesa/state_tracker/st_glsl_types.cpp
@@ -95,7 +95,8 @@ st_glsl_attrib_type_size(const struct glsl_type *type, bool is_vs_input)
 }
 
 int
-st_glsl_type_size(const struct glsl_type *type)
+st_glsl_type_size(const struct glsl_type *type, unsigned x)
 {
+   (void) x;
    return st_glsl_attrib_type_size(type, false);
 }
diff --git a/src/mesa/state_tracker/st_glsl_types.h b/src/mesa/state_tracker/st_glsl_types.h
index 3a39cee..86d155a 100644
--- a/src/mesa/state_tracker/st_glsl_types.h
+++ b/src/mesa/state_tracker/st_glsl_types.h
@@ -34,7 +34,7 @@ extern "C" {
 #endif
 
 int st_glsl_attrib_type_size(const struct glsl_type *type, bool is_vs_input);
-int st_glsl_type_size(const struct glsl_type *type);
+int st_glsl_type_size(const struct glsl_type *type, unsigned x);
 
 
 #ifdef __cplusplus
-- 
2.5.5