[Mesa-dev] [PATCH 09/18] i965: add support for packing arrays

Wed Jun 15 05:38:41 UTC 2016

Here we add a new helper function calc_type_size_offset() to help
calculate the size of a varying once packing is taken into account.
---
 src/compiler/nir/nir_lower_io.c | 55 +++++++++++++++++++++++++++++++++++------
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index c25790a..b966348 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -41,6 +41,36 @@ struct lower_io_state {
    nir_variable_mode modes;
 };
 
+/**
+ * Calculates the offset for a type by allowing for other components that are
+ * packed into the same location.
+ */
+static unsigned
+calc_type_size_offset(unsigned num_packed_components,
+                      const struct glsl_type *type,
+                      int (*type_size)(const struct glsl_type *))
+{
+   unsigned base_size;
+   const struct glsl_type *wa = glsl_without_array(type);
+   int comp_diff = num_packed_components - glsl_get_vector_elements(wa);
+
+   /* If there is no difference in component sizes or the type_size function
+    * being used treats everything as a vec4 return.
+    */
+   if (comp_diff <= 0 ||
+       type_size(glsl_float_type()) == type_size(glsl_double_type()))
+      return 0;
+
+   if (glsl_get_base_type(wa) == GLSL_TYPE_DOUBLE) {
+      base_size = type_size(glsl_dvec_type(comp_diff));
+   } else {
+      base_size = type_size(glsl_vec_type(comp_diff));
+   }
+
+   return glsl_type_is_array(type) ? base_size * glsl_get_aoa_size(type) :
+      base_size;
+}
+
 void
 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
                          unsigned base_offset,
@@ -74,13 +104,17 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
          if (locations[idx][var->data.index] == -1) {
             var->data.driver_location = location;
             locations[idx][var->data.index] = location;
-            location += type_size(var->type);
+            location += type_size(var->type) +
+               calc_type_size_offset(var->data.num_packed_components,
+                                     var->type, type_size);
          } else {
             var->data.driver_location = locations[idx][var->data.index];
          }
       } else {
          var->data.driver_location = location;
-         location += type_size(var->type);
+         location += type_size(var->type) +
+            calc_type_size_offset(var->data.num_packed_components, var->type,
+                                  type_size);
       }
    }
 
@@ -113,7 +147,8 @@ is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
 static nir_ssa_def *
 get_io_offset(nir_builder *b, nir_deref_var *deref,
               nir_ssa_def **vertex_index,
-              int (*type_size)(const struct glsl_type *))
+              int (*type_size)(const struct glsl_type *),
+              unsigned num_packed_components)
 {
    nir_deref *tail = &deref->deref;
 
@@ -141,7 +176,9 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
 
       if (tail->deref_type == nir_deref_type_array) {
          nir_deref_array *deref_array = nir_deref_as_array(tail);
-         unsigned size = type_size(tail->type);
+         unsigned size = type_size(tail->type) +
+            calc_type_size_offset(num_packed_components, tail->type,
+                                  type_size);
 
          offset = nir_iadd(b, offset,
                            nir_imm_int(b, size * deref_array->base_offset));
@@ -289,7 +326,9 @@ nir_lower_io_block(nir_block *block,
 
          offset = get_io_offset(b, intrin->variables[0],
                                 per_vertex ? &vertex_index : NULL,
-                                state->type_size);
+                                state->type_size,
+                                intrin->variables[0]->var->
+                                   data.num_packed_components);
 
          nir_intrinsic_instr *load =
             nir_intrinsic_instr_create(state->mem_ctx,
@@ -339,7 +378,9 @@ nir_lower_io_block(nir_block *block,
 
          offset = get_io_offset(b, intrin->variables[0],
                                 per_vertex ? &vertex_index : NULL,
-                                state->type_size);
+                                state->type_size,
+                                intrin->variables[0]->var->
+                                   data.num_packed_components);
 
          nir_intrinsic_instr *store =
             nir_intrinsic_instr_create(state->mem_ctx,
@@ -381,7 +422,7 @@ nir_lower_io_block(nir_block *block,
          nir_ssa_def *offset;
 
          offset = get_io_offset(b, intrin->variables[0],
-                                NULL, state->type_size);
+                                NULL, state->type_size, 0);
 
          nir_intrinsic_instr *atomic =
             nir_intrinsic_instr_create(state->mem_ctx,
-- 
2.5.5