<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Mon, Nov 14, 2016 at 5:41 PM, Kenneth Graunke <span dir="ltr"><<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Certain built-in arrays, such as gl_ClipDistance[], gl_CullDistance[],<br>
gl_TessLevelInner[], and gl_TessLevelOuter[] are specified as scalar<br>
arrays.  Normal scalar arrays are sparse - each array element usually<br>
occupies a whole vec4 slot.  However, most hardware assumes these<br>
built-in arrays are tightly packed.<br>
<br>
The new var->data.compact flag indicates that a scalar array should<br>
be tightly packed, so a float[4] array would take up a single vec4<br>
slot, and a float[8] array would take up two slots.<br>
<br>
They are still arrays, not vec4s, however.  nir_lower_io will generate<br>
intrinsics using ARB_enhanced_layouts style component qualifiers.<br>
<br>
Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
---<br>
 src/compiler/glsl/glsl_to_nir.<wbr>cpp            |  1 +<br>
 src/compiler/nir/nir.h                       |  7 +++++<br>
 src/compiler/nir/nir_gather_<wbr>info.c           |  9 ++++--<br>
 src/compiler/nir/nir_lower_<wbr>indirect_derefs.c |  8 +++--<br>
 src/compiler/nir/nir_lower_io.<wbr>c              | 44 ++++++++++++++++++++--------<br>
 src/compiler/nir/nir_print.c                 |  4 ++-<br>
 6 files changed, 55 insertions(+), 18 deletions(-)<br>
<br>
diff --git a/src/compiler/glsl/glsl_to_<wbr>nir.cpp b/src/compiler/glsl/glsl_to_<wbr>nir.cpp<br>
index 6ca760b..ed1c739 100644<br>
--- a/src/compiler/glsl/glsl_to_<wbr>nir.cpp<br>
+++ b/src/compiler/glsl/glsl_to_<wbr>nir.cpp<br>
@@ -331,6 +331,7 @@ nir_visitor::visit(ir_variable *ir)<br>
    var->data.explicit_index = ir->data.explicit_index;<br>
    var->data.explicit_binding = ir->data.explicit_binding;<br>
    var->data.has_initializer = ir->data.has_initializer;<br>
+   var->data.compact = false;<br>
    var->data.location_frac = ir->data.location_frac;<br>
<br>
    switch (ir->data.depth_layout) {<br>
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h<br>
index 3d46384..0b78242 100644<br>
--- a/src/compiler/nir/nir.h<br>
+++ b/src/compiler/nir/nir.h<br>
@@ -230,6 +230,13 @@ typedef struct nir_variable {<br>
       unsigned location_frac:2;<br>
<br>
       /**<br>
+       * If true, this variable represents an array of scalars that should<br>
+       * be tightly packed.  In other words, consecutive array elements<br>
+       * should be stored one component apart, rather than one slot apart.<br>
+       */<br>
+      unsigned compact:1;<br></blockquote><div><br></div><div>Should this 1-bit integer be a bool?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+      /**<br>
        * Whether this is a fragment shader output implicitly initialized with<br>
        * the previous contents of the specified render target at the<br>
        * framebuffer location corresponding to this shader invocation.<br>
diff --git a/src/compiler/nir/nir_gather_<wbr>info.c b/src/compiler/nir/nir_gather_<wbr>info.c<br>
index 63c8a42..4d07dda 100644<br>
--- a/src/compiler/nir/nir_gather_<wbr>info.c<br>
+++ b/src/compiler/nir/nir_gather_<wbr>info.c<br>
@@ -94,8 +94,11 @@ mark_whole_variable(nir_shader *shader, nir_variable *var)<br>
        var->data.mode == nir_var_shader_in)<br>
       is_vertex_input = true;<br>
<br>
-   set_io_mask(shader, var, 0,<br>
-               glsl_count_attribute_slots(<wbr>type, is_vertex_input));<br>
+   const unsigned slots =<br>
+      var->data.compact ? DIV_ROUND_UP(glsl_get_length(<wbr>type), 4)<br>
+                        : glsl_count_attribute_slots(<wbr>type, is_vertex_input);<br></blockquote><div><br></div><div>By using glsl_get_length(), you're assuming that all compact things are 1-D arrays with no structs.  Is that your intention?  If so, we should probably assert so that we catch it if we ever change this in the future.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+   set_io_mask(shader, var, 0, slots);<br>
 }<br>
<br>
 static unsigned<br>
@@ -150,7 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref)<br>
     * here marking the entire variable as used.<br>
     */<br>
    if (!(glsl_type_is_matrix(type) ||<br>
-         (glsl_type_is_array(type) &&<br>
+         (glsl_type_is_array(type) && !var->data.compact &&<br>
           (glsl_type_is_numeric(glsl_<wbr>without_array(type)) ||<br>
            glsl_type_is_boolean(glsl_<wbr>without_array(type)))))) {<br>
<br>
diff --git a/src/compiler/nir/nir_lower_<wbr>indirect_derefs.c b/src/compiler/nir/nir_lower_<wbr>indirect_derefs.c<br>
index 356373e..5c97dc8e 100644<br>
--- a/src/compiler/nir/nir_lower_<wbr>indirect_derefs.c<br>
+++ b/src/compiler/nir/nir_lower_<wbr>indirect_derefs.c<br>
@@ -175,8 +175,12 @@ lower_indirect_block(nir_block *block, nir_builder *b,<br>
       if (!deref_has_indirect(intrin-><wbr>variables[0]))<br>
          continue;<br>
<br>
-      /* Only lower variables whose mode is in the mask */<br>
-      if (!(modes & intrin->variables[0]->var-><wbr>data.mode))<br>
+      /* Only lower variables whose mode is in the mask, or compact<br>
+       * array variables.  (We can't handle indirects on tightly packed<br>
+       * scalar arrays, so we need to lower them regardless.) <br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+       */<br>
+      if (!(modes & intrin->variables[0]->var-><wbr>data.mode) &&<br>
+          !intrin->variables[0]->var-><wbr>data.compact)<br>
          continue;<br>
<br>
       b->cursor = nir_before_instr(&intrin-><wbr>instr);<br>
diff --git a/src/compiler/nir/nir_lower_<wbr>io.c b/src/compiler/nir/nir_lower_<wbr>io.c<br>
index a7e7f14..6628947 100644<br>
--- a/src/compiler/nir/nir_lower_<wbr>io.c<br>
+++ b/src/compiler/nir/nir_lower_<wbr>io.c<br>
@@ -88,7 +88,8 @@ nir_is_per_vertex_io(nir_<wbr>variable *var, gl_shader_stage stage)<br>
 static nir_ssa_def *<br>
 get_io_offset(nir_builder *b, nir_deref_var *deref,<br>
               nir_ssa_def **vertex_index,<br>
-              int (*type_size)(const struct glsl_type *))<br>
+              int (*type_size)(const struct glsl_type *),<br>
+              unsigned *component)<br>
 {<br>
    nir_deref *tail = &deref->deref;<br>
<br>
@@ -106,6 +107,19 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,<br>
       *vertex_index = vtx;<br>
    }<br>
<br>
+   if (deref->var->data.compact) {<br>
+      assert(tail->child->deref_type == nir_deref_type_array);<br>
+      assert(glsl_type_is_scalar(<wbr>glsl_without_array(deref->var-<wbr>>type)));<br>
+      nir_deref_array *deref_array = nir_deref_as_array(tail-><wbr>child);<br>
+      /* We always lower indirect dereferences for "compact" array vars. */<br>
+      assert(deref_array->deref_<wbr>array_type == nir_deref_array_type_direct);<br>
+<br>
+      const unsigned total_offset = *component + deref_array->base_offset;<br>
+      const unsigned slot_offset = total_offset / 4;<br>
+      *component = total_offset % 4;<br>
+      return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);<br>
+   }<br>
+<br>
    /* Just emit code and let constant-folding go to town */<br>
    nir_ssa_def *offset = nir_imm_int(b, 0);<br>
<br>
@@ -143,7 +157,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,<br>
<br>
 static nir_intrinsic_instr *<br>
 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,<br>
-           nir_ssa_def *vertex_index, nir_ssa_def *offset)<br>
+           nir_ssa_def *vertex_index, nir_ssa_def *offset,<br>
+           unsigned component)<br>
 {<br>
    const nir_shader *nir = state->builder.shader;<br>
    nir_variable *var = intrin->variables[0]->var;<br>
@@ -194,7 +209,7 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,<br>
<br>
    nir_intrinsic_set_base(load, var->data.driver_location);<br>
    if (mode == nir_var_shader_in || mode == nir_var_shader_out)<br>
-      nir_intrinsic_set_component(<wbr>load, var->data.location_frac);<br>
+      nir_intrinsic_set_component(<wbr>load, component);<br>
<br>
    if (load->intrinsic == nir_intrinsic_load_uniform)<br>
       nir_intrinsic_set_range(load, state->type_size(var->type));<br>
@@ -214,7 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,<br>
<br>
 static nir_intrinsic_instr *<br>
 lower_store(nir_intrinsic_<wbr>instr *intrin, struct lower_io_state *state,<br>
-            nir_ssa_def *vertex_index, nir_ssa_def *offset)<br>
+            nir_ssa_def *vertex_index, nir_ssa_def *offset,<br>
+            unsigned component)<br>
 {<br>
    nir_variable *var = intrin->variables[0]->var;<br>
    nir_variable_mode mode = var->data.mode;<br>
@@ -236,7 +252,7 @@ lower_store(nir_intrinsic_<wbr>instr *intrin, struct lower_io_state *state,<br>
    nir_intrinsic_set_base(store, var->data.driver_location);<br>
<br>
    if (mode == nir_var_shader_out)<br>
-      nir_intrinsic_set_component(<wbr>store, var->data.location_frac);<br>
+      nir_intrinsic_set_component(<wbr>store, component);<br>
<br>
    nir_intrinsic_set_write_mask(<wbr>store, nir_intrinsic_write_mask(<wbr>intrin));<br>
<br>
@@ -289,7 +305,7 @@ lower_atomic(nir_intrinsic_<wbr>instr *intrin, struct lower_io_state *state,<br>
<br>
 static nir_intrinsic_instr *<br>
 lower_interpolate_at(nir_<wbr>intrinsic_instr *intrin, struct lower_io_state *state,<br>
-                     nir_ssa_def *offset)<br>
+                     nir_ssa_def *offset, unsigned component)<br>
 {<br>
    nir_variable *var = intrin->variables[0]->var;<br>
<br>
@@ -297,7 +313,7 @@ lower_interpolate_at(nir_<wbr>intrinsic_instr *intrin, struct lower_io_state *state,<br>
<br>
    /* Ignore interpolateAt() for flat variables - flat is flat. */<br>
    if (var->data.interpolation == INTERP_MODE_FLAT)<br>
-      return lower_load(intrin, state, NULL, offset);<br>
+      return lower_load(intrin, state, NULL, offset, component);<br>
<br>
    nir_intrinsic_op bary_op;<br>
    switch (intrin->intrinsic) {<br>
@@ -333,7 +349,7 @@ lower_interpolate_at(nir_<wbr>intrinsic_instr *intrin, struct lower_io_state *state,<br>
    load->num_components = intrin->num_components;<br>
<br>
    nir_intrinsic_set_base(load, var->data.driver_location);<br>
-   nir_intrinsic_set_component(<wbr>load, var->data.location_frac);<br>
+   nir_intrinsic_set_component(<wbr>load, component);<br>
<br>
    load->src[0] = nir_src_for_ssa(&bary_setup-><wbr>dest.ssa);<br>
    load->src[1] = nir_src_for_ssa(offset);<br>
@@ -398,20 +414,23 @@ nir_lower_io_block(nir_block *block,<br>
<br>
       nir_ssa_def *offset;<br>
       nir_ssa_def *vertex_index = NULL;<br>
+      unsigned component_offset = var->data.location_frac;<br>
<br>
       offset = get_io_offset(b, intrin->variables[0],<br>
                              per_vertex ? &vertex_index : NULL,<br>
-                             state->type_size);<br>
+                             state->type_size, &component_offset);<br>
<br>
       nir_intrinsic_instr *replacement;<br>
<br>
       switch (intrin->intrinsic) {<br>
       case nir_intrinsic_load_var:<br>
-         replacement = lower_load(intrin, state, vertex_index, offset);<br>
+         replacement = lower_load(intrin, state, vertex_index, offset,<br>
+                                  component_offset);<br>
          break;<br>
<br>
       case nir_intrinsic_store_var:<br>
-         replacement = lower_store(intrin, state, vertex_index, offset);<br>
+         replacement = lower_store(intrin, state, vertex_index, offset,<br>
+                                   component_offset);<br>
          break;<br>
<br>
       case nir_intrinsic_var_atomic_add:<br>
@@ -432,7 +451,8 @@ nir_lower_io_block(nir_block *block,<br>
       case nir_intrinsic_interp_var_at_<wbr>sample:<br>
       case nir_intrinsic_interp_var_at_<wbr>offset:<br>
          assert(vertex_index == NULL);<br>
-         replacement = lower_interpolate_at(intrin, state, offset);<br>
+         replacement = lower_interpolate_at(intrin, state, offset,<br>
+                                            component_offset);<br>
          break;<br>
<br>
       default:<br>
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c<br>
index 242bffb..475e3f2 100644<br>
--- a/src/compiler/nir/nir_print.c<br>
+++ b/src/compiler/nir/nir_print.c<br>
@@ -432,9 +432,11 @@ print_var_decl(nir_variable *var, print_state *state)<br>
          loc = buf;<br>
       }<br>
<br>
-      fprintf(fp, " (%s, %u)", loc, var->data.driver_location);<br>
+      fprintf(fp, " (%s, %u)%s", loc, var->data.driver_location,<br>
+              var->data.compact ? " compact" : "");<br>
    }<br>
<br>
+<br></blockquote><div><br></div><div>Stray newline<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
    if (var->constant_initializer) {<br>
       fprintf(fp, " = { ");<br>
       print_constant(var->constant_<wbr>initializer, var->type, state);<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.10.2<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>