[Mesa-dev] [PATCH] i965: Separate base offset/constant offset combining from remapping.

Wed Dec 9 02:51:24 PST 2015

My tessellation branch has two additional remap functions.  I don't want
to replicate this logic there.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_nir.c | 78 ++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 28 deletions(-)

Hey Jason,

If you like this patch, and haven't yet merged your NIR input reworks,
feel free to just squash it into your changes.  Or, we can land it
separately after your changes.  It's up to you.

Separating this out allows me to reuse this in my new tessellation input
and output remapping functions, and also means we don't need to add structs
for the remap functions...we can just pass the builder, or inputs_read, or
the VUE map...and not have to pack multiple things together.

 --Ken

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 14ad172..105a175 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -27,15 +27,19 @@
 #include "glsl/nir/nir_builder.h"
 #include "program/prog_to_nir.h"
 
-struct remap_vs_attrs_state {
-   nir_builder b;
-   uint64_t inputs_read;
-};
-
+/**
+ * In many cases, we just add the base and offset together, so there's no
+ * reason to keep them separate.  Sometimes, combining them is essential:
+ * if a shader only accesses part of a compound variable (such as a matrix
+ * or array), the variable's base may not actually exist in the VUE map.
+ *
+ * This pass adds constant offsets to instr->const_index[0], and resets
+ * the offset source to 0.  Non-constant offsets remain unchanged.
+ */
 static bool
-remap_vs_attrs(nir_block *block, void *void_state)
+add_const_offset_to_base(nir_block *block, void *closure)
 {
-   struct remap_vs_attrs_state *state = void_state;
+   nir_builder *b = closure;
 
    nir_foreach_instr_safe(block, instr) {
       if (instr->type != nir_instr_type_intrinsic)
@@ -43,30 +47,48 @@ remap_vs_attrs(nir_block *block, void *void_state)
 
       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
+      if (intrin->intrinsic == nir_intrinsic_load_input ||
+          intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
+          intrin->intrinsic == nir_intrinsic_load_output ||
+          intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
+          intrin->intrinsic == nir_intrinsic_store_output ||
+          intrin->intrinsic == nir_intrinsic_store_per_vertex_output) {
+         nir_src *offset = nir_get_io_offset_src(intrin);
+         nir_const_value *const_offset = nir_src_as_const_value(*offset);
+
+         if (const_offset) {
+            intrin->const_index[0] += const_offset->u[0];
+            b->cursor = nir_before_instr(&intrin->instr);
+            nir_instr_rewrite_src(&intrin->instr, offset,
+                                  nir_src_for_ssa(nir_imm_int(b, 0)));
+         }
+      }
+   }
+   return true;
+
+}
+
+static bool
+remap_vs_attrs(nir_block *block, void *closure)
+{
+   GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
       if (intrin->intrinsic == nir_intrinsic_load_input) {
          /* Attributes come in a contiguous block, ordered by their
           * gl_vert_attrib value.  That means we can compute the slot
           * number for an attribute by masking out the enabled attributes
           * before it and counting the bits.
           */
-         nir_const_value *const_offset = nir_src_as_const_value(intrin->src[0]);
-
-         /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
-         assert(const_offset);
-
-         int attr = intrin->const_index[0] + const_offset->u[0];
-         int slot = _mesa_bitcount_64(state->inputs_read &
-                                      BITFIELD64_MASK(attr));
+         int attr = intrin->const_index[0];
+         int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
 
-         /* The NIR -> FS pass will just add the base and offset together, so
-          * there's no reason to keep them separate.  Just put it all in
-          * const_index[0] and set the offset src[0] to load_const(0).
-          */
          intrin->const_index[0] = 4 * slot;
-
-         state->b.cursor = nir_before_instr(&intrin->instr);
-         nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
-                               nir_src_for_ssa(nir_imm_int(&state->b, 0)));
       }
    }
    return true;
@@ -97,17 +119,17 @@ brw_nir_lower_inputs(nir_shader *nir,
           * key->inputs_read since the two are identical aside from Gen4-5
           * edge flag differences.
           */
-         struct remap_vs_attrs_state remap_state = {
-            .inputs_read = nir->info.inputs_read,
-         };
+         GLbitfield64 inputs_read = nir->info.inputs_read;
 
          /* This pass needs actual constants */
          nir_opt_constant_folding(nir);
 
          nir_foreach_overload(nir, overload) {
             if (overload->impl) {
-               nir_builder_init(&remap_state.b, overload->impl);
-               nir_foreach_block(overload->impl, remap_vs_attrs, &remap_state);
+               nir_builder b;
+               nir_builder_init(&b, overload->impl);
+               nir_foreach_block(overload->impl, add_const_offset_to_base, &b);
+               nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);
             }
          }
       }
-- 
2.6.3