[Mesa-dev] [RFC 2/7] nir: Add a pass for selectively lowering variables to scratch space

Mon Dec 5 19:59:53 UTC 2016

---
 src/compiler/Makefile.sources        |   1 +
 src/compiler/nir/nir.h               |   8 +-
 src/compiler/nir/nir_clone.c         |   1 +
 src/compiler/nir/nir_lower_scratch.c | 258 +++++++++++++++++++++++++++++++++++
 4 files changed, 267 insertions(+), 1 deletion(-)
 create mode 100644 src/compiler/nir/nir_lower_scratch.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 6a7dcd8..2a1594d 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -216,6 +216,7 @@ NIR_FILES = \
 	nir/nir_lower_phis_to_scalar.c \
 	nir/nir_lower_returns.c \
 	nir/nir_lower_samplers.c \
+	nir/nir_lower_scratch.c \
 	nir/nir_lower_system_values.c \
 	nir/nir_lower_tex.c \
 	nir/nir_lower_to_source_mods.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9e8ed2c..61be6aa 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1822,7 +1822,7 @@ typedef struct nir_shader {
     * the highest index a load_input_*, load_uniform_*, etc. intrinsic can
     * access plus one
     */
-   unsigned num_inputs, num_uniforms, num_outputs, num_shared;
+   unsigned num_inputs, num_uniforms, num_outputs, num_shared, num_scratch;
 
    /** The shader stage, such as MESA_SHADER_VERTEX. */
    gl_shader_stage stage;
@@ -2315,6 +2315,12 @@ void nir_lower_io_to_temporaries(nir_shader *shader,
                                  nir_function_impl *entrypoint,
                                  bool outputs, bool inputs);
 
+bool nir_lower_vars_to_scratch(nir_shader *shader,
+                               nir_variable_mode modes,
+                               int size_threshold,
+                               bool use_scalar_ops,
+                               int (*type_size)(const struct glsl_type *));
+
 void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
 
 void nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index be89426..01314ad 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -719,6 +719,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s)
    ns->num_uniforms = s->num_uniforms;
    ns->num_outputs = s->num_outputs;
    ns->num_shared = s->num_shared;
+   ns->num_scratch = s->num_scratch;
 
    free_clone_state(&state);
 
diff --git a/src/compiler/nir/nir_lower_scratch.c b/src/compiler/nir/nir_lower_scratch.c
new file mode 100644
index 0000000..a1d5590
--- /dev/null
+++ b/src/compiler/nir/nir_lower_scratch.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason at jlekstrand.net)
+ *
+ */
+
+/*
+ * This lowering pass converts references to variables with loads/stores to
+ * scratch space based on a few configurable parameters.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static bool
+deref_has_indirect(nir_deref_var *deref)
+{
+   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+      if (tail->deref_type != nir_deref_type_array)
+         continue;
+
+      nir_deref_array *arr = nir_deref_as_array(tail);
+      if (arr->deref_array_type == nir_deref_array_type_indirect)
+         return true;
+   }
+
+   return false;
+}
+
+static void
+lower_load_store(nir_builder *b,
+                 nir_intrinsic_instr *intrin,
+                 int (*type_size)(const struct glsl_type *),
+                 bool scalar)
+{
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   /* Just emit code and let constant-folding go to town */
+   nir_ssa_def *offset = nir_imm_int(b, 0);
+
+   nir_deref *tail = &intrin->variables[0]->deref;
+   while (tail->child != NULL) {
+      const struct glsl_type *parent_type = tail->type;
+      tail = tail->child;
+
+      if (tail->deref_type == nir_deref_type_array) {
+         nir_deref_array *deref_array = nir_deref_as_array(tail);
+         unsigned size = type_size(tail->type);
+
+         offset = nir_iadd(b, offset,
+                           nir_imm_int(b, size * deref_array->base_offset));
+
+         if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+            nir_ssa_def *mul =
+               nir_imul(b, nir_imm_int(b, size),
+                        nir_ssa_for_src(b, deref_array->indirect, 1));
+
+            offset = nir_iadd(b, offset, mul);
+         }
+      } else if (tail->deref_type == nir_deref_type_struct) {
+         nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+
+         unsigned field_offset = 0;
+         for (unsigned i = 0; i < deref_struct->index; i++) {
+            field_offset += type_size(glsl_get_struct_field(parent_type, i));
+         }
+         offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
+      }
+   }
+
+   nir_variable *var = intrin->variables[0]->var;
+   const unsigned var_size = type_size(var->type);
+
+   const unsigned bit_size = glsl_get_bit_size(tail->type);
+   unsigned num_ops = 1;
+   unsigned comps_per_op = glsl_get_vector_elements(tail->type);
+   unsigned comp_size;
+   if (scalar && comps_per_op > 1) {
+      num_ops = comps_per_op;
+      comps_per_op = 1;
+      comp_size = type_size(glsl_scalar_type(glsl_get_base_type(tail->type)));
+      assert(comp_size * num_ops == type_size(tail->type));
+   }
+   assert(num_ops == 1 || comps_per_op == 1);
+   assert(num_ops * comps_per_op == glsl_get_vector_elements(tail->type));
+
+   if (intrin->intrinsic == nir_intrinsic_load_var) {
+      nir_ssa_def *defs[4];
+      for (unsigned i = 0; i < num_ops; i++) {
+         nir_intrinsic_instr *load =
+            nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_scratch);
+
+         load->num_components = comps_per_op;
+
+         load->src[0] =
+            nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i * comp_size)));
+         nir_intrinsic_set_base(load, var->data.location);
+         nir_intrinsic_set_range(load, var_size);
+
+         nir_ssa_dest_init(&load->instr, &load->dest,
+                           comps_per_op, bit_size, NULL);
+         defs[i] = &load->dest.ssa;
+
+         nir_builder_instr_insert(b, &load->instr);
+      }
+
+      if (num_ops > 1)
+         defs[0] = nir_vec(b, defs, num_ops);
+
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(defs[0]));
+   } else {
+      assert(intrin->intrinsic == nir_intrinsic_store_var);
+
+      unsigned op_mask, store_mask;
+      if (num_ops > 1) {
+         store_mask = 1;
+         op_mask = nir_intrinsic_write_mask(intrin);
+      } else {
+         store_mask = nir_intrinsic_write_mask(intrin);
+         op_mask = 1;
+      }
+      for (unsigned i = 0; i < num_ops; i++) {
+         if (!(op_mask & (1 << i)))
+            continue;
+
+         nir_intrinsic_instr *store =
+            nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_scratch);
+
+         store->num_components = comps_per_op;
+
+         if (scalar) {
+            assert(intrin->src[0].is_ssa);
+            store->src[0] =
+               nir_src_for_ssa(nir_channel(b, intrin->src[0].ssa, i));
+         } else {
+            nir_src_copy(&store->src[0], &intrin->src[0], store);
+         }
+         store->src[1] =
+            nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i * comp_size)));
+         nir_intrinsic_set_base(store, var->data.location);
+         nir_intrinsic_set_range(store, var_size);
+         nir_intrinsic_set_write_mask(store, store_mask);
+
+         nir_builder_instr_insert(b, &store->instr);
+      }
+   }
+
+   nir_instr_remove(&intrin->instr);
+}
+
+bool
+nir_lower_vars_to_scratch(nir_shader *shader,
+                          nir_variable_mode modes,
+                          int size_threshold,
+                          bool use_scalar_ops,
+                          int (*type_size)(const struct glsl_type *))
+{
+   /* First, we walk the instructions and flag any variables we want to lower
+    * by removing them from their respective list and setting the mode to 0.
+    */
+   nir_foreach_function(function, shader) {
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            if (intrin->intrinsic != nir_intrinsic_load_var &&
+                intrin->intrinsic != nir_intrinsic_store_var)
+               continue;
+
+            nir_variable *var = intrin->variables[0]->var;
+
+            /* Only lower variables with one of the requested modes.  This
+             * also prevents the following code from executing more than once
+             * per variable since we set the mode to 0.
+             */
+            if (!(modes & var->data.mode))
+               continue;
+
+            if (!deref_has_indirect(intrin->variables[0]))
+               continue;
+
+            int var_size = type_size(var->type);
+            assert(var_size >= 0);
+            if (var_size < size_threshold)
+               continue;
+
+            /* Remove it from its list */
+            exec_node_remove(&var->node);
+            /* Invalid mode used to flag "moving to scratch" */
+            var->data.mode = 0;
+
+            var->data.location = shader->num_scratch;
+            shader->num_scratch += var_size;
+         }
+      }
+   }
+
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+
+      nir_builder build;
+      nir_builder_init(&build, function->impl);
+
+      bool impl_progress = false;
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            if (intrin->intrinsic != nir_intrinsic_load_var &&
+                intrin->intrinsic != nir_intrinsic_store_var)
+               continue;
+
+            nir_variable *var = intrin->variables[0]->var;
+            /* Variables flagged for lowering above have mode == 0 */
+            if (var->data.mode)
+               continue;
+
+            lower_load_store(&build, intrin, type_size, use_scalar_ops);
+            impl_progress = true;
+         }
+      }
+
+      if (impl_progress) {
+         progress = true;
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+
+   return progress;
+}
-- 
2.5.0.400.gff86faf