<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Dec 16, 2014 at 1:11 AM, Jason Ekstrand <span dir="ltr"><<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">---<br>
 src/glsl/Makefile.sources               |   1 +<br>
 src/glsl/nir/nir.h                      |   2 +<br>
 src/glsl/nir/nir_lower_locals_to_regs.c | 313 ++++++++++++++++++++++++++++++++<br>
 3 files changed, 316 insertions(+)<br>
 create mode 100644 src/glsl/nir/nir_lower_locals_to_regs.c<br>
<br>
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources<br>
index 1d3b049..6230f49 100644<br>
--- a/src/glsl/Makefile.sources<br>
+++ b/src/glsl/Makefile.sources<br>
@@ -22,6 +22,7 @@ NIR_FILES = \<br>
        $(GLSL_SRCDIR)/nir/nir_intrinsics.h \<br>
        $(GLSL_SRCDIR)/nir/nir_live_variables.c \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_atomics.c \<br>
+       $(GLSL_SRCDIR)/nir/nir_lower_locals_to_regs.c \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_samplers.cpp \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_system_values.c \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_variables.c \<br>
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h<br>
index b3abfb9..7d7aec7 100644<br>
--- a/src/glsl/nir/nir.h<br>
+++ b/src/glsl/nir/nir.h<br>
@@ -1358,6 +1358,8 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp);<br>
<br>
 void nir_split_var_copies(nir_shader *shader);<br>
<br>
+void nir_lower_locals_to_regs(nir_shader *shader);<br>
+<br>
 void nir_lower_variables(nir_shader *shader);<br>
<br>
 void nir_lower_variables_scalar(nir_shader *shader, bool lower_globals,<br>
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c<br>
new file mode 100644<br>
index 0000000..caf1c29<br>
--- /dev/null<br>
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c<br>
@@ -0,0 +1,313 @@<br>
+/*<br>
+ * Copyright © 2014 Intel Corporation<br>
+ *<br>
+ * Permission is hereby granted, free of charge, to any person obtaining a<br>
+ * copy of this software and associated documentation files (the "Software"),<br>
+ * to deal in the Software without restriction, including without limitation<br>
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+ * and/or sell copies of the Software, and to permit persons to whom the<br>
+ * Software is furnished to do so, subject to the following conditions:<br>
+ *<br>
+ * The above copyright notice and this permission notice (including the next<br>
+ * paragraph) shall be included in all copies or substantial portions of the<br>
+ * Software.<br>
+ *<br>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL<br>
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br>
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING<br>
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS<br>
+ * IN THE SOFTWARE.<br>
+ *<br>
+ * Authors:<br>
+ *    Jason Ekstrand (<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>)<br>
+ *<br>
+ */<br>
+<br>
+#include "nir.h"<br>
+<br>
+struct locals_to_regs_state {<br>
+   void *mem_ctx;<br>
+   nir_function_impl *impl;<br>
+<br>
+   /* A hash table mapping derefs to registers */<br>
+   struct hash_table *regs_table;<br>
+};<br>
+<br>
+/* The following two functions implement a hash and equality check for<br>
+ * variable dreferences.  When the hash or equality function encounters an<br>
+ * array, it ignores the offset and whether it is direct or indirect<br>
+ * entirely.<br>
+ */<br>
+static uint32_t<br>
+hash_deref(const void *void_deref)<br>
+{<br>
+   const nir_deref *deref = void_deref;<br>
+<br>
+   uint32_t hash;<br>
+   if (deref->child) {<br>
+      hash = hash_deref(deref->child);<br>
+   } else {<br>
+      hash = 2166136261ul;<br>
+   }<br>
+<br>
+   switch (deref->deref_type) {<br>
+   case nir_deref_type_var:<br>
+      hash ^= _mesa_hash_pointer(nir_deref_as_var(deref)->var);<br>
+      break;<br>
+   case nir_deref_type_array: {<br>
+      hash ^= 268435183;<br>
+      break;<br>
+   }<br>
+   case nir_deref_type_struct:<br>
+      hash ^= nir_deref_as_struct(deref)->index;<br>
+      break;<br>
+   }<br>
+<br>
+   return hash * 0x01000193;<br>
+}<br></blockquote><div><br></div><div>Same comment here about using FNV-1a instead.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+static bool<br>
+derefs_equal(const void *void_a, const void *void_b)<br>
+{<br>
+   const nir_deref *a = void_a;<br>
+   const nir_deref *b = void_b;<br>
+<br>
+   if (a->deref_type != b->deref_type)<br>
+      return false;<br>
+<br>
+   switch (a->deref_type) {<br>
+   case nir_deref_type_var:<br>
+      if (nir_deref_as_var(a)->var != nir_deref_as_var(b)->var)<br>
+         return false;<br>
+      break;<br></blockquote><div><br></div><div>Again, we could split this out of the loop since it's only going to be used once.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+   case nir_deref_type_array:<br>
+      /* Do nothing.  All array derefs are the same */<br>
+      break;<br>
+   case nir_deref_type_struct:<br>
+      if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index)<br>
+         return false;<br>
+      break;<br>
+   default:<br>
+      unreachable("Invalid dreference type");<br>
+   }<br>
+<br>
+   assert((a->child == NULL) == (b->child == NULL));<br>
+   if (a->child)<br>
+      return derefs_equal(a->child, b->child);<br>
+   else<br>
+      return true;<br>
+}<br></blockquote><div><br></div><div>Same comment about using a for loop here.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+static nir_register *<br>
+get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state)<br>
+{<br>
+   uint32_t hash = hash_deref(deref);<br>
+<br>
+   struct hash_entry *entry = _mesa_hash_table_search(state->regs_table,<br>
+                                                      hash, deref);<br>
+   if (entry)<br>
+      return entry->data;<br>
+<br>
+   unsigned array_size = 1;<br>
+   nir_deref *tail = &deref->deref;<br>
+   while (tail->child) {<br>
+      if (tail->child->deref_type == nir_deref_type_array) {<br>
+         /* Multiply by the parent's type. */<br>
+         if (glsl_type_is_matrix(tail->type)) {<br>
+            array_size *= glsl_get_matrix_columns(tail->type);<br>
+         } else {<br>
+            assert(glsl_get_length(tail->type) > 0);<br>
+            array_size *= glsl_get_length(tail->type);<br>
+         }<br>
+      }<br>
+      tail = tail->child;<br>
+   }<br>
+<br>
+   assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type));<br>
+<br>
+   nir_register *reg = nir_local_reg_create(state->impl);<br>
+   reg->num_components = glsl_get_vector_elements(tail->type);<br>
+   reg->num_array_elems = array_size > 1 ? array_size : 0;<br>
+<br>
+   _mesa_hash_table_insert(state->regs_table, hash, deref, reg);<br>
+<br>
+   return reg;<br>
+}<br>
+<br>
+static nir_src<br>
+get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,<br>
+                  struct locals_to_regs_state *state)<br>
+{<br>
+   nir_src src;<br>
+<br>
+   src.is_ssa = false;<br>
+   src.reg.reg = get_reg_for_deref(deref, state);<br>
+   src.reg.base_offset = 0;<br>
+   src.reg.indirect = NULL;<br>
+<br>
+   nir_deref *tail = &deref->deref;<br>
+   while (tail->child != NULL) {<br>
+      const struct glsl_type *parent_type = tail->type;<br>
+      tail = tail->child;<br>
+<br>
+      if (tail->deref_type != nir_deref_type_array)<br>
+         continue;<br>
+<br>
+      nir_deref_array *deref_array = nir_deref_as_array(tail);<br>
+<br>
+      src.reg.base_offset *= glsl_get_length(parent_type);<br>
+      src.reg.base_offset += deref_array->base_offset;<br>
+<br>
+      if (src.reg.indirect) {<br></blockquote><div><br></div><div>It doesn't matter too much, but since it's so easy I'd put "&& glsl_get_length(parent_type) != 1" here to save some CPU cycles.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+         nir_load_const_instr *load_const =<br>
+            nir_load_const_instr_create(state->mem_ctx);<br>
+         load_const->num_components = 1;<br>
+         load_const->value.u[0] = glsl_get_length(parent_type);<br>
+         load_const->dest.is_ssa = true;<br>
+         nir_ssa_def_init(&load_const->instr, &load_const->dest.ssa, 1, NULL);<br>
+         nir_instr_insert_before(instr, &load_const->instr);<br>
+<br>
+         nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx, nir_op_imul);<br>
+         mul->src[0].src = *src.reg.indirect;<br>
+         mul->src[1].src.is_ssa = true;<br>
+         mul->src[1].src.ssa = &load_const->dest.ssa;<br>
+         mul->dest.write_mask = 1;<br>
+         mul->dest.dest.is_ssa = true;<br>
+         nir_ssa_def_init(&mul->instr, &mul->dest.dest.ssa, 1, NULL);<br>
+         nir_instr_insert_before(instr, &mul->instr);<br>
+<br>
+         src.reg.indirect->is_ssa = true;<br>
+         src.reg.indirect->ssa = &mul->dest.dest.ssa;<br>
+      }</blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {<br>
+         if (src.reg.indirect == NULL) {<br>
+            src.reg.indirect = ralloc(state->mem_ctx, nir_src);<br>
+            *src.reg.indirect = nir_src_copy(deref_array->indirect,<br>
+                                             state->mem_ctx);<br>
+         } else {<br>
+            nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx,<br>
+                                                      nir_op_iadd);<br>
+            add->src[0].src = *src.reg.indirect;<br>
+            add->src[1].src = nir_src_copy(deref_array->indirect,<br>
+                                           state->mem_ctx);<br>
+            add->dest.write_mask = 1;<br>
+            add->dest.dest.is_ssa = true;<br>
+            nir_ssa_def_init(&add->instr, &add->dest.dest.ssa, 1, NULL);<br>
+            nir_instr_insert_before(instr, &add->instr);<br>
+<br>
+            src.reg.indirect->is_ssa = true;<br>
+            src.reg.indirect->ssa = &add->dest.dest.ssa;<br>
+         }<br>
+      }<br>
+   }<br>
+<br>
+   return src;<br>
+}<br>
+<br>
+static bool<br>
+lower_locals_to_regs_block(nir_block *block, void *void_state)<br>
+{<br>
+   struct locals_to_regs_state *state = void_state;<br>
+<br>
+   nir_foreach_instr_safe(block, instr) {<br>
+      if (instr->type != nir_instr_type_intrinsic)<br>
+         continue;<br>
+<br>
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);<br>
+<br>
+      switch (intrin->intrinsic) {<br>
+      case nir_intrinsic_load_var_vec1:<br>
+      case nir_intrinsic_load_var_vec2:<br>
+      case nir_intrinsic_load_var_vec3:<br>
+      case nir_intrinsic_load_var_vec4: {<br>
+         if (intrin->variables[0]->var->data.mode != nir_var_local)<br>
+            continue;<br>
+<br>
+         nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);<br>
+         mov->src[0].src = get_deref_reg_src(intrin->variables[0],<br>
+                                             &intrin->instr, state);<br>
+         unsigned num_components = mov->src[0].src.reg.reg->num_components;<br>
+         mov->dest.write_mask = (1 << num_components) - 1;<br>
+         if (intrin->dest.is_ssa) {<br>
+            mov->dest.dest.is_ssa = true;<br>
+            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,<br>
+                             num_components, NULL);<br>
+<br>
+            nir_src new_src = {<br>
+               .is_ssa = true,<br>
+               .ssa = &mov->dest.dest.ssa,<br>
+            };<br>
+<br>
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_src,<br>
+                                     state->mem_ctx);<br>
+         } else {<br>
+            mov->dest.dest = nir_dest_copy(intrin->dest, state->mem_ctx);<br>
+         }<br>
+         nir_instr_insert_before(&intrin->instr, &mov->instr);<br>
+<br>
+         nir_instr_remove(&intrin->instr);<br>
+         break;<br>
+      }<br>
+<br>
+      case nir_intrinsic_store_var_vec1:<br>
+      case nir_intrinsic_store_var_vec2:<br>
+      case nir_intrinsic_store_var_vec3:<br>
+      case nir_intrinsic_store_var_vec4: {<br>
+         if (intrin->variables[0]->var->data.mode != nir_var_local)<br>
+            continue;<br>
+<br>
+         nir_src reg_src = get_deref_reg_src(intrin->variables[0],<br>
+                                             &intrin->instr, state);<br>
+         unsigned num_components = reg_src.reg.reg->num_components;<br>
+<br>
+         nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);<br>
+         mov->src[0].src = nir_src_copy(intrin->src[0], state->mem_ctx);<br>
+         mov->dest.write_mask = (1 << num_components) - 1;<br>
+         mov->dest.dest.is_ssa = false;<br>
+         mov->dest.dest.reg.reg = reg_src.reg.reg;<br>
+         mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;<br>
+         mov->dest.dest.reg.indirect = reg_src.reg.indirect;<br>
+<br>
+         nir_instr_insert_before(&intrin->instr, &mov->instr);<br>
+<br>
+         nir_instr_remove(&intrin->instr);<br>
+         break;<br>
+      }<br>
+<br>
+      case nir_intrinsic_copy_var:<br>
+         unreachable("There should be no copies whatsoever at this point");<br>
+         break;<br></blockquote><div><br></div><div>Are you sure about this? My impression is that lower_variables will lower copies involving things that aren't indirectly referenced, but if you have something like:</div><div><br></div><div>foo[i] = ...</div><div>bar[*] = foo[*];</div><div>... = bar[i];</div><div><br></div><div>then the copy in the middle won't get lowered, unless there's something else I'm missing that will lower it. If we always lowered these copies (ignoring that it hurts packing for vec4 backends), then we wouldn't need wildcards in the first place...</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+      default:<br>
+         continue;<br>
+      }<br>
+   }<br>
+<br>
+   return true;<br>
+}<br>
+<br>
+static void<br>
+nir_lower_locals_to_regs_impl(nir_function_impl *impl)<br>
+{<br>
+   struct locals_to_regs_state state;<br>
+<br>
+   state.mem_ctx = ralloc_parent(impl);<br>
+   state.impl = impl;<br>
+   state.regs_table = _mesa_hash_table_create(NULL, derefs_equal);<br>
+<br>
+   nir_foreach_block(impl, lower_locals_to_regs_block, &state);<br>
+<br>
+   _mesa_hash_table_destroy(state.regs_table, NULL);<br>
+}<br>
+<br>
+void<br>
+nir_lower_locals_to_regs(nir_shader *shader)<br>
+{<br>
+   nir_foreach_overload(shader, overload) {<br>
+      if (overload->impl)<br>
+         nir_lower_locals_to_regs_impl(overload->impl);<br>
+   }<br>
+}<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.2.0<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>