<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Jan 6, 2015 at 5:05 PM, Jason Ekstrand <span dir="ltr"><<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote"><div><div class="h5">On Sun, Jan 4, 2015 at 7:52 PM, Connor Abbott <span dir="ltr"><<a href="mailto:cwabbott0@gmail.com" target="_blank">cwabbott0@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote"><div><div>On Tue, Dec 16, 2014 at 1:11 AM, Jason Ekstrand <span dir="ltr"><<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">---<br>
 src/glsl/Makefile.sources               |   1 +<br>
 src/glsl/nir/nir.h                      |   2 +<br>
 src/glsl/nir/nir_lower_locals_to_regs.c | 313 ++++++++++++++++++++++++++++++++<br>
 3 files changed, 316 insertions(+)<br>
 create mode 100644 src/glsl/nir/nir_lower_locals_to_regs.c<br>
<br>
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources<br>
index 1d3b049..6230f49 100644<br>
--- a/src/glsl/Makefile.sources<br>
+++ b/src/glsl/Makefile.sources<br>
@@ -22,6 +22,7 @@ NIR_FILES = \<br>
        $(GLSL_SRCDIR)/nir/nir_intrinsics.h \<br>
        $(GLSL_SRCDIR)/nir/nir_live_variables.c \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_atomics.c \<br>
+       $(GLSL_SRCDIR)/nir/nir_lower_locals_to_regs.c \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_samplers.cpp \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_system_values.c \<br>
        $(GLSL_SRCDIR)/nir/nir_lower_variables.c \<br>
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h<br>
index b3abfb9..7d7aec7 100644<br>
--- a/src/glsl/nir/nir.h<br>
+++ b/src/glsl/nir/nir.h<br>
@@ -1358,6 +1358,8 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp);<br>
<br>
 void nir_split_var_copies(nir_shader *shader);<br>
<br>
+void nir_lower_locals_to_regs(nir_shader *shader);<br>
+<br>
 void nir_lower_variables(nir_shader *shader);<br>
<br>
 void nir_lower_variables_scalar(nir_shader *shader, bool lower_globals,<br>
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c<br>
new file mode 100644<br>
index 0000000..caf1c29<br>
--- /dev/null<br>
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c<br>
@@ -0,0 +1,313 @@<br>
+/*<br>
+ * Copyright © 2014 Intel Corporation<br>
+ *<br>
+ * Permission is hereby granted, free of charge, to any person obtaining a<br>
+ * copy of this software and associated documentation files (the "Software"),<br>
+ * to deal in the Software without restriction, including without limitation<br>
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+ * and/or sell copies of the Software, and to permit persons to whom the<br>
+ * Software is furnished to do so, subject to the following conditions:<br>
+ *<br>
+ * The above copyright notice and this permission notice (including the next<br>
+ * paragraph) shall be included in all copies or substantial portions of the<br>
+ * Software.<br>
+ *<br>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL<br>
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br>
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING<br>
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS<br>
+ * IN THE SOFTWARE.<br>
+ *<br>
+ * Authors:<br>
+ *    Jason Ekstrand (<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>)<br>
+ *<br>
+ */<br>
+<br>
+#include "nir.h"<br>
+<br>
+struct locals_to_regs_state {<br>
+   void *mem_ctx;<br>
+   nir_function_impl *impl;<br>
+<br>
+   /* A hash table mapping derefs to registers */<br>
+   struct hash_table *regs_table;<br>
+};<br>
+<br>
+/* The following two functions implement a hash and equality check for<br>
+ * variable dreferences.  When the hash or equality function encounters an<br>
+ * array, it ignores the offset and whether it is direct or indirect<br>
+ * entirely.<br>
+ */<br>
+static uint32_t<br>
+hash_deref(const void *void_deref)<br>
+{<br>
+   const nir_deref *deref = void_deref;<br>
+<br>
+   uint32_t hash;<br>
+   if (deref->child) {<br>
+      hash = hash_deref(deref->child);<br>
+   } else {<br>
+      hash = 2166136261ul;<br>
+   }<br>
+<br>
+   switch (deref->deref_type) {<br>
+   case nir_deref_type_var:<br>
+      hash ^= _mesa_hash_pointer(nir_deref_as_var(deref)->var);<br>
+      break;<br>
+   case nir_deref_type_array: {<br>
+      hash ^= 268435183;<br>
+      break;<br>
+   }<br>
+   case nir_deref_type_struct:<br>
+      hash ^= nir_deref_as_struct(deref)->index;<br>
+      break;<br>
+   }<br>
+<br>
+   return hash * 0x01000193;<br>
+}<br></blockquote><div><br></div></div></div><div>Same comment here about using FNV-1a instead.</div><span><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+static bool<br>
+derefs_equal(const void *void_a, const void *void_b)<br>
+{<br>
+   const nir_deref *a = void_a;<br>
+   const nir_deref *b = void_b;<br>
+<br>
+   if (a->deref_type != b->deref_type)<br>
+      return false;<br>
+<br>
+   switch (a->deref_type) {<br>
+   case nir_deref_type_var:<br>
+      if (nir_deref_as_var(a)->var != nir_deref_as_var(b)->var)<br>
+         return false;<br>
+      break;<br></blockquote><div><br></div></span><div>Again, we could split this out of the loop since it's only going to be used once.</div><span><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+   case nir_deref_type_array:<br>
+      /* Do nothing.  All array derefs are the same */<br>
+      break;<br>
+   case nir_deref_type_struct:<br>
+      if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index)<br>
+         return false;<br>
+      break;<br>
+   default:<br>
+      unreachable("Invalid dreference type");<br>
+   }<br>
+<br>
+   assert((a->child == NULL) == (b->child == NULL));<br>
+   if (a->child)<br>
+      return derefs_equal(a->child, b->child);<br>
+   else<br>
+      return true;<br>
+}<br></blockquote><div><br></div></span><div>Same comment about using a for loop here.</div></div></div></div></blockquote><div><br></div></div></div><div>Hashing and loopifying are done in 150/133<br></div><div><div class="h5"><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><div><div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+static nir_register *<br>
+get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state)<br>
+{<br>
+   uint32_t hash = hash_deref(deref);<br>
+<br>
+   struct hash_entry *entry = _mesa_hash_table_search(state->regs_table,<br>
+                                                      hash, deref);<br>
+   if (entry)<br>
+      return entry->data;<br>
+<br>
+   unsigned array_size = 1;<br>
+   nir_deref *tail = &deref->deref;<br>
+   while (tail->child) {<br>
+      if (tail->child->deref_type == nir_deref_type_array) {<br>
+         /* Multiply by the parent's type. */<br>
+         if (glsl_type_is_matrix(tail->type)) {<br>
+            array_size *= glsl_get_matrix_columns(tail->type);<br>
+         } else {<br>
+            assert(glsl_get_length(tail->type) > 0);<br>
+            array_size *= glsl_get_length(tail->type);<br>
+         }<br>
+      }<br>
+      tail = tail->child;<br>
+   }<br>
+<br>
+   assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type));<br>
+<br>
+   nir_register *reg = nir_local_reg_create(state->impl);<br>
+   reg->num_components = glsl_get_vector_elements(tail->type);<br>
+   reg->num_array_elems = array_size > 1 ? array_size : 0;<br>
+<br>
+   _mesa_hash_table_insert(state->regs_table, hash, deref, reg);<br>
+<br>
+   return reg;<br>
+}<br>
+<br>
+static nir_src<br>
+get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,<br>
+                  struct locals_to_regs_state *state)<br>
+{<br>
+   nir_src src;<br>
+<br>
+   src.is_ssa = false;<br>
+   src.reg.reg = get_reg_for_deref(deref, state);<br>
+   src.reg.base_offset = 0;<br>
+   src.reg.indirect = NULL;<br>
+<br>
+   nir_deref *tail = &deref->deref;<br>
+   while (tail->child != NULL) {<br>
+      const struct glsl_type *parent_type = tail->type;<br>
+      tail = tail->child;<br>
+<br>
+      if (tail->deref_type != nir_deref_type_array)<br>
+         continue;<br>
+<br>
+      nir_deref_array *deref_array = nir_deref_as_array(tail);<br>
+<br>
+      src.reg.base_offset *= glsl_get_length(parent_type);<br>
+      src.reg.base_offset += deref_array->base_offset;<br>
+<br>
+      if (src.reg.indirect) {<br></blockquote><div><br></div></div></div><div>It doesn't matter too much, but since it's so easy I'd put "&& glsl_get_length(parent_type) != 1" here to save some CPU cycles. <br></div></div></div></div></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><div><div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+         nir_load_const_instr *load_const =<br>
+            nir_load_const_instr_create(state->mem_ctx);<br>
+         load_const->num_components = 1;<br>
+         load_const->value.u[0] = glsl_get_length(parent_type);<br>
+         load_const->dest.is_ssa = true;<br>
+         nir_ssa_def_init(&load_const->instr, &load_const->dest.ssa, 1, NULL);<br>
+         nir_instr_insert_before(instr, &load_const->instr);<br>
+<br>
+         nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx, nir_op_imul);<br>
+         mul->src[0].src = *src.reg.indirect;<br>
+         mul->src[1].src.is_ssa = true;<br>
+         mul->src[1].src.ssa = &load_const->dest.ssa;<br>
+         mul->dest.write_mask = 1;<br>
+         mul->dest.dest.is_ssa = true;<br>
+         nir_ssa_def_init(&mul->instr, &mul->dest.dest.ssa, 1, NULL);<br>
+         nir_instr_insert_before(instr, &mul->instr);<br>
+<br>
+         src.reg.indirect->is_ssa = true;<br>
+         src.reg.indirect->ssa = &mul->dest.dest.ssa;<br>
+      }</blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {<br>
+         if (src.reg.indirect == NULL) {<br>
+            src.reg.indirect = ralloc(state->mem_ctx, nir_src);<br>
+            *src.reg.indirect = nir_src_copy(deref_array->indirect,<br>
+                                             state->mem_ctx);<br>
+         } else {<br>
+            nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx,<br>
+                                                      nir_op_iadd);<br>
+            add->src[0].src = *src.reg.indirect;<br>
+            add->src[1].src = nir_src_copy(deref_array->indirect,<br>
+                                           state->mem_ctx);<br>
+            add->dest.write_mask = 1;<br>
+            add->dest.dest.is_ssa = true;<br>
+            nir_ssa_def_init(&add->instr, &add->dest.dest.ssa, 1, NULL);<br>
+            nir_instr_insert_before(instr, &add->instr);<br>
+<br>
+            src.reg.indirect->is_ssa = true;<br>
+            src.reg.indirect->ssa = &add->dest.dest.ssa;<br>
+         }<br>
+      }<br>
+   }<br>
+<br>
+   return src;<br>
+}<br>
+<br>
+static bool<br>
+lower_locals_to_regs_block(nir_block *block, void *void_state)<br>
+{<br>
+   struct locals_to_regs_state *state = void_state;<br>
+<br>
+   nir_foreach_instr_safe(block, instr) {<br>
+      if (instr->type != nir_instr_type_intrinsic)<br>
+         continue;<br>
+<br>
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);<br>
+<br>
+      switch (intrin->intrinsic) {<br>
+      case nir_intrinsic_load_var_vec1:<br>
+      case nir_intrinsic_load_var_vec2:<br>
+      case nir_intrinsic_load_var_vec3:<br>
+      case nir_intrinsic_load_var_vec4: {<br>
+         if (intrin->variables[0]->var->data.mode != nir_var_local)<br>
+            continue;<br>
+<br>
+         nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);<br>
+         mov->src[0].src = get_deref_reg_src(intrin->variables[0],<br>
+                                             &intrin->instr, state);<br>
+         unsigned num_components = mov->src[0].src.reg.reg->num_components;<br>
+         mov->dest.write_mask = (1 << num_components) - 1;<br>
+         if (intrin->dest.is_ssa) {<br>
+            mov->dest.dest.is_ssa = true;<br>
+            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,<br>
+                             num_components, NULL);<br>
+<br>
+            nir_src new_src = {<br>
+               .is_ssa = true,<br>
+               .ssa = &mov->dest.dest.ssa,<br>
+            };<br>
+<br>
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_src,<br>
+                                     state->mem_ctx);<br>
+         } else {<br>
+            mov->dest.dest = nir_dest_copy(intrin->dest, state->mem_ctx);<br>
+         }<br>
+         nir_instr_insert_before(&intrin->instr, &mov->instr);<br>
+<br>
+         nir_instr_remove(&intrin->instr);<br>
+         break;<br>
+      }<br>
+<br>
+      case nir_intrinsic_store_var_vec1:<br>
+      case nir_intrinsic_store_var_vec2:<br>
+      case nir_intrinsic_store_var_vec3:<br>
+      case nir_intrinsic_store_var_vec4: {<br>
+         if (intrin->variables[0]->var->data.mode != nir_var_local)<br>
+            continue;<br>
+<br>
+         nir_src reg_src = get_deref_reg_src(intrin->variables[0],<br>
+                                             &intrin->instr, state);<br>
+         unsigned num_components = reg_src.reg.reg->num_components;<br>
+<br>
+         nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);<br>
+         mov->src[0].src = nir_src_copy(intrin->src[0], state->mem_ctx);<br>
+         mov->dest.write_mask = (1 << num_components) - 1;<br>
+         mov->dest.dest.is_ssa = false;<br>
+         mov->dest.dest.reg.reg = reg_src.reg.reg;<br>
+         mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;<br>
+         mov->dest.dest.reg.indirect = reg_src.reg.indirect;<br>
+<br>
+         nir_instr_insert_before(&intrin->instr, &mov->instr);<br>
+<br>
+         nir_instr_remove(&intrin->instr);<br>
+         break;<br>
+      }<br>
+<br>
+      case nir_intrinsic_copy_var:<br>
+         unreachable("There should be no copies whatsoever at this point");<br>
+         break;<br></blockquote><div><br></div></div></div><div>Are you sure about this? My impression is that lower_variables will lower copies involving things that aren't indirectly referenced, but if you have something like:</div><div><br></div><div>foo[i] = ...</div><div>bar[*] = foo[*];</div><div>... = bar[i];</div><div><br></div><div>then the copy in the middle won't get lowered, unless there's something else I'm missing that will lower it.</div></div></div></div></blockquote><div><br></div></div></div><div>Yeah, there may be something missing there.  I have a pass lying around somewhere that lowers all copies.  Unfortunately, I've never actually seen this happen in the wild so It's untested.  I'll try and cook something up that I think is reliable.<br></div></div></div></div></blockquote><div><br></div><div>Ok, more info.  Right now, GLSL IR is lowering all truely indirect accesses to if-ladders right now so we can never hit this.  Once we can handle indirects in the backends or generate the if-ladders in NIR, we will need this.  Until then, let's leave it as-is to reduce the ammount of untested code.<br></div><div>--Jason<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><div></div><span class=""><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><div> If we always lowered these copies (ignoring that it hurts packing for vec4 backends), then we wouldn't need wildcards in the first place...</div></div></div></div></blockquote><div><br></div></span><div>Yes, but I have an evil plot to do copy propagation of variables that properly handles arrays.  >:-]<br><br></div><div><div class="h5"><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div><div>
+<br>
+      default:<br>
+         continue;<br>
+      }<br>
+   }<br>
+<br>
+   return true;<br>
+}<br>
+<br>
+static void<br>
+nir_lower_locals_to_regs_impl(nir_function_impl *impl)<br>
+{<br>
+   struct locals_to_regs_state state;<br>
+<br>
+   state.mem_ctx = ralloc_parent(impl);<br>
+   state.impl = impl;<br>
+   state.regs_table = _mesa_hash_table_create(NULL, derefs_equal);<br>
+<br>
+   nir_foreach_block(impl, lower_locals_to_regs_block, &state);<br>
+<br>
+   _mesa_hash_table_destroy(state.regs_table, NULL);<br>
+}<br>
+<br>
+void<br>
+nir_lower_locals_to_regs(nir_shader *shader)<br>
+{<br>
+   nir_foreach_overload(shader, overload) {<br>
+      if (overload->impl)<br>
+         nir_lower_locals_to_regs_impl(overload->impl);<br>
+   }<br>
+}<br>
</div></div><span><font color="#888888"><span><font color="#888888">--<br>
2.2.0<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></font></span></blockquote></div><br></div></div>
</blockquote></div></div></div><br></div></div>
</blockquote></div><br></div></div>