<p dir="ltr">Two comments at the moment.</p>
<p dir="ltr">1) s/sweep/steal/<br>
2) I think I'll have more comments and would like you to wait for me to get a chance to look through it before pushing.<br>
--Jason</p>
<div class="gmail_quote">On Mar 28, 2015 4:29 PM, "Kenneth Graunke" <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>> wrote:<br type="attribution"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">This pass performs a mark and sweep pass over a nir_shader's associated<br>
memory - anything still connected to the program will be kept, and any<br>
dead memory we dropped on the floor will be freed.<br>
<br>
The expectation is that this will be called when finished building and<br>
optimizing the shader. However, it's also fine to call it earlier, and<br>
many times, to free up memory earlier.<br>
<br>
Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
---<br>
src/glsl/Makefile.sources | 1 +<br>
src/glsl/nir/nir.h | 2 +<br>
src/glsl/nir/nir_sweep.c | 299 ++++++++++++++++++++++++++++++++++++++++++++++<br>
3 files changed, 302 insertions(+)<br>
create mode 100644 src/glsl/nir/nir_sweep.c<br>
<br>
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources<br>
index 8d29c55..7046407 100644<br>
--- a/src/glsl/Makefile.sources<br>
+++ b/src/glsl/Makefile.sources<br>
@@ -54,6 +54,7 @@ NIR_FILES = \<br>
nir/nir_search.c \<br>
nir/nir_search.h \<br>
nir/nir_split_var_copies.c \<br>
+ nir/nir_sweep.c \<br>
nir/nir_to_ssa.c \<br>
nir/nir_types.h \<br>
nir/nir_validate.c \<br>
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h<br>
index 7b886e3..946f895 100644<br>
--- a/src/glsl/nir/nir.h<br>
+++ b/src/glsl/nir/nir.h<br>
@@ -1632,6 +1632,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);<br>
<br>
bool nir_opt_remove_phis(nir_shader *shader);<br>
<br>
+void nir_sweep(nir_shader *shader);<br>
+<br>
#ifdef __cplusplus<br>
} /* extern "C" */<br>
#endif<br>
diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c<br>
new file mode 100644<br>
index 0000000..cba5be7<br>
--- /dev/null<br>
+++ b/src/glsl/nir/nir_sweep.c<br>
@@ -0,0 +1,299 @@<br>
+/*<br>
+ * Copyright © 2015 Intel Corporation<br>
+ *<br>
+ * Permission is hereby granted, free of charge, to any person obtaining a<br>
+ * copy of this software and associated documentation files (the "Software"),<br>
+ * to deal in the Software without restriction, including without limitation<br>
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+ * and/or sell copies of the Software, and to permit persons to whom the<br>
+ * Software is furnished to do so, subject to the following conditions:<br>
+ *<br>
+ * The above copyright notice and this permission notice (including the next<br>
+ * paragraph) shall be included in all copies or substantial portions of the<br>
+ * Software.<br>
+ *<br>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL<br>
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br>
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING<br>
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS<br>
+ * IN THE SOFTWARE.<br>
+ */<br>
+<br>
+#include "nir.h"<br>
+<br>
+/**<br>
+ * \file nir_sweep.c<br>
+ *<br>
+ * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated<br>
+ * memory - anything still connected to the program will be kept, and any dead memory<br>
+ * we dropped on the floor will be freed.<br>
+ *<br>
+ * The expectation is that drivers should call this when finished compiling the shader<br>
+ * (after any optimization, lowering, and so on). However, it's also fine to call it<br>
+ * earlier, and even many times, trading CPU cycles for memory savings.<br>
+ */<br>
+<br>
+#define steal_list(mem_ctx, type, list) \<br>
+ foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }<br>
+<br>
+static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);<br>
+<br>
+static void<br>
+sweep_ssa_def(nir_shader *nir, nir_ssa_def *ssa)<br>
+{<br>
+ ralloc_steal(nir, ssa->uses);<br>
+ ralloc_steal(nir, ssa->if_uses);<br>
+}<br>
+<br>
+static void<br>
+sweep_src(nir_shader *nir, nir_src *src)<br>
+{<br>
+ if (!src)<br>
+ return;<br>
+<br>
+ if (src->is_ssa) {<br>
+ sweep_ssa_def(nir, src->ssa);<br>
+ } else {<br>
+ sweep_src(nir, src->reg.indirect);<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+sweep_dest(nir_shader *nir, nir_dest *dest)<br>
+{<br>
+ if (dest->is_ssa) {<br>
+ sweep_ssa_def(nir, &dest->ssa);<br>
+ } else {<br>
+ sweep_src(nir, dest->reg.indirect);<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+sweep_deref_chain(nir_shader *nir, nir_deref *deref)<br>
+{<br>
+ for (; deref; deref = deref->child) {<br>
+ ralloc_steal(nir, deref);<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+sweep_alu_instr(nir_shader *nir, nir_alu_instr *alu)<br>
+{<br>
+ for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {<br>
+ sweep_src(nir, &alu->src[i].src);<br>
+ }<br>
+<br>
+ sweep_dest(nir, &alu->dest.dest);<br>
+}<br>
+<br>
+static void<br>
+sweep_call_instr(nir_shader *nir, nir_call_instr *call)<br>
+{<br>
+ ralloc_steal(nir, call->params);<br>
+ for (int i = 0; i < call->num_params; i++) {<br>
+ sweep_deref_chain(nir, &call->params[i]->deref);<br>
+ }<br>
+ if (call->return_deref)<br>
+ sweep_deref_chain(nir, &call->return_deref->deref);<br>
+}<br>
+<br>
+static void<br>
+sweep_tex_instr(nir_shader *nir, nir_tex_instr *tex)<br>
+{<br>
+ if (tex->sampler)<br>
+ sweep_deref_chain(nir, &tex->sampler->deref);<br>
+<br>
+ ralloc_steal(nir, tex->src);<br>
+ for (int i = 0; i < tex->num_srcs; i++) {<br>
+ sweep_src(nir, &tex->src[i].src);<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+sweep_intrinsic_instr(nir_shader *nir, nir_intrinsic_instr *intrin)<br>
+{<br>
+ for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {<br>
+ sweep_src(nir, &intrin->src[i]);<br>
+ }<br>
+<br>
+ for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {<br>
+ sweep_deref_chain(nir, &intrin->variables[i]->deref);<br>
+ }<br>
+<br>
+ sweep_dest(nir, &intrin->dest);<br>
+}<br>
+<br>
+static void<br>
+sweep_load_const_instr(nir_shader *nir, nir_load_const_instr *load_const)<br>
+{<br>
+ sweep_ssa_def(nir, &load_const->def);<br>
+}<br>
+<br>
+static void<br>
+sweep_ssa_undef_instr(nir_shader *nir, nir_ssa_undef_instr *ssa_undef)<br>
+{<br>
+ sweep_ssa_def(nir, &ssa_undef->def);<br>
+}<br>
+<br>
+static void<br>
+sweep_phi_instr(nir_shader *nir, nir_phi_instr *phi)<br>
+{<br>
+ foreach_list_typed(nir_phi_src, phi_src, node, &phi->srcs) {<br>
+ ralloc_steal(nir, phi_src);<br>
+ /* skip pred */<br>
+ sweep_src(nir, &phi_src->src);<br>
+ }<br>
+<br>
+ sweep_dest(nir, &phi->dest);<br>
+}<br>
+<br>
+static void<br>
+sweep_parallel_copy_instr(nir_shader *nir, nir_parallel_copy_instr *parallel_copy)<br>
+{<br>
+ /* Nothing to do here? It looks like nir_from_ssa already tidies up after itself. */<br>
+}<br>
+<br>
+<br>
+static void<br>
+sweep_block(nir_shader *nir, nir_block *block)<br>
+{<br>
+ ralloc_steal(nir, block);<br>
+<br>
+ nir_foreach_instr(block, instr) {<br>
+ ralloc_steal(nir, instr);<br>
+<br>
+ switch (instr->type) {<br>
+ case nir_instr_type_alu:<br>
+ sweep_alu_instr(nir, nir_instr_as_alu(instr));<br>
+ break;<br>
+ case nir_instr_type_call:<br>
+ sweep_call_instr(nir, nir_instr_as_call(instr));<br>
+ break;<br>
+ case nir_instr_type_tex:<br>
+ sweep_tex_instr(nir, nir_instr_as_tex(instr));<br>
+ break;<br>
+ case nir_instr_type_intrinsic:<br>
+ sweep_intrinsic_instr(nir, nir_instr_as_intrinsic(instr));<br>
+ break;<br>
+ case nir_instr_type_load_const:<br>
+ sweep_load_const_instr(nir, nir_instr_as_load_const(instr));<br>
+ break;<br>
+ case nir_instr_type_ssa_undef:<br>
+ sweep_ssa_undef_instr(nir, nir_instr_as_ssa_undef(instr));<br>
+ break;<br>
+ case nir_instr_type_jump:<br>
+ /* Nothing to do */<br>
+ break;<br>
+ case nir_instr_type_phi:<br>
+ sweep_phi_instr(nir, nir_instr_as_phi(instr));<br>
+ break;<br>
+ case nir_instr_type_parallel_copy:<br>
+ sweep_parallel_copy_instr(nir, nir_instr_as_parallel_copy(instr));<br>
+ break;<br>
+ default:<br>
+ unreachable("Invalid instruction type");<br>
+ }<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+sweep_if(nir_shader *nir, nir_if *iff)<br>
+{<br>
+ ralloc_steal(nir, iff);<br>
+<br>
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {<br>
+ sweep_cf_node(nir, cf_node);<br>
+ }<br>
+<br>
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {<br>
+ sweep_cf_node(nir, cf_node);<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+sweep_loop(nir_shader *nir, nir_loop *loop)<br>
+{<br>
+ ralloc_steal(nir, loop);<br>
+<br>
+ foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {<br>
+ sweep_cf_node(nir, cf_node);<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)<br>
+{<br>
+ switch (cf_node->type) {<br>
+ case nir_cf_node_block:<br>
+ sweep_block(nir, nir_cf_node_as_block(cf_node));<br>
+ break;<br>
+ case nir_cf_node_if:<br>
+ sweep_if(nir, nir_cf_node_as_if(cf_node));<br>
+ break;<br>
+ case nir_cf_node_loop:<br>
+ sweep_loop(nir, nir_cf_node_as_loop(cf_node));<br>
+ break;<br>
+ default:<br>
+ unreachable("Invalid CF node type");<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+sweep_impl(nir_shader *nir, nir_function_impl *impl)<br>
+{<br>
+ ralloc_steal(nir, impl);<br>
+<br>
+ ralloc_steal(nir, impl->params);<br>
+ ralloc_steal(nir, impl->return_var);<br>
+ steal_list(nir, nir_variable, &impl->locals);<br>
+ steal_list(nir, nir_register, &impl->registers);<br>
+ sweep_block(nir, impl->start_block);<br>
+ sweep_block(nir, impl->end_block);<br>
+<br>
+ foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {<br>
+ sweep_cf_node(nir, cf_node);<br>
+ }<br>
+<br>
+ /* Wipe out all the metadata, if any. */<br>
+ nir_metadata_preserve(impl, nir_metadata_none);<br>
+}<br>
+<br>
+static void<br>
+sweep_function(nir_shader *nir, nir_function *f)<br>
+{<br>
+ ralloc_steal(nir, f);<br>
+<br>
+ foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {<br>
+ ralloc_steal(nir, overload);<br>
+ ralloc_steal(nir, overload->params);<br>
+ if (overload->impl)<br>
+ sweep_impl(nir, overload->impl);<br>
+ }<br>
+}<br>
+<br>
+void<br>
+nir_sweep(nir_shader *nir)<br>
+{<br>
+ void *rubbish = ralloc_context(NULL);<br>
+<br>
+ /* First, move ownership of all the memory to a temporary context; assume dead. */<br>
+ ralloc_adopt(rubbish, nir);<br>
+<br>
+ /* Variables and registers are not dead. Steal them back. */<br>
+ steal_list(nir, nir_variable, &nir->uniforms);<br>
+ steal_list(nir, nir_variable, &nir->inputs);<br>
+ steal_list(nir, nir_variable, &nir->outputs);<br>
+ steal_list(nir, nir_variable, &nir->globals);<br>
+ steal_list(nir, nir_variable, &nir->system_values);<br>
+ steal_list(nir, nir_register, &nir->registers);<br>
+<br>
+ /* Recurse into functions, stealing their contents back. */<br>
+ foreach_list_typed(nir_function, func, node, &nir->functions) {<br>
+ sweep_function(nir, func);<br>
+ }<br>
+<br>
+ /* Free everything we didn't steal back. */<br>
+ ralloc_free(rubbish);<br>
+}<br>
--<br>
2.3.4<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>