[Mesa-dev] [PATCH 8/9] nir: Implement a nir_sweep() pass.

Kenneth Graunke kenneth at whitecape.org
Wed Apr 1 22:51:57 PDT 2015


On Wednesday, April 01, 2015 05:54:29 PM Jason Ekstrand wrote:
> On Sat, Mar 28, 2015 at 2:28 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> > This pass performs a mark and sweep pass over a nir_shader's associated
> > memory - anything still connected to the program will be kept, and any
> > dead memory we dropped on the floor will be freed.
> >
> > The expectation is that this will be called when finished building and
> > optimizing the shader.  However, it's also fine to call it earlier, and
> > many times, to free up memory earlier.
> >
> > Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> > ---
> >  src/glsl/Makefile.sources |   1 +
> >  src/glsl/nir/nir.h        |   2 +
> >  src/glsl/nir/nir_sweep.c  | 299 ++++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 302 insertions(+)
> >  create mode 100644 src/glsl/nir/nir_sweep.c
> >
> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> > index 8d29c55..7046407 100644
> > --- a/src/glsl/Makefile.sources
> > +++ b/src/glsl/Makefile.sources
> > @@ -54,6 +54,7 @@ NIR_FILES = \
> >         nir/nir_search.c \
> >         nir/nir_search.h \
> >         nir/nir_split_var_copies.c \
> > +       nir/nir_sweep.c \
> >         nir/nir_to_ssa.c \
> >         nir/nir_types.h \
> >         nir/nir_validate.c \
> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> > index 7b886e3..946f895 100644
> > --- a/src/glsl/nir/nir.h
> > +++ b/src/glsl/nir/nir.h
> > @@ -1632,6 +1632,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);
> >
> >  bool nir_opt_remove_phis(nir_shader *shader);
> >
> > +void nir_sweep(nir_shader *shader);
> > +
> >  #ifdef __cplusplus
> >  } /* extern "C" */
> >  #endif
> > diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c
> > new file mode 100644
> > index 0000000..cba5be7
> > --- /dev/null
> > +++ b/src/glsl/nir/nir_sweep.c
> > @@ -0,0 +1,299 @@
> > +/*
> > + * Copyright © 2015 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the next
> > + * paragraph) shall be included in all copies or substantial portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "nir.h"
> > +
> > +/**
> > + * \file nir_sweep.c
> > + *
> > + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
> > + * memory - anything still connected to the program will be kept, and any dead memory
> > + * we dropped on the floor will be freed.
> > + *
> > + * The expectation is that drivers should call this when finished compiling the shader
> > + * (after any optimization, lowering, and so on).  However, it's also fine to call it
> > + * earlier, and even many times, trading CPU cycles for memory savings.
> > + */
> > +
> > +#define steal_list(mem_ctx, type, list) \
> > +   foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
> > +
> > +static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
> > +
> > +static void
> > +sweep_ssa_def(nir_shader *nir, nir_ssa_def *ssa)
> > +{
> > +   ralloc_steal(nir, ssa->uses);
> > +   ralloc_steal(nir, ssa->if_uses);
> > +}
> > +
> > +static void
> > +sweep_src(nir_shader *nir, nir_src *src)
> > +{
> > +   if (!src)
> > +      return;
> > +
> > +   if (src->is_ssa) {
> > +      sweep_ssa_def(nir, src->ssa);
> > +   } else {
> > +      sweep_src(nir, src->reg.indirect);
> > +   }
> > +}
> > +
> > +static void
> > +sweep_dest(nir_shader *nir, nir_dest *dest)
> > +{
> > +   if (dest->is_ssa) {
> > +      sweep_ssa_def(nir, &dest->ssa);
> > +   } else {
> > +      sweep_src(nir, dest->reg.indirect);
> > +   }
> > +}
> > +
> > +static void
> > +sweep_deref_chain(nir_shader *nir, nir_deref *deref)
> > +{
> > +   for (; deref; deref = deref->child) {
> > +      ralloc_steal(nir, deref);
> > +   }
> > +}
> > +
> > +static void
> > +sweep_alu_instr(nir_shader *nir, nir_alu_instr *alu)
> > +{
> > +   for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
> > +      sweep_src(nir, &alu->src[i].src);
> > +   }
> > +
> > +   sweep_dest(nir, &alu->dest.dest);
> > +}
> > +
> > +static void
> > +sweep_call_instr(nir_shader *nir, nir_call_instr *call)
> > +{
> > +   ralloc_steal(nir, call->params);
> > +   for (int i = 0; i < call->num_params; i++) {
> > +      sweep_deref_chain(nir, &call->params[i]->deref);
> > +   }
> > +   if (call->return_deref)
> > +      sweep_deref_chain(nir, &call->return_deref->deref);
> > +}
> > +
> > +static void
> > +sweep_tex_instr(nir_shader *nir, nir_tex_instr *tex)
> > +{
> > +   if (tex->sampler)
> > +      sweep_deref_chain(nir, &tex->sampler->deref);
> > +
> > +   ralloc_steal(nir, tex->src);
> > +   for (int i = 0; i < tex->num_srcs; i++) {
> > +      sweep_src(nir, &tex->src[i].src);
> > +   }
> > +}
> > +
> > +static void
> > +sweep_intrinsic_instr(nir_shader *nir, nir_intrinsic_instr *intrin)
> > +{
> > +   for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
> > +      sweep_src(nir, &intrin->src[i]);
> > +   }
> > +
> > +   for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
> > +      sweep_deref_chain(nir, &intrin->variables[i]->deref);
> > +   }
> > +
> > +   sweep_dest(nir, &intrin->dest);
> > +}
> > +
> > +static void
> > +sweep_load_const_instr(nir_shader *nir, nir_load_const_instr *load_const)
> > +{
> > +   sweep_ssa_def(nir, &load_const->def);
> > +}
> > +
> > +static void
> > +sweep_ssa_undef_instr(nir_shader *nir, nir_ssa_undef_instr *ssa_undef)
> > +{
> > +   sweep_ssa_def(nir, &ssa_undef->def);
> > +}
> > +
> > +static void
> > +sweep_phi_instr(nir_shader *nir, nir_phi_instr *phi)
> > +{
> > +   foreach_list_typed(nir_phi_src, phi_src, node, &phi->srcs) {
> > +      ralloc_steal(nir, phi_src);
> > +      /* skip pred */
> > +      sweep_src(nir, &phi_src->src);
> > +   }
> > +
> > +   sweep_dest(nir, &phi->dest);
> > +}
> > +
> > +static void
> > +sweep_parallel_copy_instr(nir_shader *nir, nir_parallel_copy_instr *parallel_copy)
> > +{
> > +   /* Nothing to do here?  It looks like nir_from_ssa already tidies up after itself. */
> > +}
> > +
> > +
> > +static void
> > +sweep_block(nir_shader *nir, nir_block *block)
> > +{
> > +   ralloc_steal(nir, block);
> > +
> > +   nir_foreach_instr(block, instr) {
> > +      ralloc_steal(nir, instr);
> > +
> > +      switch (instr->type) {
> > +      case nir_instr_type_alu:
> > +         sweep_alu_instr(nir, nir_instr_as_alu(instr));
> > +         break;
> > +      case nir_instr_type_call:
> > +         sweep_call_instr(nir, nir_instr_as_call(instr));
> > +         break;
> > +      case nir_instr_type_tex:
> > +         sweep_tex_instr(nir, nir_instr_as_tex(instr));
> > +         break;
> > +      case nir_instr_type_intrinsic:
> > +         sweep_intrinsic_instr(nir, nir_instr_as_intrinsic(instr));
> > +         break;
> > +      case nir_instr_type_load_const:
> > +         sweep_load_const_instr(nir, nir_instr_as_load_const(instr));
> > +         break;
> > +      case nir_instr_type_ssa_undef:
> > +         sweep_ssa_undef_instr(nir, nir_instr_as_ssa_undef(instr));
> > +         break;
> > +      case nir_instr_type_jump:
> > +         /* Nothing to do */
> > +         break;
> > +      case nir_instr_type_phi:
> > +         sweep_phi_instr(nir, nir_instr_as_phi(instr));
> > +         break;
> > +      case nir_instr_type_parallel_copy:
> > +         sweep_parallel_copy_instr(nir, nir_instr_as_parallel_copy(instr));
> > +         break;
> > +      default:
> > +         unreachable("Invalid instruction type");
> > +      }
> > +   }
> > +}
> 
> I really don't like having to independantly sweep instructions.  I
> think we should be able to just have everything inside the instruction
> parented to the instruction and ralloc_steal the instruction.  That
> would make sweeping *much* cleaner and would also mean that deleting
> an instruction actually cleans it up.

OK, I really don't know what to do with that feedback.

In many cases, I tried to make fields that are intrinsically part of
their parent structure allocated off of that structure itself (see the
previous few patches).  It was trivial to do and makes the lifetimes
match up nicely.

Having instructions own their sources doesn't sound crazy, but I don't
see how to reasonably accomplish it.  All kinds of code generates
instructions and sets their sources and destinations.  All of those
would have to ralloc_steal() the value to the parent.  You've moved
a bit of complexity from here (one place that walks the IR) to every
place that generates or alters IR.  Sounds like a mess.

I guess we could make setter functions to encapsulate that (and enforce
their use somehow?)...or try and make the nir_instr_create() functions
require sources and destinations to exist and be passed as parameters,
so the create() functions steal them.  Still, lots of churn.

GLSL IR works this way too - subexpressions aren't parented to their
parent expression - everything's allocated out of one big pool and the
memory sweeping pass takes care of it.  It's worked well.

I guess I am being a bit overzealous here though - I shouldn't have to
descend into SSA sources - only SSA values generated by an instruction.

I also wondered whether nir_foreach_src and nir_foreach_dest would be
sufficient, and avoid this switch statement.  There are a few things
not covered by that, but it's pretty close...

> > +static void
> > +sweep_if(nir_shader *nir, nir_if *iff)
> > +{
> > +   ralloc_steal(nir, iff);
> 
> This isn't sufficient.  The if can have a source with a relative
> offset (yes, that's a crazy case) and that will have a ralloc'd source
> that doesn't get swept.

Thanks, I missed that.  I'll add sweep_src(nir, &iff->condition);

> > +   foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
> > +      sweep_cf_node(nir, cf_node);
> > +   }
> > +
> > +   foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
> > +      sweep_cf_node(nir, cf_node);
> > +   }
> > +}
> > +
> > +static void
> > +sweep_loop(nir_shader *nir, nir_loop *loop)
> > +{
> > +   ralloc_steal(nir, loop);
> > +
> > +   foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
> > +      sweep_cf_node(nir, cf_node);
> > +   }
> > +}
> > +
> > +static void
> > +sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
> > +{
> > +   switch (cf_node->type) {
> > +   case nir_cf_node_block:
> > +      sweep_block(nir, nir_cf_node_as_block(cf_node));
> > +      break;
> > +   case nir_cf_node_if:
> > +      sweep_if(nir, nir_cf_node_as_if(cf_node));
> > +      break;
> > +   case nir_cf_node_loop:
> > +      sweep_loop(nir, nir_cf_node_as_loop(cf_node));
> > +      break;
> > +   default:
> > +      unreachable("Invalid CF node type");
> > +   }
> > +}
> > +
> > +static void
> > +sweep_impl(nir_shader *nir, nir_function_impl *impl)
> > +{
> > +   ralloc_steal(nir, impl);
> > +
> > +   ralloc_steal(nir, impl->params);
> > +   ralloc_steal(nir, impl->return_var);
> > +   steal_list(nir, nir_variable, &impl->locals);
> > +   steal_list(nir, nir_register, &impl->registers);
> > +   sweep_block(nir, impl->start_block);
> 
> The start block is in the cf_node list so you don't need to sweep it separately
> 

You're right, thanks - will remove.

> > +   sweep_block(nir, impl->end_block);
> 
> Put this below sweeping the cf_node list.

OK.

> > +
> > +   foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
> > +      sweep_cf_node(nir, cf_node);
> > +   }
> > +
> > +   /* Wipe out all the metadata, if any. */
> > +   nir_metadata_preserve(impl, nir_metadata_none);
> 
> Is this really needed?  We shouldn't be changing any metadata.  In
> particular, we don't actually change any pointers.

Metadata is currently ralloc'd using nir_shader as the context, so
this pass is going to free it.  Marking it invalid means that code
will assume it doesn't exist, and recompute it when necessary.

We do leave dangling pointers in place, which is lame.  Of course,
nir_metadata_preserve leaves pointers to bunk metadata in place...
It might make sense to try and have nir_metadata_preserve free any
invalid metadata.

I could also teach this pass how to walk said metadata to try and
preserve it.  But it didn't seem worth the hassle.

> > +}
> > +
> > +static void
> > +sweep_function(nir_shader *nir, nir_function *f)
> > +{
> > +   ralloc_steal(nir, f);
> > +
> > +   foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {
> > +      ralloc_steal(nir, overload);
> > +      ralloc_steal(nir, overload->params);
> > +      if (overload->impl)
> > +         sweep_impl(nir, overload->impl);
> > +   }
> > +}
> > +
> > +void
> > +nir_sweep(nir_shader *nir)
> 
> I made the s/sweap/steal/ comment earlier.  I'm still not a fan of
> "sweep", but I don't know that I like "steal" either.  If this is a
> "mark and sweep" pass then the "sweep" functions are actually
> performing the "mark" step and ralloc_free is performing the sweep.

Yeah, I realize that's awkward.  I originally called the pass
nir_sweep() because it would go sweep up the place, throwing out all the
crumbs people left on the floor.  "sweep" = "tidy", of sorts.

So when I needed helper functions, I called them sweep_this and
sweep_that.

The analogy to mark and sweep garbage collectors came later, when I was
writing the comments.

I'm happy to rename things or adjust comments.  I like nir_sweep() as a
top-level name, but renaming the helpers to steal_* makes sense.

> > +{
> > +   void *rubbish = ralloc_context(NULL);
> > +
> > +   /* First, move ownership of all the memory to a temporary context; assume dead. */
> > +   ralloc_adopt(rubbish, nir);
> 
> Yes, we could do it this way.  Or we can simply have a dedicated
> context for each shader that isn't the nir_shader itself.  Then we
> would make a new context, move everything from old to new and delete
> the old.  I don't know for sure which I prefer.
> --Jason

They're basically equivalent.  This way adds a single trivial function
to ralloc (which seems useful), and doesn't require any real changes to
the NIR codebase.  We could instead add a separate context, and make
everything use that, but I'm not sure that it buys us anything.

GLSL IR has these notions of being 'finished' - CompileShader allocates
everything out of _mesa_glsl_parse_state, then reparents it to gl_shader
when finished.  LinkShader copies everything to a temporary linker
context, allocates everything out of that, and reparents it to
prog->_LinkedShaders[i]->ir.  i965 then completely botches it (thanks
for making me think about this).

NIR seems to be more fluid - it doesn't have a "go compile", "go
optimize" step - drivers make a shader via glsl->nir, prog->nir, or
tgsi->nir, then call various passes on that.  So there's not this
"temporary context", then "we're done, so steal it to the permanent
thing" idea.  Allocating out of the shader and sweeping periodically
seems pretty convenient.  It makes it easier to sweep more often, too.

> > +   /* Variables and registers are not dead.  Steal them back. */
> > +   steal_list(nir, nir_variable, &nir->uniforms);
> > +   steal_list(nir, nir_variable, &nir->inputs);
> > +   steal_list(nir, nir_variable, &nir->outputs);
> > +   steal_list(nir, nir_variable, &nir->globals);
> > +   steal_list(nir, nir_variable, &nir->system_values);
> > +   steal_list(nir, nir_register, &nir->registers);
> > +
> > +   /* Recurse into functions, stealing their contents back. */
> > +   foreach_list_typed(nir_function, func, node, &nir->functions) {
> > +      sweep_function(nir, func);
> > +   }
> > +
> > +   /* Free everything we didn't steal back. */
> > +   ralloc_free(rubbish);
> > +}
> > --
> > 2.3.4
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150401/691e3e77/attachment-0001.sig>


More information about the mesa-dev mailing list