[Mesa-dev] [PATCH 8/9] nir: Implement a nir_sweep() pass.

Jason Ekstrand jason at jlekstrand.net
Thu Apr 2 07:28:10 PDT 2015


On Wed, Apr 1, 2015 at 10:51 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On Wednesday, April 01, 2015 05:54:29 PM Jason Ekstrand wrote:
>> On Sat, Mar 28, 2015 at 2:28 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
>> > This pass performs a mark and sweep pass over a nir_shader's associated
>> > memory - anything still connected to the program will be kept, and any
>> > dead memory we dropped on the floor will be freed.
>> >
>> > The expectation is that this will be called when finished building and
>> > optimizing the shader.  However, it's also fine to call it earlier, and
>> > many times, to free up memory earlier.
>> >
>> > Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> > ---
>> >  src/glsl/Makefile.sources |   1 +
>> >  src/glsl/nir/nir.h        |   2 +
>> >  src/glsl/nir/nir_sweep.c  | 299 ++++++++++++++++++++++++++++++++++++++++++++++
>> >  3 files changed, 302 insertions(+)
>> >  create mode 100644 src/glsl/nir/nir_sweep.c
>> >
>> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
>> > index 8d29c55..7046407 100644
>> > --- a/src/glsl/Makefile.sources
>> > +++ b/src/glsl/Makefile.sources
>> > @@ -54,6 +54,7 @@ NIR_FILES = \
>> >         nir/nir_search.c \
>> >         nir/nir_search.h \
>> >         nir/nir_split_var_copies.c \
>> > +       nir/nir_sweep.c \
>> >         nir/nir_to_ssa.c \
>> >         nir/nir_types.h \
>> >         nir/nir_validate.c \
>> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> > index 7b886e3..946f895 100644
>> > --- a/src/glsl/nir/nir.h
>> > +++ b/src/glsl/nir/nir.h
>> > @@ -1632,6 +1632,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);
>> >
>> >  bool nir_opt_remove_phis(nir_shader *shader);
>> >
>> > +void nir_sweep(nir_shader *shader);
>> > +
>> >  #ifdef __cplusplus
>> >  } /* extern "C" */
>> >  #endif
>> > diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c
>> > new file mode 100644
>> > index 0000000..cba5be7
>> > --- /dev/null
>> > +++ b/src/glsl/nir/nir_sweep.c
>> > @@ -0,0 +1,299 @@
>> > +/*
>> > + * Copyright © 2015 Intel Corporation
>> > + *
>> > + * Permission is hereby granted, free of charge, to any person obtaining a
>> > + * copy of this software and associated documentation files (the "Software"),
>> > + * to deal in the Software without restriction, including without limitation
>> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> > + * and/or sell copies of the Software, and to permit persons to whom the
>> > + * Software is furnished to do so, subject to the following conditions:
>> > + *
>> > + * The above copyright notice and this permission notice (including the next
>> > + * paragraph) shall be included in all copies or substantial portions of the
>> > + * Software.
>> > + *
>> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> > + * IN THE SOFTWARE.
>> > + */
>> > +
>> > +#include "nir.h"
>> > +
>> > +/**
>> > + * \file nir_sweep.c
>> > + *
>> > + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
>> > + * memory - anything still connected to the program will be kept, and any dead memory
>> > + * we dropped on the floor will be freed.
>> > + *
>> > + * The expectation is that drivers should call this when finished compiling the shader
>> > + * (after any optimization, lowering, and so on).  However, it's also fine to call it
>> > + * earlier, and even many times, trading CPU cycles for memory savings.
>> > + */
>> > +
>> > +#define steal_list(mem_ctx, type, list) \
>> > +   foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
>> > +
>> > +static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
>> > +
>> > +static void
>> > +sweep_ssa_def(nir_shader *nir, nir_ssa_def *ssa)
>> > +{
>> > +   ralloc_steal(nir, ssa->uses);
>> > +   ralloc_steal(nir, ssa->if_uses);
>> > +}
>> > +
>> > +static void
>> > +sweep_src(nir_shader *nir, nir_src *src)
>> > +{
>> > +   if (!src)
>> > +      return;
>> > +
>> > +   if (src->is_ssa) {
>> > +      sweep_ssa_def(nir, src->ssa);
>> > +   } else {
>> > +      sweep_src(nir, src->reg.indirect);
>> > +   }
>> > +}
>> > +
>> > +static void
>> > +sweep_dest(nir_shader *nir, nir_dest *dest)
>> > +{
>> > +   if (dest->is_ssa) {
>> > +      sweep_ssa_def(nir, &dest->ssa);
>> > +   } else {
>> > +      sweep_src(nir, dest->reg.indirect);
>> > +   }
>> > +}
>> > +
>> > +static void
>> > +sweep_deref_chain(nir_shader *nir, nir_deref *deref)
>> > +{
>> > +   for (; deref; deref = deref->child) {
>> > +      ralloc_steal(nir, deref);
>> > +   }
>> > +}
>> > +
>> > +static void
>> > +sweep_alu_instr(nir_shader *nir, nir_alu_instr *alu)
>> > +{
>> > +   for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
>> > +      sweep_src(nir, &alu->src[i].src);
>> > +   }
>> > +
>> > +   sweep_dest(nir, &alu->dest.dest);
>> > +}
>> > +
>> > +static void
>> > +sweep_call_instr(nir_shader *nir, nir_call_instr *call)
>> > +{
>> > +   ralloc_steal(nir, call->params);
>> > +   for (int i = 0; i < call->num_params; i++) {
>> > +      sweep_deref_chain(nir, &call->params[i]->deref);
>> > +   }
>> > +   if (call->return_deref)
>> > +      sweep_deref_chain(nir, &call->return_deref->deref);
>> > +}
>> > +
>> > +static void
>> > +sweep_tex_instr(nir_shader *nir, nir_tex_instr *tex)
>> > +{
>> > +   if (tex->sampler)
>> > +      sweep_deref_chain(nir, &tex->sampler->deref);
>> > +
>> > +   ralloc_steal(nir, tex->src);
>> > +   for (int i = 0; i < tex->num_srcs; i++) {
>> > +      sweep_src(nir, &tex->src[i].src);
>> > +   }
>> > +}
>> > +
>> > +static void
>> > +sweep_intrinsic_instr(nir_shader *nir, nir_intrinsic_instr *intrin)
>> > +{
>> > +   for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
>> > +      sweep_src(nir, &intrin->src[i]);
>> > +   }
>> > +
>> > +   for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
>> > +      sweep_deref_chain(nir, &intrin->variables[i]->deref);
>> > +   }
>> > +
>> > +   sweep_dest(nir, &intrin->dest);
>> > +}
>> > +
>> > +static void
>> > +sweep_load_const_instr(nir_shader *nir, nir_load_const_instr *load_const)
>> > +{
>> > +   sweep_ssa_def(nir, &load_const->def);
>> > +}
>> > +
>> > +static void
>> > +sweep_ssa_undef_instr(nir_shader *nir, nir_ssa_undef_instr *ssa_undef)
>> > +{
>> > +   sweep_ssa_def(nir, &ssa_undef->def);
>> > +}
>> > +
>> > +static void
>> > +sweep_phi_instr(nir_shader *nir, nir_phi_instr *phi)
>> > +{
>> > +   foreach_list_typed(nir_phi_src, phi_src, node, &phi->srcs) {
>> > +      ralloc_steal(nir, phi_src);
>> > +      /* skip pred */
>> > +      sweep_src(nir, &phi_src->src);
>> > +   }
>> > +
>> > +   sweep_dest(nir, &phi->dest);
>> > +}
>> > +
>> > +static void
>> > +sweep_parallel_copy_instr(nir_shader *nir, nir_parallel_copy_instr *parallel_copy)
>> > +{
>> > +   /* Nothing to do here?  It looks like nir_from_ssa already tidies up after itself. */
>> > +}
>> > +
>> > +
>> > +static void
>> > +sweep_block(nir_shader *nir, nir_block *block)
>> > +{
>> > +   ralloc_steal(nir, block);
>> > +
>> > +   nir_foreach_instr(block, instr) {
>> > +      ralloc_steal(nir, instr);
>> > +
>> > +      switch (instr->type) {
>> > +      case nir_instr_type_alu:
>> > +         sweep_alu_instr(nir, nir_instr_as_alu(instr));
>> > +         break;
>> > +      case nir_instr_type_call:
>> > +         sweep_call_instr(nir, nir_instr_as_call(instr));
>> > +         break;
>> > +      case nir_instr_type_tex:
>> > +         sweep_tex_instr(nir, nir_instr_as_tex(instr));
>> > +         break;
>> > +      case nir_instr_type_intrinsic:
>> > +         sweep_intrinsic_instr(nir, nir_instr_as_intrinsic(instr));
>> > +         break;
>> > +      case nir_instr_type_load_const:
>> > +         sweep_load_const_instr(nir, nir_instr_as_load_const(instr));
>> > +         break;
>> > +      case nir_instr_type_ssa_undef:
>> > +         sweep_ssa_undef_instr(nir, nir_instr_as_ssa_undef(instr));
>> > +         break;
>> > +      case nir_instr_type_jump:
>> > +         /* Nothing to do */
>> > +         break;
>> > +      case nir_instr_type_phi:
>> > +         sweep_phi_instr(nir, nir_instr_as_phi(instr));
>> > +         break;
>> > +      case nir_instr_type_parallel_copy:
>> > +         sweep_parallel_copy_instr(nir, nir_instr_as_parallel_copy(instr));
>> > +         break;
>> > +      default:
>> > +         unreachable("Invalid instruction type");
>> > +      }
>> > +   }
>> > +}
>>
>> I really don't like having to independantly sweep instructions.  I
>> think we should be able to just have everything inside the instruction
>> parented to the instruction and ralloc_steal the instruction.  That
>> would make sweeping *much* cleaner and would also mean that deleting
>> an instruction actually cleans it up.
>
> OK, I really don't know what to do with that feedback.
>
> In many cases, I tried to make fields that are intrinsically part of
> their parent structure allocated off of that structure itself (see the
> previous few patches).  It was trivial to do and makes the lifetimes
> match up nicely.
>
> Having instructions own their sources doesn't sound crazy, but I don't
> see how to reasonably accomplish it.  All kinds of code generates
> instructions and sets their sources and destinations.  All of those
> would have to ralloc_steal() the value to the parent.  You've moved
> a bit of complexity from here (one place that walks the IR) to every
> place that generates or alters IR.  Sounds like a mess.

I don't think it's as much of a mess as that.  There are really only a
few things I think we would have to worry about:

1) SSA defs.  The use/def sets and optional name would have to be
allocated out of the instruction.  We can do this in nir_ssa_def_init.

2) deref chains.  nir_deref_chain_copy already takes a mem_ctx, we can
just make that an intrinsic_instr.

3) phi node sources.  We just have to change the few places that make
phi nodes to do the right thing here.

4) source indirects.  This is the one that will take work.  Most of
the time when a source is set the nir_src_copy instruction is used so
we could make that take a nir_instr instead of a void * for the mem
context.  However... that's a problem for nir_if.  Connor and I have
already talked about disallowing indirects for if sources because it
makes little sense.  However, we don't at the moment.  If we switch if
to being an instruction this goes away.

Number 4 in particular is most of the reason I've been punting on this
one.  It will take work to get all that cleaned up but I do think it's
possible and probably something we want to do eventually.  In any
case, the above things can all be done incrementally and the sweep
pass updated as we go.  It's probably not a prerequisite for landing
the pass.  We just need to leave the bug open so that we remember to
finish.

> I guess we could make setter functions to encapsulate that (and enforce
> their use somehow?)...or try and make the nir_instr_create() functions
> require sources and destinations to exist and be passed as parameters,
> so the create() functions steal them.  Still, lots of churn.
>
> GLSL IR works this way too - subexpressions aren't parented to their
> parent expression - everything's allocated out of one big pool and the
> memory sweeping pass takes care of it.  It's worked well.

We don't really have "subexpressions" here.  Each instruction just has
some stuff in it.

> I guess I am being a bit overzealous here though - I shouldn't have to
> descend into SSA sources - only SSA values generated by an instruction.
>
> I also wondered whether nir_foreach_src and nir_foreach_dest would be
> sufficient, and avoid this switch statement.  There are a few things
> not covered by that, but it's pretty close...
>
>> > +static void
>> > +sweep_if(nir_shader *nir, nir_if *iff)
>> > +{
>> > +   ralloc_steal(nir, iff);
>>
>> This isn't sufficient.  The if can have a source with a relative
>> offset (yes, that's a crazy case) and that will have a ralloc'd source
>> that doesn't get swept.
>
> Thanks, I missed that.  I'll add sweep_src(nir, &iff->condition);
>
>> > +   foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
>> > +      sweep_cf_node(nir, cf_node);
>> > +   }
>> > +
>> > +   foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
>> > +      sweep_cf_node(nir, cf_node);
>> > +   }
>> > +}
>> > +
>> > +static void
>> > +sweep_loop(nir_shader *nir, nir_loop *loop)
>> > +{
>> > +   ralloc_steal(nir, loop);
>> > +
>> > +   foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
>> > +      sweep_cf_node(nir, cf_node);
>> > +   }
>> > +}
>> > +
>> > +static void
>> > +sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
>> > +{
>> > +   switch (cf_node->type) {
>> > +   case nir_cf_node_block:
>> > +      sweep_block(nir, nir_cf_node_as_block(cf_node));
>> > +      break;
>> > +   case nir_cf_node_if:
>> > +      sweep_if(nir, nir_cf_node_as_if(cf_node));
>> > +      break;
>> > +   case nir_cf_node_loop:
>> > +      sweep_loop(nir, nir_cf_node_as_loop(cf_node));
>> > +      break;
>> > +   default:
>> > +      unreachable("Invalid CF node type");
>> > +   }
>> > +}
>> > +
>> > +static void
>> > +sweep_impl(nir_shader *nir, nir_function_impl *impl)
>> > +{
>> > +   ralloc_steal(nir, impl);
>> > +
>> > +   ralloc_steal(nir, impl->params);
>> > +   ralloc_steal(nir, impl->return_var);
>> > +   steal_list(nir, nir_variable, &impl->locals);
>> > +   steal_list(nir, nir_register, &impl->registers);
>> > +   sweep_block(nir, impl->start_block);
>>
>> The start block is in the cf_node list so you don't need to sweep it separately
>>
>
> You're right, thanks - will remove.
>
>> > +   sweep_block(nir, impl->end_block);
>>
>> Put this below sweeping the cf_node list.
>
> OK.
>
>> > +
>> > +   foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
>> > +      sweep_cf_node(nir, cf_node);
>> > +   }
>> > +
>> > +   /* Wipe out all the metadata, if any. */
>> > +   nir_metadata_preserve(impl, nir_metadata_none);
>>
>> Is this really needed?  We shouldn't be changing any metadata.  In
>> particular, we don't actually change any pointers.
>
> Metadata is currently ralloc'd using nir_shader as the context, so
> this pass is going to free it.  Marking it invalid means that code
> will assume it doesn't exist, and recompute it when necessary.
>
> We do leave dangling pointers in place, which is lame.  Of course,
> nir_metadata_preserve leaves pointers to bunk metadata in place...
> It might make sense to try and have nir_metadata_preserve free any
> invalid metadata.
>
> I could also teach this pass how to walk said metadata to try and
> preserve it.  But it didn't seem worth the hassle.

Fair point.  Go ahead and nuke it.

>> > +}
>> > +
>> > +static void
>> > +sweep_function(nir_shader *nir, nir_function *f)
>> > +{
>> > +   ralloc_steal(nir, f);
>> > +
>> > +   foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {
>> > +      ralloc_steal(nir, overload);
>> > +      ralloc_steal(nir, overload->params);
>> > +      if (overload->impl)
>> > +         sweep_impl(nir, overload->impl);
>> > +   }
>> > +}
>> > +
>> > +void
>> > +nir_sweep(nir_shader *nir)
>>
>> I made the s/sweap/steal/ comment earlier.  I'm still not a fan of
>> "sweep", but I don't know that I like "steal" either.  If this is a
>> "mark and sweep" pass then the "sweep" functions are actually
>> performing the "mark" step and ralloc_free is performing the sweep.
>
> Yeah, I realize that's awkward.  I originally called the pass
> nir_sweep() because it would go sweep up the place, throwing out all the
> crumbs people left on the floor.  "sweep" = "tidy", of sorts.
>
> So when I needed helper functions, I called them sweep_this and
> sweep_that.
>
> The analogy to mark and sweep garbage collectors came later, when I was
> writing the comments.
>
> I'm happy to rename things or adjust comments.  I like nir_sweep() as a
> top-level name, but renaming the helpers to steal_* makes sense.

Yeah, that's reasonable.  I'm not opposed to sweap, it's just not
particularly descriptive.  Maybe we should just call it
nir_collect_garbage?

>> > +{
>> > +   void *rubbish = ralloc_context(NULL);
>> > +
>> > +   /* First, move ownership of all the memory to a temporary context; assume dead. */
>> > +   ralloc_adopt(rubbish, nir);
>>
>> Yes, we could do it this way.  Or we can simply have a dedicated
>> context for each shader that isn't the nir_shader itself.  Then we
>> would make a new context, move everything from old to new and delete
>> the old.  I don't know for sure which I prefer.
>> --Jason
>
> They're basically equivalent.  This way adds a single trivial function
> to ralloc (which seems useful), and doesn't require any real changes to
> the NIR codebase.  We could instead add a separate context, and make
> everything use that, but I'm not sure that it buys us anything.
>
> GLSL IR has these notions of being 'finished' - CompileShader allocates
> everything out of _mesa_glsl_parse_state, then reparents it to gl_shader
> when finished.  LinkShader copies everything to a temporary linker
> context, allocates everything out of that, and reparents it to
> prog->_LinkedShaders[i]->ir.  i965 then completely botches it (thanks
> for making me think about this).
>
> NIR seems to be more fluid - it doesn't have a "go compile", "go
> optimize" step - drivers make a shader via glsl->nir, prog->nir, or
> tgsi->nir, then call various passes on that.  So there's not this
> "temporary context", then "we're done, so steal it to the permanent
> thing" idea.  Allocating out of the shader and sweeping periodically
> seems pretty convenient.  It makes it easier to sweep more often, too.

That seems reasonable.  I'm not opposed to the current approach.

>> > +   /* Variables and registers are not dead.  Steal them back. */
>> > +   steal_list(nir, nir_variable, &nir->uniforms);
>> > +   steal_list(nir, nir_variable, &nir->inputs);
>> > +   steal_list(nir, nir_variable, &nir->outputs);
>> > +   steal_list(nir, nir_variable, &nir->globals);
>> > +   steal_list(nir, nir_variable, &nir->system_values);
>> > +   steal_list(nir, nir_register, &nir->registers);
>> > +
>> > +   /* Recurse into functions, stealing their contents back. */
>> > +   foreach_list_typed(nir_function, func, node, &nir->functions) {
>> > +      sweep_function(nir, func);
>> > +   }
>> > +
>> > +   /* Free everything we didn't steal back. */
>> > +   ralloc_free(rubbish);
>> > +}
>> > --
>> > 2.3.4
>> >
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev at lists.freedesktop.org
>> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list