[Mesa-dev] [PATCH 5/7] nir: support to clone shaders (v2)

Rob Clark robdclark at gmail.com
Mon Oct 26 12:53:53 PDT 2015


On Mon, Oct 26, 2015 at 2:44 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> On Mon, Oct 26, 2015 at 8:27 AM, Rob Clark <robdclark at gmail.com> wrote:
>> Signed-off-by: Rob Clark <robclark at freedesktop.org>
>> ---
>>  src/glsl/Makefile.sources |   1 +
>>  src/glsl/nir/nir.c        |   8 +
>>  src/glsl/nir/nir.h        |   2 +
>>  src/glsl/nir/nir_clone.c  | 949 ++++++++++++++++++++++++++++++++++++++++++++++
>>  4 files changed, 960 insertions(+)
>>  create mode 100644 src/glsl/nir/nir_clone.c
>>
>> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
>> index ca87036..25e3801 100644
>> --- a/src/glsl/Makefile.sources
>> +++ b/src/glsl/Makefile.sources
>> @@ -26,6 +26,7 @@ NIR_FILES = \
>>         nir/nir.h \
>>         nir/nir_array.h \
>>         nir/nir_builder.h \
>> +       nir/nir_clone.c \
>>         nir/nir_constant_expressions.h \
>>         nir/nir_control_flow.c \
>>         nir/nir_control_flow.h \
>> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
>> index 0cbe4e1..2defa36 100644
>> --- a/src/glsl/nir/nir.c
>> +++ b/src/glsl/nir/nir.c
>> @@ -316,6 +316,14 @@ nir_block_create(nir_shader *shader)
>>     block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
>>                                            _mesa_key_pointer_equal);
>>     block->imm_dom = NULL;
>> +   /* XXX maybe it would be worth it to defer allocation?  This
>> +    * way it doesn't get allocated for shader ref's that never run
>> +    * nir_calc_dominance?  For example, state-tracker creates an
>> +    * initial IR, clones that, runs appropriate lowering pass, passes
>> +    * to driver which does common lowering/opt, and then stores ref
>> +    * which is later used to do state specific lowering and futher
>> +    * opt.  Do any of the references not need dominance metadata?
>> +    */
>
> Yes, we should defer this.  The dominance frontier is a product of the
> dominance calculations (as are the dominance arrays) so we should
> allocate it there.
>
>>     block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
>>                                            _mesa_key_pointer_equal);
>>
>> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> index fb60340..efcbf62 100644
>> --- a/src/glsl/nir/nir.h
>> +++ b/src/glsl/nir/nir.h
>> @@ -1897,6 +1897,8 @@ void nir_index_blocks(nir_function_impl *impl);
>>  void nir_print_shader(nir_shader *shader, FILE *fp);
>>  void nir_print_instr(const nir_instr *instr, FILE *fp);
>>
>> +nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
>> +
>>  #ifdef DEBUG
>>  void nir_validate_shader(nir_shader *shader);
>>  #else
>> diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c
>> new file mode 100644
>> index 0000000..5c1739e
>> --- /dev/null
>> +++ b/src/glsl/nir/nir_clone.c
>> @@ -0,0 +1,949 @@
>> +/*
>> + * Copyright © 2015 Red Hat
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "nir.h"
>> +#include "nir_control_flow_private.h"
>> +
>> +// TODO move these:
>> +#define exec_list_head(type, list, node) \
>> +   exec_node_data(type, exec_list_get_head(list), node)
>> +#define exec_list_next(type, nodeptr, node) \
>> +   exec_node_data(type, exec_node_get_next(nodeptr), node)
>> +
>> +/* Secret Decoder Ring:
>> + *   clone_foo():
>> + *        Allocate and clone a foo.
>> + *   __clone_foo():
>> + *        Clone body of foo (ie. parent class, embedded struct,
>> + *        etc)
>> + *   __clone_foo_p2():
>> + *        clone body of foo, pass 2.. since in first pass we
>> + *        can have fwd references to embedded structs, some
>> + *        ptrs (and things that depend on them) must be
>> + *        resolved in 2nd pass
>> + */
>> +
>> +typedef struct {
>> +   /* maps orig ptr -> cloned ptr: */
>> +   struct hash_table *ptr_table;
>> +   /* list of unresolved ssa src's: */
>> +   struct list_head ssa_src_list;
>> +
>> +   /* memctx for new toplevel shader object: */
>> +   void *mem_ctx;
>> +   /* new shader object, used as memctx for just about everything else: */
>> +   nir_shader *ns;
>> +} clone_state;
>> +
>> +typedef void *(*clone_fxn)(clone_state *state, const void *ptr);
>> +
>> +static void
>> +init_clone_state(clone_state *state, void *mem_ctx)
>> +{
>> +   state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>> +                                              _mesa_key_pointer_equal);
>> +   list_inithead(&state->ssa_src_list);
>> +   state->mem_ctx = mem_ctx;
>> +}
>> +
>> +static void
>> +free_clone_state(clone_state *state)
>> +{
>> +   _mesa_hash_table_destroy(state->ptr_table, NULL);
>> +}
>> +
>> +static void *
>> +clone_ptr(clone_state *state, const void *ptr, clone_fxn clone)
>> +{
>> +   struct hash_entry *entry;
>> +   void *nptr;
>> +
>> +   if (!ptr)
>> +      return NULL;
>> +
>> +   entry = _mesa_hash_table_search(state->ptr_table, ptr);
>> +   if (entry)
>> +      return entry->data;
>
> I've mentioned this on IRC before, but I really don't think this whole
> song-and-dance is needed.  As long as you clone registers and
> variables before you clone instructions and as long as you clone
> instructions top-to-bottom like you are, there is exactly one case in
> which you will ever need to reference something before it's added to
> the table: phi sources for phis at the top of loops.  This means that
> you just need to make clone_phi a little special and the second pass
> can just be your walk over the ssa_src_list.  In particular, clone_phi
> needs to look something like this:

I did start going down that road, but the problem is that it would
involve cloning blocks in more or less the right order too.. and while
the clone_ptr() approach for dealing w/ successors/predecessor block
ptrs is pretty convenient, it doesn't really guarantee that blocks get
cloned in the right order.

Possibly I could fill out successor/predecessor links on a 2nd pass,
at the expense of extra complexity.  Not really sure it's worth it..

> clone_phi()
> {
>    phi = nir_phi_instr_create()
>    add_to_remap(phi)
>    nir_instr_init(phi)
>
>    // Walk the phi sources, fill them out, and add them to
> src_ssa_list.  You'll fill them out with the actual sources later.
> }
>
> This will ensure that the phi destination *does* exist for all future
> stuff and only the sources need a second pass.
>
>> +
>> +   nptr = clone(state, ptr);
>> +
>> +#ifdef DEBUG
>> +   entry = _mesa_hash_table_search(state->ptr_table, ptr);
>> +   assert(entry->data == nptr);
>> +#endif
>> +
>> +   return nptr;
>> +}
>> +
>> +/* To avoid loops, we need to store cloned ptrs before they are fully
>> + * initialized, since cloning their contents could result in a path
>> + * back to the cloned pointer
>> + */
>> +static void
>> +store_ptr(clone_state *state, void *nptr, const void *ptr)
>> +{
>> +   _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
>> +}
>> +
>> +/* There are a few cases we need to know the ralloc_parent()'s clone.
>> + * This only works when the original ptr's ralloc_parent() has already
>> + * been cloned, but for parents this should always be the case.
>> + */
>> +static void *
>> +ralloc_parent_clone(clone_state *state, const void *ptr)
>> +{
>> +   return clone_ptr(state, ralloc_parent(ptr), NULL);
>> +}
>> +
>> +static void * clone_var(clone_state *state, const void *ptr);
>> +static void * clone_reg(clone_state *state, const void *ptr);
>> +static void * clone_src(clone_state *state, const void *ptr);
>> +static void * clone_deref(clone_state *state, const void *ptr);
>> +static void * clone_instr(clone_state *state, const void *ptr);
>> +static void * clone_block(clone_state *state, const void *ptr);
>> +static void * clone_cf_node(clone_state *state, const void *ptr);
>> +static void * clone_function_overload(clone_state *state, const void *ptr);
>> +static void * clone_function(clone_state *state, const void *ptr);
>> +
>> +static void __clone_cf_node_p2(clone_state *state, nir_cf_node *ncf, const nir_cf_node *cf);
>> +
>> +/* clone list of nir_variable: */
>> +static void
>> +clone_var_list(clone_state *state, struct exec_list *dst,
>> +               const struct exec_list *list)
>> +{
>> +   exec_list_make_empty(dst);
>> +   foreach_list_typed(nir_variable, var, node, list) {
>> +      nir_variable *nvar = clone_ptr(state, var, clone_var);
>> +      exec_list_push_tail(dst, &nvar->node);
>> +   }
>> +}
>> +
>> +/* clone list of nir_register: */
>> +static void
>> +clone_reg_list(clone_state *state, struct exec_list *dst,
>> +               const struct exec_list *list)
>> +{
>> +   exec_list_make_empty(dst);
>> +   foreach_list_typed(nir_register, reg, node, list) {
>> +      nir_register *nreg = clone_ptr(state, reg, clone_reg);
>> +      exec_list_push_tail(dst, &nreg->node);
>> +   }
>> +}
>> +
>> +/* clone list of nir_cf_node: */
>> +static void
>> +clone_cf_list(clone_state *state, struct exec_list *dst,
>> +              const struct exec_list *list)
>> +{
>> +   exec_list_make_empty(dst);
>> +   foreach_list_typed(nir_cf_node, cf, node, list) {
>> +      nir_cf_node *ncf = clone_ptr(state, cf, clone_cf_node);
>> +      exec_list_push_tail(dst, &ncf->node);
>> +   }
>> +}
>> +
>> +static void
>> +__clone_cf_list_p2(clone_state *state, struct exec_list *dst,
>> +                   const struct exec_list *list)
>> +{
>> +   nir_cf_node *ncf = exec_list_head(nir_cf_node, dst, node);
>> +   foreach_list_typed(nir_cf_node, cf, node, list) {
>> +      __clone_cf_node_p2(state, ncf, cf);
>> +      ncf = exec_list_next(nir_cf_node, &ncf->node, node);
>> +   }
>> +}
>> +
>> +static struct set *
>> +clone_set(clone_state *state, void *mem_ctx, struct set *src, clone_fxn clone)
>> +{
>> +   struct set *dst = _mesa_set_create(mem_ctx, src->key_hash_function,
>> +                                      src->key_equals_function);
>> +   struct set_entry *entry;
>> +   set_foreach(src, entry) {
>> +      _mesa_set_add(dst, clone_ptr(state, entry->key, clone));
>> +   }
>> +   return dst;
>> +}
>> +
>> +static void *
>> +clone_constant(clone_state *state, const void *ptr)
>> +{
>> +   const nir_constant *c = ptr;
>> +   nir_constant *nc;
>> +
>> +   void *mem_ctx = ralloc_parent_clone(state, c);
>> +
>> +   nc = ralloc(mem_ctx, nir_constant);
>> +   store_ptr(state, nc, c);
>> +
>> +   nc->value = c->value;
>> +   nc->num_elements = c->num_elements;
>> +   nc->elements = ralloc_array(mem_ctx, nir_constant *, c->num_elements);
>> +   for (unsigned i = 0; i < c->num_elements; i++) {
>> +      nc->elements[i] = clone_ptr(state, c->elements[i], clone_constant);
>> +   }
>> +
>> +   return nc;
>> +}
>> +
>> +/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
>> + * having to deal with locals and globals separately:
>> + */
>> +static void *
>> +clone_var(clone_state *state, const void *ptr)
>> +{
>> +   const nir_variable *var = ptr;
>> +   nir_variable *nvar;
>> +
>> +   nvar = rzalloc(state->ns, nir_variable);
>> +   store_ptr(state, nvar, var);
>> +
>> +   nvar->type = var->type;
>> +   nvar->name = ralloc_strdup(nvar, var->name);
>> +   nvar->max_ifc_array_access = ralloc_array(nvar, unsigned,
>> +                                             var->num_max_ifc_array_access);
>> +   memcpy(nvar->max_ifc_array_access, var->max_ifc_array_access,
>> +          var->num_max_ifc_array_access * sizeof(unsigned));
>> +   nvar->data = var->data;
>> +   nvar->num_state_slots = var->num_state_slots;
>> +   nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
>> +   memcpy(nvar->state_slots, var->state_slots,
>> +          var->num_state_slots * sizeof(nir_state_slot));
>> +   nvar->constant_initializer = clone_ptr(state, var->constant_initializer,
>> +                                          clone_constant);
>> +   nvar->interface_type = var->interface_type;
>> +
>> +   return nvar;
>> +}
>> +
>> +/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
>> + * to avoid having to deal with locals and globals separately:
>> + */
>> +static void *
>> +clone_reg(clone_state *state, const void *ptr)
>> +{
>> +   const nir_register *reg = ptr;
>> +   nir_register *nreg;
>> +
>> +   nreg = rzalloc(state->ns, nir_register);
>> +   store_ptr(state, nreg, reg);
>> +
>> +   nreg->num_components = reg->num_components;
>> +   nreg->num_array_elems = reg->num_array_elems;
>> +   nreg->index = reg->index;
>> +   nreg->name = ralloc_strdup(nreg, reg->name);
>> +   nreg->is_global = reg->is_global;
>> +   nreg->is_packed = reg->is_packed;
>> +
>> +   /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
>> +   list_inithead(&nreg->uses);
>> +   list_inithead(&nreg->defs);
>> +   list_inithead(&nreg->if_uses);
>> +
>> +   return nreg;
>> +}
>> +
>> +static void
>> +__clone_src(clone_state *state, nir_src *nsrc, const nir_src *src)
>> +{
>> +   nsrc->is_ssa = src->is_ssa;
>> +   if (src->is_ssa) {
>> +      /* save for resolving later: */
>> +      list_addtail(&nsrc->link, &state->ssa_src_list);
>> +      /* we also need to stash the original src for use to later
>> +       * resolve the clone'd ssa-def.  Normally it would be nice
>> +       * to keep nsrc->ssa ptr as null to catch anyone deref'ing
>> +       * it before it is resolved.  But there wasn't room to add
>> +       * a ptr to nir_src without increasing it's size.  But a
>> +       * 'bitwise not' is easily reversable and should mostly
>> +       * have the same result as null:
>> +       */
>> +      nsrc->ssa = (void *)~(uintptr_t)src;
>
> Ugh... Yeah, that works...  I would hate this if it weren't so clever...

I guess if I repurpose src->use_link, there is now room for an extra
ptr in nir_src, for a less clever way to handle this.

>> +   } else {
>> +      nsrc->reg.reg = clone_ptr(state, src->reg.reg, clone_reg);
>> +      nsrc->reg.indirect = clone_ptr(state, src->reg.indirect, clone_src);
>> +      nsrc->reg.base_offset = src->reg.base_offset;
>> +   }
>> +}
>> +
>> +static void *
>> +clone_src(clone_state *state, const void *ptr)
>> +{
>> +   const nir_src *src = ptr;
>> +   nir_src *nsrc;
>> +
>> +   nsrc = ralloc(ralloc_parent_clone(state, src), nir_src);
>> +   store_ptr(state, nsrc, src);
>> +
>> +   __clone_src(state, nsrc, src);
>> +
>> +   return nsrc;
>> +}
>> +
>> +static void
>> +__clone_ssa_def(clone_state *state, nir_ssa_def *ndef, const nir_ssa_def *def)
>> +{
>> +   ndef->name = ralloc_strdup(state->ns, def->name);
>> +   ndef->index = def->index;
>> +   ndef->live_index = def->live_index;
>
> Don't bother.  Just throw away metadata.
>
>> +   ndef->parent_instr = clone_ptr(state, def->parent_instr, clone_instr);
>> +   list_inithead(&ndef->uses);
>> +   list_inithead(&ndef->if_uses);
>> +   ndef->num_components = def->num_components;
>
> Um... We already have nir_ssa_dest_init which does this...

but would end up incrementing impl->ssa_alloc, which we don't want..
there were a number of other places where I also could not use the
helpers due to unwanted side-effects (creating child blocks, etc)

>> +
>> +   /* special case, since embedded ptr linked to elsewhere, we must store it: */
>> +   store_ptr(state, ndef, def);
>> +}
>> +
>> +static void
>> +__clone_dst(clone_state *state, nir_dest *ndst, const nir_dest *dst)
>> +{
>> +   ndst->is_ssa = dst->is_ssa;
>> +   if (dst->is_ssa) {
>> +      __clone_ssa_def(state, &ndst->ssa, &dst->ssa);
>> +   } else {
>> +      ndst->reg.parent_instr = clone_ptr(state, dst->reg.parent_instr,
>> +                                         clone_instr);
>> +      ndst->reg.reg = clone_ptr(state, dst->reg.reg, clone_reg);
>> +      ndst->reg.indirect = clone_ptr(state, dst->reg.indirect, clone_src);
>> +      ndst->reg.base_offset = dst->reg.base_offset;
>> +   }
>> +}
>> +
>> +static void
>> +__clone_deref(clone_state *state, nir_deref *ndref, const nir_deref *dref)
>> +{
>> +   ndref->deref_type = dref->deref_type;
>> +   ndref->child = clone_ptr(state, dref->child, clone_deref);
>> +   ndref->type = dref->type;
>> +}
>> +
>> +static void *
>> +clone_deref_var(clone_state *state, const void *ptr)
>> +{
>> +   const nir_deref_var *dvar = ptr;
>> +   nir_deref_var *ndvar;
>> +
>> +   ndvar = ralloc(ralloc_parent_clone(state, dvar), nir_deref_var);
>
> This is completely unneeded.  Just pass the intrinsic into clone_deref_var.

Except then all the clone fxns would need to take an extra mem_ctx
param.. which was how I started things out, but it was getting ugly.
Since 90% of everything is parented by the nir_shader, with only a few
special cases, it turned out to be simpler to just handle the special
cases specially.

>> +   store_ptr(state, ndvar, dvar);
>> +
>> +   __clone_deref(state, &ndvar->deref, &dvar->deref);
>> +
>> +   ndvar->var = clone_ptr(state, dvar->var, clone_var);
>> +
>> +   return ndvar;
>> +}
>> +
>> +static void *
>> +clone_deref_array(clone_state *state, const void *ptr)
>> +{
>> +   const nir_deref_array *darr = ptr;
>> +   nir_deref_array *ndarr;
>> +
>> +   ndarr = ralloc(ralloc_parent_clone(state, darr), nir_deref_array);
>> +   store_ptr(state, ndarr, darr);
>> +
>> +   __clone_deref(state, &ndarr->deref, &darr->deref);
>> +
>> +   ndarr->deref_array_type = darr->deref_array_type;
>> +   ndarr->base_offset = darr->base_offset;
>> +
>> +   __clone_src(state, &ndarr->indirect, &darr->indirect);
>> +
>> +   return ndarr;
>> +}
>> +
>> +static void *
>> +clone_deref_struct(clone_state *state, const void *ptr)
>> +{
>> +   const nir_deref_struct *dstr = ptr;
>> +   nir_deref_struct *ndstr;
>> +
>> +   ndstr = ralloc(ralloc_parent_clone(state, dstr), nir_deref_struct);
>> +   store_ptr(state, ndstr, dstr);
>> +
>> +   __clone_deref(state, &ndstr->deref, &dstr->deref);
>> +
>> +   ndstr->index = dstr->index;
>> +
>> +   return ndstr;
>> +}
>> +
>> +static void *
>> +clone_deref(clone_state *state, const void *ptr)
>> +{
>> +   const nir_deref *dref = ptr;
>> +   switch (dref->deref_type) {
>> +   case nir_deref_type_var:
>> +      return clone_deref_var(state, ptr);
>> +   case nir_deref_type_array:
>> +      return clone_deref_array(state, ptr);
>> +   case nir_deref_type_struct:
>> +      return clone_deref_struct(state, ptr);
>> +   default:
>> +      unreachable("bad deref type");
>> +      return NULL;
>> +   }
>> +}
>> +
>> +static void
>> +__clone_instr(clone_state *state, nir_instr *ninstr, const nir_instr *instr)
>> +{
>> +   ninstr->type = instr->type;
>> +   ninstr->block = clone_ptr(state, instr->block, clone_block);
>> +}
>> +
>> +static void *
>> +clone_alu(clone_state *state, const void *ptr)
>> +{
>> +   const nir_alu_instr *alu = ptr;
>> +   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
>> +
>> +   nir_alu_instr *nalu =
>> +      ralloc_size(state->ns,
>> +                  sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
>
> If you just used nir_alu_instr_create, it would do this for you...

along w/ a bunch of other stuff I didn't need..  maybe I should break
out nir_alu_instr_size(nsrc)/etc macros that both sites could share..

>> +   store_ptr(state, nalu, alu);
>> +
>> +   __clone_instr(state, &nalu->instr, &alu->instr);
>> +
>> +   nalu->op = alu->op;
>> +
>> +   __clone_dst(state, &nalu->dest.dest, &alu->dest.dest);
>> +   nalu->dest.saturate = alu->dest.saturate;
>> +   nalu->dest.write_mask = alu->dest.write_mask;
>> +
>> +   for (unsigned i = 0; i < num_srcs; i++) {
>> +      __clone_src(state, &nalu->src[i].src, &alu->src[i].src);
>> +      nalu->src[i].negate = alu->src[i].negate;
>> +      nalu->src[i].abs = alu->src[i].abs;
>> +      memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
>> +             sizeof(nalu->src[i].swizzle));
>> +   }
>> +
>> +   return nalu;
>> +}
>> +
>> +static void *
>> +clone_intrinsic(clone_state *state, const void *ptr)
>> +{
>> +   const nir_intrinsic_instr *itr = ptr;
>> +   unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
>> +
>> +   nir_intrinsic_instr *nitr =
>> +      ralloc_size(state->ns,
>> +                  sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
>> +   store_ptr(state, nitr, itr);
>> +
>> +   __clone_instr(state, &nitr->instr, &itr->instr);
>> +
>> +   nitr->intrinsic = itr->intrinsic;
>> +
>> +   if (nir_intrinsic_infos[itr->intrinsic].has_dest)
>> +      __clone_dst(state, &nitr->dest, &itr->dest);
>> +
>> +   nitr->num_components = itr->num_components;
>> +   memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
>> +
>> +   for (unsigned i = 0; i < ARRAY_SIZE(nitr->variables); i++) {
>> +      nitr->variables[i] = clone_ptr(state, itr->variables[i], clone_deref_var);
>
> Derefs don't need to go in the remap table.  They need to use the
> variable from it and need to remap any indirect sources, but the deref
> doesn't need to go in the table.

that goes for all the various places deref's can appear?  (Ie. call,
tex instr, etc)?  They are all private to the instruction?  I got a
bit lost/impatient in following glsl_to_nir around so I went for the
safe thing..

(That said, in the grand scheme of things I don't think there are so
many deref objects, so probably doesn't make much difference)

>> +   }
>> +
>> +   for (unsigned i = 0; i < num_srcs; i++) {
>> +      __clone_src(state, &nitr->src[i], &itr->src[i]);
>> +   }
>> +
>> +   return nitr;
>> +}
>> +
>> +static void *
>> +clone_load_const(clone_state *state, const void *ptr)
>> +{
>> +   const nir_load_const_instr *lc = ptr;
>> +   nir_load_const_instr *nlc;
>> +
>> +   nlc = ralloc(state->ns, nir_load_const_instr);
>> +   store_ptr(state, nlc, lc);
>> +
>> +   __clone_instr(state, &nlc->instr, &lc->instr);
>> +
>> +   memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
>> +   __clone_ssa_def(state, &nlc->def, &lc->def);
>> +
>> +   return nlc;
>> +}
>> +
>> +static void *
>> +clone_ssa_undef(clone_state *state, const void *ptr)
>> +{
>> +   const nir_ssa_undef_instr *sa = ptr;
>> +   nir_ssa_undef_instr *nsa;
>> +
>> +   nsa = ralloc(state->ns, nir_ssa_undef_instr);
>> +   store_ptr(state, nsa, sa);
>> +
>> +   __clone_instr(state, &nsa->instr, &sa->instr);
>> +
>> +   __clone_ssa_def(state, &nsa->def, &sa->def);
>> +
>> +   return nsa;
>> +}
>> +
>> +static void *
>> +clone_tex(clone_state *state, const void *ptr)
>> +{
>> +   const nir_tex_instr *tex = ptr;
>> +   nir_tex_instr *ntex;
>> +
>> +   ntex= ralloc(state->ns, nir_tex_instr);
>> +   store_ptr(state, ntex, tex);
>> +
>> +   __clone_instr(state, &ntex->instr, &tex->instr);
>> +
>> +   ntex->sampler_dim = tex->sampler_dim;
>> +   ntex->dest_type = tex->dest_type;
>> +   ntex->op = tex->op;
>> +   __clone_dst(state, &ntex->dest, &tex->dest);
>> +   ntex->num_srcs = tex->num_srcs;
>> +   ntex->src = ralloc_array(ntex, nir_tex_src, tex->num_srcs);
>> +   for (unsigned i = 0; i < ntex->num_srcs; i++) {
>> +      ntex->src[i].src_type = tex->src[i].src_type;
>> +      __clone_src(state, &ntex->src[i].src, &tex->src[i].src);
>> +   }
>> +   ntex->coord_components = tex->coord_components;
>> +   ntex->is_array = tex->is_array;
>> +   ntex->is_shadow = tex->is_shadow;
>> +   ntex->is_new_style_shadow = tex->is_new_style_shadow;
>> +   memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
>> +   ntex->component = tex->component;
>> +   ntex->sampler_index = tex->sampler_index;
>> +   ntex->sampler_array_size = tex->sampler_array_size;
>> +   ntex->sampler = clone_ptr(state, tex->sampler, clone_deref_var);
>> +
>> +   return ntex;
>> +}
>> +
>> +static void *
>> +clone_phi(clone_state *state, const void *ptr)
>> +{
>> +   const nir_phi_instr *phi = ptr;
>> +   nir_phi_instr *nphi;
>> +
>> +   nphi = ralloc(state->ns, nir_phi_instr);
>> +   store_ptr(state, nphi, phi);
>> +
>> +   __clone_instr(state, &nphi->instr, &phi->instr);
>> +
>> +   exec_list_make_empty(&nphi->srcs);
>> +   foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
>> +      nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
>> +
>> +      nsrc->pred = clone_ptr(state, src->pred, clone_block);
>> +      __clone_src(state, &nsrc->src, &src->src);
>> +
>> +      exec_list_push_tail(&nphi->srcs, &nsrc->node);
>> +   }
>> +
>> +   __clone_dst(state, &nphi->dest, &phi->dest);
>> +
>> +   return nphi;
>> +}
>> +
>> +static void *
>> +clone_jump(clone_state *state, const void *ptr)
>> +{
>> +   const nir_jump_instr *jmp = ptr;
>> +   nir_jump_instr *njmp;
>> +
>> +   njmp = ralloc(state->ns, nir_jump_instr);
>> +   store_ptr(state, njmp, jmp);
>> +
>> +   __clone_instr(state, &njmp->instr, &jmp->instr);
>> +
>> +   njmp->type = jmp->type;
>> +
>> +   return njmp;
>> +}
>> +
>> +static void *
>> +clone_call(clone_state *state, const void *ptr)
>> +{
>> +   const nir_call_instr *call = ptr;
>> +   nir_call_instr *ncall;
>> +
>> +   ncall = ralloc(state->ns, nir_call_instr);
>> +   store_ptr(state, ncall, call);
>> +
>> +   __clone_instr(state, &ncall->instr, &call->instr);
>> +
>> +   ncall->num_params = call->num_params;
>> +   ncall->params = ralloc_array(ncall, nir_deref_var *, call->num_params);
>> +   for (unsigned i = 0; i < ncall->num_params; i++) {
>> +      ncall->params[i] = clone_ptr(state, call->params[i], clone_deref_var);
>> +   }
>> +
>> +   ncall->return_deref = clone_ptr(state, call->return_deref, clone_deref_var);
>> +   ncall->callee = clone_ptr(state, call->callee, clone_function_overload);
>> +
>> +   return ncall;
>> +}
>> +
>> +static void *
>> +clone_parallel_copy(clone_state *state, const void *ptr)
>> +{
>> +   const nir_parallel_copy_instr *pc = ptr;
>> +   nir_parallel_copy_instr *npc;
>> +
>> +   npc = ralloc(state->ns, nir_parallel_copy_instr);
>> +   store_ptr(state, npc, pc);
>> +
>> +   __clone_instr(state, &npc->instr, &pc->instr);
>> +
>> +   nir_foreach_parallel_copy_entry(pc, entry) {
>> +      nir_parallel_copy_entry *nentry;
>> +
>> +      nentry = rzalloc(state->ns, nir_parallel_copy_entry);
>> +
>> +      __clone_src(state, &nentry->src, &entry->src);
>> +      __clone_dst(state, &nentry->dest, &entry->dest);
>> +
>> +      exec_list_push_tail(&npc->entries, &nentry->node);
>> +   }
>> +
>> +   return npc;
>> +}
>> +
>> +static void *
>> +clone_instr(clone_state *state, const void *ptr)
>> +{
>> +   const nir_instr *instr = ptr;
>> +   switch (instr->type) {
>> +   case nir_instr_type_alu:
>> +      return clone_alu(state, ptr);
>> +   case nir_instr_type_intrinsic:
>> +      return clone_intrinsic(state, ptr);
>> +   case nir_instr_type_load_const:
>> +      return clone_load_const(state, ptr);
>> +   case nir_instr_type_ssa_undef:
>> +      return clone_ssa_undef(state, ptr);
>> +   case nir_instr_type_tex:
>> +      return clone_tex(state, ptr);
>> +   case nir_instr_type_phi:
>> +      return clone_phi(state, ptr);
>> +   case nir_instr_type_jump:
>> +      return clone_jump(state, ptr);
>> +   case nir_instr_type_call:
>> +      return clone_call(state, ptr);
>> +   case nir_instr_type_parallel_copy:
>> +      return clone_parallel_copy(state, ptr);
>> +   default:
>> +      unreachable("bad instr type");
>> +      return NULL;
>> +   }
>> +}
>> +
>> +static void
>> +__clone_cf(clone_state *state, nir_cf_node *ncf, const nir_cf_node *cf)
>> +{
>> +   ncf->type = cf->type;
>> +   ncf->parent = clone_ptr(state, cf->parent, clone_cf_node);
>> +}
>> +
>> +static void *
>> +clone_block(clone_state *state, const void *ptr)
>> +{
>> +   const nir_block *blk = ptr;
>> +   nir_block *nblk;
>> +
>> +   nblk = rzalloc(state->ns, nir_block);
>> +   store_ptr(state, nblk, blk);
>> +
>> +   __clone_cf(state, &nblk->cf_node, &blk->cf_node);
>> +
>> +   /* clone instructions before successor blocks: */
>> +   exec_list_make_empty(&nblk->instr_list);
>> +   nir_foreach_instr(blk, instr) {
>> +      nir_instr *ninstr = clone_ptr(state, instr, clone_instr);
>> +      exec_list_push_tail(&nblk->instr_list, &ninstr->node);
>> +   }
>> +
>> +   nblk->successors[0] = clone_ptr(state, blk->successors[0], clone_block);
>> +   nblk->successors[1] = clone_ptr(state, blk->successors[1], clone_block);
>> +   nblk->predecessors = clone_set(state, nblk, blk->predecessors, clone_block);
>> +   nblk->dom_frontier = _mesa_set_create(nblk, _mesa_hash_pointer,
>> +                                         _mesa_key_pointer_equal);
>> +
>> +   return nblk;
>> +}
>> +
>> +static void
>> +__clone_block_p2(clone_state *state, nir_block *nblk, const nir_block *blk)
>> +{
>> +   nir_instr *ninstr;
>> +
>> +   ninstr = nir_block_first_instr(nblk);
>> +   nir_foreach_instr(blk, instr) {
>> +      nir_add_defs_uses(ninstr);
>> +      ninstr = nir_instr_next(ninstr);
>
> What are you doing here?  You have instr from the iterator, why not just use it?

originally, before I split out the ssa nir_src's to fix up, I needed
both the new and old instr ptr, and didn't want to get that via
hashtable lookups.  Which is the reason for the most part for doing
custom iteration over the IR..

I guess in this particular spot, since I no longer need the original
instr, I can just iterate nblk instead..


>> +   }
>> +}
>> +
>> +
>> +static void *
>> +clone_if(clone_state *state, const void *ptr)
>> +{
>> +   const nir_if *i = ptr;
>> +   nir_if *ni;
>> +
>> +   ni = ralloc(state->ns, nir_if);
>> +   store_ptr(state, ni, i);
>> +
>> +   __clone_cf(state, &ni->cf_node, &i->cf_node);
>> +
>> +   __clone_src(state, &ni->condition, &i->condition);
>> +   clone_cf_list(state, &ni->then_list, &i->then_list);
>> +   clone_cf_list(state, &ni->else_list, &i->else_list);
>> +
>> +   return ni;
>> +}
>> +
>> +static void
>> +__clone_if_p2(clone_state *state, nir_if *ni, const nir_if *i)
>> +{
>> +   nir_update_if_uses(&ni->cf_node);
>> +   __clone_cf_list_p2(state, &ni->then_list, &i->then_list);
>> +   __clone_cf_list_p2(state, &ni->else_list, &i->else_list);
>> +}
>> +
>> +static void *
>> +clone_loop(clone_state *state, const void *ptr)
>> +{
>> +   const nir_loop *loop = ptr;
>> +   nir_loop *nloop;
>> +
>> +   nloop = ralloc(state->ns, nir_loop);
>> +   store_ptr(state, nloop, loop);
>> +
>> +   __clone_cf(state, &nloop->cf_node, &loop->cf_node);
>> +
>> +   clone_cf_list(state, &nloop->body, &loop->body);
>> +
>> +   return nloop;
>> +}
>> +
>> +static void
>> +__clone_loop_p2(clone_state *state, nir_loop *nloop, const nir_loop *loop)
>> +{
>> +   __clone_cf_list_p2(state, &nloop->body, &loop->body);
>> +}
>> +
>> +static void *
>> +clone_function_impl(clone_state *state, const void *ptr)
>> +{
>> +   const nir_function_impl *fi = ptr;
>> +   nir_function_impl *nfi;
>> +
>> +   nfi = ralloc(state->ns, nir_function_impl);
>> +   store_ptr(state, nfi, fi);
>> +
>> +   __clone_cf(state, &nfi->cf_node, &fi->cf_node);
>> +
>> +   nfi->overload = clone_ptr(state, fi->overload, clone_function_overload);
>> +   clone_cf_list(state, &nfi->body, &fi->body);
>> +   nfi->end_block = clone_ptr(state, fi->end_block, clone_block);
>> +   clone_var_list(state, &nfi->locals, &fi->locals);
>> +
>> +   nfi->num_params = fi->num_params;
>> +   nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
>> +   for (unsigned i = 0; i < fi->num_params; i++) {
>> +      nfi->params[i] = clone_ptr(state, fi->params[i], clone_var);
>> +   }
>> +
>> +   nfi->return_var = clone_ptr(state, fi->return_var, clone_var);
>> +   clone_reg_list(state, &nfi->registers, &fi->registers);
>> +   nfi->reg_alloc = fi->reg_alloc;
>> +   nfi->ssa_alloc = fi->ssa_alloc;
>> +   nfi->num_blocks = fi->num_blocks;
>> +   nfi->valid_metadata = 0;
>> +
>> +   return nfi;
>> +}
>> +
>> +static void
>> +__clone_function_impl_p2(clone_state *state, nir_function_impl *nfi,
>> +                         const nir_function_impl *fi)
>> +{
>> +   __clone_cf_list_p2(state, &nfi->body, &fi->body);
>> +}
>> +
>> +static void *
>> +clone_cf_node(clone_state *state, const void *ptr)
>> +{
>> +   const nir_cf_node *cf = ptr;
>> +   switch (cf->type) {
>> +   case nir_cf_node_block:
>> +      return clone_block(state, ptr);
>> +   case nir_cf_node_if:
>> +      return clone_if(state, ptr);
>> +   case nir_cf_node_loop:
>> +      return clone_loop(state, ptr);
>> +   case nir_cf_node_function:
>> +      return clone_function_impl(state, ptr);
>> +   default:
>> +      unreachable("bad cf type");
>> +      return NULL;
>> +   }
>> +}
>> +
>> +static void
>> +__clone_cf_node_p2(clone_state *state, nir_cf_node *ncf, const nir_cf_node *cf)
>> +{
>> +   switch (cf->type) {
>> +   case nir_cf_node_block:
>> +      __clone_block_p2(state, nir_cf_node_as_block(ncf), nir_cf_node_as_block(cf));
>> +      break;
>> +   case nir_cf_node_if:
>> +      __clone_if_p2(state, nir_cf_node_as_if(ncf), nir_cf_node_as_if(cf));
>> +      break;
>> +   case nir_cf_node_loop:
>> +      __clone_loop_p2(state, nir_cf_node_as_loop(ncf), nir_cf_node_as_loop(cf));
>> +      break;
>> +   case nir_cf_node_function:
>> +      __clone_function_impl_p2(state, nir_cf_node_as_function(ncf),
>> +                               nir_cf_node_as_function(cf));
>
> This whole song-and-dance isn't needed.  We already have
> nir_foreach_block which will do all of this for you.  To handle the
> ifs, we have nir_block_get_following_if().  See also
> nir_live_variables for details.

but it won't co-iterate two lists at the same time for me ;-)

>> +      break;
>> +   default:
>> +      unreachable("bad cf type");
>> +      break;
>> +   }
>> +}
>> +
>> +static void *
>> +clone_function_overload(clone_state *state, const void *ptr)
>> +{
>> +   const nir_function_overload *fo = ptr;
>> +   nir_function_overload *nfo;
>> +
>> +   nfo = ralloc(state->ns, nir_function_overload);
>> +   store_ptr(state, nfo, fo);
>
> This one does have to go in the remap table as it will be used by
> nir_call_instr.
>
>> +
>> +   nfo->num_params = fo->num_params;
>> +   nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params);
>> +   memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params);
>> +
>> +   nfo->return_type = fo->return_type;
>> +   nfo->impl = clone_ptr(state, fo->impl, clone_function_impl);
>> +
>> +   nfo->function = clone_ptr(state, fo->function, clone_function);
>
> If you used nir_function_create, that would be set for you.

I'd still need to look up the fxn.. so meh

it was also pushing into shader->functions list, which I didn't want.

>> +
>> +   return nfo;
>> +}
>> +
>> +static void *
>> +clone_function(clone_state *state, const void *ptr)
>> +{
>> +   const nir_function *fxn = ptr;
>> +   nir_function *nfxn;
>> +
>> +   nfxn = ralloc(state->ns, nir_function);
>> +   store_ptr(state, nfxn, fxn);
>
> There's no reason to store the function pointer in the remap table.
> The only thing that ever references it is the shader and the
> overloads.
>
>> +
>> +   exec_list_make_empty(&nfxn->overload_list);
>> +   foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list) {
>> +      nir_function_overload *nfo = clone_ptr(state, fo, clone_function_overload);
>
> There's no reason to run this through clone_ptr.  You already know
> that there's one per entry in this list and it will never fall back to
> the hash-table version.

meh, it's only a couple extra hashtable lookups in the grand scheme of
things, and it has the advantage of not falling over if some different
nir-builder does something a bit different..

BR,
-R

>> +      exec_list_push_tail(&nfxn->overload_list, &nfo->node);
>> +   }
>> +
>> +   nfxn->name = ralloc_strdup(nfxn, fxn->name);
>> +   nfxn->shader = state->ns;   /* we could use clone_ptr, but overkill */
>> +
>> +   return nfxn;
>> +}
>> +
>> +static void *
>> +clone_shader(clone_state *state, const void *ptr)
>> +{
>> +   const nir_shader *s = ptr;
>> +   nir_shader *ns;
>> +
>> +   ns = nir_shader_create(state->mem_ctx, s->stage, s->options);
>> +   store_ptr(state, ns, s);
>> +   state->ns = ns;
>> +
>> +   clone_var_list(state, &ns->uniforms, &s->uniforms);
>> +   clone_var_list(state, &ns->inputs,   &s->inputs);
>> +   clone_var_list(state, &ns->outputs,  &s->outputs);
>> +   clone_var_list(state, &ns->globals,  &s->globals);
>> +   clone_var_list(state, &ns->system_values, &s->system_values);
>> +
>> +   exec_list_make_empty(&ns->functions);
>> +   foreach_list_typed(nir_function, fxn, node, &s->functions) {
>> +      nir_function *nfxn = clone_ptr(state, fxn, clone_function);
>
> There's no reason to run this through clone_ptr
>
>> +      exec_list_push_tail(&ns->functions, &nfxn->node);
>> +   }
>> +
>> +   clone_reg_list(state, &ns->registers, &s->registers);
>> +
>> +   ns->info = s->info;
>> +   ns->reg_alloc = s->reg_alloc;
>> +   ns->num_inputs = s->num_inputs;
>> +   ns->num_uniforms = s->num_uniforms;
>> +   ns->num_outputs = s->num_outputs;
>> +
>> +   return ns;
>> +}
>> +
>> +/* second pass for some fixup.. in various cases we can have references
>> + * to embedded objects that haven't been created yet (like phi src's),
>> + * so some parts need to be handled on a second pass.
>> + */
>> +static void
>> +__clone_shader_p2(clone_state *state, nir_shader *ns, const nir_shader *s)
>> +{
>> +   /* resolve ssa srcs: */
>> +   list_for_each_entry(nir_src, nsrc, &state->ssa_src_list, link) {
>> +      /* recover the orig src ptr saved in __clone_src(): */
>> +      nir_src *src = (void *)~(uintptr_t)nsrc->ssa;
>> +      nsrc->ssa = clone_ptr(state, src->ssa, NULL);
>> +   }
>> +   /* nir_update_if_uses() and nir_add_defs_uses(): */
>> +   nir_foreach_overload(s, fo) {
>> +      const nir_function_impl *impl = fo->impl;
>> +      nir_function_impl *nimpl = clone_ptr(state, impl, NULL);
>> +      __clone_function_impl_p2(state, nimpl, impl);
>> +   }
>> +}
>> +
>> +nir_shader *
>> +nir_shader_clone(void *mem_ctx, const nir_shader *s)
>> +{
>> +   clone_state state;
>> +   nir_shader *ns;
>> +
>> +   init_clone_state(&state, mem_ctx);
>> +   ns = clone_ptr(&state, s, clone_shader);
>> +   __clone_shader_p2(&state, ns, s);
>> +   free_clone_state(&state);
>> +
>> +   return ns;
>> +}
>> --
>> 2.5.0
>>


More information about the mesa-dev mailing list