[Mesa-dev] [PATCH 5/7] nir: support to clone shaders (v2)

Jason Ekstrand jason at jlekstrand.net
Mon Oct 26 11:44:25 PDT 2015


On Mon, Oct 26, 2015 at 8:27 AM, Rob Clark <robdclark at gmail.com> wrote:
> Signed-off-by: Rob Clark <robclark at freedesktop.org>
> ---
>  src/glsl/Makefile.sources |   1 +
>  src/glsl/nir/nir.c        |   8 +
>  src/glsl/nir/nir.h        |   2 +
>  src/glsl/nir/nir_clone.c  | 949 ++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 960 insertions(+)
>  create mode 100644 src/glsl/nir/nir_clone.c
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index ca87036..25e3801 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -26,6 +26,7 @@ NIR_FILES = \
>         nir/nir.h \
>         nir/nir_array.h \
>         nir/nir_builder.h \
> +       nir/nir_clone.c \
>         nir/nir_constant_expressions.h \
>         nir/nir_control_flow.c \
>         nir/nir_control_flow.h \
> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
> index 0cbe4e1..2defa36 100644
> --- a/src/glsl/nir/nir.c
> +++ b/src/glsl/nir/nir.c
> @@ -316,6 +316,14 @@ nir_block_create(nir_shader *shader)
>     block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
>                                            _mesa_key_pointer_equal);
>     block->imm_dom = NULL;
> +   /* XXX maybe it would be worth it to defer allocation?  This
> +    * way it doesn't get allocated for shader ref's that never run
> +    * nir_calc_dominance?  For example, state-tracker creates an
> +    * initial IR, clones that, runs appropriate lowering pass, passes
> +    * to driver which does common lowering/opt, and then stores ref
> +    * which is later used to do state specific lowering and futher
> +    * opt.  Do any of the references not need dominance metadata?
> +    */

Yes, we should defer this.  The dominance frontier is a product of the
dominance calculations (as are the dominance arrays) so we should
allocate it there.

>     block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
>                                            _mesa_key_pointer_equal);
>
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index fb60340..efcbf62 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1897,6 +1897,8 @@ void nir_index_blocks(nir_function_impl *impl);
>  void nir_print_shader(nir_shader *shader, FILE *fp);
>  void nir_print_instr(const nir_instr *instr, FILE *fp);
>
> +nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
> +
>  #ifdef DEBUG
>  void nir_validate_shader(nir_shader *shader);
>  #else
> diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c
> new file mode 100644
> index 0000000..5c1739e
> --- /dev/null
> +++ b/src/glsl/nir/nir_clone.c
> @@ -0,0 +1,949 @@
> +/*
> + * Copyright © 2015 Red Hat
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_control_flow_private.h"
> +
> +// TODO move these:
> +#define exec_list_head(type, list, node) \
> +   exec_node_data(type, exec_list_get_head(list), node)
> +#define exec_list_next(type, nodeptr, node) \
> +   exec_node_data(type, exec_node_get_next(nodeptr), node)
> +
> +/* Secret Decoder Ring:
> + *   clone_foo():
> + *        Allocate and clone a foo.
> + *   __clone_foo():
> + *        Clone body of foo (ie. parent class, embedded struct,
> + *        etc)
> + *   __clone_foo_p2():
> + *        clone body of foo, pass 2.. since in first pass we
> + *        can have fwd references to embedded structs, some
> + *        ptrs (and things that depend on them) must be
> + *        resolved in 2nd pass
> + */
> +
> +typedef struct {
> +   /* maps orig ptr -> cloned ptr: */
> +   struct hash_table *ptr_table;
> +   /* list of unresolved ssa src's: */
> +   struct list_head ssa_src_list;
> +
> +   /* memctx for new toplevel shader object: */
> +   void *mem_ctx;
> +   /* new shader object, used as memctx for just about everything else: */
> +   nir_shader *ns;
> +} clone_state;
> +
> +typedef void *(*clone_fxn)(clone_state *state, const void *ptr);
> +
> +static void
> +init_clone_state(clone_state *state, void *mem_ctx)
> +{
> +   state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
> +                                              _mesa_key_pointer_equal);
> +   list_inithead(&state->ssa_src_list);
> +   state->mem_ctx = mem_ctx;
> +}
> +
> +static void
> +free_clone_state(clone_state *state)
> +{
> +   _mesa_hash_table_destroy(state->ptr_table, NULL);
> +}
> +
> +static void *
> +clone_ptr(clone_state *state, const void *ptr, clone_fxn clone)
> +{
> +   struct hash_entry *entry;
> +   void *nptr;
> +
> +   if (!ptr)
> +      return NULL;
> +
> +   entry = _mesa_hash_table_search(state->ptr_table, ptr);
> +   if (entry)
> +      return entry->data;

I've mentioned this on IRC before, but I really don't think this whole
song-and-dance is needed.  As long as you clone registers and
variables before you clone instructions and as long as you clone
instructions top-to-bottom like you are, there is exactly one case in
which you will ever need to reference something before it's added to
the table: phi sources for phis at the top of loops.  This means that
you just need to make clone_phi a little special and the second pass
can just be your walk over the ssa_src_list.  In particular, clone_phi
needs to look something like this:

clone_phi()
{
   phi = nir_phi_instr_create()
   add_to_remap(phi)
   nir_instr_init(phi)

   // Walk the phi sources, fill them out, and add them to
src_ssa_list.  You'll fill them out with the actual sources later.
}

This will ensure that the phi destination *does* exist for all future
stuff and only the sources need a second pass.

> +
> +   nptr = clone(state, ptr);
> +
> +#ifdef DEBUG
> +   entry = _mesa_hash_table_search(state->ptr_table, ptr);
> +   assert(entry->data == nptr);
> +#endif
> +
> +   return nptr;
> +}
> +
> +/* To avoid loops, we need to store cloned ptrs before they are fully
> + * initialized, since cloning their contents could result in a path
> + * back to the cloned pointer
> + */
> +static void
> +store_ptr(clone_state *state, void *nptr, const void *ptr)
> +{
> +   _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
> +}
> +
> +/* There are a few cases we need to know the ralloc_parent()'s clone.
> + * This only works when the original ptr's ralloc_parent() has already
> + * been cloned, but for parents this should always be the case.
> + */
> +static void *
> +ralloc_parent_clone(clone_state *state, const void *ptr)
> +{
> +   return clone_ptr(state, ralloc_parent(ptr), NULL);
> +}
> +
> +static void * clone_var(clone_state *state, const void *ptr);
> +static void * clone_reg(clone_state *state, const void *ptr);
> +static void * clone_src(clone_state *state, const void *ptr);
> +static void * clone_deref(clone_state *state, const void *ptr);
> +static void * clone_instr(clone_state *state, const void *ptr);
> +static void * clone_block(clone_state *state, const void *ptr);
> +static void * clone_cf_node(clone_state *state, const void *ptr);
> +static void * clone_function_overload(clone_state *state, const void *ptr);
> +static void * clone_function(clone_state *state, const void *ptr);
> +
> +static void __clone_cf_node_p2(clone_state *state, nir_cf_node *ncf, const nir_cf_node *cf);
> +
> +/* clone list of nir_variable: */
> +static void
> +clone_var_list(clone_state *state, struct exec_list *dst,
> +               const struct exec_list *list)
> +{
> +   exec_list_make_empty(dst);
> +   foreach_list_typed(nir_variable, var, node, list) {
> +      nir_variable *nvar = clone_ptr(state, var, clone_var);
> +      exec_list_push_tail(dst, &nvar->node);
> +   }
> +}
> +
> +/* clone list of nir_register: */
> +static void
> +clone_reg_list(clone_state *state, struct exec_list *dst,
> +               const struct exec_list *list)
> +{
> +   exec_list_make_empty(dst);
> +   foreach_list_typed(nir_register, reg, node, list) {
> +      nir_register *nreg = clone_ptr(state, reg, clone_reg);
> +      exec_list_push_tail(dst, &nreg->node);
> +   }
> +}
> +
> +/* clone list of nir_cf_node: */
> +static void
> +clone_cf_list(clone_state *state, struct exec_list *dst,
> +              const struct exec_list *list)
> +{
> +   exec_list_make_empty(dst);
> +   foreach_list_typed(nir_cf_node, cf, node, list) {
> +      nir_cf_node *ncf = clone_ptr(state, cf, clone_cf_node);
> +      exec_list_push_tail(dst, &ncf->node);
> +   }
> +}
> +
> +static void
> +__clone_cf_list_p2(clone_state *state, struct exec_list *dst,
> +                   const struct exec_list *list)
> +{
> +   nir_cf_node *ncf = exec_list_head(nir_cf_node, dst, node);
> +   foreach_list_typed(nir_cf_node, cf, node, list) {
> +      __clone_cf_node_p2(state, ncf, cf);
> +      ncf = exec_list_next(nir_cf_node, &ncf->node, node);
> +   }
> +}
> +
> +static struct set *
> +clone_set(clone_state *state, void *mem_ctx, struct set *src, clone_fxn clone)
> +{
> +   struct set *dst = _mesa_set_create(mem_ctx, src->key_hash_function,
> +                                      src->key_equals_function);
> +   struct set_entry *entry;
> +   set_foreach(src, entry) {
> +      _mesa_set_add(dst, clone_ptr(state, entry->key, clone));
> +   }
> +   return dst;
> +}
> +
> +static void *
> +clone_constant(clone_state *state, const void *ptr)
> +{
> +   const nir_constant *c = ptr;
> +   nir_constant *nc;
> +
> +   void *mem_ctx = ralloc_parent_clone(state, c);
> +
> +   nc = ralloc(mem_ctx, nir_constant);
> +   store_ptr(state, nc, c);
> +
> +   nc->value = c->value;
> +   nc->num_elements = c->num_elements;
> +   nc->elements = ralloc_array(mem_ctx, nir_constant *, c->num_elements);
> +   for (unsigned i = 0; i < c->num_elements; i++) {
> +      nc->elements[i] = clone_ptr(state, c->elements[i], clone_constant);
> +   }
> +
> +   return nc;
> +}
> +
> +/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
> + * having to deal with locals and globals separately:
> + */
> +static void *
> +clone_var(clone_state *state, const void *ptr)
> +{
> +   const nir_variable *var = ptr;
> +   nir_variable *nvar;
> +
> +   nvar = rzalloc(state->ns, nir_variable);
> +   store_ptr(state, nvar, var);
> +
> +   nvar->type = var->type;
> +   nvar->name = ralloc_strdup(nvar, var->name);
> +   nvar->max_ifc_array_access = ralloc_array(nvar, unsigned,
> +                                             var->num_max_ifc_array_access);
> +   memcpy(nvar->max_ifc_array_access, var->max_ifc_array_access,
> +          var->num_max_ifc_array_access * sizeof(unsigned));
> +   nvar->data = var->data;
> +   nvar->num_state_slots = var->num_state_slots;
> +   nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
> +   memcpy(nvar->state_slots, var->state_slots,
> +          var->num_state_slots * sizeof(nir_state_slot));
> +   nvar->constant_initializer = clone_ptr(state, var->constant_initializer,
> +                                          clone_constant);
> +   nvar->interface_type = var->interface_type;
> +
> +   return nvar;
> +}
> +
> +/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
> + * to avoid having to deal with locals and globals separately:
> + */
> +static void *
> +clone_reg(clone_state *state, const void *ptr)
> +{
> +   const nir_register *reg = ptr;
> +   nir_register *nreg;
> +
> +   nreg = rzalloc(state->ns, nir_register);
> +   store_ptr(state, nreg, reg);
> +
> +   nreg->num_components = reg->num_components;
> +   nreg->num_array_elems = reg->num_array_elems;
> +   nreg->index = reg->index;
> +   nreg->name = ralloc_strdup(nreg, reg->name);
> +   nreg->is_global = reg->is_global;
> +   nreg->is_packed = reg->is_packed;
> +
> +   /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
> +   list_inithead(&nreg->uses);
> +   list_inithead(&nreg->defs);
> +   list_inithead(&nreg->if_uses);
> +
> +   return nreg;
> +}
> +
> +static void
> +__clone_src(clone_state *state, nir_src *nsrc, const nir_src *src)
> +{
> +   nsrc->is_ssa = src->is_ssa;
> +   if (src->is_ssa) {
> +      /* save for resolving later: */
> +      list_addtail(&nsrc->link, &state->ssa_src_list);
> +      /* we also need to stash the original src for use to later
> +       * resolve the clone'd ssa-def.  Normally it would be nice
> +       * to keep nsrc->ssa ptr as null to catch anyone deref'ing
> +       * it before it is resolved.  But there wasn't room to add
> +       * a ptr to nir_src without increasing it's size.  But a
> +       * 'bitwise not' is easily reversable and should mostly
> +       * have the same result as null:
> +       */
> +      nsrc->ssa = (void *)~(uintptr_t)src;

Ugh... Yeah, that works...  I would hate this if it weren't so clever...

> +   } else {
> +      nsrc->reg.reg = clone_ptr(state, src->reg.reg, clone_reg);
> +      nsrc->reg.indirect = clone_ptr(state, src->reg.indirect, clone_src);
> +      nsrc->reg.base_offset = src->reg.base_offset;
> +   }
> +}
> +
> +static void *
> +clone_src(clone_state *state, const void *ptr)
> +{
> +   const nir_src *src = ptr;
> +   nir_src *nsrc;
> +
> +   nsrc = ralloc(ralloc_parent_clone(state, src), nir_src);
> +   store_ptr(state, nsrc, src);
> +
> +   __clone_src(state, nsrc, src);
> +
> +   return nsrc;
> +}
> +
> +static void
> +__clone_ssa_def(clone_state *state, nir_ssa_def *ndef, const nir_ssa_def *def)
> +{
> +   ndef->name = ralloc_strdup(state->ns, def->name);
> +   ndef->index = def->index;
> +   ndef->live_index = def->live_index;

Don't bother.  Just throw away metadata.

> +   ndef->parent_instr = clone_ptr(state, def->parent_instr, clone_instr);
> +   list_inithead(&ndef->uses);
> +   list_inithead(&ndef->if_uses);
> +   ndef->num_components = def->num_components;

Um... We already have nir_ssa_dest_init which does this...

> +
> +   /* special case, since embedded ptr linked to elsewhere, we must store it: */
> +   store_ptr(state, ndef, def);
> +}
> +
> +static void
> +__clone_dst(clone_state *state, nir_dest *ndst, const nir_dest *dst)
> +{
> +   ndst->is_ssa = dst->is_ssa;
> +   if (dst->is_ssa) {
> +      __clone_ssa_def(state, &ndst->ssa, &dst->ssa);
> +   } else {
> +      ndst->reg.parent_instr = clone_ptr(state, dst->reg.parent_instr,
> +                                         clone_instr);
> +      ndst->reg.reg = clone_ptr(state, dst->reg.reg, clone_reg);
> +      ndst->reg.indirect = clone_ptr(state, dst->reg.indirect, clone_src);
> +      ndst->reg.base_offset = dst->reg.base_offset;
> +   }
> +}
> +
> +static void
> +__clone_deref(clone_state *state, nir_deref *ndref, const nir_deref *dref)
> +{
> +   ndref->deref_type = dref->deref_type;
> +   ndref->child = clone_ptr(state, dref->child, clone_deref);
> +   ndref->type = dref->type;
> +}
> +
> +static void *
> +clone_deref_var(clone_state *state, const void *ptr)
> +{
> +   const nir_deref_var *dvar = ptr;
> +   nir_deref_var *ndvar;
> +
> +   ndvar = ralloc(ralloc_parent_clone(state, dvar), nir_deref_var);

This is completely unneeded.  Just pass the intrinsic into clone_deref_var.

> +   store_ptr(state, ndvar, dvar);
> +
> +   __clone_deref(state, &ndvar->deref, &dvar->deref);
> +
> +   ndvar->var = clone_ptr(state, dvar->var, clone_var);
> +
> +   return ndvar;
> +}
> +
> +static void *
> +clone_deref_array(clone_state *state, const void *ptr)
> +{
> +   const nir_deref_array *darr = ptr;
> +   nir_deref_array *ndarr;
> +
> +   ndarr = ralloc(ralloc_parent_clone(state, darr), nir_deref_array);
> +   store_ptr(state, ndarr, darr);
> +
> +   __clone_deref(state, &ndarr->deref, &darr->deref);
> +
> +   ndarr->deref_array_type = darr->deref_array_type;
> +   ndarr->base_offset = darr->base_offset;
> +
> +   __clone_src(state, &ndarr->indirect, &darr->indirect);
> +
> +   return ndarr;
> +}
> +
> +static void *
> +clone_deref_struct(clone_state *state, const void *ptr)
> +{
> +   const nir_deref_struct *dstr = ptr;
> +   nir_deref_struct *ndstr;
> +
> +   ndstr = ralloc(ralloc_parent_clone(state, dstr), nir_deref_struct);
> +   store_ptr(state, ndstr, dstr);
> +
> +   __clone_deref(state, &ndstr->deref, &dstr->deref);
> +
> +   ndstr->index = dstr->index;
> +
> +   return ndstr;
> +}
> +
> +static void *
> +clone_deref(clone_state *state, const void *ptr)
> +{
> +   const nir_deref *dref = ptr;
> +   switch (dref->deref_type) {
> +   case nir_deref_type_var:
> +      return clone_deref_var(state, ptr);
> +   case nir_deref_type_array:
> +      return clone_deref_array(state, ptr);
> +   case nir_deref_type_struct:
> +      return clone_deref_struct(state, ptr);
> +   default:
> +      unreachable("bad deref type");
> +      return NULL;
> +   }
> +}
> +
> +static void
> +__clone_instr(clone_state *state, nir_instr *ninstr, const nir_instr *instr)
> +{
> +   ninstr->type = instr->type;
> +   ninstr->block = clone_ptr(state, instr->block, clone_block);
> +}
> +
> +static void *
> +clone_alu(clone_state *state, const void *ptr)
> +{
> +   const nir_alu_instr *alu = ptr;
> +   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
> +
> +   nir_alu_instr *nalu =
> +      ralloc_size(state->ns,
> +                  sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));

If you just used nir_alu_instr_create, it would do this for you...

> +   store_ptr(state, nalu, alu);
> +
> +   __clone_instr(state, &nalu->instr, &alu->instr);
> +
> +   nalu->op = alu->op;
> +
> +   __clone_dst(state, &nalu->dest.dest, &alu->dest.dest);
> +   nalu->dest.saturate = alu->dest.saturate;
> +   nalu->dest.write_mask = alu->dest.write_mask;
> +
> +   for (unsigned i = 0; i < num_srcs; i++) {
> +      __clone_src(state, &nalu->src[i].src, &alu->src[i].src);
> +      nalu->src[i].negate = alu->src[i].negate;
> +      nalu->src[i].abs = alu->src[i].abs;
> +      memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
> +             sizeof(nalu->src[i].swizzle));
> +   }
> +
> +   return nalu;
> +}
> +
> +static void *
> +clone_intrinsic(clone_state *state, const void *ptr)
> +{
> +   const nir_intrinsic_instr *itr = ptr;
> +   unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
> +
> +   nir_intrinsic_instr *nitr =
> +      ralloc_size(state->ns,
> +                  sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
> +   store_ptr(state, nitr, itr);
> +
> +   __clone_instr(state, &nitr->instr, &itr->instr);
> +
> +   nitr->intrinsic = itr->intrinsic;
> +
> +   if (nir_intrinsic_infos[itr->intrinsic].has_dest)
> +      __clone_dst(state, &nitr->dest, &itr->dest);
> +
> +   nitr->num_components = itr->num_components;
> +   memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
> +
> +   for (unsigned i = 0; i < ARRAY_SIZE(nitr->variables); i++) {
> +      nitr->variables[i] = clone_ptr(state, itr->variables[i], clone_deref_var);

Derefs don't need to go in the remap table.  They need to use the
variable from it and need to remap any indirect sources, but the deref
doesn't need to go in the table.

> +   }
> +
> +   for (unsigned i = 0; i < num_srcs; i++) {
> +      __clone_src(state, &nitr->src[i], &itr->src[i]);
> +   }
> +
> +   return nitr;
> +}
> +
> +static void *
> +clone_load_const(clone_state *state, const void *ptr)
> +{
> +   const nir_load_const_instr *lc = ptr;
> +   nir_load_const_instr *nlc;
> +
> +   nlc = ralloc(state->ns, nir_load_const_instr);
> +   store_ptr(state, nlc, lc);
> +
> +   __clone_instr(state, &nlc->instr, &lc->instr);
> +
> +   memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
> +   __clone_ssa_def(state, &nlc->def, &lc->def);
> +
> +   return nlc;
> +}
> +
> +static void *
> +clone_ssa_undef(clone_state *state, const void *ptr)
> +{
> +   const nir_ssa_undef_instr *sa = ptr;
> +   nir_ssa_undef_instr *nsa;
> +
> +   nsa = ralloc(state->ns, nir_ssa_undef_instr);
> +   store_ptr(state, nsa, sa);
> +
> +   __clone_instr(state, &nsa->instr, &sa->instr);
> +
> +   __clone_ssa_def(state, &nsa->def, &sa->def);
> +
> +   return nsa;
> +}
> +
> +static void *
> +clone_tex(clone_state *state, const void *ptr)
> +{
> +   const nir_tex_instr *tex = ptr;
> +   nir_tex_instr *ntex;
> +
> +   ntex= ralloc(state->ns, nir_tex_instr);
> +   store_ptr(state, ntex, tex);
> +
> +   __clone_instr(state, &ntex->instr, &tex->instr);
> +
> +   ntex->sampler_dim = tex->sampler_dim;
> +   ntex->dest_type = tex->dest_type;
> +   ntex->op = tex->op;
> +   __clone_dst(state, &ntex->dest, &tex->dest);
> +   ntex->num_srcs = tex->num_srcs;
> +   ntex->src = ralloc_array(ntex, nir_tex_src, tex->num_srcs);
> +   for (unsigned i = 0; i < ntex->num_srcs; i++) {
> +      ntex->src[i].src_type = tex->src[i].src_type;
> +      __clone_src(state, &ntex->src[i].src, &tex->src[i].src);
> +   }
> +   ntex->coord_components = tex->coord_components;
> +   ntex->is_array = tex->is_array;
> +   ntex->is_shadow = tex->is_shadow;
> +   ntex->is_new_style_shadow = tex->is_new_style_shadow;
> +   memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
> +   ntex->component = tex->component;
> +   ntex->sampler_index = tex->sampler_index;
> +   ntex->sampler_array_size = tex->sampler_array_size;
> +   ntex->sampler = clone_ptr(state, tex->sampler, clone_deref_var);
> +
> +   return ntex;
> +}
> +
> +static void *
> +clone_phi(clone_state *state, const void *ptr)
> +{
> +   const nir_phi_instr *phi = ptr;
> +   nir_phi_instr *nphi;
> +
> +   nphi = ralloc(state->ns, nir_phi_instr);
> +   store_ptr(state, nphi, phi);
> +
> +   __clone_instr(state, &nphi->instr, &phi->instr);
> +
> +   exec_list_make_empty(&nphi->srcs);
> +   foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
> +      nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
> +
> +      nsrc->pred = clone_ptr(state, src->pred, clone_block);
> +      __clone_src(state, &nsrc->src, &src->src);
> +
> +      exec_list_push_tail(&nphi->srcs, &nsrc->node);
> +   }
> +
> +   __clone_dst(state, &nphi->dest, &phi->dest);
> +
> +   return nphi;
> +}
> +
> +static void *
> +clone_jump(clone_state *state, const void *ptr)
> +{
> +   const nir_jump_instr *jmp = ptr;
> +   nir_jump_instr *njmp;
> +
> +   njmp = ralloc(state->ns, nir_jump_instr);
> +   store_ptr(state, njmp, jmp);
> +
> +   __clone_instr(state, &njmp->instr, &jmp->instr);
> +
> +   njmp->type = jmp->type;
> +
> +   return njmp;
> +}
> +
> +static void *
> +clone_call(clone_state *state, const void *ptr)
> +{
> +   const nir_call_instr *call = ptr;
> +   nir_call_instr *ncall;
> +
> +   ncall = ralloc(state->ns, nir_call_instr);
> +   store_ptr(state, ncall, call);
> +
> +   __clone_instr(state, &ncall->instr, &call->instr);
> +
> +   ncall->num_params = call->num_params;
> +   ncall->params = ralloc_array(ncall, nir_deref_var *, call->num_params);
> +   for (unsigned i = 0; i < ncall->num_params; i++) {
> +      ncall->params[i] = clone_ptr(state, call->params[i], clone_deref_var);
> +   }
> +
> +   ncall->return_deref = clone_ptr(state, call->return_deref, clone_deref_var);
> +   ncall->callee = clone_ptr(state, call->callee, clone_function_overload);
> +
> +   return ncall;
> +}
> +
> +static void *
> +clone_parallel_copy(clone_state *state, const void *ptr)
> +{
> +   const nir_parallel_copy_instr *pc = ptr;
> +   nir_parallel_copy_instr *npc;
> +
> +   npc = ralloc(state->ns, nir_parallel_copy_instr);
> +   store_ptr(state, npc, pc);
> +
> +   __clone_instr(state, &npc->instr, &pc->instr);
> +
> +   nir_foreach_parallel_copy_entry(pc, entry) {
> +      nir_parallel_copy_entry *nentry;
> +
> +      nentry = rzalloc(state->ns, nir_parallel_copy_entry);
> +
> +      __clone_src(state, &nentry->src, &entry->src);
> +      __clone_dst(state, &nentry->dest, &entry->dest);
> +
> +      exec_list_push_tail(&npc->entries, &nentry->node);
> +   }
> +
> +   return npc;
> +}
> +
> +static void *
> +clone_instr(clone_state *state, const void *ptr)
> +{
> +   const nir_instr *instr = ptr;
> +   switch (instr->type) {
> +   case nir_instr_type_alu:
> +      return clone_alu(state, ptr);
> +   case nir_instr_type_intrinsic:
> +      return clone_intrinsic(state, ptr);
> +   case nir_instr_type_load_const:
> +      return clone_load_const(state, ptr);
> +   case nir_instr_type_ssa_undef:
> +      return clone_ssa_undef(state, ptr);
> +   case nir_instr_type_tex:
> +      return clone_tex(state, ptr);
> +   case nir_instr_type_phi:
> +      return clone_phi(state, ptr);
> +   case nir_instr_type_jump:
> +      return clone_jump(state, ptr);
> +   case nir_instr_type_call:
> +      return clone_call(state, ptr);
> +   case nir_instr_type_parallel_copy:
> +      return clone_parallel_copy(state, ptr);
> +   default:
> +      unreachable("bad instr type");
> +      return NULL;
> +   }
> +}
> +
> +static void
> +__clone_cf(clone_state *state, nir_cf_node *ncf, const nir_cf_node *cf)
> +{
> +   ncf->type = cf->type;
> +   ncf->parent = clone_ptr(state, cf->parent, clone_cf_node);
> +}
> +
> +static void *
> +clone_block(clone_state *state, const void *ptr)
> +{
> +   const nir_block *blk = ptr;
> +   nir_block *nblk;
> +
> +   nblk = rzalloc(state->ns, nir_block);
> +   store_ptr(state, nblk, blk);
> +
> +   __clone_cf(state, &nblk->cf_node, &blk->cf_node);
> +
> +   /* clone instructions before successor blocks: */
> +   exec_list_make_empty(&nblk->instr_list);
> +   nir_foreach_instr(blk, instr) {
> +      nir_instr *ninstr = clone_ptr(state, instr, clone_instr);
> +      exec_list_push_tail(&nblk->instr_list, &ninstr->node);
> +   }
> +
> +   nblk->successors[0] = clone_ptr(state, blk->successors[0], clone_block);
> +   nblk->successors[1] = clone_ptr(state, blk->successors[1], clone_block);
> +   nblk->predecessors = clone_set(state, nblk, blk->predecessors, clone_block);
> +   nblk->dom_frontier = _mesa_set_create(nblk, _mesa_hash_pointer,
> +                                         _mesa_key_pointer_equal);
> +
> +   return nblk;
> +}
> +
> +static void
> +__clone_block_p2(clone_state *state, nir_block *nblk, const nir_block *blk)
> +{
> +   nir_instr *ninstr;
> +
> +   ninstr = nir_block_first_instr(nblk);
> +   nir_foreach_instr(blk, instr) {
> +      nir_add_defs_uses(ninstr);
> +      ninstr = nir_instr_next(ninstr);

What are you doing here?  You have instr from the iterator, why not just use it?

> +   }
> +}
> +
> +
> +static void *
> +clone_if(clone_state *state, const void *ptr)
> +{
> +   const nir_if *i = ptr;
> +   nir_if *ni;
> +
> +   ni = ralloc(state->ns, nir_if);
> +   store_ptr(state, ni, i);
> +
> +   __clone_cf(state, &ni->cf_node, &i->cf_node);
> +
> +   __clone_src(state, &ni->condition, &i->condition);
> +   clone_cf_list(state, &ni->then_list, &i->then_list);
> +   clone_cf_list(state, &ni->else_list, &i->else_list);
> +
> +   return ni;
> +}
> +
> +static void
> +__clone_if_p2(clone_state *state, nir_if *ni, const nir_if *i)
> +{
> +   nir_update_if_uses(&ni->cf_node);
> +   __clone_cf_list_p2(state, &ni->then_list, &i->then_list);
> +   __clone_cf_list_p2(state, &ni->else_list, &i->else_list);
> +}
> +
> +static void *
> +clone_loop(clone_state *state, const void *ptr)
> +{
> +   const nir_loop *loop = ptr;
> +   nir_loop *nloop;
> +
> +   nloop = ralloc(state->ns, nir_loop);
> +   store_ptr(state, nloop, loop);
> +
> +   __clone_cf(state, &nloop->cf_node, &loop->cf_node);
> +
> +   clone_cf_list(state, &nloop->body, &loop->body);
> +
> +   return nloop;
> +}
> +
> +static void
> +__clone_loop_p2(clone_state *state, nir_loop *nloop, const nir_loop *loop)
> +{
> +   __clone_cf_list_p2(state, &nloop->body, &loop->body);
> +}
> +
> +static void *
> +clone_function_impl(clone_state *state, const void *ptr)
> +{
> +   const nir_function_impl *fi = ptr;
> +   nir_function_impl *nfi;
> +
> +   nfi = ralloc(state->ns, nir_function_impl);
> +   store_ptr(state, nfi, fi);
> +
> +   __clone_cf(state, &nfi->cf_node, &fi->cf_node);
> +
> +   nfi->overload = clone_ptr(state, fi->overload, clone_function_overload);
> +   clone_cf_list(state, &nfi->body, &fi->body);
> +   nfi->end_block = clone_ptr(state, fi->end_block, clone_block);
> +   clone_var_list(state, &nfi->locals, &fi->locals);
> +
> +   nfi->num_params = fi->num_params;
> +   nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
> +   for (unsigned i = 0; i < fi->num_params; i++) {
> +      nfi->params[i] = clone_ptr(state, fi->params[i], clone_var);
> +   }
> +
> +   nfi->return_var = clone_ptr(state, fi->return_var, clone_var);
> +   clone_reg_list(state, &nfi->registers, &fi->registers);
> +   nfi->reg_alloc = fi->reg_alloc;
> +   nfi->ssa_alloc = fi->ssa_alloc;
> +   nfi->num_blocks = fi->num_blocks;
> +   nfi->valid_metadata = 0;
> +
> +   return nfi;
> +}
> +
> +static void
> +__clone_function_impl_p2(clone_state *state, nir_function_impl *nfi,
> +                         const nir_function_impl *fi)
> +{
> +   __clone_cf_list_p2(state, &nfi->body, &fi->body);
> +}
> +
> +static void *
> +clone_cf_node(clone_state *state, const void *ptr)
> +{
> +   const nir_cf_node *cf = ptr;
> +   switch (cf->type) {
> +   case nir_cf_node_block:
> +      return clone_block(state, ptr);
> +   case nir_cf_node_if:
> +      return clone_if(state, ptr);
> +   case nir_cf_node_loop:
> +      return clone_loop(state, ptr);
> +   case nir_cf_node_function:
> +      return clone_function_impl(state, ptr);
> +   default:
> +      unreachable("bad cf type");
> +      return NULL;
> +   }
> +}
> +
> +static void
> +__clone_cf_node_p2(clone_state *state, nir_cf_node *ncf, const nir_cf_node *cf)
> +{
> +   switch (cf->type) {
> +   case nir_cf_node_block:
> +      __clone_block_p2(state, nir_cf_node_as_block(ncf), nir_cf_node_as_block(cf));
> +      break;
> +   case nir_cf_node_if:
> +      __clone_if_p2(state, nir_cf_node_as_if(ncf), nir_cf_node_as_if(cf));
> +      break;
> +   case nir_cf_node_loop:
> +      __clone_loop_p2(state, nir_cf_node_as_loop(ncf), nir_cf_node_as_loop(cf));
> +      break;
> +   case nir_cf_node_function:
> +      __clone_function_impl_p2(state, nir_cf_node_as_function(ncf),
> +                               nir_cf_node_as_function(cf));

This whole song-and-dance isn't needed.  We already have
nir_foreach_block which will do all of this for you.  To handle the
ifs, we have nir_block_get_following_if().  See also
nir_live_variables for details.

> +      break;
> +   default:
> +      unreachable("bad cf type");
> +      break;
> +   }
> +}
> +
> +static void *
> +clone_function_overload(clone_state *state, const void *ptr)
> +{
> +   const nir_function_overload *fo = ptr;
> +   nir_function_overload *nfo;
> +
> +   nfo = ralloc(state->ns, nir_function_overload);
> +   store_ptr(state, nfo, fo);

This one does have to go in the remap table as it will be used by
nir_call_instr.

> +
> +   nfo->num_params = fo->num_params;
> +   nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params);
> +   memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params);
> +
> +   nfo->return_type = fo->return_type;
> +   nfo->impl = clone_ptr(state, fo->impl, clone_function_impl);
> +
> +   nfo->function = clone_ptr(state, fo->function, clone_function);

If you used nir_function_create, that would be set for you.

> +
> +   return nfo;
> +}
> +
> +static void *
> +clone_function(clone_state *state, const void *ptr)
> +{
> +   const nir_function *fxn = ptr;
> +   nir_function *nfxn;
> +
> +   nfxn = ralloc(state->ns, nir_function);
> +   store_ptr(state, nfxn, fxn);

There's no reason to store the function pointer in the remap table.
The only thing that ever references it is the shader and the
overloads.

> +
> +   exec_list_make_empty(&nfxn->overload_list);
> +   foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list) {
> +      nir_function_overload *nfo = clone_ptr(state, fo, clone_function_overload);

There's no reason to run this through clone_ptr.  You already know
that there's one per entry in this list and it will never fall back to
the hash-table version.

> +      exec_list_push_tail(&nfxn->overload_list, &nfo->node);
> +   }
> +
> +   nfxn->name = ralloc_strdup(nfxn, fxn->name);
> +   nfxn->shader = state->ns;   /* we could use clone_ptr, but overkill */
> +
> +   return nfxn;
> +}
> +
> +static void *
> +clone_shader(clone_state *state, const void *ptr)
> +{
> +   const nir_shader *s = ptr;
> +   nir_shader *ns;
> +
> +   ns = nir_shader_create(state->mem_ctx, s->stage, s->options);
> +   store_ptr(state, ns, s);
> +   state->ns = ns;
> +
> +   clone_var_list(state, &ns->uniforms, &s->uniforms);
> +   clone_var_list(state, &ns->inputs,   &s->inputs);
> +   clone_var_list(state, &ns->outputs,  &s->outputs);
> +   clone_var_list(state, &ns->globals,  &s->globals);
> +   clone_var_list(state, &ns->system_values, &s->system_values);
> +
> +   exec_list_make_empty(&ns->functions);
> +   foreach_list_typed(nir_function, fxn, node, &s->functions) {
> +      nir_function *nfxn = clone_ptr(state, fxn, clone_function);

There's no reason to run this through clone_ptr

> +      exec_list_push_tail(&ns->functions, &nfxn->node);
> +   }
> +
> +   clone_reg_list(state, &ns->registers, &s->registers);
> +
> +   ns->info = s->info;
> +   ns->reg_alloc = s->reg_alloc;
> +   ns->num_inputs = s->num_inputs;
> +   ns->num_uniforms = s->num_uniforms;
> +   ns->num_outputs = s->num_outputs;
> +
> +   return ns;
> +}
> +
> +/* second pass for some fixup.. in various cases we can have references
> + * to embedded objects that haven't been created yet (like phi src's),
> + * so some parts need to be handled on a second pass.
> + */
> +static void
> +__clone_shader_p2(clone_state *state, nir_shader *ns, const nir_shader *s)
> +{
> +   /* resolve ssa srcs: */
> +   list_for_each_entry(nir_src, nsrc, &state->ssa_src_list, link) {
> +      /* recover the orig src ptr saved in __clone_src(): */
> +      nir_src *src = (void *)~(uintptr_t)nsrc->ssa;
> +      nsrc->ssa = clone_ptr(state, src->ssa, NULL);
> +   }
> +   /* nir_update_if_uses() and nir_add_defs_uses(): */
> +   nir_foreach_overload(s, fo) {
> +      const nir_function_impl *impl = fo->impl;
> +      nir_function_impl *nimpl = clone_ptr(state, impl, NULL);
> +      __clone_function_impl_p2(state, nimpl, impl);
> +   }
> +}
> +
> +nir_shader *
> +nir_shader_clone(void *mem_ctx, const nir_shader *s)
> +{
> +   clone_state state;
> +   nir_shader *ns;
> +
> +   init_clone_state(&state, mem_ctx);
> +   ns = clone_ptr(&state, s, clone_shader);
> +   __clone_shader_p2(&state, ns, s);
> +   free_clone_state(&state);
> +
> +   return ns;
> +}
> --
> 2.5.0
>


More information about the mesa-dev mailing list