[Mesa-dev] [PATCH 5/7] nir: support to clone shaders (v2)

Jason Ekstrand jason at jlekstrand.net
Mon Oct 26 15:56:46 PDT 2015


On Mon, Oct 26, 2015 at 12:53 PM, Rob Clark <robdclark at gmail.com> wrote:
> On Mon, Oct 26, 2015 at 2:44 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>> On Mon, Oct 26, 2015 at 8:27 AM, Rob Clark <robdclark at gmail.com> wrote:
>>> Signed-off-by: Rob Clark <robclark at freedesktop.org>
>>> ---
>>>  src/glsl/Makefile.sources |   1 +
>>>  src/glsl/nir/nir.c        |   8 +
>>>  src/glsl/nir/nir.h        |   2 +
>>>  src/glsl/nir/nir_clone.c  | 949 ++++++++++++++++++++++++++++++++++++++++++++++
>>>  4 files changed, 960 insertions(+)
>>>  create mode 100644 src/glsl/nir/nir_clone.c
>>>
>>> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
>>> index ca87036..25e3801 100644
>>> --- a/src/glsl/Makefile.sources
>>> +++ b/src/glsl/Makefile.sources
>>> @@ -26,6 +26,7 @@ NIR_FILES = \
>>>         nir/nir.h \
>>>         nir/nir_array.h \
>>>         nir/nir_builder.h \
>>> +       nir/nir_clone.c \
>>>         nir/nir_constant_expressions.h \
>>>         nir/nir_control_flow.c \
>>>         nir/nir_control_flow.h \
>>> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
>>> index 0cbe4e1..2defa36 100644
>>> --- a/src/glsl/nir/nir.c
>>> +++ b/src/glsl/nir/nir.c
>>> @@ -316,6 +316,14 @@ nir_block_create(nir_shader *shader)
>>>     block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
>>>                                            _mesa_key_pointer_equal);
>>>     block->imm_dom = NULL;
>>> +   /* XXX maybe it would be worth it to defer allocation?  This
>>> +    * way it doesn't get allocated for shader ref's that never run
>>> +    * nir_calc_dominance?  For example, state-tracker creates an
>>> +    * initial IR, clones that, runs appropriate lowering pass, passes
>>> +    * to driver which does common lowering/opt, and then stores ref
>>> +    * which is later used to do state specific lowering and futher
>>> +    * opt.  Do any of the references not need dominance metadata?
>>> +    */
>>
>> Yes, we should defer this.  The dominance frontier is a product of the
>> dominance calculations (as are the dominance arrays) so we should
>> allocate it there.
>>
>>>     block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
>>>                                            _mesa_key_pointer_equal);
>>>
>>> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>>> index fb60340..efcbf62 100644
>>> --- a/src/glsl/nir/nir.h
>>> +++ b/src/glsl/nir/nir.h
>>> @@ -1897,6 +1897,8 @@ void nir_index_blocks(nir_function_impl *impl);
>>>  void nir_print_shader(nir_shader *shader, FILE *fp);
>>>  void nir_print_instr(const nir_instr *instr, FILE *fp);
>>>
>>> +nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
>>> +
>>>  #ifdef DEBUG
>>>  void nir_validate_shader(nir_shader *shader);
>>>  #else
>>> diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c
>>> new file mode 100644
>>> index 0000000..5c1739e
>>> --- /dev/null
>>> +++ b/src/glsl/nir/nir_clone.c
>>> @@ -0,0 +1,949 @@
>>> +/*
>>> + * Copyright © 2015 Red Hat
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a
>>> + * copy of this software and associated documentation files (the "Software"),
>>> + * to deal in the Software without restriction, including without limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice (including the next
>>> + * paragraph) shall be included in all copies or substantial portions of the
>>> + * Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>>> + * IN THE SOFTWARE.
>>> + */
>>> +
>>> +#include "nir.h"
>>> +#include "nir_control_flow_private.h"
>>> +
>>> +// TODO move these:
>>> +#define exec_list_head(type, list, node) \
>>> +   exec_node_data(type, exec_list_get_head(list), node)
>>> +#define exec_list_next(type, nodeptr, node) \
>>> +   exec_node_data(type, exec_node_get_next(nodeptr), node)
>>> +
>>> +/* Secret Decoder Ring:
>>> + *   clone_foo():
>>> + *        Allocate and clone a foo.
>>> + *   __clone_foo():
>>> + *        Clone body of foo (ie. parent class, embedded struct,
>>> + *        etc)
>>> + *   __clone_foo_p2():
>>> + *        clone body of foo, pass 2.. since in first pass we
>>> + *        can have fwd references to embedded structs, some
>>> + *        ptrs (and things that depend on them) must be
>>> + *        resolved in 2nd pass
>>> + */
>>> +
>>> +typedef struct {
>>> +   /* maps orig ptr -> cloned ptr: */
>>> +   struct hash_table *ptr_table;
>>> +   /* list of unresolved ssa src's: */
>>> +   struct list_head ssa_src_list;
>>> +
>>> +   /* memctx for new toplevel shader object: */
>>> +   void *mem_ctx;
>>> +   /* new shader object, used as memctx for just about everything else: */
>>> +   nir_shader *ns;
>>> +} clone_state;
>>> +
>>> +typedef void *(*clone_fxn)(clone_state *state, const void *ptr);
>>> +
>>> +static void
>>> +init_clone_state(clone_state *state, void *mem_ctx)
>>> +{
>>> +   state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>>> +                                              _mesa_key_pointer_equal);
>>> +   list_inithead(&state->ssa_src_list);
>>> +   state->mem_ctx = mem_ctx;
>>> +}
>>> +
>>> +static void
>>> +free_clone_state(clone_state *state)
>>> +{
>>> +   _mesa_hash_table_destroy(state->ptr_table, NULL);
>>> +}
>>> +
>>> +static void *
>>> +clone_ptr(clone_state *state, const void *ptr, clone_fxn clone)
>>> +{
>>> +   struct hash_entry *entry;
>>> +   void *nptr;
>>> +
>>> +   if (!ptr)
>>> +      return NULL;
>>> +
>>> +   entry = _mesa_hash_table_search(state->ptr_table, ptr);
>>> +   if (entry)
>>> +      return entry->data;
>>
>> I've mentioned this on IRC before, but I really don't think this whole
>> song-and-dance is needed.  As long as you clone registers and
>> variables before you clone instructions and as long as you clone
>> instructions top-to-bottom like you are, there is exactly one case in
>> which you will ever need to reference something before it's added to
>> the table: phi sources for phis at the top of loops.  This means that
>> you just need to make clone_phi a little special and the second pass
>> can just be your walk over the ssa_src_list.  In particular, clone_phi
>> needs to look something like this:
>
> I did start going down that road, but the problem is that it would
> involve cloning blocks in more or less the right order too.. and while
> the clone_ptr() approach for dealing w/ successors/predecessor block
> ptrs is pretty convenient, it doesn't really guarantee that blocks get
> cloned in the right order.
>
> Possibly I could fill out successor/predecessor links on a 2nd pass,
> at the expense of extra complexity.  Not really sure it's worth it..
>
>> clone_phi()
>> {
>>    phi = nir_phi_instr_create()
>>    add_to_remap(phi)
>>    nir_instr_init(phi)
>>
>>    // Walk the phi sources, fill them out, and add them to
>> src_ssa_list.  You'll fill them out with the actual sources later.
>> }
>>
>> This will ensure that the phi destination *does* exist for all future
>> stuff and only the sources need a second pass.
>>
>>> +
>>> +   nptr = clone(state, ptr);
>>> +
>>> +#ifdef DEBUG
>>> +   entry = _mesa_hash_table_search(state->ptr_table, ptr);
>>> +   assert(entry->data == nptr);
>>> +#endif
>>> +
>>> +   return nptr;
>>> +}
>>> +
>>> +/* To avoid loops, we need to store cloned ptrs before they are fully
>>> + * initialized, since cloning their contents could result in a path
>>> + * back to the cloned pointer
>>> + */
>>> +static void
>>> +store_ptr(clone_state *state, void *nptr, const void *ptr)
>>> +{
>>> +   _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
>>> +}
>>> +
>>> +/* There are a few cases we need to know the ralloc_parent()'s clone.
>>> + * This only works when the original ptr's ralloc_parent() has already
>>> + * been cloned, but for parents this should always be the case.
>>> + */
>>> +static void *
>>> +ralloc_parent_clone(clone_state *state, const void *ptr)
>>> +{
>>> +   return clone_ptr(state, ralloc_parent(ptr), NULL);
>>> +}
>>> +
>>> +static void * clone_var(clone_state *state, const void *ptr);
>>> +static void * clone_reg(clone_state *state, const void *ptr);
>>> +static void * clone_src(clone_state *state, const void *ptr);
>>> +static void * clone_deref(clone_state *state, const void *ptr);
>>> +static void * clone_instr(clone_state *state, const void *ptr);
>>> +static void * clone_block(clone_state *state, const void *ptr);
>>> +static void * clone_cf_node(clone_state *state, const void *ptr);
>>> +static void * clone_function_overload(clone_state *state, const void *ptr);
>>> +static void * clone_function(clone_state *state, const void *ptr);
>>> +
>>> +static void __clone_cf_node_p2(clone_state *state, nir_cf_node *ncf, const nir_cf_node *cf);
>>> +
>>> +/* clone list of nir_variable: */
>>> +static void
>>> +clone_var_list(clone_state *state, struct exec_list *dst,
>>> +               const struct exec_list *list)
>>> +{
>>> +   exec_list_make_empty(dst);
>>> +   foreach_list_typed(nir_variable, var, node, list) {
>>> +      nir_variable *nvar = clone_ptr(state, var, clone_var);
>>> +      exec_list_push_tail(dst, &nvar->node);
>>> +   }
>>> +}
>>> +
>>> +/* clone list of nir_register: */
>>> +static void
>>> +clone_reg_list(clone_state *state, struct exec_list *dst,
>>> +               const struct exec_list *list)
>>> +{
>>> +   exec_list_make_empty(dst);
>>> +   foreach_list_typed(nir_register, reg, node, list) {
>>> +      nir_register *nreg = clone_ptr(state, reg, clone_reg);
>>> +      exec_list_push_tail(dst, &nreg->node);
>>> +   }
>>> +}
>>> +
>>> +/* clone list of nir_cf_node: */
>>> +static void
>>> +clone_cf_list(clone_state *state, struct exec_list *dst,
>>> +              const struct exec_list *list)
>>> +{
>>> +   exec_list_make_empty(dst);
>>> +   foreach_list_typed(nir_cf_node, cf, node, list) {
>>> +      nir_cf_node *ncf = clone_ptr(state, cf, clone_cf_node);
>>> +      exec_list_push_tail(dst, &ncf->node);
>>> +   }
>>> +}
>>> +
>>> +static void
>>> +__clone_cf_list_p2(clone_state *state, struct exec_list *dst,
>>> +                   const struct exec_list *list)
>>> +{
>>> +   nir_cf_node *ncf = exec_list_head(nir_cf_node, dst, node);
>>> +   foreach_list_typed(nir_cf_node, cf, node, list) {
>>> +      __clone_cf_node_p2(state, ncf, cf);
>>> +      ncf = exec_list_next(nir_cf_node, &ncf->node, node);
>>> +   }
>>> +}
>>> +
>>> +static struct set *
>>> +clone_set(clone_state *state, void *mem_ctx, struct set *src, clone_fxn clone)
>>> +{
>>> +   struct set *dst = _mesa_set_create(mem_ctx, src->key_hash_function,
>>> +                                      src->key_equals_function);
>>> +   struct set_entry *entry;
>>> +   set_foreach(src, entry) {
>>> +      _mesa_set_add(dst, clone_ptr(state, entry->key, clone));
>>> +   }
>>> +   return dst;
>>> +}
>>> +
>>> +static void *
>>> +clone_constant(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_constant *c = ptr;
>>> +   nir_constant *nc;
>>> +
>>> +   void *mem_ctx = ralloc_parent_clone(state, c);
>>> +
>>> +   nc = ralloc(mem_ctx, nir_constant);
>>> +   store_ptr(state, nc, c);
>>> +
>>> +   nc->value = c->value;
>>> +   nc->num_elements = c->num_elements;
>>> +   nc->elements = ralloc_array(mem_ctx, nir_constant *, c->num_elements);
>>> +   for (unsigned i = 0; i < c->num_elements; i++) {
>>> +      nc->elements[i] = clone_ptr(state, c->elements[i], clone_constant);
>>> +   }
>>> +
>>> +   return nc;
>>> +}
>>> +
>>> +/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
>>> + * having to deal with locals and globals separately:
>>> + */
>>> +static void *
>>> +clone_var(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_variable *var = ptr;
>>> +   nir_variable *nvar;
>>> +
>>> +   nvar = rzalloc(state->ns, nir_variable);
>>> +   store_ptr(state, nvar, var);
>>> +
>>> +   nvar->type = var->type;
>>> +   nvar->name = ralloc_strdup(nvar, var->name);
>>> +   nvar->max_ifc_array_access = ralloc_array(nvar, unsigned,
>>> +                                             var->num_max_ifc_array_access);
>>> +   memcpy(nvar->max_ifc_array_access, var->max_ifc_array_access,
>>> +          var->num_max_ifc_array_access * sizeof(unsigned));
>>> +   nvar->data = var->data;
>>> +   nvar->num_state_slots = var->num_state_slots;
>>> +   nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
>>> +   memcpy(nvar->state_slots, var->state_slots,
>>> +          var->num_state_slots * sizeof(nir_state_slot));
>>> +   nvar->constant_initializer = clone_ptr(state, var->constant_initializer,
>>> +                                          clone_constant);
>>> +   nvar->interface_type = var->interface_type;
>>> +
>>> +   return nvar;
>>> +}
>>> +
>>> +/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
>>> + * to avoid having to deal with locals and globals separately:
>>> + */
>>> +static void *
>>> +clone_reg(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_register *reg = ptr;
>>> +   nir_register *nreg;
>>> +
>>> +   nreg = rzalloc(state->ns, nir_register);
>>> +   store_ptr(state, nreg, reg);
>>> +
>>> +   nreg->num_components = reg->num_components;
>>> +   nreg->num_array_elems = reg->num_array_elems;
>>> +   nreg->index = reg->index;
>>> +   nreg->name = ralloc_strdup(nreg, reg->name);
>>> +   nreg->is_global = reg->is_global;
>>> +   nreg->is_packed = reg->is_packed;
>>> +
>>> +   /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
>>> +   list_inithead(&nreg->uses);
>>> +   list_inithead(&nreg->defs);
>>> +   list_inithead(&nreg->if_uses);
>>> +
>>> +   return nreg;
>>> +}
>>> +
>>> +static void
>>> +__clone_src(clone_state *state, nir_src *nsrc, const nir_src *src)
>>> +{
>>> +   nsrc->is_ssa = src->is_ssa;
>>> +   if (src->is_ssa) {
>>> +      /* save for resolving later: */
>>> +      list_addtail(&nsrc->link, &state->ssa_src_list);
>>> +      /* we also need to stash the original src for use to later
>>> +       * resolve the clone'd ssa-def.  Normally it would be nice
>>> +       * to keep nsrc->ssa ptr as null to catch anyone deref'ing
>>> +       * it before it is resolved.  But there wasn't room to add
>>> +       * a ptr to nir_src without increasing it's size.  But a
>>> +       * 'bitwise not' is easily reversable and should mostly
>>> +       * have the same result as null:
>>> +       */
>>> +      nsrc->ssa = (void *)~(uintptr_t)src;
>>
>> Ugh... Yeah, that works...  I would hate this if it weren't so clever...
>
> I guess if I repurpose src->use_link, there is now room for an extra
> ptr in nir_src, for a less clever way to handle this.
>
>>> +   } else {
>>> +      nsrc->reg.reg = clone_ptr(state, src->reg.reg, clone_reg);
>>> +      nsrc->reg.indirect = clone_ptr(state, src->reg.indirect, clone_src);
>>> +      nsrc->reg.base_offset = src->reg.base_offset;
>>> +   }
>>> +}
>>> +
>>> +static void *
>>> +clone_src(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_src *src = ptr;
>>> +   nir_src *nsrc;
>>> +
>>> +   nsrc = ralloc(ralloc_parent_clone(state, src), nir_src);
>>> +   store_ptr(state, nsrc, src);
>>> +
>>> +   __clone_src(state, nsrc, src);
>>> +
>>> +   return nsrc;
>>> +}
>>> +
>>> +static void
>>> +__clone_ssa_def(clone_state *state, nir_ssa_def *ndef, const nir_ssa_def *def)
>>> +{
>>> +   ndef->name = ralloc_strdup(state->ns, def->name);
>>> +   ndef->index = def->index;
>>> +   ndef->live_index = def->live_index;
>>
>> Don't bother.  Just throw away metadata.
>>
>>> +   ndef->parent_instr = clone_ptr(state, def->parent_instr, clone_instr);
>>> +   list_inithead(&ndef->uses);
>>> +   list_inithead(&ndef->if_uses);
>>> +   ndef->num_components = def->num_components;
>>
>> Um... We already have nir_ssa_dest_init which does this...
>
> but would end up incrementing impl->ssa_alloc, which we don't want..
> there were a number of other places where I also could not use the
> helpers due to unwanted side-effects (creating child blocks, etc)
>
>>> +
>>> +   /* special case, since embedded ptr linked to elsewhere, we must store it: */
>>> +   store_ptr(state, ndef, def);
>>> +}
>>> +
>>> +static void
>>> +__clone_dst(clone_state *state, nir_dest *ndst, const nir_dest *dst)
>>> +{
>>> +   ndst->is_ssa = dst->is_ssa;
>>> +   if (dst->is_ssa) {
>>> +      __clone_ssa_def(state, &ndst->ssa, &dst->ssa);
>>> +   } else {
>>> +      ndst->reg.parent_instr = clone_ptr(state, dst->reg.parent_instr,
>>> +                                         clone_instr);
>>> +      ndst->reg.reg = clone_ptr(state, dst->reg.reg, clone_reg);
>>> +      ndst->reg.indirect = clone_ptr(state, dst->reg.indirect, clone_src);
>>> +      ndst->reg.base_offset = dst->reg.base_offset;
>>> +   }
>>> +}
>>> +
>>> +static void
>>> +__clone_deref(clone_state *state, nir_deref *ndref, const nir_deref *dref)
>>> +{
>>> +   ndref->deref_type = dref->deref_type;
>>> +   ndref->child = clone_ptr(state, dref->child, clone_deref);
>>> +   ndref->type = dref->type;
>>> +}
>>> +
>>> +static void *
>>> +clone_deref_var(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_deref_var *dvar = ptr;
>>> +   nir_deref_var *ndvar;
>>> +
>>> +   ndvar = ralloc(ralloc_parent_clone(state, dvar), nir_deref_var);
>>
>> This is completely unneeded.  Just pass the intrinsic into clone_deref_var.
>
> Except then all the clone fxns would need to take an extra mem_ctx
> param.. which was how I started things out, but it was getting ugly.
> Since 90% of everything is parented by the nir_shader, with only a few
> special cases, it turned out to be simpler to just handle the special
> cases specially.
>
>>> +   store_ptr(state, ndvar, dvar);
>>> +
>>> +   __clone_deref(state, &ndvar->deref, &dvar->deref);
>>> +
>>> +   ndvar->var = clone_ptr(state, dvar->var, clone_var);
>>> +
>>> +   return ndvar;
>>> +}
>>> +
>>> +static void *
>>> +clone_deref_array(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_deref_array *darr = ptr;
>>> +   nir_deref_array *ndarr;
>>> +
>>> +   ndarr = ralloc(ralloc_parent_clone(state, darr), nir_deref_array);
>>> +   store_ptr(state, ndarr, darr);
>>> +
>>> +   __clone_deref(state, &ndarr->deref, &darr->deref);
>>> +
>>> +   ndarr->deref_array_type = darr->deref_array_type;
>>> +   ndarr->base_offset = darr->base_offset;
>>> +
>>> +   __clone_src(state, &ndarr->indirect, &darr->indirect);
>>> +
>>> +   return ndarr;
>>> +}
>>> +
>>> +static void *
>>> +clone_deref_struct(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_deref_struct *dstr = ptr;
>>> +   nir_deref_struct *ndstr;
>>> +
>>> +   ndstr = ralloc(ralloc_parent_clone(state, dstr), nir_deref_struct);
>>> +   store_ptr(state, ndstr, dstr);
>>> +
>>> +   __clone_deref(state, &ndstr->deref, &dstr->deref);
>>> +
>>> +   ndstr->index = dstr->index;
>>> +
>>> +   return ndstr;
>>> +}
>>> +
>>> +static void *
>>> +clone_deref(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_deref *dref = ptr;
>>> +   switch (dref->deref_type) {
>>> +   case nir_deref_type_var:
>>> +      return clone_deref_var(state, ptr);
>>> +   case nir_deref_type_array:
>>> +      return clone_deref_array(state, ptr);
>>> +   case nir_deref_type_struct:
>>> +      return clone_deref_struct(state, ptr);
>>> +   default:
>>> +      unreachable("bad deref type");
>>> +      return NULL;
>>> +   }
>>> +}
>>> +
>>> +static void
>>> +__clone_instr(clone_state *state, nir_instr *ninstr, const nir_instr *instr)
>>> +{
>>> +   ninstr->type = instr->type;
>>> +   ninstr->block = clone_ptr(state, instr->block, clone_block);
>>> +}
>>> +
>>> +static void *
>>> +clone_alu(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_alu_instr *alu = ptr;
>>> +   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
>>> +
>>> +   nir_alu_instr *nalu =
>>> +      ralloc_size(state->ns,
>>> +                  sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
>>
>> If you just used nir_alu_instr_create, it would do this for you...
>
> along w/ a bunch of other stuff I didn't need..  maybe I should break
> out nir_alu_instr_size(nsrc)/etc macros that both sites could share..
>
>>> +   store_ptr(state, nalu, alu);
>>> +
>>> +   __clone_instr(state, &nalu->instr, &alu->instr);
>>> +
>>> +   nalu->op = alu->op;
>>> +
>>> +   __clone_dst(state, &nalu->dest.dest, &alu->dest.dest);
>>> +   nalu->dest.saturate = alu->dest.saturate;
>>> +   nalu->dest.write_mask = alu->dest.write_mask;
>>> +
>>> +   for (unsigned i = 0; i < num_srcs; i++) {
>>> +      __clone_src(state, &nalu->src[i].src, &alu->src[i].src);
>>> +      nalu->src[i].negate = alu->src[i].negate;
>>> +      nalu->src[i].abs = alu->src[i].abs;
>>> +      memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
>>> +             sizeof(nalu->src[i].swizzle));
>>> +   }
>>> +
>>> +   return nalu;
>>> +}
>>> +
>>> +static void *
>>> +clone_intrinsic(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_intrinsic_instr *itr = ptr;
>>> +   unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
>>> +
>>> +   nir_intrinsic_instr *nitr =
>>> +      ralloc_size(state->ns,
>>> +                  sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
>>> +   store_ptr(state, nitr, itr);
>>> +
>>> +   __clone_instr(state, &nitr->instr, &itr->instr);
>>> +
>>> +   nitr->intrinsic = itr->intrinsic;
>>> +
>>> +   if (nir_intrinsic_infos[itr->intrinsic].has_dest)
>>> +      __clone_dst(state, &nitr->dest, &itr->dest);
>>> +
>>> +   nitr->num_components = itr->num_components;
>>> +   memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
>>> +
>>> +   for (unsigned i = 0; i < ARRAY_SIZE(nitr->variables); i++) {
>>> +      nitr->variables[i] = clone_ptr(state, itr->variables[i], clone_deref_var);
>>
>> Derefs don't need to go in the remap table.  They need to use the
>> variable from it and need to remap any indirect sources, but the deref
>> doesn't need to go in the table.
>
> that goes for all the various places deref's can appear?  (Ie. call,
> tex instr, etc)?  They are all private to the instruction?  I got a
> bit lost/impatient in following glsl_to_nir around so I went for the
> safe thing..

Yes, that's correct.  A deref is always only local to the instruction.
This is why I suggested passing the instruction into clone_deref
above.  If you don't need to use clone_ptr on it, then it doesn't
require passing extra stuff through.

> (That said, in the grand scheme of things I don't think there are so
> many deref objects, so probably doesn't make much difference)

Sure, but it simplifies things if you just create it on the spot and
pass the instruction through.

>>> +   }
>>> +
>>> +   for (unsigned i = 0; i < num_srcs; i++) {
>>> +      __clone_src(state, &nitr->src[i], &itr->src[i]);
>>> +   }
>>> +
>>> +   return nitr;
>>> +}
>>> +
>>> +static void *
>>> +clone_load_const(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_load_const_instr *lc = ptr;
>>> +   nir_load_const_instr *nlc;
>>> +
>>> +   nlc = ralloc(state->ns, nir_load_const_instr);
>>> +   store_ptr(state, nlc, lc);
>>> +
>>> +   __clone_instr(state, &nlc->instr, &lc->instr);
>>> +
>>> +   memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
>>> +   __clone_ssa_def(state, &nlc->def, &lc->def);
>>> +
>>> +   return nlc;
>>> +}
>>> +
>>> +static void *
>>> +clone_ssa_undef(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_ssa_undef_instr *sa = ptr;
>>> +   nir_ssa_undef_instr *nsa;
>>> +
>>> +   nsa = ralloc(state->ns, nir_ssa_undef_instr);
>>> +   store_ptr(state, nsa, sa);
>>> +
>>> +   __clone_instr(state, &nsa->instr, &sa->instr);
>>> +
>>> +   __clone_ssa_def(state, &nsa->def, &sa->def);
>>> +
>>> +   return nsa;
>>> +}
>>> +
>>> +static void *
>>> +clone_tex(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_tex_instr *tex = ptr;
>>> +   nir_tex_instr *ntex;
>>> +
>>> +   ntex= ralloc(state->ns, nir_tex_instr);
>>> +   store_ptr(state, ntex, tex);
>>> +
>>> +   __clone_instr(state, &ntex->instr, &tex->instr);
>>> +
>>> +   ntex->sampler_dim = tex->sampler_dim;
>>> +   ntex->dest_type = tex->dest_type;
>>> +   ntex->op = tex->op;
>>> +   __clone_dst(state, &ntex->dest, &tex->dest);
>>> +   ntex->num_srcs = tex->num_srcs;
>>> +   ntex->src = ralloc_array(ntex, nir_tex_src, tex->num_srcs);
>>> +   for (unsigned i = 0; i < ntex->num_srcs; i++) {
>>> +      ntex->src[i].src_type = tex->src[i].src_type;
>>> +      __clone_src(state, &ntex->src[i].src, &tex->src[i].src);
>>> +   }
>>> +   ntex->coord_components = tex->coord_components;
>>> +   ntex->is_array = tex->is_array;
>>> +   ntex->is_shadow = tex->is_shadow;
>>> +   ntex->is_new_style_shadow = tex->is_new_style_shadow;
>>> +   memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
>>> +   ntex->component = tex->component;
>>> +   ntex->sampler_index = tex->sampler_index;
>>> +   ntex->sampler_array_size = tex->sampler_array_size;
>>> +   ntex->sampler = clone_ptr(state, tex->sampler, clone_deref_var);
>>> +
>>> +   return ntex;
>>> +}
>>> +
>>> +static void *
>>> +clone_phi(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_phi_instr *phi = ptr;
>>> +   nir_phi_instr *nphi;
>>> +
>>> +   nphi = ralloc(state->ns, nir_phi_instr);
>>> +   store_ptr(state, nphi, phi);
>>> +
>>> +   __clone_instr(state, &nphi->instr, &phi->instr);
>>> +
>>> +   exec_list_make_empty(&nphi->srcs);
>>> +   foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
>>> +      nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
>>> +
>>> +      nsrc->pred = clone_ptr(state, src->pred, clone_block);
>>> +      __clone_src(state, &nsrc->src, &src->src);
>>> +
>>> +      exec_list_push_tail(&nphi->srcs, &nsrc->node);
>>> +   }
>>> +
>>> +   __clone_dst(state, &nphi->dest, &phi->dest);
>>> +
>>> +   return nphi;
>>> +}
>>> +
>>> +static void *
>>> +clone_jump(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_jump_instr *jmp = ptr;
>>> +   nir_jump_instr *njmp;
>>> +
>>> +   njmp = ralloc(state->ns, nir_jump_instr);
>>> +   store_ptr(state, njmp, jmp);
>>> +
>>> +   __clone_instr(state, &njmp->instr, &jmp->instr);
>>> +
>>> +   njmp->type = jmp->type;
>>> +
>>> +   return njmp;
>>> +}
>>> +
>>> +static void *
>>> +clone_call(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_call_instr *call = ptr;
>>> +   nir_call_instr *ncall;
>>> +
>>> +   ncall = ralloc(state->ns, nir_call_instr);
>>> +   store_ptr(state, ncall, call);
>>> +
>>> +   __clone_instr(state, &ncall->instr, &call->instr);
>>> +
>>> +   ncall->num_params = call->num_params;
>>> +   ncall->params = ralloc_array(ncall, nir_deref_var *, call->num_params);
>>> +   for (unsigned i = 0; i < ncall->num_params; i++) {
>>> +      ncall->params[i] = clone_ptr(state, call->params[i], clone_deref_var);
>>> +   }
>>> +
>>> +   ncall->return_deref = clone_ptr(state, call->return_deref, clone_deref_var);
>>> +   ncall->callee = clone_ptr(state, call->callee, clone_function_overload);
>>> +
>>> +   return ncall;
>>> +}
>>> +
>>> +static void *
>>> +clone_parallel_copy(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_parallel_copy_instr *pc = ptr;
>>> +   nir_parallel_copy_instr *npc;
>>> +
>>> +   npc = ralloc(state->ns, nir_parallel_copy_instr);
>>> +   store_ptr(state, npc, pc);
>>> +
>>> +   __clone_instr(state, &npc->instr, &pc->instr);
>>> +
>>> +   nir_foreach_parallel_copy_entry(pc, entry) {
>>> +      nir_parallel_copy_entry *nentry;
>>> +
>>> +      nentry = rzalloc(state->ns, nir_parallel_copy_entry);
>>> +
>>> +      __clone_src(state, &nentry->src, &entry->src);
>>> +      __clone_dst(state, &nentry->dest, &entry->dest);
>>> +
>>> +      exec_list_push_tail(&npc->entries, &nentry->node);
>>> +   }
>>> +
>>> +   return npc;
>>> +}
>>> +
>>> +static void *
>>> +clone_instr(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_instr *instr = ptr;
>>> +   switch (instr->type) {
>>> +   case nir_instr_type_alu:
>>> +      return clone_alu(state, ptr);
>>> +   case nir_instr_type_intrinsic:
>>> +      return clone_intrinsic(state, ptr);
>>> +   case nir_instr_type_load_const:
>>> +      return clone_load_const(state, ptr);
>>> +   case nir_instr_type_ssa_undef:
>>> +      return clone_ssa_undef(state, ptr);
>>> +   case nir_instr_type_tex:
>>> +      return clone_tex(state, ptr);
>>> +   case nir_instr_type_phi:
>>> +      return clone_phi(state, ptr);
>>> +   case nir_instr_type_jump:
>>> +      return clone_jump(state, ptr);
>>> +   case nir_instr_type_call:
>>> +      return clone_call(state, ptr);
>>> +   case nir_instr_type_parallel_copy:
>>> +      return clone_parallel_copy(state, ptr);
>>> +   default:
>>> +      unreachable("bad instr type");
>>> +      return NULL;
>>> +   }
>>> +}
>>> +
>>> +static void
>>> +__clone_cf(clone_state *state, nir_cf_node *ncf, const nir_cf_node *cf)
>>> +{
>>> +   ncf->type = cf->type;
>>> +   ncf->parent = clone_ptr(state, cf->parent, clone_cf_node);
>>> +}
>>> +
>>> +static void *
>>> +clone_block(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_block *blk = ptr;
>>> +   nir_block *nblk;
>>> +
>>> +   nblk = rzalloc(state->ns, nir_block);
>>> +   store_ptr(state, nblk, blk);
>>> +
>>> +   __clone_cf(state, &nblk->cf_node, &blk->cf_node);
>>> +
>>> +   /* clone instructions before successor blocks: */
>>> +   exec_list_make_empty(&nblk->instr_list);
>>> +   nir_foreach_instr(blk, instr) {
>>> +      nir_instr *ninstr = clone_ptr(state, instr, clone_instr);
>>> +      exec_list_push_tail(&nblk->instr_list, &ninstr->node);
>>> +   }
>>> +
>>> +   nblk->successors[0] = clone_ptr(state, blk->successors[0], clone_block);
>>> +   nblk->successors[1] = clone_ptr(state, blk->successors[1], clone_block);
>>> +   nblk->predecessors = clone_set(state, nblk, blk->predecessors, clone_block);
>>> +   nblk->dom_frontier = _mesa_set_create(nblk, _mesa_hash_pointer,
>>> +                                         _mesa_key_pointer_equal);
>>> +
>>> +   return nblk;
>>> +}
>>> +
>>> +static void
>>> +__clone_block_p2(clone_state *state, nir_block *nblk, const nir_block *blk)
>>> +{
>>> +   nir_instr *ninstr;
>>> +
>>> +   ninstr = nir_block_first_instr(nblk);
>>> +   nir_foreach_instr(blk, instr) {
>>> +      nir_add_defs_uses(ninstr);
>>> +      ninstr = nir_instr_next(ninstr);
>>
>> What are you doing here?  You have instr from the iterator, why not just use it?
>
> originally, before I split out the ssa nir_src's to fix up, I needed
> both the new and old instr ptr, and didn't want to get that via
> hashtable lookups.  Which is the reason for the most part for doing
> custom iteration over the IR..
>
> I guess in this particular spot, since I no longer need the original
> instr, I can just iterate nblk instead..
>
>
>>> +   }
>>> +}
>>> +
>>> +
>>> +static void *
>>> +clone_if(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_if *i = ptr;
>>> +   nir_if *ni;
>>> +
>>> +   ni = ralloc(state->ns, nir_if);
>>> +   store_ptr(state, ni, i);
>>> +
>>> +   __clone_cf(state, &ni->cf_node, &i->cf_node);
>>> +
>>> +   __clone_src(state, &ni->condition, &i->condition);
>>> +   clone_cf_list(state, &ni->then_list, &i->then_list);
>>> +   clone_cf_list(state, &ni->else_list, &i->else_list);
>>> +
>>> +   return ni;
>>> +}
>>> +
>>> +static void
>>> +__clone_if_p2(clone_state *state, nir_if *ni, const nir_if *i)
>>> +{
>>> +   nir_update_if_uses(&ni->cf_node);
>>> +   __clone_cf_list_p2(state, &ni->then_list, &i->then_list);
>>> +   __clone_cf_list_p2(state, &ni->else_list, &i->else_list);
>>> +}
>>> +
>>> +static void *
>>> +clone_loop(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_loop *loop = ptr;
>>> +   nir_loop *nloop;
>>> +
>>> +   nloop = ralloc(state->ns, nir_loop);
>>> +   store_ptr(state, nloop, loop);
>>> +
>>> +   __clone_cf(state, &nloop->cf_node, &loop->cf_node);
>>> +
>>> +   clone_cf_list(state, &nloop->body, &loop->body);
>>> +
>>> +   return nloop;
>>> +}
>>> +
>>> +static void
>>> +__clone_loop_p2(clone_state *state, nir_loop *nloop, const nir_loop *loop)
>>> +{
>>> +   __clone_cf_list_p2(state, &nloop->body, &loop->body);
>>> +}
>>> +
>>> +static void *
>>> +clone_function_impl(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_function_impl *fi = ptr;
>>> +   nir_function_impl *nfi;
>>> +
>>> +   nfi = ralloc(state->ns, nir_function_impl);
>>> +   store_ptr(state, nfi, fi);
>>> +
>>> +   __clone_cf(state, &nfi->cf_node, &fi->cf_node);
>>> +
>>> +   nfi->overload = clone_ptr(state, fi->overload, clone_function_overload);
>>> +   clone_cf_list(state, &nfi->body, &fi->body);
>>> +   nfi->end_block = clone_ptr(state, fi->end_block, clone_block);
>>> +   clone_var_list(state, &nfi->locals, &fi->locals);
>>> +
>>> +   nfi->num_params = fi->num_params;
>>> +   nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
>>> +   for (unsigned i = 0; i < fi->num_params; i++) {
>>> +      nfi->params[i] = clone_ptr(state, fi->params[i], clone_var);
>>> +   }
>>> +
>>> +   nfi->return_var = clone_ptr(state, fi->return_var, clone_var);
>>> +   clone_reg_list(state, &nfi->registers, &fi->registers);
>>> +   nfi->reg_alloc = fi->reg_alloc;
>>> +   nfi->ssa_alloc = fi->ssa_alloc;
>>> +   nfi->num_blocks = fi->num_blocks;
>>> +   nfi->valid_metadata = 0;
>>> +
>>> +   return nfi;
>>> +}
>>> +
>>> +static void
>>> +__clone_function_impl_p2(clone_state *state, nir_function_impl *nfi,
>>> +                         const nir_function_impl *fi)
>>> +{
>>> +   __clone_cf_list_p2(state, &nfi->body, &fi->body);
>>> +}
>>> +
>>> +static void *
>>> +clone_cf_node(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_cf_node *cf = ptr;
>>> +   switch (cf->type) {
>>> +   case nir_cf_node_block:
>>> +      return clone_block(state, ptr);
>>> +   case nir_cf_node_if:
>>> +      return clone_if(state, ptr);
>>> +   case nir_cf_node_loop:
>>> +      return clone_loop(state, ptr);
>>> +   case nir_cf_node_function:
>>> +      return clone_function_impl(state, ptr);
>>> +   default:
>>> +      unreachable("bad cf type");
>>> +      return NULL;
>>> +   }
>>> +}
>>> +
>>> +static void
>>> +__clone_cf_node_p2(clone_state *state, nir_cf_node *ncf, const nir_cf_node *cf)
>>> +{
>>> +   switch (cf->type) {
>>> +   case nir_cf_node_block:
>>> +      __clone_block_p2(state, nir_cf_node_as_block(ncf), nir_cf_node_as_block(cf));
>>> +      break;
>>> +   case nir_cf_node_if:
>>> +      __clone_if_p2(state, nir_cf_node_as_if(ncf), nir_cf_node_as_if(cf));
>>> +      break;
>>> +   case nir_cf_node_loop:
>>> +      __clone_loop_p2(state, nir_cf_node_as_loop(ncf), nir_cf_node_as_loop(cf));
>>> +      break;
>>> +   case nir_cf_node_function:
>>> +      __clone_function_impl_p2(state, nir_cf_node_as_function(ncf),
>>> +                               nir_cf_node_as_function(cf));
>>
>> This whole song-and-dance isn't needed.  We already have
>> nir_foreach_block which will do all of this for you.  To handle the
>> ifs, we have nir_block_get_following_if().  See also
>> nir_live_variables for details.
>
> but it won't co-iterate two lists at the same time for me ;-)

Sure, but I'm pretty sure you never use it given my comment on
clone_block_p2 :-)

>>> +      break;
>>> +   default:
>>> +      unreachable("bad cf type");
>>> +      break;
>>> +   }
>>> +}
>>> +
>>> +static void *
>>> +clone_function_overload(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_function_overload *fo = ptr;
>>> +   nir_function_overload *nfo;
>>> +
>>> +   nfo = ralloc(state->ns, nir_function_overload);
>>> +   store_ptr(state, nfo, fo);
>>
>> This one does have to go in the remap table as it will be used by
>> nir_call_instr.
>>
>>> +
>>> +   nfo->num_params = fo->num_params;
>>> +   nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params);
>>> +   memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params);
>>> +
>>> +   nfo->return_type = fo->return_type;
>>> +   nfo->impl = clone_ptr(state, fo->impl, clone_function_impl);
>>> +
>>> +   nfo->function = clone_ptr(state, fo->function, clone_function);
>>
>> If you used nir_function_create, that would be set for you.
>
> I'd still need to look up the fxn.. so meh
>
> it was also pushing into shader->functions list, which I didn't want.
>
>>> +
>>> +   return nfo;
>>> +}
>>> +
>>> +static void *
>>> +clone_function(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_function *fxn = ptr;
>>> +   nir_function *nfxn;
>>> +
>>> +   nfxn = ralloc(state->ns, nir_function);
>>> +   store_ptr(state, nfxn, fxn);
>>
>> There's no reason to store the function pointer in the remap table.
>> The only thing that ever references it is the shader and the
>> overloads.
>>
>>> +
>>> +   exec_list_make_empty(&nfxn->overload_list);
>>> +   foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list) {
>>> +      nir_function_overload *nfo = clone_ptr(state, fo, clone_function_overload);
>>
>> There's no reason to run this through clone_ptr.  You already know
>> that there's one per entry in this list and it will never fall back to
>> the hash-table version.
>
> meh, it's only a couple extra hashtable lookups in the grand scheme of
> things, and it has the advantage of not falling over if some different
> nir-builder does something a bit different..
>
> BR,
> -R
>
>>> +      exec_list_push_tail(&nfxn->overload_list, &nfo->node);
>>> +   }
>>> +
>>> +   nfxn->name = ralloc_strdup(nfxn, fxn->name);
>>> +   nfxn->shader = state->ns;   /* we could use clone_ptr, but overkill */
>>> +
>>> +   return nfxn;
>>> +}
>>> +
>>> +static void *
>>> +clone_shader(clone_state *state, const void *ptr)
>>> +{
>>> +   const nir_shader *s = ptr;
>>> +   nir_shader *ns;
>>> +
>>> +   ns = nir_shader_create(state->mem_ctx, s->stage, s->options);
>>> +   store_ptr(state, ns, s);
>>> +   state->ns = ns;
>>> +
>>> +   clone_var_list(state, &ns->uniforms, &s->uniforms);
>>> +   clone_var_list(state, &ns->inputs,   &s->inputs);
>>> +   clone_var_list(state, &ns->outputs,  &s->outputs);
>>> +   clone_var_list(state, &ns->globals,  &s->globals);
>>> +   clone_var_list(state, &ns->system_values, &s->system_values);
>>> +
>>> +   exec_list_make_empty(&ns->functions);
>>> +   foreach_list_typed(nir_function, fxn, node, &s->functions) {
>>> +      nir_function *nfxn = clone_ptr(state, fxn, clone_function);
>>
>> There's no reason to run this through clone_ptr
>>
>>> +      exec_list_push_tail(&ns->functions, &nfxn->node);
>>> +   }
>>> +
>>> +   clone_reg_list(state, &ns->registers, &s->registers);
>>> +
>>> +   ns->info = s->info;
>>> +   ns->reg_alloc = s->reg_alloc;
>>> +   ns->num_inputs = s->num_inputs;
>>> +   ns->num_uniforms = s->num_uniforms;
>>> +   ns->num_outputs = s->num_outputs;
>>> +
>>> +   return ns;
>>> +}
>>> +
>>> +/* second pass for some fixup.. in various cases we can have references
>>> + * to embedded objects that haven't been created yet (like phi src's),
>>> + * so some parts need to be handled on a second pass.
>>> + */
>>> +static void
>>> +__clone_shader_p2(clone_state *state, nir_shader *ns, const nir_shader *s)
>>> +{
>>> +   /* resolve ssa srcs: */
>>> +   list_for_each_entry(nir_src, nsrc, &state->ssa_src_list, link) {
>>> +      /* recover the orig src ptr saved in __clone_src(): */
>>> +      nir_src *src = (void *)~(uintptr_t)nsrc->ssa;
>>> +      nsrc->ssa = clone_ptr(state, src->ssa, NULL);
>>> +   }
>>> +   /* nir_update_if_uses() and nir_add_defs_uses(): */
>>> +   nir_foreach_overload(s, fo) {
>>> +      const nir_function_impl *impl = fo->impl;
>>> +      nir_function_impl *nimpl = clone_ptr(state, impl, NULL);
>>> +      __clone_function_impl_p2(state, nimpl, impl);
>>> +   }
>>> +}
>>> +
>>> +nir_shader *
>>> +nir_shader_clone(void *mem_ctx, const nir_shader *s)
>>> +{
>>> +   clone_state state;
>>> +   nir_shader *ns;
>>> +
>>> +   init_clone_state(&state, mem_ctx);
>>> +   ns = clone_ptr(&state, s, clone_shader);
>>> +   __clone_shader_p2(&state, ns, s);
>>> +   free_clone_state(&state);
>>> +
>>> +   return ns;
>>> +}
>>> --
>>> 2.5.0
>>>


More information about the mesa-dev mailing list