[Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc

Dieter Nützel Dieter at nuetzel-hh.de
Wed Dec 6 08:56:26 UTC 2017


Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>

Dieter

Am 02.12.2017 15:49, schrieb Thomas Helland:
> Also, allocate worklist_elem in groups of 20, to reduce the burden of
> allocation. Do not use rzalloc, as there is no need. This lets us drop
> the number of calls to ralloc from aproximately 10% of all calls to
> ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size.
> This cuts the runtime of shader-db by 1%, while at the same time
> reducing the number of stalled cycles, executed cycles, and executed
> instructions by about 1 % as reported by perf. I did a five-run
> benchmark pre and post and got a statistical variance less than 0.1% 
> pre
> and post. This was with i965's ir validation polluting the benchmark, 
> so
> the numbers are even better in release builds.
> 
> Performance change as found with perf-diff:
> 4.74%     -0.23%  libc-2.26.so            [.] _int_malloc
> 1.88%     -0.21%  libc-2.26.so            [.] malloc
> 2.27%     +0.16%  libmesa_dri_drivers.so  [.] match_value.part.7
> 2.95%     -0.12%  libc-2.26.so            [.] _int_free
>           +0.11%  libmesa_dri_drivers.so  [.] worklist_push
> 1.22%     -0.08%  libc-2.26.so            [.] malloc_consolidate
> 0.16%     -0.06%  libmesa_dri_drivers.so  [.] mark_live_cb
> 1.21%     +0.06%  libmesa_dri_drivers.so  [.] match_expression.part.6
> 0.75%     -0.05%  libc-2.26.so            [.] cfree at GLIBC_2.2.5
> 0.50%     -0.05%  libmesa_dri_drivers.so  [.] ralloc_size
> 0.57%     +0.04%  libmesa_dri_drivers.so  [.] nir_replace_instr
> 1.29%     -0.04%  libmesa_dri_drivers.so  [.] unsafe_free
> ---
>  src/compiler/nir/nir_opt_dce.c | 47 
> ++++++++++++++++++++++++++++--------------
>  1 file changed, 32 insertions(+), 15 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_opt_dce.c 
> b/src/compiler/nir/nir_opt_dce.c
> index 5cefba3a72..f9285fe4ac 100644
> --- a/src/compiler/nir/nir_opt_dce.c
> +++ b/src/compiler/nir/nir_opt_dce.c
> @@ -29,32 +29,46 @@
> 
>  /* SSA-based mark-and-sweep dead code elimination */
> 
> +typedef struct {
> +   struct exec_list *worklist;
> +   struct exec_list *free_nodes;
> +} worklist;
> +
>  typedef struct {
>     struct exec_node node;
>     nir_instr *instr;
>  } worklist_elem;
> 
>  static void
> -worklist_push(struct exec_list *worklist, nir_instr *instr)
> +worklist_push(worklist *worklist, nir_instr *instr)
>  {
> -   worklist_elem *elem = ralloc(worklist, worklist_elem);
> +   if (exec_list_is_empty(worklist->free_nodes)) {
> +      worklist_elem *elements = ralloc_array(worklist, worklist_elem, 
> 20);
> +      for (int i = 0; i < 20; i++)
> +         exec_list_push_tail(worklist->free_nodes, &elements[i].node);
> +   }
> +
> +   struct exec_node *node = exec_list_pop_head(worklist->free_nodes);
> +   worklist_elem *elem = exec_node_data(worklist_elem, node, node);
>     elem->instr = instr;
>     instr->pass_flags = 1;
> -   exec_list_push_tail(worklist, &elem->node);
> +   exec_list_push_tail(worklist->worklist, &elem->node);
>  }
> 
>  static nir_instr *
> -worklist_pop(struct exec_list *worklist)
> +worklist_pop(worklist *worklist)
>  {
> -   struct exec_node *node = exec_list_pop_head(worklist);
> +
> +   struct exec_node *node = exec_list_pop_head(worklist->worklist);
>     worklist_elem *elem = exec_node_data(worklist_elem, node, node);
> +   exec_list_push_head(worklist->free_nodes, node);
>     return elem->instr;
>  }
> 
>  static bool
>  mark_live_cb(nir_src *src, void *_state)
>  {
> -   struct exec_list *worklist = (struct exec_list *) _state;
> +   worklist *worklist = _state;
> 
>     if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
>        worklist_push(worklist, src->ssa->parent_instr);
> @@ -64,7 +78,7 @@ mark_live_cb(nir_src *src, void *_state)
>  }
> 
>  static void
> -init_instr(nir_instr *instr, struct exec_list *worklist)
> +init_instr(nir_instr *instr, worklist *worklist)
>  {
>     nir_alu_instr *alu_instr;
>     nir_intrinsic_instr *intrin_instr;
> @@ -113,7 +127,7 @@ init_instr(nir_instr *instr, struct exec_list 
> *worklist)
>  }
> 
>  static bool
> -init_block(nir_block *block, struct exec_list *worklist)
> +init_block(nir_block *block, worklist *worklist)
>  {
>     nir_foreach_instr(instr, block)
>        init_instr(instr, worklist);
> @@ -131,19 +145,22 @@ init_block(nir_block *block, struct exec_list 
> *worklist)
>  static bool
>  nir_opt_dce_impl(nir_function_impl *impl)
>  {
> -   struct exec_list *worklist = rzalloc(NULL, struct exec_list);
> -   exec_list_make_empty(worklist);
> +   worklist *wl = ralloc(NULL, worklist);
> +   wl->free_nodes = ralloc(wl, struct exec_list);
> +   wl->worklist = ralloc(wl, struct exec_list);
> +   exec_list_make_empty(wl->free_nodes);
> +   exec_list_make_empty(wl->worklist);
> 
>     nir_foreach_block(block, impl) {
> -      init_block(block, worklist);
> +      init_block(block, wl);
>     }
> 
> -   while (!exec_list_is_empty(worklist)) {
> -      nir_instr *instr = worklist_pop(worklist);
> -      nir_foreach_src(instr, mark_live_cb, worklist);
> +   while (!exec_list_is_empty(wl->worklist)) {
> +      nir_instr *instr = worklist_pop(wl);
> +      nir_foreach_src(instr, mark_live_cb, wl);
>     }
> 
> -   ralloc_free(worklist);
> +   ralloc_free(wl);
> 
>     bool progress = false;


More information about the mesa-dev mailing list