[Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc

Dieter Nützel Dieter at nuetzel-hh.de
Thu Jan 4 05:12:39 UTC 2018


Hello to all of you and a

Happy New Year! ;-)

Any thoughts about this, Marek, Nicolai?
I'm running this for four weeks without any hickup, now.

Thanks,
Dieter

Am 06.12.2017 09:56, schrieb Dieter Nützel:
> Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>
> 
> Dieter
> 
> Am 02.12.2017 15:49, schrieb Thomas Helland:
>> Also, allocate worklist_elem in groups of 20, to reduce the burden of
>> allocation. Do not use rzalloc, as there is no need. This lets us drop
>> the number of calls to ralloc from aproximately 10% of all calls to
>> ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size.
>> This cuts the runtime of shader-db by 1%, while at the same time
>> reducing the number of stalled cycles, executed cycles, and executed
>> instructions by about 1 % as reported by perf. I did a five-run
>> benchmark pre and post and got a statistical variance less than 0.1% 
>> pre
>> and post. This was with i965's ir validation polluting the benchmark, 
>> so
>> the numbers are even better in release builds.
>> 
>> Performance change as found with perf-diff:
>> 4.74%     -0.23%  libc-2.26.so            [.] _int_malloc
>> 1.88%     -0.21%  libc-2.26.so            [.] malloc
>> 2.27%     +0.16%  libmesa_dri_drivers.so  [.] match_value.part.7
>> 2.95%     -0.12%  libc-2.26.so            [.] _int_free
>>           +0.11%  libmesa_dri_drivers.so  [.] worklist_push
>> 1.22%     -0.08%  libc-2.26.so            [.] malloc_consolidate
>> 0.16%     -0.06%  libmesa_dri_drivers.so  [.] mark_live_cb
>> 1.21%     +0.06%  libmesa_dri_drivers.so  [.] match_expression.part.6
>> 0.75%     -0.05%  libc-2.26.so            [.] cfree at GLIBC_2.2.5
>> 0.50%     -0.05%  libmesa_dri_drivers.so  [.] ralloc_size
>> 0.57%     +0.04%  libmesa_dri_drivers.so  [.] nir_replace_instr
>> 1.29%     -0.04%  libmesa_dri_drivers.so  [.] unsafe_free
>> ---
>>  src/compiler/nir/nir_opt_dce.c | 47 
>> ++++++++++++++++++++++++++++--------------
>>  1 file changed, 32 insertions(+), 15 deletions(-)
>> 
>> diff --git a/src/compiler/nir/nir_opt_dce.c 
>> b/src/compiler/nir/nir_opt_dce.c
>> index 5cefba3a72..f9285fe4ac 100644
>> --- a/src/compiler/nir/nir_opt_dce.c
>> +++ b/src/compiler/nir/nir_opt_dce.c
>> @@ -29,32 +29,46 @@
>> 
>>  /* SSA-based mark-and-sweep dead code elimination */
>> 
>> +typedef struct {
>> +   struct exec_list *worklist;
>> +   struct exec_list *free_nodes;
>> +} worklist;
>> +
>>  typedef struct {
>>     struct exec_node node;
>>     nir_instr *instr;
>>  } worklist_elem;
>> 
>>  static void
>> -worklist_push(struct exec_list *worklist, nir_instr *instr)
>> +worklist_push(worklist *worklist, nir_instr *instr)
>>  {
>> -   worklist_elem *elem = ralloc(worklist, worklist_elem);
>> +   if (exec_list_is_empty(worklist->free_nodes)) {
>> +      worklist_elem *elements = ralloc_array(worklist, worklist_elem, 
>> 20);
>> +      for (int i = 0; i < 20; i++)
>> +         exec_list_push_tail(worklist->free_nodes, 
>> &elements[i].node);
>> +   }
>> +
>> +   struct exec_node *node = exec_list_pop_head(worklist->free_nodes);
>> +   worklist_elem *elem = exec_node_data(worklist_elem, node, node);
>>     elem->instr = instr;
>>     instr->pass_flags = 1;
>> -   exec_list_push_tail(worklist, &elem->node);
>> +   exec_list_push_tail(worklist->worklist, &elem->node);
>>  }
>> 
>>  static nir_instr *
>> -worklist_pop(struct exec_list *worklist)
>> +worklist_pop(worklist *worklist)
>>  {
>> -   struct exec_node *node = exec_list_pop_head(worklist);
>> +
>> +   struct exec_node *node = exec_list_pop_head(worklist->worklist);
>>     worklist_elem *elem = exec_node_data(worklist_elem, node, node);
>> +   exec_list_push_head(worklist->free_nodes, node);
>>     return elem->instr;
>>  }
>> 
>>  static bool
>>  mark_live_cb(nir_src *src, void *_state)
>>  {
>> -   struct exec_list *worklist = (struct exec_list *) _state;
>> +   worklist *worklist = _state;
>> 
>>     if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
>>        worklist_push(worklist, src->ssa->parent_instr);
>> @@ -64,7 +78,7 @@ mark_live_cb(nir_src *src, void *_state)
>>  }
>> 
>>  static void
>> -init_instr(nir_instr *instr, struct exec_list *worklist)
>> +init_instr(nir_instr *instr, worklist *worklist)
>>  {
>>     nir_alu_instr *alu_instr;
>>     nir_intrinsic_instr *intrin_instr;
>> @@ -113,7 +127,7 @@ init_instr(nir_instr *instr, struct exec_list 
>> *worklist)
>>  }
>> 
>>  static bool
>> -init_block(nir_block *block, struct exec_list *worklist)
>> +init_block(nir_block *block, worklist *worklist)
>>  {
>>     nir_foreach_instr(instr, block)
>>        init_instr(instr, worklist);
>> @@ -131,19 +145,22 @@ init_block(nir_block *block, struct exec_list 
>> *worklist)
>>  static bool
>>  nir_opt_dce_impl(nir_function_impl *impl)
>>  {
>> -   struct exec_list *worklist = rzalloc(NULL, struct exec_list);
>> -   exec_list_make_empty(worklist);
>> +   worklist *wl = ralloc(NULL, worklist);
>> +   wl->free_nodes = ralloc(wl, struct exec_list);
>> +   wl->worklist = ralloc(wl, struct exec_list);
>> +   exec_list_make_empty(wl->free_nodes);
>> +   exec_list_make_empty(wl->worklist);
>> 
>>     nir_foreach_block(block, impl) {
>> -      init_block(block, worklist);
>> +      init_block(block, wl);
>>     }
>> 
>> -   while (!exec_list_is_empty(worklist)) {
>> -      nir_instr *instr = worklist_pop(worklist);
>> -      nir_foreach_src(instr, mark_live_cb, worklist);
>> +   while (!exec_list_is_empty(wl->worklist)) {
>> +      nir_instr *instr = worklist_pop(wl);
>> +      nir_foreach_src(instr, mark_live_cb, wl);
>>     }
>> 
>> -   ralloc_free(worklist);
>> +   ralloc_free(wl);
>> 
>>     bool progress = false;
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list