[Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc
Dieter Nützel
Dieter at nuetzel-hh.de
Thu Jan 4 05:12:39 UTC 2018
Hello to all of you and a
Happy New Year! ;-)
Any thoughts about this, Marek, Nicolai?
I'm running this for four weeks without any hickup, now.
Thanks,
Dieter
Am 06.12.2017 09:56, schrieb Dieter Nützel:
> Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>
>
> Dieter
>
> Am 02.12.2017 15:49, schrieb Thomas Helland:
>> Also, allocate worklist_elem in groups of 20, to reduce the burden of
>> allocation. Do not use rzalloc, as there is no need. This lets us drop
>> the number of calls to ralloc from aproximately 10% of all calls to
>> ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size.
>> This cuts the runtime of shader-db by 1%, while at the same time
>> reducing the number of stalled cycles, executed cycles, and executed
>> instructions by about 1 % as reported by perf. I did a five-run
>> benchmark pre and post and got a statistical variance less than 0.1%
>> pre
>> and post. This was with i965's ir validation polluting the benchmark,
>> so
>> the numbers are even better in release builds.
>>
>> Performance change as found with perf-diff:
>> 4.74% -0.23% libc-2.26.so [.] _int_malloc
>> 1.88% -0.21% libc-2.26.so [.] malloc
>> 2.27% +0.16% libmesa_dri_drivers.so [.] match_value.part.7
>> 2.95% -0.12% libc-2.26.so [.] _int_free
>> +0.11% libmesa_dri_drivers.so [.] worklist_push
>> 1.22% -0.08% libc-2.26.so [.] malloc_consolidate
>> 0.16% -0.06% libmesa_dri_drivers.so [.] mark_live_cb
>> 1.21% +0.06% libmesa_dri_drivers.so [.] match_expression.part.6
>> 0.75% -0.05% libc-2.26.so [.] cfree at GLIBC_2.2.5
>> 0.50% -0.05% libmesa_dri_drivers.so [.] ralloc_size
>> 0.57% +0.04% libmesa_dri_drivers.so [.] nir_replace_instr
>> 1.29% -0.04% libmesa_dri_drivers.so [.] unsafe_free
>> ---
>> src/compiler/nir/nir_opt_dce.c | 47
>> ++++++++++++++++++++++++++++--------------
>> 1 file changed, 32 insertions(+), 15 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir_opt_dce.c
>> b/src/compiler/nir/nir_opt_dce.c
>> index 5cefba3a72..f9285fe4ac 100644
>> --- a/src/compiler/nir/nir_opt_dce.c
>> +++ b/src/compiler/nir/nir_opt_dce.c
>> @@ -29,32 +29,46 @@
>>
>> /* SSA-based mark-and-sweep dead code elimination */
>>
>> +typedef struct {
>> + struct exec_list *worklist;
>> + struct exec_list *free_nodes;
>> +} worklist;
>> +
>> typedef struct {
>> struct exec_node node;
>> nir_instr *instr;
>> } worklist_elem;
>>
>> static void
>> -worklist_push(struct exec_list *worklist, nir_instr *instr)
>> +worklist_push(worklist *worklist, nir_instr *instr)
>> {
>> - worklist_elem *elem = ralloc(worklist, worklist_elem);
>> + if (exec_list_is_empty(worklist->free_nodes)) {
>> + worklist_elem *elements = ralloc_array(worklist, worklist_elem,
>> 20);
>> + for (int i = 0; i < 20; i++)
>> + exec_list_push_tail(worklist->free_nodes,
>> &elements[i].node);
>> + }
>> +
>> + struct exec_node *node = exec_list_pop_head(worklist->free_nodes);
>> + worklist_elem *elem = exec_node_data(worklist_elem, node, node);
>> elem->instr = instr;
>> instr->pass_flags = 1;
>> - exec_list_push_tail(worklist, &elem->node);
>> + exec_list_push_tail(worklist->worklist, &elem->node);
>> }
>>
>> static nir_instr *
>> -worklist_pop(struct exec_list *worklist)
>> +worklist_pop(worklist *worklist)
>> {
>> - struct exec_node *node = exec_list_pop_head(worklist);
>> +
>> + struct exec_node *node = exec_list_pop_head(worklist->worklist);
>> worklist_elem *elem = exec_node_data(worklist_elem, node, node);
>> + exec_list_push_head(worklist->free_nodes, node);
>> return elem->instr;
>> }
>>
>> static bool
>> mark_live_cb(nir_src *src, void *_state)
>> {
>> - struct exec_list *worklist = (struct exec_list *) _state;
>> + worklist *worklist = _state;
>>
>> if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
>> worklist_push(worklist, src->ssa->parent_instr);
>> @@ -64,7 +78,7 @@ mark_live_cb(nir_src *src, void *_state)
>> }
>>
>> static void
>> -init_instr(nir_instr *instr, struct exec_list *worklist)
>> +init_instr(nir_instr *instr, worklist *worklist)
>> {
>> nir_alu_instr *alu_instr;
>> nir_intrinsic_instr *intrin_instr;
>> @@ -113,7 +127,7 @@ init_instr(nir_instr *instr, struct exec_list
>> *worklist)
>> }
>>
>> static bool
>> -init_block(nir_block *block, struct exec_list *worklist)
>> +init_block(nir_block *block, worklist *worklist)
>> {
>> nir_foreach_instr(instr, block)
>> init_instr(instr, worklist);
>> @@ -131,19 +145,22 @@ init_block(nir_block *block, struct exec_list
>> *worklist)
>> static bool
>> nir_opt_dce_impl(nir_function_impl *impl)
>> {
>> - struct exec_list *worklist = rzalloc(NULL, struct exec_list);
>> - exec_list_make_empty(worklist);
>> + worklist *wl = ralloc(NULL, worklist);
>> + wl->free_nodes = ralloc(wl, struct exec_list);
>> + wl->worklist = ralloc(wl, struct exec_list);
>> + exec_list_make_empty(wl->free_nodes);
>> + exec_list_make_empty(wl->worklist);
>>
>> nir_foreach_block(block, impl) {
>> - init_block(block, worklist);
>> + init_block(block, wl);
>> }
>>
>> - while (!exec_list_is_empty(worklist)) {
>> - nir_instr *instr = worklist_pop(worklist);
>> - nir_foreach_src(instr, mark_live_cb, worklist);
>> + while (!exec_list_is_empty(wl->worklist)) {
>> + nir_instr *instr = worklist_pop(wl);
>> + nir_foreach_src(instr, mark_live_cb, wl);
>> }
>>
>> - ralloc_free(worklist);
>> + ralloc_free(wl);
>>
>> bool progress = false;
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list