[Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc
Dieter Nützel
Dieter at nuetzel-hh.de
Wed Dec 6 08:56:26 UTC 2017
Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>
Dieter
Am 02.12.2017 15:49, schrieb Thomas Helland:
> Also, allocate worklist_elem in groups of 20, to reduce the burden of
> allocation. Do not use rzalloc, as there is no need. This lets us drop
> the number of calls to ralloc from aproximately 10% of all calls to
> ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size.
> This cuts the runtime of shader-db by 1%, while at the same time
> reducing the number of stalled cycles, executed cycles, and executed
> instructions by about 1 % as reported by perf. I did a five-run
> benchmark pre and post and got a statistical variance less than 0.1%
> pre
> and post. This was with i965's ir validation polluting the benchmark,
> so
> the numbers are even better in release builds.
>
> Performance change as found with perf-diff:
> 4.74% -0.23% libc-2.26.so [.] _int_malloc
> 1.88% -0.21% libc-2.26.so [.] malloc
> 2.27% +0.16% libmesa_dri_drivers.so [.] match_value.part.7
> 2.95% -0.12% libc-2.26.so [.] _int_free
> +0.11% libmesa_dri_drivers.so [.] worklist_push
> 1.22% -0.08% libc-2.26.so [.] malloc_consolidate
> 0.16% -0.06% libmesa_dri_drivers.so [.] mark_live_cb
> 1.21% +0.06% libmesa_dri_drivers.so [.] match_expression.part.6
> 0.75% -0.05% libc-2.26.so [.] cfree at GLIBC_2.2.5
> 0.50% -0.05% libmesa_dri_drivers.so [.] ralloc_size
> 0.57% +0.04% libmesa_dri_drivers.so [.] nir_replace_instr
> 1.29% -0.04% libmesa_dri_drivers.so [.] unsafe_free
> ---
> src/compiler/nir/nir_opt_dce.c | 47
> ++++++++++++++++++++++++++++--------------
> 1 file changed, 32 insertions(+), 15 deletions(-)
>
> diff --git a/src/compiler/nir/nir_opt_dce.c
> b/src/compiler/nir/nir_opt_dce.c
> index 5cefba3a72..f9285fe4ac 100644
> --- a/src/compiler/nir/nir_opt_dce.c
> +++ b/src/compiler/nir/nir_opt_dce.c
> @@ -29,32 +29,46 @@
>
> /* SSA-based mark-and-sweep dead code elimination */
>
> +typedef struct {
> + struct exec_list *worklist;
> + struct exec_list *free_nodes;
> +} worklist;
> +
> typedef struct {
> struct exec_node node;
> nir_instr *instr;
> } worklist_elem;
>
> static void
> -worklist_push(struct exec_list *worklist, nir_instr *instr)
> +worklist_push(worklist *worklist, nir_instr *instr)
> {
> - worklist_elem *elem = ralloc(worklist, worklist_elem);
> + if (exec_list_is_empty(worklist->free_nodes)) {
> + worklist_elem *elements = ralloc_array(worklist, worklist_elem,
> 20);
> + for (int i = 0; i < 20; i++)
> + exec_list_push_tail(worklist->free_nodes, &elements[i].node);
> + }
> +
> + struct exec_node *node = exec_list_pop_head(worklist->free_nodes);
> + worklist_elem *elem = exec_node_data(worklist_elem, node, node);
> elem->instr = instr;
> instr->pass_flags = 1;
> - exec_list_push_tail(worklist, &elem->node);
> + exec_list_push_tail(worklist->worklist, &elem->node);
> }
>
> static nir_instr *
> -worklist_pop(struct exec_list *worklist)
> +worklist_pop(worklist *worklist)
> {
> - struct exec_node *node = exec_list_pop_head(worklist);
> +
> + struct exec_node *node = exec_list_pop_head(worklist->worklist);
> worklist_elem *elem = exec_node_data(worklist_elem, node, node);
> + exec_list_push_head(worklist->free_nodes, node);
> return elem->instr;
> }
>
> static bool
> mark_live_cb(nir_src *src, void *_state)
> {
> - struct exec_list *worklist = (struct exec_list *) _state;
> + worklist *worklist = _state;
>
> if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
> worklist_push(worklist, src->ssa->parent_instr);
> @@ -64,7 +78,7 @@ mark_live_cb(nir_src *src, void *_state)
> }
>
> static void
> -init_instr(nir_instr *instr, struct exec_list *worklist)
> +init_instr(nir_instr *instr, worklist *worklist)
> {
> nir_alu_instr *alu_instr;
> nir_intrinsic_instr *intrin_instr;
> @@ -113,7 +127,7 @@ init_instr(nir_instr *instr, struct exec_list
> *worklist)
> }
>
> static bool
> -init_block(nir_block *block, struct exec_list *worklist)
> +init_block(nir_block *block, worklist *worklist)
> {
> nir_foreach_instr(instr, block)
> init_instr(instr, worklist);
> @@ -131,19 +145,22 @@ init_block(nir_block *block, struct exec_list
> *worklist)
> static bool
> nir_opt_dce_impl(nir_function_impl *impl)
> {
> - struct exec_list *worklist = rzalloc(NULL, struct exec_list);
> - exec_list_make_empty(worklist);
> + worklist *wl = ralloc(NULL, worklist);
> + wl->free_nodes = ralloc(wl, struct exec_list);
> + wl->worklist = ralloc(wl, struct exec_list);
> + exec_list_make_empty(wl->free_nodes);
> + exec_list_make_empty(wl->worklist);
>
> nir_foreach_block(block, impl) {
> - init_block(block, worklist);
> + init_block(block, wl);
> }
>
> - while (!exec_list_is_empty(worklist)) {
> - nir_instr *instr = worklist_pop(worklist);
> - nir_foreach_src(instr, mark_live_cb, worklist);
> + while (!exec_list_is_empty(wl->worklist)) {
> + nir_instr *instr = worklist_pop(wl);
> + nir_foreach_src(instr, mark_live_cb, wl);
> }
>
> - ralloc_free(worklist);
> + ralloc_free(wl);
>
> bool progress = false;
More information about the mesa-dev
mailing list