[Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc

Thomas Helland thomashelland90 at gmail.com
Sat Dec 2 14:49:41 UTC 2017


Also, allocate worklist_elem in groups of 20, to reduce the burden of 
allocation. Do not use rzalloc, as there is no need. This lets us drop 
the number of calls to ralloc from aproximately 10% of all calls to 
ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size. 
This cuts the runtime of shader-db by 1%, while at the same time 
reducing the number of stalled cycles, executed cycles, and executed 
instructions by about 1 % as reported by perf. I did a five-run 
benchmark pre and post and got a statistical variance less than 0.1% pre 
and post. This was with i965's ir validation polluting the benchmark, so 
the numbers are even better in release builds.

Performance change as found with perf-diff:
4.74%     -0.23%  libc-2.26.so            [.] _int_malloc
1.88%     -0.21%  libc-2.26.so            [.] malloc
2.27%     +0.16%  libmesa_dri_drivers.so  [.] match_value.part.7
2.95%     -0.12%  libc-2.26.so            [.] _int_free
          +0.11%  libmesa_dri_drivers.so  [.] worklist_push
1.22%     -0.08%  libc-2.26.so            [.] malloc_consolidate
0.16%     -0.06%  libmesa_dri_drivers.so  [.] mark_live_cb
1.21%     +0.06%  libmesa_dri_drivers.so  [.] match_expression.part.6
0.75%     -0.05%  libc-2.26.so            [.] cfree at GLIBC_2.2.5
0.50%     -0.05%  libmesa_dri_drivers.so  [.] ralloc_size
0.57%     +0.04%  libmesa_dri_drivers.so  [.] nir_replace_instr
1.29%     -0.04%  libmesa_dri_drivers.so  [.] unsafe_free
---
 src/compiler/nir/nir_opt_dce.c | 47 ++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/src/compiler/nir/nir_opt_dce.c b/src/compiler/nir/nir_opt_dce.c
index 5cefba3a72..f9285fe4ac 100644
--- a/src/compiler/nir/nir_opt_dce.c
+++ b/src/compiler/nir/nir_opt_dce.c
@@ -29,32 +29,46 @@
 
 /* SSA-based mark-and-sweep dead code elimination */
 
+typedef struct {
+   struct exec_list *worklist;
+   struct exec_list *free_nodes;
+} worklist;
+
 typedef struct {
    struct exec_node node;
    nir_instr *instr;
 } worklist_elem;
 
 static void
-worklist_push(struct exec_list *worklist, nir_instr *instr)
+worklist_push(worklist *worklist, nir_instr *instr)
 {
-   worklist_elem *elem = ralloc(worklist, worklist_elem);
+   if (exec_list_is_empty(worklist->free_nodes)) {
+      worklist_elem *elements = ralloc_array(worklist, worklist_elem, 20);
+      for (int i = 0; i < 20; i++)
+         exec_list_push_tail(worklist->free_nodes, &elements[i].node);
+   }
+
+   struct exec_node *node = exec_list_pop_head(worklist->free_nodes);
+   worklist_elem *elem = exec_node_data(worklist_elem, node, node);
    elem->instr = instr;
    instr->pass_flags = 1;
-   exec_list_push_tail(worklist, &elem->node);
+   exec_list_push_tail(worklist->worklist, &elem->node);
 }
 
 static nir_instr *
-worklist_pop(struct exec_list *worklist)
+worklist_pop(worklist *worklist)
 {
-   struct exec_node *node = exec_list_pop_head(worklist);
+   
+   struct exec_node *node = exec_list_pop_head(worklist->worklist);
    worklist_elem *elem = exec_node_data(worklist_elem, node, node);
+   exec_list_push_head(worklist->free_nodes, node);
    return elem->instr;
 }
 
 static bool
 mark_live_cb(nir_src *src, void *_state)
 {
-   struct exec_list *worklist = (struct exec_list *) _state;
+   worklist *worklist = _state;
 
    if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
       worklist_push(worklist, src->ssa->parent_instr);
@@ -64,7 +78,7 @@ mark_live_cb(nir_src *src, void *_state)
 }
 
 static void
-init_instr(nir_instr *instr, struct exec_list *worklist)
+init_instr(nir_instr *instr, worklist *worklist)
 {
    nir_alu_instr *alu_instr;
    nir_intrinsic_instr *intrin_instr;
@@ -113,7 +127,7 @@ init_instr(nir_instr *instr, struct exec_list *worklist)
 }
 
 static bool
-init_block(nir_block *block, struct exec_list *worklist)
+init_block(nir_block *block, worklist *worklist)
 {
    nir_foreach_instr(instr, block)
       init_instr(instr, worklist);
@@ -131,19 +145,22 @@ init_block(nir_block *block, struct exec_list *worklist)
 static bool
 nir_opt_dce_impl(nir_function_impl *impl)
 {
-   struct exec_list *worklist = rzalloc(NULL, struct exec_list);
-   exec_list_make_empty(worklist);
+   worklist *wl = ralloc(NULL, worklist);
+   wl->free_nodes = ralloc(wl, struct exec_list);
+   wl->worklist = ralloc(wl, struct exec_list);
+   exec_list_make_empty(wl->free_nodes);
+   exec_list_make_empty(wl->worklist);
 
    nir_foreach_block(block, impl) {
-      init_block(block, worklist);
+      init_block(block, wl);
    }
 
-   while (!exec_list_is_empty(worklist)) {
-      nir_instr *instr = worklist_pop(worklist);
-      nir_foreach_src(instr, mark_live_cb, worklist);
+   while (!exec_list_is_empty(wl->worklist)) {
+      nir_instr *instr = worklist_pop(wl);
+      nir_foreach_src(instr, mark_live_cb, wl);
    }
 
-   ralloc_free(worklist);
+   ralloc_free(wl);
 
    bool progress = false;
 
-- 
2.15.0



More information about the mesa-dev mailing list