[Mesa-dev] [PATCH] glsl: optimize list handling in opt_dead_code

Jan Ziak 0xe2.0x9a.0x9b at gmail.com
Tue Oct 18 13:31:24 UTC 2016


This patch replaces the ir_variable_refcount_entry's linked-list
with an array-list.

The array-list has local storage which does not require ANY additional
allocations if the list has small number of elements. The size of this
storage is configurable for each variable.

Benchmark results for "./run -1 shaders" from shader-db[1]:

- The total number of executed instructions goes down from 64.184 to 63.797
  giga-instructions when Mesa is compiled with "gcc -O0 ..."
- In the call tree starting at function do_dead_code():
  - the number of calls to malloc() is reduced by about 10%
  - the number of calls to free() is reduced by about 30%

The layout of the source code in the new file fast_list.h has been
formatted by NetBeans.

[1] git://anongit.freedesktop.org/mesa/shader-db

Signed-off-by: Jan Ziak (http://atom-symbol.net) <0xe2.0x9a.0x9b at gmail.com>
---
 src/compiler/glsl/ir_variable_refcount.cpp |  14 +-
 src/compiler/glsl/ir_variable_refcount.h   |   8 +-
 src/compiler/glsl/opt_dead_code.cpp        |  19 +--
 src/util/fast_list.h                       | 215 +++++++++++++++++++++++++++++
 4 files changed, 224 insertions(+), 32 deletions(-)

diff --git a/src/compiler/glsl/ir_variable_refcount.cpp b/src/compiler/glsl/ir_variable_refcount.cpp
index 8306be1..94d6edc 100644
--- a/src/compiler/glsl/ir_variable_refcount.cpp
+++ b/src/compiler/glsl/ir_variable_refcount.cpp
@@ -46,15 +46,6 @@ static void
 free_entry(struct hash_entry *entry)
 {
    ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) entry->data;
-
-   /* Free assignment list */
-   exec_node *n;
-   while ((n = ivre->assign_list.pop_head()) != NULL) {
-      struct assignment_entry *assignment_entry =
-         exec_node_data(struct assignment_entry, n, link);
-      free(assignment_entry);
-   }
-
    delete ivre;
 }
 
@@ -142,10 +133,7 @@ ir_variable_refcount_visitor::visit_leave(ir_assignment *ir)
        */
       assert(entry->referenced_count >= entry->assigned_count);
       if (entry->referenced_count == entry->assigned_count) {
-         struct assignment_entry *assignment_entry =
-            (struct assignment_entry *)calloc(1, sizeof(*assignment_entry));
-         assignment_entry->assign = ir;
-         entry->assign_list.push_head(&assignment_entry->link);
+         entry->assign_list.add(ir);
       }
    }
 
diff --git a/src/compiler/glsl/ir_variable_refcount.h b/src/compiler/glsl/ir_variable_refcount.h
index 08a11c0..c3ec5fe 100644
--- a/src/compiler/glsl/ir_variable_refcount.h
+++ b/src/compiler/glsl/ir_variable_refcount.h
@@ -32,11 +32,7 @@
 #include "ir.h"
 #include "ir_visitor.h"
 #include "compiler/glsl_types.h"
-
-struct assignment_entry {
-   exec_node link;
-   ir_assignment *assign;
-};
+#include "util/fast_list.h"
 
 class ir_variable_refcount_entry
 {
@@ -50,7 +46,7 @@ public:
     * This is intended to be used for dead code optimisation and may
     * not be a complete list.
     */
-   exec_list assign_list;
+   arraylist<ir_assignment*,4> assign_list;
 
    /** Number of times the variable is referenced, including assignments. */
    unsigned referenced_count;
diff --git a/src/compiler/glsl/opt_dead_code.cpp b/src/compiler/glsl/opt_dead_code.cpp
index 75e668a..8e395697 100644
--- a/src/compiler/glsl/opt_dead_code.cpp
+++ b/src/compiler/glsl/opt_dead_code.cpp
@@ -52,7 +52,7 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
 
    struct hash_entry *e;
    hash_table_foreach(v.ht, e) {
-      ir_variable_refcount_entry *entry = (ir_variable_refcount_entry *)e->data;
+      ir_variable_refcount_entry *const entry = (ir_variable_refcount_entry *)e->data;
 
       /* Since each assignment is a reference, the refereneced count must be
        * greater than or equal to the assignment count.  If they are equal,
@@ -89,7 +89,7 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
       if (entry->var->data.always_active_io)
          continue;
 
-      if (!entry->assign_list.is_empty()) {
+      if (!entry->assign_list.empty()) {
 	 /* Remove all the dead assignments to the variable we found.
 	  * Don't do so if it's a shader or function output, though.
 	  */
@@ -98,26 +98,19 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
              entry->var->data.mode != ir_var_shader_out &&
              entry->var->data.mode != ir_var_shader_storage) {
 
-            while (!entry->assign_list.is_empty()) {
-               struct assignment_entry *assignment_entry =
-                  exec_node_data(struct assignment_entry,
-                                 entry->assign_list.get_head_raw(), link);
-
-	       assignment_entry->assign->remove();
-
+            for (ir_assignment *assign : entry->assign_list) {
+	       assign->remove();
 	       if (debug) {
 	          printf("Removed assignment to %s@%p\n",
 		         entry->var->name, (void *) entry->var);
                }
-
-               assignment_entry->link.remove();
-               free(assignment_entry);
             }
+            entry->assign_list.clear();
             progress = true;
 	 }
       }
 
-      if (entry->assign_list.is_empty()) {
+      if (entry->assign_list.empty()) {
 	 /* If there are no assignments or references to the variable left,
 	  * then we can remove its declaration.
 	  */
diff --git a/src/util/fast_list.h b/src/util/fast_list.h
new file mode 100644
index 0000000..8d4ac9a
--- /dev/null
+++ b/src/util/fast_list.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright © 2016 Jan Ziak
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jan Ziak (http://atom-symbol.net) <0xe2.0x9x.0a9b at gmail.com>
+ */
+
+/**
+ * @file fast_list.h
+ *
+ * High-performance C++ list implementation.
+ */
+
+#ifndef _UTIL_FAST_LIST_H
+#define _UTIL_FAST_LIST_H
+
+#include <new>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "util/macros.h"
+
+/**
+ * A high-performance array list.
+ *
+ * The list doesn't support elements whose behavior depends on the memory address of the element.
+ *
+ * @param T  List element type.
+ *
+ * @param N  The number of statically allocated elements to achieve the highest overall performance.
+ *           N is typically 0, 2, 4 or 8 depending on the runtime usage pattern of a particular list
+ *           variable in the source code.
+ */
+template<typename T, size_t N>
+struct arraylist {
+   size_t len; // Length
+   size_t cap; // Capacity (size of memory allocated to 'elem')
+   T *elem; // List elements
+   uint8_t local[N * sizeof (T)]; // Fast local preallocated storage for lists with length 0..N
+
+   arraylist() {
+      len = 0;
+      cap = N;
+      elem = (N ? (T*) local : nullptr);
+   }
+
+   ~arraylist() {
+      if (!__has_trivial_destructor(T)) {
+         for (size_t i = 0; i < len; i++) {
+            elem[i].~T();
+         }
+      }
+      if ((N && elem != (T*) local) && elem) {
+         free(elem);
+      }
+   }
+
+   arraylist(const arraylist<T, N> &a) = delete;
+   void operator=(const arraylist<T, N> &a) = delete;
+
+   bool empty() const {
+      return len == 0;
+   }
+
+   void clear() {
+      if (!__has_trivial_destructor(T)) {
+         for (size_t i = 0; i < len; i++) {
+            elem[i].~T();
+         }
+      }
+      len = 0;
+   }
+
+   arraylist<T, N>& add(const T &e) {
+      if (unlikely(len == cap)) {
+         enlarge1();
+      }
+      new (&elem[len++]) T(e);
+      return *this;
+   }
+
+   T& operator[](size_t i) {
+      return elem[i];
+   }
+
+   const T& operator[](size_t i) const {
+      return elem[i];
+   }
+
+   /*
+    * C++ iterators:
+    */
+
+   struct const_iterator_t {
+      const T * const elem;
+      size_t i;
+
+      const_iterator_t(const T *_elem, size_t _i) : elem(_elem), i(_i) {
+      }
+
+      const T& operator*() const {
+         return elem[i];
+      }
+
+      void operator++() {
+         i++;
+      }
+
+      bool operator!=(const const_iterator_t &b) const {
+         return i < b.i;
+      }
+   };
+
+   struct iterator_t {
+      T * const elem;
+      size_t i;
+
+      iterator_t(T *_elem, size_t _i) : elem(_elem), i(_i) {
+      }
+
+      T& operator*() const {
+         return elem[i];
+      }
+
+      void operator++() {
+         i++;
+      }
+
+      bool operator!=(const iterator_t &b) const {
+         return i < b.i;
+      }
+   };
+
+   const_iterator_t cbegin() const {
+      return const_iterator_t(elem, 0);
+   }
+
+   const_iterator_t cend() const {
+      return const_iterator_t(elem, len);
+   }
+
+   const_iterator_t begin() const {
+      return const_iterator_t(elem, 0);
+   }
+
+   const_iterator_t end() const {
+      return const_iterator_t(elem, len);
+   }
+
+   iterator_t begin() {
+      return iterator_t(elem, 0);
+   }
+
+   iterator_t end() {
+      return iterator_t(elem, len);
+   }
+
+   /*
+    * Methods for std::vector<T> compatibility:
+    */
+
+   arraylist<T, N>& push_back(const T &e) {
+      return add(e);
+   }
+
+private:
+
+   void enlarge1() {
+      if (N) {
+         cap *= 2;
+         if (elem == (T*) local) {
+            elem = (T*) malloc(cap * sizeof (T));
+            if (unlikely(!elem)) {
+               fprintf(stderr, "error: out of memory\n");
+               abort();
+            }
+            memcpy(elem, local, len * sizeof (T));
+         } else {
+            elem = (T*) realloc(elem, cap * sizeof (T));
+            if (unlikely(!elem)) {
+               fprintf(stderr, "error: out of memory\n");
+               abort();
+            }
+         }
+      } else {
+         cap = (cap ? 2 * cap : 4);
+         elem = (T*) realloc(elem, cap * sizeof (T));
+         if (unlikely(!elem)) {
+            fprintf(stderr, "error: out of memory\n");
+            abort();
+         }
+      }
+   }
+};
+
+#endif /* _UTIL_FAST_LIST_H */
\ No newline at end of file


More information about the mesa-dev mailing list