[Mesa-dev] glsl: optimize list handling in opt_dead_code

Maciej Cencora m.cencora at gmail.com
Tue Oct 18 08:43:51 UTC 2016


On wtorek, 18 października 2016 00:07:18 CEST Jan Ziak wrote:
> This patch replaces the ir_variable_refcount_entry's linked-list
> with an array-list.
>
> The array-list has local storage which does not require ANY additional
> allocations if the list has small number of elements. The size of this
> storage is configurable for each variable.
>
> Benchmark results for "./run -1 shaders" from shader-db[1]:
>
> - The total number of executed instructions goes down from 64.184 to
63.797
>   giga-instructions when Mesa is compiled with "gcc -O0 ..."

Hi,

A total number of instructions in -O0 is not a good indicator of whether
this change is beneficial from performance POV.
You should check it with -O2 or whatever is the default in mesa release
builds.

> - In the call tree starting at function do_dead_code():
>   - the number of calls to malloc() is reduced by about 10%
>   - the number of calls to free() is reduced by about 30%

These are certainly a win.

>
> [1] git://anongit.freedesktop.org/mesa/shader-db
>
> Signed-off-by: Jan Ziak (http://atom-symbol.net) <0xe2.0x9a.0x9b at gmail.com
>
> ---
>  src/compiler/glsl/ir_variable_refcount.cpp |  14 +--
>  src/compiler/glsl/ir_variable_refcount.h   |   8 +-
>  src/compiler/glsl/opt_dead_code.cpp        |  19 ++--
>  src/util/fast_list.h                       | 167
+++++++++++++++++++++++++++++
>  4 files changed, 176 insertions(+), 32 deletions(-)
>
> diff --git a/src/compiler/glsl/ir_variable_refcount.cpp
b/src/compiler/glsl/ir_variable_refcount.cpp
> index 8306be1..94d6edc 100644
> --- a/src/compiler/glsl/ir_variable_refcount.cpp
> +++ b/src/compiler/glsl/ir_variable_refcount.cpp
> @@ -46,15 +46,6 @@ static void
>  free_entry(struct hash_entry *entry)
>  {
>     ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *)
entry->data;
> -
> -   /* Free assignment list */
> -   exec_node *n;
> -   while ((n = ivre->assign_list.pop_head()) != NULL) {
> -      struct assignment_entry *assignment_entry =
> -         exec_node_data(struct assignment_entry, n, link);
> -      free(assignment_entry);
> -   }
> -
>     delete ivre;
>  }
>
> @@ -142,10 +133,7 @@
ir_variable_refcount_visitor::visit_leave(ir_assignment *ir)
>         */
>        assert(entry->referenced_count >= entry->assigned_count);
>        if (entry->referenced_count == entry->assigned_count) {
> -         struct assignment_entry *assignment_entry =
> -            (struct assignment_entry *)calloc(1,
sizeof(*assignment_entry));
> -         assignment_entry->assign = ir;
> -         entry->assign_list.push_head(&assignment_entry->link);
> +         entry->assign_list.add(ir);
>        }
>     }
>
> diff --git a/src/compiler/glsl/ir_variable_refcount.h
b/src/compiler/glsl/ir_variable_refcount.h
> index 08a11c0..c3ec5fe 100644
> --- a/src/compiler/glsl/ir_variable_refcount.h
> +++ b/src/compiler/glsl/ir_variable_refcount.h
> @@ -32,11 +32,7 @@
>  #include "ir.h"
>  #include "ir_visitor.h"
>  #include "compiler/glsl_types.h"
> -
> -struct assignment_entry {
> -   exec_node link;
> -   ir_assignment *assign;
> -};
> +#include "util/fast_list.h"
>
>  class ir_variable_refcount_entry
>  {
> @@ -50,7 +46,7 @@ public:
>      * This is intended to be used for dead code optimisation and may
>      * not be a complete list.
>      */
> -   exec_list assign_list;
> +   arraylist<ir_assignment*,4> assign_list;
>
>     /** Number of times the variable is referenced, including
assignments. */
>     unsigned referenced_count;
> diff --git a/src/compiler/glsl/opt_dead_code.cpp
b/src/compiler/glsl/opt_dead_code.cpp
> index 75e668a..06e8c3d 100644
> --- a/src/compiler/glsl/opt_dead_code.cpp
> +++ b/src/compiler/glsl/opt_dead_code.cpp
> @@ -52,7 +52,7 @@ do_dead_code(exec_list *instructions, bool
uniform_locations_assigned)
>
>     struct hash_entry *e;
>     hash_table_foreach(v.ht, e) {
> -      ir_variable_refcount_entry *entry = (ir_variable_refcount_entry
*)e->data;
> +      ir_variable_refcount_entry *const entry =
(ir_variable_refcount_entry *)e->data;
>
>        /* Since each assignment is a reference, the refereneced count
must be
>         * greater than or equal to the assignment count.  If they are
equal,
> @@ -89,7 +89,7 @@ do_dead_code(exec_list *instructions, bool
uniform_locations_assigned)
>        if (entry->var->data.always_active_io)
>           continue;
>
> -      if (!entry->assign_list.is_empty()) {
> +      if (!entry->assign_list.empty()) {
>   /* Remove all the dead assignments to the variable we found.
>    * Don't do so if it's a shader or function output, though.
>    */
> @@ -98,26 +98,19 @@ do_dead_code(exec_list *instructions, bool
uniform_locations_assigned)
>               entry->var->data.mode != ir_var_shader_out &&
>               entry->var->data.mode != ir_var_shader_storage) {
>
> -            while (!entry->assign_list.is_empty()) {
> -               struct assignment_entry *assignment_entry =
> -                  exec_node_data(struct assignment_entry,
> -                                 entry->assign_list.get_head_raw(),
link);
> -
> -       assignment_entry->assign->remove();
> -
> +            for(ir_assignment *assign : entry->assign_list) {
> +       assign->remove();
>         if (debug) {
>            printf("Removed assignment to %s@%p\n",
>           entry->var->name, (void *) entry->var);
>                 }
> -
> -               assignment_entry->link.remove();
> -               free(assignment_entry);
>              }
> +            entry->assign_list.clear();
>              progress = true;
>   }
>        }
>
> -      if (entry->assign_list.is_empty()) {
> +      if (entry->assign_list.empty()) {
>   /* If there are no assignments or references to the variable left,
>    * then we can remove its declaration.
>    */
> diff --git a/src/util/fast_list.h b/src/util/fast_list.h
> new file mode 100644
> index 0000000..4abd965
> --- /dev/null
> +++ b/src/util/fast_list.h
> @@ -0,0 +1,167 @@
> +/*
> + * Copyright © 2016 Jan Ziak
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining
a
> + * copy of this software and associated documentation files (the
"Software"),
> + * to deal in the Software without restriction, including without
limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
next
> + * paragraph) shall be included in all copies or substantial portions of
the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Jan Ziak (http://atom-symbol.net) <0xe2.0x9x.0a9b at gmail.com>
> + */
> +
> +/**
> + * @file fast_list.h
> + *
> + * High-performance C++ list implementation.
> + */
> +
> +#ifndef _UTIL_FAST_LIST_H
> +#define _UTIL_FAST_LIST_H
> +
> +#include <new>
> +#include <stdlib.h>
> +#include <string.h>
> +
> +/**
> + * A high-performance array list.

Please rename it. Its not a list since it doesn't fullfil list requirement
(O(1) insertion, iterators are not invalidated on insertion or removal,
etc...).

> + *
> + * The list doesn't support elements whose behavior depends on the
memory address of the element.
> + *
> + * @param T  List element type.
> + *
> + * @param N  The number of statically allocated elements to achieve the
highest overall performance.
> + *           N is typically 0, 2, 4 or 8 depending on the runtime usage
pattern of a particular list
> + *           variable in the source code.
> + */
> +template<typename T, size_t N>
> +struct arraylist
> +{
> +   size_t len;  // Length
> +   size_t cap;  // Capacity (size of memory allocated to 'elem')
> +   T *elem;     // List elements

elem data member is not necessary. You can derive this information from len.

> +   uint8_t local[N*sizeof(T)];  // Fast local preallocated storage for
lists with length 0..N

With current design minimum number of elements in internal storage cannot
be 0 - zero-sized array are not supported in C++
I suggest increasing the minimum to 1, as it will simplify code (a couple
of conditionals can be removed).

Also you should use std::aligned_storage to get properly aligned buffer.

> +
> +   arraylist() {
> +      len = 0;
> +      cap = N;
> +      elem = (N ? (T*)local : NULL);
> +   }
> +
> +   ~arraylist() {
> +      if(!__has_trivial_destructor(T)) {

__has_trivial_destructor is gcc specific, use
std::is_trivially_destructible<T>

> +         for(size_t i=0; i<len; i++) {
> +            elem[i].~T();
> +         }
> +      }
> +      if((N && elem != (T*)local) && elem) {
> +         free(elem);
> +      }
> +   }
> +
> +   arraylist(const arraylist<T,N> &a) = delete;
> +   void operator=(const arraylist<T,N> &a) = delete;
> +
> +   bool empty() const {
> +      return len==0;
> +   }
> +
> +   void clear() {
> +      if(!__has_trivial_destructor(T)) {
> +         for(size_t i=0; i<len; i++) {
> +            elem[i].~T();
> +         }
> +      }
> +      len = 0;
> +   }
> +
> +   arraylist<T,N>& add(const T &e) {
> +      if(len == cap) {
> +         enlarge1();
> +      }
> +      new (&elem[len++]) T(e);
> +      return *this;
> +   }

Why return *this? I see no use for call chaining of add().

> +
> +   T& operator[](size_t i) { return elem[i]; }
> +   const T& operator[](size_t i) const { return elem[i]; }
> +
> +   /*
> +    * C++ iterators:
> +    */
> +
> +   struct const_iterator_t {
> +      const T *const elem;
> +      size_t i;
> +      const_iterator_t(const T *_elem, size_t _i) : elem(_elem), i(_i) {}
> +      const T& operator*() const { return elem[i]; }
> +      void operator++() { i++; }
> +      bool operator!=(const const_iterator_t &b) const { return i < b.i;
}

Shouldn't this be 'return i != b.i'?
Same for iterator_t.

> +   };
> +   struct iterator_t {
> +      T *const elem;
> +      size_t i;
> +      iterator_t(T *_elem, size_t _i) : elem(_elem), i(_i) {}
> +      T& operator*() const { return elem[i]; }
> +      void operator++() { i++; }
> +      bool operator!=(const iterator_t &b) const { return i < b.i; }
> +   };
> +
> +   const_iterator_t const_begin() const { return const_iterator_t(elem,
0); }
> +   const_iterator_t const_end() const { return const_iterator_t(elem,
len); }
> +
> +   const_iterator_t begin() const { return const_iterator_t(elem, 0); }
> +   const_iterator_t end() const { return const_iterator_t(elem, len); }
> +
> +   iterator_t begin() { return iterator_t(elem, 0); }
> +   iterator_t end() { return iterator_t(elem, len); }
> +
> +   /*
> +    * Methods for std::vector<T> compatibility:
> +    */
> +
> +   arraylist<T,N>& push_back(const T &e) { return add(e); }

std::vector<T>::push_back return type is void

> +
> +private:
> +   void enlarge1() {
> +      if(N) {
> +         cap *= 2;
> +         if(elem == (T*)local) {
> +            elem = (T*)malloc(cap*sizeof(T));
> +            if(!elem) {
> +               abort();
> +            }
> +            memcpy(elem, local, len*sizeof(T));

This works only for trivially copyable types. If that was intentional, then
please add a comment at the top,
and a proper static_assert so it doesn't blow up in someone's face later.

PS. Please keep my e-mail address in a loop since I'm not subscribed to
this list.

Regards,
Maciej

> +         }
> +         else {
> +            elem = (T*)realloc(elem, cap*sizeof(T));
> +            if(!elem) {
> +               abort();
> +            }
> +         }
> +      }
> +      else {
> +         cap = (cap ? 2*cap : 4);
> +         elem = (T*)realloc(elem, cap*sizeof(T));
> +         if(!elem) {
> +            abort();
> +         }
> +      }
> +   }
> +};
> +
> +#endif /* _UTIL_FAST_LIST_H */
> \ No newline at end of file
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20161018/d5c60c6a/attachment-0001.html>


More information about the mesa-dev mailing list