[Mesa-dev] [PATCH] glsl: optimize list handling in opt_dead_code

Jan Ziak 0xe2.0x9a.0x9b at gmail.com
Tue Oct 18 13:44:29 UTC 2016


Hi Michael,

thanks for the suggestions about code formatting. I formatted the
whole fast_list.h file in the Netbeans editor and uploaded a new
revision of the patch.

Jan

On Tue, Oct 18, 2016 at 1:42 PM, Michael Schellenberger Costa
<mschellenbergercosta at googlemail.com> wrote:
> Hi Jan,
>
> On 18.10.2016 00:07, Jan Ziak wrote:
>>
>> This patch replaces the ir_variable_refcount_entry's linked-list
>> with an array-list.
>>
>> The array-list has local storage which does not require ANY additional
>> allocations if the list has small number of elements. The size of this
>> storage is configurable for each variable.
>>
>> Benchmark results for "./run -1 shaders" from shader-db[1]:
>>
>> - The total number of executed instructions goes down from 64.184 to
>> 63.797
>>    giga-instructions when Mesa is compiled with "gcc -O0 ..."
>> - In the call tree starting at function do_dead_code():
>>    - the number of calls to malloc() is reduced by about 10%
>>    - the number of calls to free() is reduced by about 30%
>>
>> [1] git://anongit.freedesktop.org/mesa/shader-db
>>
>> Signed-off-by: Jan Ziak (http://atom-symbol.net)
>> <0xe2.0x9a.0x9b at gmail.com>
>> ---
>>   src/compiler/glsl/ir_variable_refcount.cpp |  14 +--
>>   src/compiler/glsl/ir_variable_refcount.h   |   8 +-
>>   src/compiler/glsl/opt_dead_code.cpp        |  19 ++--
>>   src/util/fast_list.h                       | 167
>> +++++++++++++++++++++++++++++
>>   4 files changed, 176 insertions(+), 32 deletions(-)
>>
>> diff --git a/src/compiler/glsl/ir_variable_refcount.cpp
>> b/src/compiler/glsl/ir_variable_refcount.cpp
>> index 8306be1..94d6edc 100644
>> --- a/src/compiler/glsl/ir_variable_refcount.cpp
>> +++ b/src/compiler/glsl/ir_variable_refcount.cpp
>> @@ -46,15 +46,6 @@ static void
>>   free_entry(struct hash_entry *entry)
>>   {
>>      ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *)
>> entry->data;
>> -
>> -   /* Free assignment list */
>> -   exec_node *n;
>> -   while ((n = ivre->assign_list.pop_head()) != NULL) {
>> -      struct assignment_entry *assignment_entry =
>> -         exec_node_data(struct assignment_entry, n, link);
>> -      free(assignment_entry);
>> -   }
>> -
>>      delete ivre;
>>   }
>>   @@ -142,10 +133,7 @@
>> ir_variable_refcount_visitor::visit_leave(ir_assignment *ir)
>>          */
>>         assert(entry->referenced_count >= entry->assigned_count);
>>         if (entry->referenced_count == entry->assigned_count) {
>> -         struct assignment_entry *assignment_entry =
>> -            (struct assignment_entry *)calloc(1,
>> sizeof(*assignment_entry));
>> -         assignment_entry->assign = ir;
>> -         entry->assign_list.push_head(&assignment_entry->link);
>> +         entry->assign_list.add(ir);
>>         }
>>      }
>>   diff --git a/src/compiler/glsl/ir_variable_refcount.h
>> b/src/compiler/glsl/ir_variable_refcount.h
>> index 08a11c0..c3ec5fe 100644
>> --- a/src/compiler/glsl/ir_variable_refcount.h
>> +++ b/src/compiler/glsl/ir_variable_refcount.h
>> @@ -32,11 +32,7 @@
>>   #include "ir.h"
>>   #include "ir_visitor.h"
>>   #include "compiler/glsl_types.h"
>> -
>> -struct assignment_entry {
>> -   exec_node link;
>> -   ir_assignment *assign;
>> -};
>> +#include "util/fast_list.h"
>>     class ir_variable_refcount_entry
>>   {
>> @@ -50,7 +46,7 @@ public:
>>       * This is intended to be used for dead code optimisation and may
>>       * not be a complete list.
>>       */
>> -   exec_list assign_list;
>> +   arraylist<ir_assignment*,4> assign_list;
>>        /** Number of times the variable is referenced, including
>> assignments. */
>>      unsigned referenced_count;
>> diff --git a/src/compiler/glsl/opt_dead_code.cpp
>> b/src/compiler/glsl/opt_dead_code.cpp
>> index 75e668a..06e8c3d 100644
>> --- a/src/compiler/glsl/opt_dead_code.cpp
>> +++ b/src/compiler/glsl/opt_dead_code.cpp
>> @@ -52,7 +52,7 @@ do_dead_code(exec_list *instructions, bool
>> uniform_locations_assigned)
>>        struct hash_entry *e;
>>      hash_table_foreach(v.ht, e) {
>> -      ir_variable_refcount_entry *entry = (ir_variable_refcount_entry
>> *)e->data;
>> +      ir_variable_refcount_entry *const entry =
>> (ir_variable_refcount_entry *)e->data;
>>           /* Since each assignment is a reference, the refereneced count
>> must be
>>          * greater than or equal to the assignment count.  If they are
>> equal,
>> @@ -89,7 +89,7 @@ do_dead_code(exec_list *instructions, bool
>> uniform_locations_assigned)
>>         if (entry->var->data.always_active_io)
>>            continue;
>>   -      if (!entry->assign_list.is_empty()) {
>> +      if (!entry->assign_list.empty()) {
>>          /* Remove all the dead assignments to the variable we found.
>>           * Don't do so if it's a shader or function output, though.
>>           */
>> @@ -98,26 +98,19 @@ do_dead_code(exec_list *instructions, bool
>> uniform_locations_assigned)
>>                entry->var->data.mode != ir_var_shader_out &&
>>                entry->var->data.mode != ir_var_shader_storage) {
>>   -            while (!entry->assign_list.is_empty()) {
>> -               struct assignment_entry *assignment_entry =
>> -                  exec_node_data(struct assignment_entry,
>> -                                 entry->assign_list.get_head_raw(),
>> link);
>> -
>> -              assignment_entry->assign->remove();
>> -
>> +            for(ir_assignment *assign : entry->assign_list) {
>
> The original code separates control flow instructions as for or while with a
> space before the brace, aka "for (...". This applies for all the code.
>
>> +              assign->remove();
>>                if (debug) {
>>                   printf("Removed assignment to %s@%p\n",
>>                          entry->var->name, (void *) entry->var);
>>                  }
>> -
>> -               assignment_entry->link.remove();
>> -               free(assignment_entry);
>>               }
>> +            entry->assign_list.clear();
>>               progress = true;
>>          }
>>         }
>>   -      if (entry->assign_list.is_empty()) {
>> +      if (entry->assign_list.empty()) {
>>          /* If there are no assignments or references to the variable
>> left,
>>           * then we can remove its declaration.
>>           */
>> diff --git a/src/util/fast_list.h b/src/util/fast_list.h
>> new file mode 100644
>> index 0000000..4abd965
>> --- /dev/null
>> +++ b/src/util/fast_list.h
>> @@ -0,0 +1,167 @@
>> +/*
>> + * Copyright © 2016 Jan Ziak
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining
>> a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> next
>> + * paragraph) shall be included in all copies or substantial portions of
>> the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Jan Ziak (http://atom-symbol.net) <0xe2.0x9x.0a9b at gmail.com>
>> + */
>> +
>> +/**
>> + * @file fast_list.h
>> + *
>> + * High-performance C++ list implementation.
>> + */
>> +
>> +#ifndef _UTIL_FAST_LIST_H
>> +#define _UTIL_FAST_LIST_H
>> +
>> +#include <new>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +
>> +/**
>> + * A high-performance array list.
>> + *
>> + * The list doesn't support elements whose behavior depends on the memory
>> address of the element.
>> + *
>> + * @param T  List element type.
>> + *
>> + * @param N  The number of statically allocated elements to achieve the
>> highest overall performance.
>> + *           N is typically 0, 2, 4 or 8 depending on the runtime usage
>> pattern of a particular list
>> + *           variable in the source code.
>> + */
>> +template<typename T, size_t N>
>> +struct arraylist
>> +{
>> +   size_t len;  // Length
>> +   size_t cap;  // Capacity (size of memory allocated to 'elem')
>> +   T *elem;     // List elements
>> +   uint8_t local[N*sizeof(T)];  // Fast local preallocated storage for
>> lists with length 0..N
>> +
>> +   arraylist() {
>> +      len = 0;
>> +      cap = N;
>> +      elem = (N ? (T*)local : NULL);
>
> You might want to use nullptr if you go for C++11 anyway (As i assume from
> the range based loops above)
>>
>> +   }
>> +
>> +   ~arraylist() {
>> +      if(!__has_trivial_destructor(T)) {
>> +         for(size_t i=0; i<len; i++) {
>
> Please separate operators by whitespace aka "i < len"
>>
>> +            elem[i].~T();
>> +         }
>> +      }
>> +      if((N && elem != (T*)local) && elem) {
>> +         free(elem);
>> +      }
>> +   }
>> +
>> +   arraylist(const arraylist<T,N> &a) = delete;
>> +   void operator=(const arraylist<T,N> &a) = delete;
>> +
>> +   bool empty() const {
>> +      return len==0;
>
> same
>>
>> +   }
>> +
>> +   void clear() {
>> +      if(!__has_trivial_destructor(T)) {
>> +         for(size_t i=0; i<len; i++) {
>
> same
>
>> +            elem[i].~T();
>> +         }
>> +      }
>> +      len = 0;
>> +   }
>> +
>> +   arraylist<T,N>& add(const T &e) {
>> +      if(len == cap) {
>> +         enlarge1();
>> +      }
>> +      new (&elem[len++]) T(e);
>> +      return *this;
>> +   }
>> +
>> +   T& operator[](size_t i) { return elem[i]; }
>> +   const T& operator[](size_t i) const { return elem[i]; }
>> +
>> +   /*
>> +    * C++ iterators:
>> +    */
>> +
>> +   struct const_iterator_t {
>> +      const T *const elem;
>> +      size_t i;
>> +      const_iterator_t(const T *_elem, size_t _i) : elem(_elem), i(_i) {}
>> +      const T& operator*() const { return elem[i]; }
>> +      void operator++() { i++; }
>> +      bool operator!=(const const_iterator_t &b) const { return i < b.i;
>> }
>> +   };
>> +   struct iterator_t {
>> +      T *const elem;
>> +      size_t i;
>> +      iterator_t(T *_elem, size_t _i) : elem(_elem), i(_i) {}
>> +      T& operator*() const { return elem[i]; }
>> +      void operator++() { i++; }
>> +      bool operator!=(const iterator_t &b) const { return i < b.i; }
>> +   };
>> +
>> +   const_iterator_t const_begin() const { return const_iterator_t(elem,
>> 0); }
>> +   const_iterator_t const_end() const { return const_iterator_t(elem,
>> len); }
>
> The stl knows those as cbegin() and cend(), maybe stick with the standard
> names?
> --Michael
>>
>> +
>> +   const_iterator_t begin() const { return const_iterator_t(elem, 0); }
>> +   const_iterator_t end() const { return const_iterator_t(elem, len); }
>> +
>> +   iterator_t begin() { return iterator_t(elem, 0); }
>> +   iterator_t end() { return iterator_t(elem, len); }
>> +
>> +   /*
>> +    * Methods for std::vector<T> compatibility:
>> +    */
>> +
>> +   arraylist<T,N>& push_back(const T &e) { return add(e); }
>> +
>> +private:
>> +   void enlarge1() {
>> +      if(N) {
>> +         cap *= 2;
>> +         if(elem == (T*)local) {
>> +            elem = (T*)malloc(cap*sizeof(T));
>> +            if(!elem) {
>> +               abort();
>> +            }
>> +            memcpy(elem, local, len*sizeof(T));
>> +         }
>> +         else {
>> +            elem = (T*)realloc(elem, cap*sizeof(T));
>> +            if(!elem) {
>> +               abort();
>> +            }
>> +         }
>> +      }
>> +      else {
>> +         cap = (cap ? 2*cap : 4);
>> +         elem = (T*)realloc(elem, cap*sizeof(T));
>> +         if(!elem) {
>> +            abort();
>> +         }
>> +      }
>> +   }
>> +};
>> +
>> +#endif /* _UTIL_FAST_LIST_H */
>> \ No newline at end of file
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list