[Mesa-dev] [PATCH] glsl: optimize list handling in opt_dead_code
Jan Ziak
0xe2.0x9a.0x9b at gmail.com
Tue Oct 18 13:44:29 UTC 2016
Hi Michael,
thanks for the suggestions about code formatting. I formatted the
whole fast_list.h file in the Netbeans editor and uploaded a new
revision of the patch.
Jan
On Tue, Oct 18, 2016 at 1:42 PM, Michael Schellenberger Costa
<mschellenbergercosta at googlemail.com> wrote:
> Hi Jan,
>
> On 18.10.2016 00:07, Jan Ziak wrote:
>>
>> This patch replaces the ir_variable_refcount_entry's linked-list
>> with an array-list.
>>
>> The array-list has local storage which does not require ANY additional
>> allocations if the list has small number of elements. The size of this
>> storage is configurable for each variable.
>>
>> Benchmark results for "./run -1 shaders" from shader-db[1]:
>>
>> - The total number of executed instructions goes down from 64.184 to
>> 63.797
>> giga-instructions when Mesa is compiled with "gcc -O0 ..."
>> - In the call tree starting at function do_dead_code():
>> - the number of calls to malloc() is reduced by about 10%
>> - the number of calls to free() is reduced by about 30%
>>
>> [1] git://anongit.freedesktop.org/mesa/shader-db
>>
>> Signed-off-by: Jan Ziak (http://atom-symbol.net)
>> <0xe2.0x9a.0x9b at gmail.com>
>> ---
>> src/compiler/glsl/ir_variable_refcount.cpp | 14 +--
>> src/compiler/glsl/ir_variable_refcount.h | 8 +-
>> src/compiler/glsl/opt_dead_code.cpp | 19 ++--
>> src/util/fast_list.h | 167
>> +++++++++++++++++++++++++++++
>> 4 files changed, 176 insertions(+), 32 deletions(-)
>>
>> diff --git a/src/compiler/glsl/ir_variable_refcount.cpp
>> b/src/compiler/glsl/ir_variable_refcount.cpp
>> index 8306be1..94d6edc 100644
>> --- a/src/compiler/glsl/ir_variable_refcount.cpp
>> +++ b/src/compiler/glsl/ir_variable_refcount.cpp
>> @@ -46,15 +46,6 @@ static void
>> free_entry(struct hash_entry *entry)
>> {
>> ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *)
>> entry->data;
>> -
>> - /* Free assignment list */
>> - exec_node *n;
>> - while ((n = ivre->assign_list.pop_head()) != NULL) {
>> - struct assignment_entry *assignment_entry =
>> - exec_node_data(struct assignment_entry, n, link);
>> - free(assignment_entry);
>> - }
>> -
>> delete ivre;
>> }
>> @@ -142,10 +133,7 @@
>> ir_variable_refcount_visitor::visit_leave(ir_assignment *ir)
>> */
>> assert(entry->referenced_count >= entry->assigned_count);
>> if (entry->referenced_count == entry->assigned_count) {
>> - struct assignment_entry *assignment_entry =
>> - (struct assignment_entry *)calloc(1,
>> sizeof(*assignment_entry));
>> - assignment_entry->assign = ir;
>> - entry->assign_list.push_head(&assignment_entry->link);
>> + entry->assign_list.add(ir);
>> }
>> }
>> diff --git a/src/compiler/glsl/ir_variable_refcount.h
>> b/src/compiler/glsl/ir_variable_refcount.h
>> index 08a11c0..c3ec5fe 100644
>> --- a/src/compiler/glsl/ir_variable_refcount.h
>> +++ b/src/compiler/glsl/ir_variable_refcount.h
>> @@ -32,11 +32,7 @@
>> #include "ir.h"
>> #include "ir_visitor.h"
>> #include "compiler/glsl_types.h"
>> -
>> -struct assignment_entry {
>> - exec_node link;
>> - ir_assignment *assign;
>> -};
>> +#include "util/fast_list.h"
>> class ir_variable_refcount_entry
>> {
>> @@ -50,7 +46,7 @@ public:
>> * This is intended to be used for dead code optimisation and may
>> * not be a complete list.
>> */
>> - exec_list assign_list;
>> + arraylist<ir_assignment*,4> assign_list;
>> /** Number of times the variable is referenced, including
>> assignments. */
>> unsigned referenced_count;
>> diff --git a/src/compiler/glsl/opt_dead_code.cpp
>> b/src/compiler/glsl/opt_dead_code.cpp
>> index 75e668a..06e8c3d 100644
>> --- a/src/compiler/glsl/opt_dead_code.cpp
>> +++ b/src/compiler/glsl/opt_dead_code.cpp
>> @@ -52,7 +52,7 @@ do_dead_code(exec_list *instructions, bool
>> uniform_locations_assigned)
>> struct hash_entry *e;
>> hash_table_foreach(v.ht, e) {
>> - ir_variable_refcount_entry *entry = (ir_variable_refcount_entry
>> *)e->data;
>> + ir_variable_refcount_entry *const entry =
>> (ir_variable_refcount_entry *)e->data;
>> /* Since each assignment is a reference, the refereneced count
>> must be
>> * greater than or equal to the assignment count. If they are
>> equal,
>> @@ -89,7 +89,7 @@ do_dead_code(exec_list *instructions, bool
>> uniform_locations_assigned)
>> if (entry->var->data.always_active_io)
>> continue;
>> - if (!entry->assign_list.is_empty()) {
>> + if (!entry->assign_list.empty()) {
>> /* Remove all the dead assignments to the variable we found.
>> * Don't do so if it's a shader or function output, though.
>> */
>> @@ -98,26 +98,19 @@ do_dead_code(exec_list *instructions, bool
>> uniform_locations_assigned)
>> entry->var->data.mode != ir_var_shader_out &&
>> entry->var->data.mode != ir_var_shader_storage) {
>> - while (!entry->assign_list.is_empty()) {
>> - struct assignment_entry *assignment_entry =
>> - exec_node_data(struct assignment_entry,
>> - entry->assign_list.get_head_raw(),
>> link);
>> -
>> - assignment_entry->assign->remove();
>> -
>> + for(ir_assignment *assign : entry->assign_list) {
>
> The original code separates control flow instructions as for or while with a
> space before the brace, aka "for (...". This applies for all the code.
>
>> + assign->remove();
>> if (debug) {
>> printf("Removed assignment to %s@%p\n",
>> entry->var->name, (void *) entry->var);
>> }
>> -
>> - assignment_entry->link.remove();
>> - free(assignment_entry);
>> }
>> + entry->assign_list.clear();
>> progress = true;
>> }
>> }
>> - if (entry->assign_list.is_empty()) {
>> + if (entry->assign_list.empty()) {
>> /* If there are no assignments or references to the variable
>> left,
>> * then we can remove its declaration.
>> */
>> diff --git a/src/util/fast_list.h b/src/util/fast_list.h
>> new file mode 100644
>> index 0000000..4abd965
>> --- /dev/null
>> +++ b/src/util/fast_list.h
>> @@ -0,0 +1,167 @@
>> +/*
>> + * Copyright © 2016 Jan Ziak
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining
>> a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> next
>> + * paragraph) shall be included in all copies or substantial portions of
>> the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + * Jan Ziak (http://atom-symbol.net) <0xe2.0x9x.0a9b at gmail.com>
>> + */
>> +
>> +/**
>> + * @file fast_list.h
>> + *
>> + * High-performance C++ list implementation.
>> + */
>> +
>> +#ifndef _UTIL_FAST_LIST_H
>> +#define _UTIL_FAST_LIST_H
>> +
>> +#include <new>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +
>> +/**
>> + * A high-performance array list.
>> + *
>> + * The list doesn't support elements whose behavior depends on the memory
>> address of the element.
>> + *
>> + * @param T List element type.
>> + *
>> + * @param N The number of statically allocated elements to achieve the
>> highest overall performance.
>> + * N is typically 0, 2, 4 or 8 depending on the runtime usage
>> pattern of a particular list
>> + * variable in the source code.
>> + */
>> +template<typename T, size_t N>
>> +struct arraylist
>> +{
>> + size_t len; // Length
>> + size_t cap; // Capacity (size of memory allocated to 'elem')
>> + T *elem; // List elements
>> + uint8_t local[N*sizeof(T)]; // Fast local preallocated storage for
>> lists with length 0..N
>> +
>> + arraylist() {
>> + len = 0;
>> + cap = N;
>> + elem = (N ? (T*)local : NULL);
>
> You might want to use nullptr if you go for C++11 anyway (As i assume from
> the range based loops above)
>>
>> + }
>> +
>> + ~arraylist() {
>> + if(!__has_trivial_destructor(T)) {
>> + for(size_t i=0; i<len; i++) {
>
> Please separate operators by whitespace aka "i < len"
>>
>> + elem[i].~T();
>> + }
>> + }
>> + if((N && elem != (T*)local) && elem) {
>> + free(elem);
>> + }
>> + }
>> +
>> + arraylist(const arraylist<T,N> &a) = delete;
>> + void operator=(const arraylist<T,N> &a) = delete;
>> +
>> + bool empty() const {
>> + return len==0;
>
> same
>>
>> + }
>> +
>> + void clear() {
>> + if(!__has_trivial_destructor(T)) {
>> + for(size_t i=0; i<len; i++) {
>
> same
>
>> + elem[i].~T();
>> + }
>> + }
>> + len = 0;
>> + }
>> +
>> + arraylist<T,N>& add(const T &e) {
>> + if(len == cap) {
>> + enlarge1();
>> + }
>> + new (&elem[len++]) T(e);
>> + return *this;
>> + }
>> +
>> + T& operator[](size_t i) { return elem[i]; }
>> + const T& operator[](size_t i) const { return elem[i]; }
>> +
>> + /*
>> + * C++ iterators:
>> + */
>> +
>> + struct const_iterator_t {
>> + const T *const elem;
>> + size_t i;
>> + const_iterator_t(const T *_elem, size_t _i) : elem(_elem), i(_i) {}
>> + const T& operator*() const { return elem[i]; }
>> + void operator++() { i++; }
>> + bool operator!=(const const_iterator_t &b) const { return i < b.i;
>> }
>> + };
>> + struct iterator_t {
>> + T *const elem;
>> + size_t i;
>> + iterator_t(T *_elem, size_t _i) : elem(_elem), i(_i) {}
>> + T& operator*() const { return elem[i]; }
>> + void operator++() { i++; }
>> + bool operator!=(const iterator_t &b) const { return i < b.i; }
>> + };
>> +
>> + const_iterator_t const_begin() const { return const_iterator_t(elem,
>> 0); }
>> + const_iterator_t const_end() const { return const_iterator_t(elem,
>> len); }
>
> The stl knows those as cbegin() and cend(), maybe stick with the standard
> names?
> --Michael
>>
>> +
>> + const_iterator_t begin() const { return const_iterator_t(elem, 0); }
>> + const_iterator_t end() const { return const_iterator_t(elem, len); }
>> +
>> + iterator_t begin() { return iterator_t(elem, 0); }
>> + iterator_t end() { return iterator_t(elem, len); }
>> +
>> + /*
>> + * Methods for std::vector<T> compatibility:
>> + */
>> +
>> + arraylist<T,N>& push_back(const T &e) { return add(e); }
>> +
>> +private:
>> + void enlarge1() {
>> + if(N) {
>> + cap *= 2;
>> + if(elem == (T*)local) {
>> + elem = (T*)malloc(cap*sizeof(T));
>> + if(!elem) {
>> + abort();
>> + }
>> + memcpy(elem, local, len*sizeof(T));
>> + }
>> + else {
>> + elem = (T*)realloc(elem, cap*sizeof(T));
>> + if(!elem) {
>> + abort();
>> + }
>> + }
>> + }
>> + else {
>> + cap = (cap ? 2*cap : 4);
>> + elem = (T*)realloc(elem, cap*sizeof(T));
>> + if(!elem) {
>> + abort();
>> + }
>> + }
>> + }
>> +};
>> +
>> +#endif /* _UTIL_FAST_LIST_H */
>> \ No newline at end of file
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list