Mesa (main): pan/bi: Add nodearray datastructure

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Jun 2 17:30:01 UTC 2022


Module: Mesa
Branch: main
Commit: c70daa74f03aff5985710d87fc209279d115db7e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c70daa74f03aff5985710d87fc209279d115db7e

Author: Icecream95 <ixn at disroot.org>
Date:   Thu Jun  2 11:56:27 2022 -0400

pan/bi: Add nodearray datastructure

This is an array which can either be sparse or dense, and was designed
to be used to track liveness and interference information.

Either a sparse array with sorted indices or dense array is used.
Other data structures were tried, such as red-black trees or hash
tables, but they were slower. When used for storing constraints, the
indices do not have to be sorted as duplicating elements is okay, but
the speedup from that was not enough to justify the extra complexity.

v2: Add a comment about how to potentially speed it up. But it seems
  fast enough even without this change.
v3: Use a custom struct rather than relying on util_dynarray.
v4: Split out functions only used for liveness analysis, rather than the simpler
  data structure needed for the register interference matrix. If we need to
  optimize liveness, that can follow on after. Also make it for vec8 (Alyssa).

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16780>

---

 src/panfrost/bifrost/nodearray.h | 247 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 247 insertions(+)

diff --git a/src/panfrost/bifrost/nodearray.h b/src/panfrost/bifrost/nodearray.h
new file mode 100644
index 00000000000..ed852e0c56d
--- /dev/null
+++ b/src/panfrost/bifrost/nodearray.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2021 Icecream95
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* A nodearray is an array type that is either sparse or dense, depending on
+ * the number of elements.
+ *
+ * When the number of elements is over a threshold (max_sparse), the dense mode
+ * is used, and the nodearray is simply a container for an array.
+ *
+ * In sparse mode, the array has elements with a 24-bit node index and a value.
+ * The nodes are always sorted, so that a binary search can be used to find
+ * elements. Nonexistent elements are treated as zero.
+ *
+ * Function names follow ARM instruction names: orr does *elem |= value.
+ *
+ * Although it's probably already fast enough, the datastructure could be sped
+ * up a lot, especially when NEON is available, by making the sparse mode store
+ * sixteen adjacent values, so that adding new keys also allocates nearby keys,
+ * and to allow for vectorising iteration, as can be done when in the dense
+ * mode.
+ */
+
+#ifndef __BIFROST_NODEARRAY_H
+#define __BIFROST_NODEARRAY_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* A value that may be stored in a nodearray element, used directly for dense
+ * elements and included into sparse elements.
+ */
+typedef uint16_t nodearray_value;
+
+#define NODEARRAY_MAX_VALUE 0xffff
+
+/* Type storing sparse nodearray elements, consisting of a nodearray_value at
+ * the bottom and a nodearray_key at the top.
+ */
+typedef uint64_t nodearray_sparse;
+
+typedef struct {
+        union {
+                nodearray_sparse *sparse;
+                nodearray_value *dense;
+        };
+        unsigned size;
+        unsigned sparse_capacity;
+} nodearray;
+
+/* Align sizes to 16-bytes for SIMD purposes */
+#define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)
+
+#define nodearray_sparse_foreach(buf, elem) \
+   for (nodearray_sparse *elem = (buf)->sparse; \
+        elem < (buf)->sparse + (buf)->size; elem++)
+
+#define nodearray_dense_foreach(buf, elem) \
+   for (nodearray_value *elem = (buf)->dense; \
+        elem < (buf)->dense + (buf)->size; elem++)
+
+#define nodearray_dense_foreach_64(buf, elem) \
+   for (uint64_t *elem = (uint64_t *)(buf)->dense; \
+        (nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)
+
+static inline bool
+nodearray_is_sparse(const nodearray *a)
+{
+        return a->sparse_capacity != ~0U;
+}
+
+static inline void
+nodearray_init(nodearray *a)
+{
+        memset(a, 0, sizeof(nodearray));
+}
+
+static inline void
+nodearray_reset(nodearray *a)
+{
+        free(a->sparse);
+        nodearray_init(a);
+}
+
+static inline nodearray_sparse
+nodearray_encode(unsigned key, nodearray_value value)
+{
+        static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
+        return ((nodearray_sparse) key << 16) | value;
+}
+
+static inline unsigned
+nodearray_sparse_key(const nodearray_sparse *elem)
+{
+        static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
+        return *elem >> 16;
+}
+
+static inline nodearray_value
+nodearray_sparse_value(const nodearray_sparse *elem)
+{
+        return *elem & NODEARRAY_MAX_VALUE;
+}
+
+static inline unsigned
+nodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem)
+{
+        assert(nodearray_is_sparse(a) && a->size);
+
+        nodearray_sparse *data = a->sparse;
+
+        /* Encode the key using the highest possible value, so that the
+         * matching node must be encoded lower than this
+         */
+        nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
+
+        unsigned left = 0;
+        unsigned right = a->size - 1;
+
+        if (data[right] <= skey)
+                left = right;
+
+        while (left != right) {
+                /* No need to worry about overflow, we couldn't have more than
+                 * 2^24 elements */
+                unsigned probe = (left + right + 1) / 2;
+
+                if (data[probe] > skey)
+                        right = probe - 1;
+                else
+                        left = probe;
+        }
+
+        *elem = data + left;
+        return left;
+}
+
+static inline void
+nodearray_orr(nodearray *a, unsigned key, nodearray_value value,
+              unsigned max_sparse, unsigned max)
+{
+        assert(key < (1 << 24));
+        assert(key < max);
+
+        if (!value)
+                return;
+
+        if (nodearray_is_sparse(a)) {
+                unsigned size = a->size;
+                unsigned left = 0;
+
+                if (size) {
+                        /* First, binary search for key */
+                        nodearray_sparse *elem;
+                        left = nodearray_sparse_search(a, key, &elem);
+
+                        if (nodearray_sparse_key(elem) == key) {
+                                *elem |= value;
+                                return;
+                        }
+
+                        /* We insert before `left`, so increment it if it's
+                         * out of order */
+                        if (nodearray_sparse_key(elem) < key)
+                                ++left;
+                }
+
+                if (size < max_sparse && (size + 1) < max / 4) {
+                        /* We didn't find it, but we know where to insert it. */
+
+                        nodearray_sparse *data = a->sparse;
+                        nodearray_sparse *data_move = data + left;
+
+                        bool realloc = (++a->size) > a->sparse_capacity;
+
+                        if (realloc) {
+                                a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
+
+                                a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse));
+
+                                if (left)
+                                        memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
+                        }
+
+                        nodearray_sparse *elem = a->sparse + left;
+
+                        if (left != size)
+                                memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse));
+
+                        *elem = nodearray_encode(key, value);
+
+                        if (realloc)
+                                free(data);
+
+                        return;
+                }
+
+                /* There are too many elements, so convert to a dense array */
+                nodearray old = *a;
+
+                a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value));
+                a->size = max;
+                a->sparse_capacity = ~0U;
+
+                nodearray_value *data = a->dense;
+
+                nodearray_sparse_foreach(&old, x) {
+                        unsigned key = nodearray_sparse_key(x);
+                        nodearray_value value = nodearray_sparse_value(x);
+
+                        assert(key < max);
+                        data[key] = value;
+                }
+
+                free(old.sparse);
+        }
+
+        a->dense[key] |= value;
+}
+
+#ifdef __cplusplus
+} /* extern C */
+#endif
+
+#endif



More information about the mesa-commit mailing list