[Mesa-dev] [PATCH 2/2] SQUASH: intel/fs/bank_conflicts: Roll back to the nineties.
Francisco Jerez
currojerez at riseup.net
Thu Jun 22 19:20:12 UTC 2017
---
src/intel/compiler/brw_fs_bank_conflicts.cpp | 274 ++++++++++++++++++---------
1 file changed, 188 insertions(+), 86 deletions(-)
diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp
index 0225c70..dc88cac 100644
--- a/src/intel/compiler/brw_fs_bank_conflicts.cpp
+++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
@@ -51,9 +51,6 @@
#include "brw_fs.h"
#include "brw_cfg.h"
-#include <vector>
-#include <array>
-
#ifdef __SSE2__
#include <emmintrin.h>
@@ -72,7 +69,9 @@ namespace {
/**
* SIMD integer vector data type.
*/
- typedef std::array<__m128i, 2> vector_type;
+ struct vector_type {
+ __m128i v[2];
+ };
/**
* Scalar data type matching the representation of a single component of \p
@@ -88,8 +87,7 @@ namespace {
/**
* Number of components of a \p vector_type.
*/
- const unsigned vector_width = 2 * sizeof(vector_type::value_type) /
- sizeof(scalar_type);
+ const unsigned vector_width = 2 * sizeof(__m128i) / sizeof(scalar_type);
/**
* Set the i-th component of vector \p v to \p x.
@@ -98,7 +96,7 @@ namespace {
set(vector_type &v, unsigned i, scalar_type x)
{
assert(i < vector_width);
- memcpy((char *)v.data() + i * sizeof(x), &x, sizeof(x));
+ memcpy((char *)v.v + i * sizeof(x), &x, sizeof(x));
}
/**
@@ -109,7 +107,7 @@ namespace {
{
assert(i < vector_width);
scalar_type x;
- memcpy(&x, (char *)v.data() + i * sizeof(x), sizeof(x));
+ memcpy(&x, (char *)v.v + i * sizeof(x), sizeof(x));
return x;
}
@@ -119,10 +117,10 @@ namespace {
vector_type
adds(const vector_type &v, const vector_type &w)
{
- const vector_type u = {
- _mm_adds_epi16(v[0], w[0]),
- _mm_adds_epi16(v[1], w[1])
- };
+ const vector_type u = {{
+ _mm_adds_epi16(v.v[0], w.v[0]),
+ _mm_adds_epi16(v.v[1], w.v[1])
+ }};
return u;
}
@@ -132,10 +130,10 @@ namespace {
vector_type
subs(const vector_type &v, const vector_type &w)
{
- const vector_type u = {
- _mm_subs_epi16(v[0], w[0]),
- _mm_subs_epi16(v[1], w[1])
- };
+ const vector_type u = {{
+ _mm_subs_epi16(v.v[0], w.v[0]),
+ _mm_subs_epi16(v.v[1], w.v[1])
+ }};
return u;
}
@@ -145,10 +143,10 @@ namespace {
vector_type
mask(const vector_type &v, const vector_type &w)
{
- const vector_type u = {
- _mm_and_si128(v[0], w[0]),
- _mm_and_si128(v[1], w[1])
- };
+ const vector_type u = {{
+ _mm_and_si128(v.v[0], w.v[0]),
+ _mm_and_si128(v.v[1], w.v[1])
+ }};
return u;
}
@@ -158,7 +156,7 @@ namespace {
scalar_type
sums(const vector_type &v)
{
- const __m128i v8 = _mm_adds_epi16(v[0], v[1]);
+ const __m128i v8 = _mm_adds_epi16(v.v[0], v.v[1]);
const __m128i v4 = _mm_adds_epi16(v8, _mm_shuffle_epi32(v8, 0x4e));
const __m128i v2 = _mm_adds_epi16(v4, _mm_shuffle_epi32(v4, 0xb1));
const __m128i v1 = _mm_adds_epi16(v2, _mm_shufflelo_epi16(v2, 0xb1));
@@ -225,7 +223,7 @@ namespace {
vector_type
adds(vector_type v, vector_type w)
{
- return std::max(INT16_MIN, std::min(INT16_MAX, int(v) + w));
+ return MAX2(INT16_MIN, MIN2(INT16_MAX, int(v) + w));
}
/**
@@ -234,7 +232,7 @@ namespace {
vector_type
subs(vector_type v, vector_type w)
{
- return std::max(INT16_MIN, std::min(INT16_MAX, int(v) - w));
+ return MAX2(INT16_MIN, MIN2(INT16_MAX, int(v) - w));
}
/**
@@ -258,6 +256,15 @@ namespace {
#endif
+/**
+ * Swap \p x and \p y.
+ */
+#define SWAP(x, y) do { \
+ __typeof(y) _swap_tmp = y; \
+ y = x; \
+ x = _swap_tmp; \
+ } while (0)
+
namespace {
/**
* Variable-length vector type intended to represent cycle-count costs for
@@ -267,7 +274,37 @@ namespace {
* atoms are assigned the same bank b or opposite-parity banks b and b^1).
* \sa shader_conflict_weight_matrix()
*/
- typedef std::vector<vector_type> weight_vector_type;
+ struct weight_vector_type {
+ weight_vector_type() : v(NULL), size(0) {}
+
+ weight_vector_type(unsigned n) :
+ v(new vector_type[DIV_ROUND_UP(n, vector_width)]()),
+ size(n) {}
+
+ weight_vector_type(const weight_vector_type &u) :
+ v(new vector_type[DIV_ROUND_UP(u.size, vector_width)]()),
+ size(u.size)
+ {
+ memcpy(v, u.v,
+ DIV_ROUND_UP(u.size, vector_width) * sizeof(vector_type));
+ }
+
+ ~weight_vector_type()
+ {
+ delete[] v;
+ }
+
+ weight_vector_type &
+ operator=(weight_vector_type u)
+ {
+ SWAP(v, u.v);
+ SWAP(size, u.size);
+ return *this;
+ }
+
+ vector_type *v;
+ unsigned size;
+ };
/**
* Set the (i, p)-th component of weight vector \p v to \p x.
@@ -275,7 +312,7 @@ namespace {
void
set(weight_vector_type &v, unsigned i, unsigned p, scalar_type x)
{
- set(v[(2 * i + p) / vector_width], (2 * i + p) % vector_width, x);
+ set(v.v[(2 * i + p) / vector_width], (2 * i + p) % vector_width, x);
}
/**
@@ -284,7 +321,7 @@ namespace {
scalar_type
get(const weight_vector_type &v, unsigned i, unsigned p)
{
- return get(v[(2 * i + p) / vector_width], (2 * i + p) % vector_width);
+ return get(v.v[(2 * i + p) / vector_width], (2 * i + p) % vector_width);
}
/**
@@ -316,13 +353,43 @@ namespace {
* Create a (for the moment unrestricted) partitioning of a register
* file of size \p n. The units are arbitrary.
*/
- partitioning(unsigned n) {
+ partitioning(unsigned n) :
+ max_reg(n),
+ offsets(new unsigned[n + num_terminator_atoms]),
+ atoms(new unsigned[n + num_terminator_atoms])
+ {
for (unsigned i = 0; i < n + num_terminator_atoms; i++) {
- offsets.push_back(i);
- atoms.push_back(i);
+ offsets[i] = i;
+ atoms[i] = i;
}
}
+ partitioning(const partitioning &p) :
+ max_reg(p.max_reg),
+ offsets(new unsigned[p.num_atoms() + num_terminator_atoms]),
+ atoms(new unsigned[p.max_reg + num_terminator_atoms])
+ {
+ memcpy(offsets, p.offsets,
+ sizeof(unsigned) * (p.num_atoms() + num_terminator_atoms));
+ memcpy(atoms, p.atoms,
+ sizeof(unsigned) * (p.max_reg + num_terminator_atoms));
+ }
+
+ ~partitioning()
+ {
+ delete[] offsets;
+ delete[] atoms;
+ }
+
+ partitioning &
+ operator=(partitioning p)
+ {
+ SWAP(max_reg, p.max_reg);
+ SWAP(offsets, p.offsets);
+ SWAP(atoms, p.atoms);
+ return *this;
+ }
+
/**
* Require register range [reg, reg + n[ to be considered part of the
* same atom.
@@ -336,7 +403,7 @@ namespace {
* case that the specified contiguity requirement leads to the fusion
* (yay) of one or more existing atoms.
*/
- for (unsigned reg1 = reg + 1; reg1 < atoms.size(); reg1++) {
+ for (unsigned reg1 = reg + 1; reg1 <= max_reg; reg1++) {
if (offsets[atoms[reg1]] < reg + n) {
atoms[reg1] = r;
} else {
@@ -347,11 +414,6 @@ namespace {
atoms[reg1] = r;
}
}
-
- /* Clean up the scraps if we ended up with less atoms than we started
- * with.
- */
- offsets.erase(offsets.begin() + r + 1, offsets.end());
}
/**
@@ -388,7 +450,7 @@ namespace {
unsigned
num_atoms() const
{
- return offsets.size() - num_terminator_atoms;
+ return atoms[max_reg];
}
private:
@@ -398,8 +460,9 @@ namespace {
* size_of_atom().
*/
static const unsigned num_terminator_atoms = 1;
- std::vector<unsigned> offsets;
- std::vector<unsigned> atoms;
+ unsigned max_reg;
+ unsigned *offsets;
+ unsigned *atoms;
};
/**
@@ -455,10 +518,10 @@ namespace {
* Return the set of GRF atoms that should be left untouched at their
* original location to avoid violating hardware or software assumptions.
*/
- std::vector<bool>
+ bool *
shader_reg_constraints(const fs_visitor *v, const partitioning &p)
{
- std::vector<bool> constrained(p.num_atoms());
+ bool *constrained = new bool[p.num_atoms()]();
/* These are read implicitly by some send-message instructions without
* any indication at the IR level. Assume they are unsafe to move
@@ -520,12 +583,13 @@ namespace {
* meantime optimizing based on Gen9 weights is likely to be more
* helpful than not optimizing at all.
*/
- std::vector<weight_vector_type>
+ weight_vector_type *
shader_conflict_weight_matrix(const fs_visitor *v, const partitioning &p)
{
- std::vector<weight_vector_type> conflicts(p.num_atoms(),
- weight_vector_type(DIV_ROUND_UP(2 * p.num_atoms(),
- vector_width)));
+ weight_vector_type *conflicts = new weight_vector_type[p.num_atoms()];
+ for (unsigned r = 0; r < p.num_atoms(); r++)
+ conflicts[r] = weight_vector_type(2 * p.num_atoms());
+
/* Crude approximation of the number of times the current basic block
* will be executed at run-time.
*/
@@ -575,8 +639,8 @@ namespace {
* between atoms r and s. Note that the weight matrix is
* symmetric with respect to indices r and s by construction.
*/
- const scalar_type w = std::min(unsigned(max_scalar),
- get(conflicts[r], s, p) + cycle_scale);
+ const scalar_type w = MIN2(unsigned(max_scalar),
+ get(conflicts[r], s, p) + cycle_scale);
set(conflicts[r], s, p, w);
set(conflicts[s], r, p, w);
}
@@ -592,14 +656,16 @@ namespace {
* the specified \p conflicts matrix (\sa
* shader_conflict_weight_matrix()).
*/
- std::vector<bool>
- have_any_conflicts(const std::vector<weight_vector_type> &conflicts)
+ bool *
+ have_any_conflicts(const partitioning &p,
+ const weight_vector_type *conflicts)
{
- std::vector<bool> any_conflicts(conflicts.size());
+ bool *any_conflicts = new bool[p.num_atoms()]();
- for (unsigned r = 0; r < conflicts.size(); r++) {
- for (unsigned s = 0; s < conflicts[r].size(); s++)
- any_conflicts[r] = any_conflicts[r] || sums(conflicts[r][s]);
+ for (unsigned r = 0; r < p.num_atoms(); r++) {
+ const unsigned m = DIV_ROUND_UP(conflicts[r].size, vector_width);
+ for (unsigned s = 0; s < m; s++)
+ any_conflicts[r] |= sums(conflicts[r].v[s]);
}
return any_conflicts;
@@ -627,27 +693,60 @@ namespace {
const weight_vector_type &bank_mask_n,
const weight_vector_type &conflicts)
{
+ const unsigned m = DIV_ROUND_UP(conflicts.size, vector_width);
vector_type s_p = {}, s_n = {};
- for (unsigned r = 0; r < conflicts.size(); r++) {
- s_p = adds(s_p, mask(bank_mask_p[r], conflicts[r]));
- s_n = adds(s_n, mask(bank_mask_n[r], conflicts[r]));
+ for (unsigned r = 0; r < m; r++) {
+ s_p = adds(s_p, mask(bank_mask_p.v[r], conflicts.v[r]));
+ s_n = adds(s_n, mask(bank_mask_n.v[r], conflicts.v[r]));
}
return sums(subs(s_p, s_n));
}
/**
- * Return an identity permutation of GRF atoms, represented as the start GRF
- * offset each atom is mapped into.
+ * Register atom permutation, represented as the start GRF offset each atom
+ * is mapped into.
+ */
+ struct permutation {
+ permutation() : v(NULL), size(0) {}
+
+ permutation(unsigned n) :
+ v(new unsigned[n]()), size(n) {}
+
+ permutation(const permutation &p) :
+ v(new unsigned[p.size]), size(p.size)
+ {
+ memcpy(v, p.v, p.size * sizeof(unsigned));
+ }
+
+ ~permutation()
+ {
+ delete[] v;
+ }
+
+ permutation &
+ operator=(permutation p)
+ {
+ SWAP(v, p.v);
+ SWAP(size, p.size);
+ return *this;
+ }
+
+ unsigned *v;
+ unsigned size;
+ };
+
+ /**
+ * Return an identity permutation of GRF atoms.
*/
- std::vector<unsigned>
+ permutation
identity_reg_permutation(const partitioning &p)
{
- std::vector<unsigned> map(p.num_atoms());
+ permutation map(p.num_atoms());
- for (unsigned r = 0; r < map.size(); r++)
- map[r] = p.reg_of_atom(r);
+ for (unsigned r = 0; r < map.size; r++)
+ map.v[r] = p.reg_of_atom(r);
return map;
}
@@ -671,18 +770,18 @@ namespace {
* characteristic function of each bank, if you regard it as a set
* containing all atoms assigned to it according to the \p map array.
*/
- std::array<weight_vector_type, 4>
- bank_characteristics(const std::vector<unsigned> &map)
+ weight_vector_type *
+ bank_characteristics(const permutation &map)
{
- std::array<weight_vector_type, 4> banks;
+ weight_vector_type *banks = new weight_vector_type[4];
- for (unsigned b = 0; b < banks.size(); b++) {
- banks[b].resize(DIV_ROUND_UP(2 * map.size(), vector_width));
+ for (unsigned b = 0; b < 4; b++) {
+ banks[b] = weight_vector_type(2 * map.size);
- for (unsigned j = 0; j < map.size(); j++) {
+ for (unsigned j = 0; j < map.size; j++) {
for (unsigned p = 0; p < 2; p++)
set(banks[b], j, p,
- (b ^ p) == bank_of(map[j]) ? -1 : 0);
+ (b ^ p) == bank_of(map.v[j]) ? -1 : 0);
}
}
@@ -697,24 +796,24 @@ namespace {
* may allow it to do a better job in some cases -- It simply reorders
* existing atoms in the GRF space without affecting their identity.
*/
- std::vector<unsigned>
+ permutation
optimize_reg_permutation(const partitioning &p,
- const std::vector<bool> &constrained,
- const std::vector<weight_vector_type> &conflicts,
- std::vector<unsigned> map)
+ const bool *constrained,
+ const weight_vector_type *conflicts,
+ permutation map)
{
- const std::vector<bool> any_conflicts = have_any_conflicts(conflicts);
- std::array<weight_vector_type, 4> banks = bank_characteristics(map);
+ const bool *any_conflicts = have_any_conflicts(p, conflicts);
+ weight_vector_type *banks = bank_characteristics(map);
- for (unsigned r = 0; r < map.size(); r++) {
- const unsigned bank_r = bank_of(map[r]);
+ for (unsigned r = 0; r < map.size; r++) {
+ const unsigned bank_r = bank_of(map.v[r]);
if (!constrained[r]) {
unsigned best_s = r;
int best_benefit = 0;
- for (unsigned s = 0; s < map.size(); s++) {
- const unsigned bank_s = bank_of(map[s]);
+ for (unsigned s = 0; s < map.size; s++) {
+ const unsigned bank_s = bank_of(map.v[s]);
if (bank_r != bank_s && !constrained[s] &&
p.size_of_atom(r) == p.size_of_atom(s) &&
@@ -731,16 +830,18 @@ namespace {
}
if (best_s != r) {
- for (unsigned b = 0; b < banks.size(); b++) {
+ for (unsigned b = 0; b < 4; b++) {
for (unsigned p = 0; p < 2; p++)
swap(banks[b], r, p, best_s, p);
}
- std::swap(map[r], map[best_s]);
+ SWAP(map.v[r], map.v[best_s]);
}
}
}
+ delete[] banks;
+ delete[] any_conflicts;
return map;
}
@@ -749,13 +850,12 @@ namespace {
* return the result.
*/
fs_reg
- transform(const partitioning &p, const std::vector<unsigned> &map,
- fs_reg r)
+ transform(const partitioning &p, const permutation &map, fs_reg r)
{
if (r.file == VGRF) {
const unsigned reg = reg_of(r);
const unsigned s = p.atom_of_reg(reg);
- r.nr = map[s] + reg - p.reg_of_atom(s);
+ r.nr = map.v[s] + reg - p.reg_of_atom(s);
r.offset = r.offset % REG_SIZE;
}
@@ -773,10 +873,10 @@ fs_visitor::opt_bank_conflicts()
return false;
const partitioning p = shader_reg_partitioning(this);
- const std::vector<bool> constrained = shader_reg_constraints(this, p);
- const std::vector<weight_vector_type> conflicts =
+ const bool *constrained = shader_reg_constraints(this, p);
+ const weight_vector_type *conflicts =
shader_conflict_weight_matrix(this, p);
- const std::vector<unsigned> map =
+ const permutation map =
optimize_reg_permutation(p, constrained, conflicts,
identity_reg_permutation(p));
@@ -787,5 +887,7 @@ fs_visitor::opt_bank_conflicts()
inst->src[i] = transform(p, map, inst->src[i]);
}
+ delete[] conflicts;
+ delete[] constrained;
return true;
}
--
2.10.2
More information about the mesa-dev
mailing list