Mesa (main): pan/bi: Calculate dependency graph when bundling
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Jul 12 23:44:38 UTC 2021
Module: Mesa
Branch: main
Commit: 599662205020df1bc919d48a9809251dbd366714
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=599662205020df1bc919d48a9809251dbd366714
Author: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Date: Wed Jan 20 21:02:12 2021 -0500
pan/bi: Calculate dependency graph when bundling
Code is ported from Midgard, modified to be scalar, post-RA, and to put
the arrays on the worklist instead of the instruction to save memory.
This enables out-of-order scheduling.
total tuples in shared programs: 128691 -> 125304 (-2.63%)
tuples in affected programs: 114091 -> 110704 (-2.97%)
helped: 844
HURT: 377
helped stats (abs) min: 1.0 max: 150.0 x̄: 4.88 x̃: 3
helped stats (rel) min: 0.30% max: 26.42% x̄: 5.56% x̃: 4.35%
HURT stats (abs) min: 1.0 max: 8.0 x̄: 1.94 x̃: 1
HURT stats (rel) min: 0.20% max: 33.33% x̄: 6.84% x̃: 3.23%
95% mean confidence interval for tuples value: -3.16 -2.38
95% mean confidence interval for tuples %-change: -2.19% -1.27%
Tuples are helped.
total clauses in shared programs: 27579 -> 26059 (-5.51%)
clauses in affected programs: 20606 -> 19086 (-7.38%)
helped: 941
HURT: 39
helped stats (abs) min: 1.0 max: 21.0 x̄: 1.66 x̃: 1
helped stats (rel) min: 0.69% max: 44.44% x̄: 10.48% x̃: 9.09%
HURT stats (abs) min: 1.0 max: 2.0 x̄: 1.15 x̃: 1
HURT stats (rel) min: 1.89% max: 10.00% x̄: 4.73% x̃: 4.55%
95% mean confidence interval for clauses value: -1.63 -1.47
95% mean confidence interval for clauses %-change: -10.27% -9.48%
Clauses are helped.
total cycles in shared programs: 12262.54 -> 12154.79 (-0.88%)
cycles in affected programs: 2210.54 -> 2102.79 (-4.87%)
helped: 374
HURT: 56
helped stats (abs) min: 0.041665999999999315 max: 6.25 x̄: 0.30 x̃: 0
helped stats (rel) min: 0.42% max: 26.00% x̄: 6.90% x̃: 7.14%
HURT stats (abs) min: 0.041665999999999315 max: 0.5833319999999986 x̄: 0.11 x̃: 0
HURT stats (rel) min: 0.16% max: 100.00% x̄: 55.17% x̃: 50.00%
95% mean confidence interval for cycles value: -0.29 -0.21
95% mean confidence interval for cycles %-change: -1.37% 3.73%
Inconclusive result (%-change mean confidence interval includes 0).
total arith in shared programs: 4852.29 -> 4658.13 (-4.00%)
arith in affected programs: 4525.17 -> 4331 (-4.29%)
helped: 1112
HURT: 166
helped stats (abs) min: 0.041665999999999315 max: 6.25 x̄: 0.19 x̃: 0
helped stats (rel) min: 0.42% max: 33.33% x̄: 6.59% x̃: 5.36%
HURT stats (abs) min: 0.041665999999999315 max: 0.5833319999999986 x̄: 0.07 x̃: 0
HURT stats (rel) min: 0.16% max: 100.00% x̄: 25.05% x̃: 2.40%
95% mean confidence interval for arith value: -0.17 -0.14
95% mean confidence interval for arith %-change: -3.44% -1.51%
Arith are helped.
total quadwords in shared programs: 117141 -> 111394 (-4.91%)
quadwords in affected programs: 104390 -> 98643 (-5.51%)
helped: 1245
HURT: 76
helped stats (abs) min: 1.0 max: 69.0 x̄: 4.74 x̃: 4
helped stats (rel) min: 0.28% max: 35.00% x̄: 7.88% x̃: 6.45%
HURT stats (abs) min: 1.0 max: 8.0 x̄: 2.01 x̃: 1
HURT stats (rel) min: 0.20% max: 10.00% x̄: 3.52% x̃: 4.25%
95% mean confidence interval for quadwords value: -4.61 -4.09
95% mean confidence interval for quadwords %-change: -7.56% -6.88%
Quadwords are helped.
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10961>
---
src/panfrost/bifrost/bi_schedule.c | 181 +++++++++++++++++++++++++++++++--
src/panfrost/bifrost/bifrost.h | 1 +
src/panfrost/bifrost/bifrost_compile.c | 3 +-
3 files changed, 177 insertions(+), 8 deletions(-)
diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c
index e4099174004..468a9868085 100644
--- a/src/panfrost/bifrost/bi_schedule.c
+++ b/src/panfrost/bifrost/bi_schedule.c
@@ -38,6 +38,13 @@ struct bi_worklist {
/* Bitset of instructions in the block ready for scheduling */
BITSET_WORD *worklist;
+
+ /* The backwards dependency graph. nr_dependencies is the number of
+ * unscheduled instructions that must still be scheduled after (before)
+ * this instruction. dependents are which instructions need to be
+ * scheduled before (after) this instruction. */
+ unsigned *dep_counts;
+ BITSET_WORD **dependents;
};
/* State of a single tuple and clause under construction */
@@ -149,6 +156,138 @@ bi_supports_dtsel(bi_instr *ins)
}
}
+/* Adds an edge to the dependency graph */
+
+static void
+bi_push_dependency(unsigned parent, unsigned child,
+ BITSET_WORD **dependents, unsigned *dep_counts)
+{
+ if (!BITSET_TEST(dependents[parent], child)) {
+ BITSET_SET(dependents[parent], child);
+ dep_counts[child]++;
+ }
+}
+
+static void
+add_dependency(struct util_dynarray *table, unsigned index, unsigned child,
+ BITSET_WORD **dependents, unsigned *dep_counts)
+{
+ assert(index < 64);
+ util_dynarray_foreach(table + index, unsigned, parent)
+ bi_push_dependency(*parent, child, dependents, dep_counts);
+}
+
+static void
+mark_access(struct util_dynarray *table, unsigned index, unsigned parent)
+{
+ assert(index < 64);
+ util_dynarray_append(&table[index], unsigned, parent);
+}
+
+static bool
+bi_is_sched_barrier(bi_instr *I)
+{
+ switch (I->op) {
+ case BI_OPCODE_BARRIER:
+ case BI_OPCODE_DISCARD_F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void
+bi_create_dependency_graph(struct bi_worklist st, bool inorder)
+{
+ struct util_dynarray last_read[64], last_write[64];
+
+ for (unsigned i = 0; i < 64; ++i) {
+ util_dynarray_init(&last_read[i], NULL);
+ util_dynarray_init(&last_write[i], NULL);
+ }
+
+ /* Initialize dependency graph */
+ for (unsigned i = 0; i < st.count; ++i) {
+ st.dependents[i] =
+ calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
+
+ st.dep_counts[i] = 0;
+ }
+
+ unsigned prev_msg = ~0;
+
+ /* Populate dependency graph */
+ for (signed i = st.count - 1; i >= 0; --i) {
+ bi_instr *ins = st.instructions[i];
+
+ bi_foreach_src(ins, s) {
+ if (ins->src[s].type != BI_INDEX_REGISTER) continue;
+ unsigned count = bi_count_read_registers(ins, s);
+
+ for (unsigned c = 0; c < count; ++c)
+ add_dependency(last_write, ins->src[s].value + c, i, st.dependents, st.dep_counts);
+ }
+
+ /* Keep message-passing ops in order. (This pass only cares
+ * about bundling; reordering of message-passing instructions
+ * happens during earlier scheduling.) */
+
+ if (bi_message_type_for_instr(ins)) {
+ if (prev_msg != ~0)
+ bi_push_dependency(prev_msg, i, st.dependents, st.dep_counts);
+
+ prev_msg = i;
+ }
+
+ /* Handle schedule barriers, adding All the deps */
+ if (inorder || bi_is_sched_barrier(ins)) {
+ for (unsigned j = 0; j < st.count; ++j) {
+ if (i == j) continue;
+
+ bi_push_dependency(MAX2(i, j), MIN2(i, j),
+ st.dependents, st.dep_counts);
+ }
+ }
+
+ bi_foreach_dest(ins, d) {
+ if (ins->dest[d].type != BI_INDEX_REGISTER) continue;
+ unsigned dest = ins->dest[d].value;
+
+ unsigned count = bi_count_write_registers(ins, d);
+
+ for (unsigned c = 0; c < count; ++c) {
+ add_dependency(last_read, dest + c, i, st.dependents, st.dep_counts);
+ add_dependency(last_write, dest + c, i, st.dependents, st.dep_counts);
+ mark_access(last_write, dest + c, i);
+ }
+ }
+
+ bi_foreach_src(ins, s) {
+ if (ins->src[s].type != BI_INDEX_REGISTER) continue;
+
+ unsigned count = bi_count_read_registers(ins, s);
+
+ for (unsigned c = 0; c < count; ++c)
+ mark_access(last_read, ins->src[s].value + c, i);
+ }
+ }
+
+ /* If there is a branch, all instructions depend on it, as interblock
+ * execution must be purely in-order */
+
+ bi_instr *last = st.instructions[st.count - 1];
+ if (last->branch_target || last->op == BI_OPCODE_JUMP) {
+ for (signed i = st.count - 2; i >= 0; --i)
+ bi_push_dependency(st.count - 1, i, st.dependents, st.dep_counts);
+ }
+
+ /* Free the intermediate structures */
+ for (unsigned i = 0; i < 64; ++i) {
+ util_dynarray_fini(&last_read[i]);
+ util_dynarray_fini(&last_write[i]);
+ }
+}
+
/* Scheduler pseudoinstruction lowerings to enable instruction pairings.
* Currently only support CUBEFACE -> *CUBEFACE1/+CUBEFACE2
*/
@@ -275,14 +414,23 @@ bi_flatten_block(bi_block *block, unsigned *len)
*/
static struct bi_worklist
-bi_initialize_worklist(bi_block *block)
+bi_initialize_worklist(bi_block *block, bool inorder)
{
struct bi_worklist st = { };
st.instructions = bi_flatten_block(block, &st.count);
- if (st.count) {
- st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
- BITSET_SET(st.worklist, st.count - 1);
+ if (!st.count)
+ return st;
+
+ st.dependents = calloc(st.count, sizeof(st.dependents[0]));
+ st.dep_counts = calloc(st.count, sizeof(st.dep_counts[0]));
+
+ bi_create_dependency_graph(st, inorder);
+ st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
+
+ for (unsigned i = 0; i < st.count; ++i) {
+ if (st.dep_counts[i] == 0)
+ BITSET_SET(st.worklist, i);
}
return st;
@@ -291,6 +439,8 @@ bi_initialize_worklist(bi_block *block)
static void
bi_free_worklist(struct bi_worklist st)
{
+ free(st.dep_counts);
+ free(st.dependents);
free(st.instructions);
free(st.worklist);
}
@@ -298,8 +448,24 @@ bi_free_worklist(struct bi_worklist st)
static void
bi_update_worklist(struct bi_worklist st, unsigned idx)
{
- if (idx >= 1)
- BITSET_SET(st.worklist, idx - 1);
+ assert(st.dep_counts[idx] == 0);
+
+ if (!st.dependents[idx])
+ return;
+
+ /* Iterate each dependent to remove one dependency (`done`),
+ * adding dependents to the worklist where possible. */
+
+ unsigned i;
+ BITSET_FOREACH_SET(i, st.dependents[idx], st.count) {
+ assert(st.dep_counts[i] != 0);
+ unsigned new_deps = --st.dep_counts[i];
+
+ if (new_deps == 0)
+ BITSET_SET(st.worklist, i);
+ }
+
+ free(st.dependents[idx]);
}
/* To work out the back-to-back flag, we need to detect branches and
@@ -1568,7 +1734,8 @@ bi_schedule_block(bi_context *ctx, bi_block *block)
list_inithead(&block->clauses);
/* Copy list to dynamic array */
- struct bi_worklist st = bi_initialize_worklist(block);
+ struct bi_worklist st = bi_initialize_worklist(block,
+ bifrost_debug & BIFROST_DBG_INORDER);
if (!st.count) {
bi_free_worklist(st);
diff --git a/src/panfrost/bifrost/bifrost.h b/src/panfrost/bifrost/bifrost.h
index be3a2d0cb33..436404f0be0 100644
--- a/src/panfrost/bifrost/bifrost.h
+++ b/src/panfrost/bifrost/bifrost.h
@@ -36,6 +36,7 @@
#define BIFROST_DBG_VERBOSE 0x0008
#define BIFROST_DBG_INTERNAL 0x0010
#define BIFROST_DBG_NOSCHED 0x0020
+#define BIFROST_DBG_INORDER 0x0040
extern int bifrost_debug;
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 3b577944a77..594d02d9b46 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -43,7 +43,8 @@ static const struct debug_named_value bifrost_debug_options[] = {
{"shaderdb", BIFROST_DBG_SHADERDB, "Print statistics"},
{"verbose", BIFROST_DBG_VERBOSE, "Disassemble verbosely"},
{"internal", BIFROST_DBG_INTERNAL, "Dump even internal shaders"},
- {"nosched", BIFROST_DBG_NOSCHED, "Force trivial scheduling"},
+ {"nosched", BIFROST_DBG_NOSCHED, "Force trivial bundling"},
+ {"inorder", BIFROST_DBG_INORDER, "Force in-order bundling"},
DEBUG_NAMED_VALUE_END
};
More information about the mesa-commit
mailing list