Mesa (main): freedreno/ir3: Reduce choose_instr_dec() and _inc() overhead.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Jul 19 23:33:08 UTC 2021
Module: Mesa
Branch: main
Commit: bda26dfcfc5f9012ab1bd22f2bbaa664315e2671
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bda26dfcfc5f9012ab1bd22f2bbaa664315e2671
Author: Emma Anholt <emma at anholt.net>
Date: Tue Jul 13 13:39:36 2021 -0700
freedreno/ir3: Reduce choose_instr_dec() and _inc() overhead.
If you didn't have a freed+ready instruction, you'd redo the live_effect
and check_instr() logic multiple times per instr. Replace the multiple
loops in each function with a ranking that I think is more readable,
reducing the overhead in the process.
debugoptimized dEQP-GLES31.functional.ubo.random.all_per_block_buffers.20
runtime goes from ~3.5s -> ~3.0s on my lazor. No shader-db change.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11855>
---
src/freedreno/ir3/ir3_sched.c | 185 +++++++++++++++++++-----------------------
1 file changed, 82 insertions(+), 103 deletions(-)
diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c
index 7b452d630cc..12ef6954821 100644
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@@ -593,6 +593,30 @@ static struct ir3_sched_node *choose_instr_inc(struct ir3_sched_ctx *ctx,
struct ir3_sched_notes *notes,
bool defer, bool avoid_output);
+enum choose_instr_dec_rank {
+ DEC_NEUTRAL,
+ DEC_NEUTRAL_READY,
+ DEC_FREED,
+ DEC_FREED_READY,
+};
+
+static const char *
+dec_rank_name(enum choose_instr_dec_rank rank)
+{
+ switch (rank) {
+ case DEC_NEUTRAL:
+ return "neutral";
+ case DEC_NEUTRAL_READY:
+ return "neutral+ready";
+ case DEC_FREED:
+ return "freed";
+ case DEC_FREED_READY:
+ return "freed+ready";
+ default:
+ return NULL;
+ }
+}
+
/**
* Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code
* Scheduling for Register pressure) heuristic.
@@ -606,8 +630,8 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
{
const char *mode = defer ? "-d" : "";
struct ir3_sched_node *chosen = NULL;
+ enum choose_instr_dec_rank chosen_rank = DEC_NEUTRAL;
- /* Find a ready inst with regs freed and pick the one with max cost. */
foreach_sched_node (n, &ctx->dag->heads) {
if (defer && should_defer(ctx, n->instr))
continue;
@@ -615,97 +639,70 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
/* Note: mergedregs is only used post-RA, just set it to false */
unsigned d = ir3_delay_calc_prera(ctx->block, n->instr);
- if (d > 0)
- continue;
-
- if (live_effect(n->instr) > -1)
+ int live = live_effect(n->instr);
+ if (live > 0)
continue;
if (!check_instr(ctx, notes, n->instr))
continue;
- if (!chosen || chosen->max_delay < n->max_delay) {
- chosen = n;
+ enum choose_instr_dec_rank rank;
+ if (live < 0) {
+ /* Prioritize instrs which free up regs and can be scheduled with no
+ * delay.
+ */
+ if (d == 0)
+ rank = DEC_FREED_READY;
+ else
+ rank = DEC_FREED;
+ } else {
+ /* Contra the paper, pick a leader with no effect on used regs. This
+ * may open up new opportunities, as otherwise a single-operand instr
+ * consuming a value will tend to block finding freeing that value.
+ * This had a massive effect on reducing spilling on V3D.
+ *
+ * XXX: Should this prioritize ready?
+ */
+ if (d == 0)
+ rank = DEC_NEUTRAL_READY;
+ else
+ rank = DEC_NEUTRAL;
}
- }
-
- if (chosen) {
- di(chosen->instr, "dec%s: chose (freed+ready)", mode);
- return chosen;
- }
-
- /* Find a leader with regs freed and pick the one with max cost. */
- foreach_sched_node (n, &ctx->dag->heads) {
- if (defer && should_defer(ctx, n->instr))
- continue;
-
- if (live_effect(n->instr) > -1)
- continue;
- if (!check_instr(ctx, notes, n->instr))
- continue;
-
- if (!chosen || chosen->max_delay < n->max_delay) {
+ /* Prefer higher-ranked instructions, or in the case of a rank tie, the
+ * highest latency-to-end-of-program instruction.
+ */
+ if (!chosen || rank > chosen_rank ||
+ (rank == chosen_rank && chosen->max_delay < n->max_delay)) {
chosen = n;
+ chosen_rank = rank;
}
}
if (chosen) {
- di(chosen->instr, "dec%s: chose (freed)", mode);
+ di(chosen->instr, "dec%s: chose (%s)", mode, dec_rank_name(chosen_rank));
return chosen;
}
- /* Contra the paper, pick a leader with no effect on used regs. This may
- * open up new opportunities, as otherwise a single-operand instr consuming
- * a value will tend to block finding freeing that value. This had a
- * massive effect on reducing spilling on V3D.
- *
- * XXX: Should this prioritize ready?
- */
- foreach_sched_node (n, &ctx->dag->heads) {
- if (defer && should_defer(ctx, n->instr))
- continue;
-
- unsigned d = ir3_delay_calc_prera(ctx->block, n->instr);
-
- if (d > 0)
- continue;
-
- if (live_effect(n->instr) > 0)
- continue;
-
- if (!check_instr(ctx, notes, n->instr))
- continue;
-
- if (!chosen || chosen->max_delay < n->max_delay)
- chosen = n;
- }
-
- if (chosen) {
- di(chosen->instr, "dec%s: chose (neutral+ready)", mode);
- return chosen;
- }
-
- foreach_sched_node (n, &ctx->dag->heads) {
- if (defer && should_defer(ctx, n->instr))
- continue;
-
- if (live_effect(n->instr) > 0)
- continue;
-
- if (!check_instr(ctx, notes, n->instr))
- continue;
+ return choose_instr_inc(ctx, notes, defer, true);
+}
- if (!chosen || chosen->max_delay < n->max_delay)
- chosen = n;
- }
+enum choose_instr_inc_rank {
+ INC_DISTANCE,
+ INC_DISTANCE_READY,
+};
- if (chosen) {
- di(chosen->instr, "dec%s: chose (neutral)", mode);
- return chosen;
+static const char *
+inc_rank_name(enum choose_instr_inc_rank rank)
+{
+ switch (rank) {
+ case INC_DISTANCE:
+ return "distance";
+ case INC_DISTANCE_READY:
+ return "distance+ready";
+ default:
+ return NULL;
}
-
- return choose_instr_inc(ctx, notes, defer, true);
}
/**
@@ -718,6 +715,7 @@ choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
{
const char *mode = defer ? "-d" : "";
struct ir3_sched_node *chosen = NULL;
+ enum choose_instr_inc_rank chosen_rank = INC_DISTANCE;
/*
* From hear on out, we are picking something that increases
@@ -734,48 +732,29 @@ choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
if (defer && should_defer(ctx, n->instr))
continue;
- unsigned d = ir3_delay_calc_prera(ctx->block, n->instr);
-
- if (d > 0)
- continue;
-
if (!check_instr(ctx, notes, n->instr))
continue;
- unsigned distance = nearest_use(n->instr);
-
- if (!chosen || distance < chosen_distance) {
- chosen = n;
- chosen_distance = distance;
- }
- }
-
- if (chosen) {
- di(chosen->instr, "inc%s: chose (distance+ready)", mode);
- return chosen;
- }
-
- /* Pick the max delay of the remaining leaders. */
- foreach_sched_node (n, &ctx->dag->heads) {
- if (avoid_output && n->output)
- continue;
-
- if (defer && should_defer(ctx, n->instr))
- continue;
+ unsigned d = ir3_delay_calc_prera(ctx->block, n->instr);
- if (!check_instr(ctx, notes, n->instr))
- continue;
+ enum choose_instr_inc_rank rank;
+ if (d == 0)
+ rank = INC_DISTANCE_READY;
+ else
+ rank = INC_DISTANCE;
unsigned distance = nearest_use(n->instr);
- if (!chosen || distance < chosen_distance) {
+ if (!chosen || rank > chosen_rank ||
+ (rank == chosen_rank && distance < chosen_distance)) {
chosen = n;
chosen_distance = distance;
+ chosen_rank = rank;
}
}
if (chosen) {
- di(chosen->instr, "inc%s: chose (distance)", mode);
+ di(chosen->instr, "inc%s: chose (%s)", mode, inc_rank_name(chosen_rank));
return chosen;
}
More information about the mesa-commit
mailing list