Mesa (master): freedreno/ir3/sched: avoid scheduling outputs

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed May 13 03:56:40 UTC 2020


Module: Mesa
Branch: master
Commit: d95a6e3a0ca2d4a420306dd078cea05d3f21c865
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d95a6e3a0ca2d4a420306dd078cea05d3f21c865

Author: Rob Clark <robdclark at chromium.org>
Date:   Wed May  6 10:20:14 2020 -0700

freedreno/ir3/sched: avoid scheduling outputs

If an instruction's only use is as an output, and it increases register
pressure, then try to avoid scheduling it until there are no other
options.

A semi-common pattern is `fragcolN.a = 1.0`, this pushes all these
immed loads to the end of the shader.

Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4923>

---

 src/freedreno/ir3/ir3.h       | 14 +++++++
 src/freedreno/ir3/ir3_ra.h    | 14 -------
 src/freedreno/ir3/ir3_sched.c | 95 +++++++++++++++++++++++++++++++++++++++----
 3 files changed, 101 insertions(+), 22 deletions(-)

diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 9ec324e4e4a..247dca19564 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -846,6 +846,20 @@ static inline unsigned dest_regs(struct ir3_instruction *instr)
 	return util_last_bit(instr->regs[0]->wrmask);
 }
 
+static inline bool
+writes_gpr(struct ir3_instruction *instr)
+{
+	if (dest_regs(instr) == 0)
+		return false;
+	/* is dest a normal temp register: */
+	struct ir3_register *reg = instr->regs[0];
+	debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)));
+	if ((reg_num(reg) == REG_A0) ||
+			(reg->num == regid(REG_P0, 0)))
+		return false;
+	return true;
+}
+
 static inline bool writes_addr0(struct ir3_instruction *instr)
 {
 	if (instr->regs_count > 0) {
diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h
index 35fb618c49a..7acfdd0443d 100644
--- a/src/freedreno/ir3/ir3_ra.h
+++ b/src/freedreno/ir3/ir3_ra.h
@@ -205,20 +205,6 @@ scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
 	return ra_name(ctx, &ctx->instrd[instr->ip]) + n;
 }
 
-static inline bool
-writes_gpr(struct ir3_instruction *instr)
-{
-	if (dest_regs(instr) == 0)
-		return false;
-	/* is dest a normal temp register: */
-	struct ir3_register *reg = instr->regs[0];
-	debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)));
-	if ((reg_num(reg) == REG_A0) ||
-			(reg->num == regid(REG_P0, 0)))
-		return false;
-	return true;
-}
-
 #define NO_NAME ~0
 
 /*
diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c
index 46448c10b4b..b4f1bc0a187 100644
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@@ -124,6 +124,18 @@ struct ir3_sched_node {
 	 * If so, we should prioritize it when possible
 	 */
 	bool kill_path;
+
+	/* This node represents a shader output.  A semi-common pattern in
+	 * shaders is something along the lines of:
+	 *
+	 *    fragcolor.w = 1.0
+	 *
+	 * Which we'd prefer to schedule as late as possible, since it
+	 * produces a live value that is never killed/consumed.  So detect
+	 * outputs up-front, and avoid scheduling them unless the reduce
+	 * register pressure (or at least are neutral)
+	 */
+	bool output;
 };
 
 #define foreach_sched_node(__n, __list) \
@@ -394,12 +406,18 @@ live_effect(struct ir3_instruction *instr)
 	return new_live - freed_live;
 }
 
+static struct ir3_sched_node * choose_instr_inc(struct ir3_sched_ctx *ctx,
+		struct ir3_sched_notes *notes, bool avoid_output);
+
 /**
  * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code
  * Scheduling for Register pressure) heuristic.
+ *
+ * Only handles the case of choosing instructions that reduce register pressure
+ * or are even.
  */
 static struct ir3_sched_node *
-choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
+choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
 {
 	struct ir3_sched_node *chosen = NULL;
 
@@ -422,7 +440,7 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
 	}
 
 	if (chosen) {
-		di(chosen->instr, "csr: chose (freed+ready)");
+		di(chosen->instr, "dec: chose (freed+ready)");
 		return chosen;
 	}
 
@@ -440,7 +458,7 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
 	}
 
 	if (chosen) {
-		di(chosen->instr, "csr: chose (freed)");
+		di(chosen->instr, "dec: chose (freed)");
 		return chosen;
 	}
 
@@ -468,7 +486,7 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
 	}
 
 	if (chosen) {
-		di(chosen->instr, "csr: chose (neutral+ready)");
+		di(chosen->instr, "dec: chose (neutral+ready)");
 		return chosen;
 	}
 
@@ -484,10 +502,23 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
 	}
 
 	if (chosen) {
-		di(chosen->instr, "csr: chose (neutral)");
+		di(chosen->instr, "dec: chose (neutral)");
 		return chosen;
 	}
 
+	return choose_instr_inc(ctx, notes, true);
+}
+
+/**
+ * When we can't choose an instruction that reduces register pressure or
+ * is neutral, we end up here to try and pick the least bad option.
+ */
+static struct ir3_sched_node *
+choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
+		bool avoid_output)
+{
+	struct ir3_sched_node *chosen = NULL;
+
 	/*
 	 * From hear on out, we are picking something that increases
 	 * register pressure.  So try to pick something which will
@@ -497,6 +528,9 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
 
 	/* Pick the max delay of the remaining ready set. */
 	foreach_sched_node (n, &ctx->dag->heads) {
+		if (avoid_output && n->output)
+			continue;
+
 		unsigned d = ir3_delay_calc(ctx->block, n->instr, false, false);
 
 		if (d > 0)
@@ -514,12 +548,15 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
 	}
 
 	if (chosen) {
-		di(chosen->instr, "csr: chose (distance+ready)");
+		di(chosen->instr, "inc: chose (distance+ready)");
 		return chosen;
 	}
 
 	/* Pick the max delay of the remaining leaders. */
 	foreach_sched_node (n, &ctx->dag->heads) {
+		if (avoid_output && n->output)
+			continue;
+
 		if (!check_instr(ctx, notes, n->instr))
 			continue;
 
@@ -532,7 +569,7 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
 	}
 
 	if (chosen) {
-		di(chosen->instr, "csr: chose (distance)");
+		di(chosen->instr, "inc: chose (distance)");
 		return chosen;
 	}
 
@@ -594,7 +631,11 @@ choose_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
 	if (chosen)
 		return chosen->instr;
 
-	chosen = choose_instr_csr(ctx, notes);
+	chosen = choose_instr_dec(ctx, notes);
+	if (chosen)
+		return chosen->instr;
+
+	chosen = choose_instr_inc(ctx, notes, false);
 	if (chosen)
 		return chosen->instr;
 
@@ -759,6 +800,39 @@ mark_kill_path(struct ir3_instruction *instr)
 	}
 }
 
+/* Is it an output? */
+static bool
+is_output_collect(struct ir3_instruction *instr)
+{
+	struct ir3 *ir = instr->block->shader;
+
+	for (unsigned i = 0; i < ir->outputs_count; i++) {
+		struct ir3_instruction *collect = ir->outputs[i];
+		assert(collect->opc == OPC_META_COLLECT);
+		if (instr == collect)
+			return true;
+	}
+
+	return false;
+}
+
+/* Is it's only use as output? */
+static bool
+is_output_only(struct ir3_instruction *instr)
+{
+	if (!writes_gpr(instr))
+		return false;
+
+	if (!(instr->regs[0]->flags & IR3_REG_SSA))
+		return false;
+
+	foreach_ssa_use (use, instr)
+		if (!is_output_collect(use))
+			return false;
+
+	return true;
+}
+
 static void
 sched_node_add_deps(struct ir3_instruction *instr)
 {
@@ -777,6 +851,11 @@ sched_node_add_deps(struct ir3_instruction *instr)
 	if (is_kill(instr) || is_input(instr)) {
 		mark_kill_path(instr);
 	}
+
+	if (is_output_only(instr)) {
+		struct ir3_sched_node *n = instr->data;
+		n->output = true;
+	}
 }
 
 static void



More information about the mesa-commit mailing list