Mesa (master): freedreno/ir3: update SFU delay

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Feb 28 17:12:35 UTC 2020


Module: Mesa
Branch: master
Commit: 56565b7bba54b8298d2c14c66bb87c59930b09ee
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=56565b7bba54b8298d2c14c66bb87c59930b09ee

Author: Rob Clark <robdclark at chromium.org>
Date:   Tue Feb 25 10:44:26 2020 -0800

freedreno/ir3: update SFU delay

1) emperically, 10 seems like a more accurate # than 4
2) push "soft" delay handling into ir3_delayslots(), as
   we should also be using it to calculate the costs
   that the schedulers use

Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3989>

---

 src/freedreno/ir3/ir3.h           |  2 +-
 src/freedreno/ir3/ir3_delay.c     | 26 ++++++++++++++++----------
 src/freedreno/ir3/ir3_depth.c     |  2 +-
 src/freedreno/ir3/ir3_postsched.c |  2 +-
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 21fd8c602b9..b66d8e2d6fd 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1157,7 +1157,7 @@ void ir3_print_instr(struct ir3_instruction *instr);
 
 /* delay calculation: */
 int ir3_delayslots(struct ir3_instruction *assigner,
-		struct ir3_instruction *consumer, unsigned n);
+		struct ir3_instruction *consumer, unsigned n, bool soft);
 unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
 		bool soft, bool pred);
 void ir3_remove_nops(struct ir3 *ir);
diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c
index 0b796a4183a..5839128a4c6 100644
--- a/src/freedreno/ir3/ir3_delay.c
+++ b/src/freedreno/ir3/ir3_delay.c
@@ -69,7 +69,7 @@ ignore_dep(struct ir3_instruction *assigner,
  */
 int
 ir3_delayslots(struct ir3_instruction *assigner,
-		struct ir3_instruction *consumer, unsigned n)
+		struct ir3_instruction *consumer, unsigned n, bool soft)
 {
 	if (ignore_dep(assigner, consumer, n))
 		return 0;
@@ -85,6 +85,20 @@ ir3_delayslots(struct ir3_instruction *assigner,
 	if (writes_addr(assigner))
 		return 6;
 
+	/* On a6xx, it takes the number of delay slots to get a SFU result
+	 * back (ie. using nop's instead of (ss) is:
+	 *
+	 *     8 - single warp
+	 *     9 - two warps
+	 *    10 - four warps
+	 *
+	 * and so on.  Not quite sure where it tapers out (ie. how many
+	 * warps share an SFU unit).  But 10 seems like a reasonable #
+	 * to choose:
+	 */
+	if (soft && is_sfu(assigner))
+		return 10;
+
 	/* handled via sync flags: */
 	if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
 		return 0;
@@ -195,15 +209,7 @@ delay_calc_srcn(struct ir3_block *block,
 			delay = MAX2(delay, d);
 		}
 	} else {
-		if (soft) {
-			if (is_sfu(assigner)) {
-				delay = 4;
-			} else {
-				delay = ir3_delayslots(assigner, consumer, srcn);
-			}
-		} else {
-			delay = ir3_delayslots(assigner, consumer, srcn);
-		}
+		delay = ir3_delayslots(assigner, consumer, srcn, soft);
 		delay -= distance(block, assigner, delay, pred);
 	}
 
diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c
index 135d4365d2e..6bb946871e5 100644
--- a/src/freedreno/ir3/ir3_depth.c
+++ b/src/freedreno/ir3/ir3_depth.c
@@ -89,7 +89,7 @@ ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep)
 		if (i == 0)
 			continue;
 
-		sd = ir3_delayslots(src, instr, i) + src->depth;
+		sd = ir3_delayslots(src, instr, i, true) + src->depth;
 		sd += boost;
 
 		instr->depth = MAX2(instr->depth, sd);
diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c
index 4290e882249..47a8e52fdeb 100644
--- a/src/freedreno/ir3/ir3_postsched.c
+++ b/src/freedreno/ir3/ir3_postsched.c
@@ -380,7 +380,7 @@ calculate_deps(struct ir3_postsched_deps_state *state,
 
 				struct ir3_postsched_node *dep = dep_reg(state, reg->num + b);
 				if (dep && (state->direction == F)) {
-					unsigned d = ir3_delayslots(dep->instr, node->instr, i);
+					unsigned d = ir3_delayslots(dep->instr, node->instr, i, true);
 					node->delay = MAX2(node->delay, d);
 				}
 			}



More information about the mesa-commit mailing list