Mesa (master): freedreno/ir3: Better sstall estimation

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jan 13 18:53:08 UTC 2021


Module: Mesa
Branch: master
Commit: 3e15ba5ccc4e7b8af80ea84a44906a2ffa895490
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e15ba5ccc4e7b8af80ea84a44906a2ffa895490

Author: Rob Clark <robdclark at chromium.org>
Date:   Sat Jan  9 12:12:37 2021 -0800

freedreno/ir3: Better sstall estimation

1) Take into account repeat/nop cycles
2) Clear sfu_delay after an (ss) sync

Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7997>

---

 ...w.indexed.indirect_draw_count.triangle_list.log | 48 +++++++--------
 src/freedreno/.gitlab-ci/reference/fd-clouds.log   | 72 +++++++++++-----------
 .../.gitlab-ci/reference/glxgears-a420.log         | 40 ++++++------
 src/freedreno/ir3/ir3.c                            |  8 ++-
 4 files changed, 85 insertions(+), 83 deletions(-)

diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
index 8900e61ebe4..3c4e006b4a1 100644
--- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
+++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
@@ -816,13 +816,13 @@ t4					write SP_VS_OBJ_START_LO (a81c)
 							- used (full): 4-11 (cnt=8, max=11)
 							- input (half): 8-19 (cnt=12, max=19)
 							- input (full): 4-9 (cnt=6, max=9)
-							- max const: 5
-
 							- output (half): 16-23 (cnt=8, max=23)  (estimated)
 							- output (full): 8-11 (cnt=4, max=11)  (estimated)
-							- shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
+
+							- shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov
+							- shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen
 							- shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-							- shaderdb: 0 (ss), 0 (sy)
+							- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 00000000010541a4:					0000: 48a81c02 01054000 00000000
 t7					opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
 						{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -843,13 +843,13 @@ t7					opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
 						- used (full): 4-11 (cnt=8, max=11)
 						- input (half): 8-19 (cnt=12, max=19)
 						- input (full): 4-9 (cnt=6, max=9)
-						- max const: 5
-
 						- output (half): 16-23 (cnt=8, max=23)  (estimated)
 						- output (full): 8-11 (cnt=4, max=11)  (estimated)
-						- shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
+
+						- shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov
+						- shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen
 						- shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-						- shaderdb: 0 (ss), 0 (sy)
+						- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 00000000010541b0:					0000: 70328003 00620000 01054000 00000000
 t7					opcode: CP_LOAD_STATE6_GEOM (32) (8 dwords)
 						{ DST_OFF = 1 | STATE_TYPE = ST6_CONSTANTS | STATE_SRC = SS6_DIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -908,13 +908,13 @@ t4					write SP_FS_OBJ_START_LO (a983)
 							- used (full): 0 2-5 (cnt=5, max=5)
 							- input (half): 0-1 (cnt=2, max=1)
 							- input (full): 0 (cnt=1, max=0)
-							- max const: 0
-
 							- output (half): 4-11 (cnt=8, max=11)  (estimated)
 							- output (full): 2-5 (cnt=4, max=5)  (estimated)
-							- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
+
+							- shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov
+							- shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen
 							- shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-							- shaderdb: 0 (ss), 0 (sy)
+							- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 000000000105422c:					0000: 40a98302 01054080 00000000
 t7					opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
 						{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_FS_SHADER | NUM_UNIT = 1 }
@@ -934,13 +934,13 @@ t7					opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
 						- used (full): 0 2-5 (cnt=5, max=5)
 						- input (half): 0-1 (cnt=2, max=1)
 						- input (full): 0 (cnt=1, max=0)
-						- max const: 0
-
 						- output (half): 4-11 (cnt=8, max=11)  (estimated)
 						- output (full): 2-5 (cnt=4, max=5)  (estimated)
-						- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
+
+						- shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov
+						- shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen
 						- shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-						- shaderdb: 0 (ss), 0 (sy)
+						- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001054238:					0000: 70348003 00720000 01054080 00000000
 t4					write SP_CS_CONFIG (a9bb)
 						SP_CS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
@@ -1502,13 +1502,13 @@ t7			opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
 				- used (full): 4-11 (cnt=8, max=11)
 				- input (half): 8-19 (cnt=12, max=19)
 				- input (full): 4-9 (cnt=6, max=9)
-				- max const: 5
-
 				- output (half): 16-23 (cnt=8, max=23)  (estimated)
 				- output (full): 8-11 (cnt=4, max=11)  (estimated)
-				- shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
+
+				- shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov
+				- shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen
 				- shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-				- shaderdb: 0 (ss), 0 (sy)
+				- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 !+	00000100			SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
 !+	00000001			SP_VS_INSTRLEN: 1
  +	00000000			SP_HS_WAVE_INPUT_SIZE: 0
@@ -1537,13 +1537,13 @@ t7			opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
 				- used (full): 0 2-5 (cnt=5, max=5)
 				- input (half): 0-1 (cnt=2, max=1)
 				- input (full): 0 (cnt=1, max=0)
-				- max const: 0
-
 				- output (half): 4-11 (cnt=8, max=11)  (estimated)
 				- output (full): 2-5 (cnt=4, max=5)  (estimated)
-				- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
+
+				- shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov
+				- shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen
 				- shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-				- shaderdb: 0 (ss), 0 (sy)
+				- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 !+	00000100			SP_BLEND_CNTL: { UNK8 }
  +	00000000			SP_SRGB_CNTL: { 0 }
 !+	0000000f			SP_FS_RENDER_COMPONENTS: { RT0 = 0xf | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log
index 874f1085c78..a3cfde19efa 100644
--- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log
+++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log
@@ -639,13 +639,13 @@ t4					write SP_VS_OBJ_START_LO (a81c)
 							- used (full): (cnt=0, max=0)
 							- input (half): (cnt=0, max=0)
 							- input (full): (cnt=0, max=0)
-							- max const: 0
-
 							- output (half): (cnt=0, max=0)  (estimated)
 							- output (full): (cnt=0, max=0)  (estimated)
-							- shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+							- shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+							- shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
 							- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-							- shaderdb: 0 (ss), 0 (sy)
+							- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001121038:					0000: 48a81c02 01011000 00000000
 t7					opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
 						{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -661,13 +661,13 @@ t7					opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
 						- used (full): (cnt=0, max=0)
 						- input (half): (cnt=0, max=0)
 						- input (full): (cnt=0, max=0)
-						- max const: 0
-
 						- output (half): (cnt=0, max=0)  (estimated)
 						- output (full): (cnt=0, max=0)  (estimated)
-						- shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+						- shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+						- shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
 						- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-						- shaderdb: 0 (ss), 0 (sy)
+						- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001121044:					0000: 70328003 00620000 01011000 00000000
 t4					write VPC_VAR[0].DISABLE (9212)
 						VPC_VAR[0].DISABLE: 0xffffffff
@@ -1109,13 +1109,13 @@ t7			opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
 				- used (full): (cnt=0, max=0)
 				- input (half): (cnt=0, max=0)
 				- input (full): (cnt=0, max=0)
-				- max const: 0
-
 				- output (half): (cnt=0, max=0)  (estimated)
 				- output (full): (cnt=0, max=0)  (estimated)
-				- shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+				- shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+				- shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
 				- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-				- shaderdb: 0 (ss), 0 (sy)
+				- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 !+	00000100			SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
 !+	00000001			SP_VS_INSTRLEN: 1
  +	00000000			SP_HS_WAVE_INPUT_SIZE: 0
@@ -1954,13 +1954,13 @@ t4					write SP_VS_OBJ_START_LO (a81c)
 							- used (full): (cnt=0, max=0)
 							- input (half): (cnt=0, max=0)
 							- input (full): (cnt=0, max=0)
-							- max const: 0
-
 							- output (half): (cnt=0, max=0)  (estimated)
 							- output (full): (cnt=0, max=0)  (estimated)
-							- shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+							- shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+							- shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
 							- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-							- shaderdb: 0 (ss), 0 (sy)
+							- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001120038:					0000: 48a81c02 01012000 00000000
 t7					opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
 						{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -1976,13 +1976,13 @@ t7					opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
 						- used (full): (cnt=0, max=0)
 						- input (half): (cnt=0, max=0)
 						- input (full): (cnt=0, max=0)
-						- max const: 0
-
 						- output (half): (cnt=0, max=0)  (estimated)
 						- output (full): (cnt=0, max=0)  (estimated)
-						- shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+						- shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+						- shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
 						- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-						- shaderdb: 0 (ss), 0 (sy)
+						- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001120044:					0000: 70328003 00620000 01012000 00000000
 t4					write VPC_VAR[0].DISABLE (9212)
 						VPC_VAR[0].DISABLE: 0xffffffff
@@ -3497,13 +3497,13 @@ t4					write SP_FS_OBJ_START_LO (a983)
 							- used (full): 0-73 (cnt=74, max=73)
 							- input (half): 38-41 (cnt=4, max=41)
 							- input (full): 19-20 (cnt=2, max=20)
-							- max const: 113
-
 							- output (half): 8-15 (cnt=8, max=15)  (estimated)
 							- output (full): 4-7 (cnt=4, max=7)  (estimated)
-							- shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
+
+							- shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov
+							- shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen
 							- shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
-							- shaderdb: 140 (ss), 0 (sy)
+							- shaderdb: 1326 sstall, 140 (ss), 0 (sy)
 0000000001120158:					0000: 40a98302 01013000 00000000
 t7					opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
 						{ DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_FS_SHADER | NUM_UNIT = 88 }
@@ -4920,13 +4920,13 @@ t7					opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
 						- used (full): 0-73 (cnt=74, max=73)
 						- input (half): 38-41 (cnt=4, max=41)
 						- input (full): 19-20 (cnt=2, max=20)
-						- max const: 113
-
 						- output (half): 8-15 (cnt=8, max=15)  (estimated)
 						- output (full): 4-7 (cnt=4, max=7)  (estimated)
-						- shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
+
+						- shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov
+						- shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen
 						- shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
-						- shaderdb: 140 (ss), 0 (sy)
+						- shaderdb: 1326 sstall, 140 (ss), 0 (sy)
 0000000001120164:					0000: 70348003 16320000 01013000 00000000
 t4					write VFD_CONTROL_1 (a001)
 						VFD_CONTROL_1: { REGID4VTX = r63.x | REGID4INST = r63.x | REGID4PRIMID = r63.x | REGID4VIEWID = r63.x }
@@ -5334,13 +5334,13 @@ t7			opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
 				- used (full): (cnt=0, max=0)
 				- input (half): (cnt=0, max=0)
 				- input (full): (cnt=0, max=0)
-				- max const: 0
-
 				- output (half): (cnt=0, max=0)  (estimated)
 				- output (full): (cnt=0, max=0)  (estimated)
-				- shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+				- shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+				- shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
 				- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-				- shaderdb: 0 (ss), 0 (sy)
+				- shaderdb: 0 sstall, 0 (ss), 0 (sy)
  +	00000100			SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +	00000001			SP_VS_INSTRLEN: 1
  +	00000000			SP_HS_WAVE_INPUT_SIZE: 0
@@ -6772,13 +6772,13 @@ t7			opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
 				- used (full): 0-73 (cnt=74, max=73)
 				- input (half): 38-41 (cnt=4, max=41)
 				- input (full): 19-20 (cnt=2, max=20)
-				- max const: 113
-
 				- output (half): 8-15 (cnt=8, max=15)  (estimated)
 				- output (full): 4-7 (cnt=4, max=7)  (estimated)
-				- shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
+
+				- shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov
+				- shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen
 				- shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
-				- shaderdb: 140 (ss), 0 (sy)
+				- shaderdb: 1326 sstall, 140 (ss), 0 (sy)
 !+	00000100			SP_BLEND_CNTL: { UNK8 }
  +	fcfcfc00			SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x }
 !+	00000001			SP_FS_OUTPUT_CNTL1: { MRT = 1 }
diff --git a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log
index efed6fcdcfd..c5ef966f48e 100644
--- a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log
+++ b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log
@@ -429,13 +429,13 @@ t3			opcode: CP_LOAD_STATE4 (30) (35 dwords)
 				- used (full): (cnt=0, max=0)
 				- input (half): (cnt=0, max=0)
 				- input (full): (cnt=0, max=0)
-				- max const: 0
-
 				- output (half): (cnt=0, max=0)  (estimated)
 				- output (full): (cnt=0, max=0)  (estimated)
-				- shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+				- shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+				- shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
 				- shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-				- shaderdb: 0 (ss), 0 (sy)
+				- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 109ce1f0:			0000: c0213000 00600000 00000000 00000000 03000000 00000000 00000000 00000000
 *
 t3			opcode: CP_LOAD_STATE4 (30) (35 dwords)
@@ -455,13 +455,13 @@ t3			opcode: CP_LOAD_STATE4 (30) (35 dwords)
 				- used (full): 0-3 (cnt=4, max=3)
 				- input (half): (cnt=0, max=0)
 				- input (full): (cnt=0, max=0)
-				- max const: 3
-
 				- output (half): (cnt=0, max=0)  (estimated)
 				- output (full): 0-3 (cnt=4, max=3)  (estimated)
-				- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 last-baryf, 0 half, 1 full
+
+				- shaderdb: 9 instr, 4 nops, 5 non-nops, 4 mov, 0 cov
+				- shaderdb: 0 last-baryf, 0 half, 1 full, 1 constlen
 				- shaderdb: 5 cat0, 4 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-				- shaderdb: 0 (ss), 0 (sy)
+				- shaderdb: 0 sstall, 0 (ss), 0 (sy)
 109ce27c:			0000: c0213000 00700000 00000000 00000000 20244000 00000001 20244001 00000002
 109ce29c:			0020: 20244002 00000003 20244003 00000000 03000000 00000000 00000000 00000000
 *
@@ -1043,13 +1043,13 @@ t3			opcode: CP_LOAD_STATE4 (30) (131 dwords)
 				- used (full): 0-13 (cnt=14, max=13)
 				- input (half): (cnt=0, max=0)
 				- input (full): 2-5 (cnt=4, max=5)
-				- max const: 52
-
 				- output (half): (cnt=0, max=0)  (estimated)
 				- output (full): 6-13 (cnt=8, max=13)  (estimated)
-				- shaderdb: 74 instructions, 27 nops, 47 non-nops, (61 instlen), 0 last-baryf, 0 half, 4 full
+
+				- shaderdb: 74 instr, 27 nops, 47 non-nops, 7 mov, 1 cov
+				- shaderdb: 0 last-baryf, 0 half, 4 full, 14 constlen
 				- shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7
-				- shaderdb: 1 (ss), 0 (sy)
+				- shaderdb: 10 sstall, 1 (ss), 0 (sy)
 109ce66c:			0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004
 109ce68c:			0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c
 109ce6ac:			0040: 63828006 10010002 40700000 0001100f 63828009 00001005 63818000 00000010
@@ -1085,13 +1085,13 @@ t3			opcode: CP_LOAD_STATE4 (30) (35 dwords)
 				- used (full): 0-3 (cnt=4, max=3)
 				- input (half): (cnt=0, max=0)
 				- input (full): 0-3 (cnt=4, max=3)
-				- max const: 0
-
 				- output (half): (cnt=0, max=0)  (estimated)
 				- output (full): (cnt=0, max=0)  (estimated)
-				- shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full
+
+				- shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov
+				- shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen
 				- shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7
-				- shaderdb: 1 (ss), 0 (sy)
+				- shaderdb: 65531 sstall, 1 (ss), 0 (sy)
 109ce878:			0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
 109ce898:			0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000
 109ce8b8:			0040: 03000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
@@ -1675,13 +1675,13 @@ t3			opcode: CP_LOAD_STATE4 (30) (131 dwords)
 				- used (full): 0-8 10-17 (cnt=17, max=17)
 				- input (half): (cnt=0, max=0)
 				- input (full): 2-8 (cnt=7, max=8)
-				- max const: 52
-
 				- output (half): (cnt=0, max=0)  (estimated)
 				- output (full): 10-17 (cnt=8, max=17)  (estimated)
-				- shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full
+
+				- shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov
+				- shaderdb: 0 last-baryf, 0 half, 5 full, 14 constlen
 				- shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7
-				- shaderdb: 1 (ss), 0 (sy)
+				- shaderdb: 10 sstall, 1 (ss), 0 (sy)
 109cee34:			0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004
 109cee54:			0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c
 109cee74:			0040: 6382800a 10010002 40700000 0001100f 6382800d 00001005 63818000 00000010
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 0188da9657a..61d3c7bab5a 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -1002,7 +1002,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
 	info->sizedwords = info->size / 4;
 
 	foreach_block (block, &shader->block_list) {
-		unsigned sfu_delay = 0;
+		int sfu_delay = 0;
 
 		foreach_instr (instr, &block->instr_list) {
 
@@ -1050,6 +1050,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
 			if (instr->flags & IR3_INSTR_SS) {
 				info->ss++;
 				info->sstall += sfu_delay;
+				sfu_delay = 0;
 			}
 
 			if (instr->flags & IR3_INSTR_SY)
@@ -1057,8 +1058,9 @@ ir3_collect_info(struct ir3_shader_variant *v)
 
 			if (is_sfu(instr)) {
 				sfu_delay = 10;
-			} else if (sfu_delay > 0) {
-				sfu_delay--;
+			} else {
+				int n = MIN2(sfu_delay, 1 + instr->repeat + instr->nop);
+				sfu_delay -= n;
 			}
 		}
 	}



More information about the mesa-commit mailing list