[Nouveau] [PATCH v4] nv110/exa: update sched codes

Aaryaman Vasishta jem456.vasishta at gmail.com
Tue Jun 27 15:16:03 UTC 2017


v4: Updated the wait dependancy bars based on tex component masks.

This patch adds proper delays to maxwell exa shaders. Tested with
rendercheck -f a8r8g8b8.

I am still wondering whether the rd's are required. We could
still wait on the write bars instead. eg. see
"sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in exacmnv110.fp

Trello:
https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays

Signed-off-by: Aaryaman Vasishta <jem456.vasishta at gmail.com>
---
 src/shader/exac8nv110.fp  | 10 +++++-----
 src/shader/exac8nv110.fpc | 18 +++++++++---------
 src/shader/exacanv110.fp  | 10 +++++-----
 src/shader/exacanv110.fpc | 18 +++++++++---------
 src/shader/exacmnv110.fp  | 10 +++++-----
 src/shader/exacmnv110.fpc | 18 +++++++++---------
 src/shader/exas8nv110.fp  |  6 +++---
 src/shader/exas8nv110.fpc | 12 ++++++------
 src/shader/exasanv110.fp  | 10 +++++-----
 src/shader/exasanv110.fpc | 18 +++++++++---------
 src/shader/exascnv110.fp  |  6 +++---
 src/shader/exascnv110.fpc | 10 +++++-----
 src/shader/videonv110.fp  | 14 +++++++-------
 src/shader/videonv110.fpc | 26 +++++++++++++-------------
 14 files changed, 93 insertions(+), 93 deletions(-)

diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
index ce78036..101b67f 100644
--- a/src/shader/exac8nv110.fp
+++ b/src/shader/exac8nv110.fp
@@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
 ipa $r2 a[0x90] $r0 0x0 0x1
 tex nodep $r1 $r2 0x0 0x1 t2d 0x8
 ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
 ipa $r2 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r2 0x0 0x0 t2d 0x8
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
 fmul ftz $r3 $r0 $r1
 mov $r2 $r3 0xf
 mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
 mov $r0 $r3 0xf
 exit
 #endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
index 4aa1368..1f7d649 100644
--- a/src/shader/exac8nv110.fpc
+++ b/src/shader/exac8nv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff03,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0x21e0072f,
+0x005cbc03,
 0x0007ff02,
 0xe043ff89,
 0x2ff70201,
 0xc03a0014,
 0x4007ff03,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0074f,
+0x001fbc06,
 0x0007ff02,
 0xe043ff88,
 0x2ff70200,
 0xc03a0004,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe6,
+0x001f8400,
 0x00170003,
 0x5c681000,
 0x00370002,
 0x5c980780,
 0x00370001,
 0x5c980780,
-0xfc0007e0,
+0xfde007e1,
 0x001f8000,
 0x00370000,
 0x5c980780,
diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
index a70d5c5..fe55fcd 100644
--- a/src/shader/exacanv110.fp
+++ b/src/shader/exacanv110.fp
@@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
 ipa $r2 a[0x90] $r0 0x0 0x1
 tex nodep $r4 $r2 0x0 0x1 t2d 0xf
 ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0xf
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
 fmul ftz $r3 $r3 $r7
 fmul ftz $r2 $r2 $r6
 fmul ftz $r1 $r1 $r5
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
 fmul ftz $r0 $r0 $r4
 exit
 #endif
diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
index 7c0ca5e..7c8ebbd 100644
--- a/src/shader/exacanv110.fpc
+++ b/src/shader/exacanv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff03,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001cbc03,
 0x0007ff02,
 0xe043ff89,
 0xaff70204,
 0xc03a0017,
 0x4007ff01,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0172f,
+0x001fbc02,
 0x0007ff00,
 0xe043ff88,
 0xaff70000,
 0xc03a0007,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
 0x00770303,
 0x5c681000,
 0x00670202,
 0x5c681000,
 0x00570101,
 0x5c681000,
-0xfc0007e0,
+0xfde007e1,
 0x001f8000,
 0x00470000,
 0x5c681000,
diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
index fe5c294..7113ab3 100644
--- a/src/shader/exacmnv110.fp
+++ b/src/shader/exacmnv110.fp
@@ -25,23 +25,23 @@ NV110FP_Composite[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
 ipa $r2 a[0x90] $r0 0x0 0x1
 tex nodep $r4 $r2 0x0 0x1 t2d 0x8
 ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0xf
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
 fmul ftz $r3 $r3 $r4
 fmul ftz $r2 $r2 $r4
 fmul ftz $r1 $r1 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
 fmul ftz $r0 $r0 $r4
 exit
 #endif
diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
index 9d62c1a..60352a8 100644
--- a/src/shader/exacmnv110.fpc
+++ b/src/shader/exacmnv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff03,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001cbc03,
 0x0007ff02,
 0xe043ff89,
 0x2ff70204,
 0xc03a0014,
 0x4007ff01,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0172f,
+0x001fbc02,
 0x0007ff00,
 0xe043ff88,
 0xaff70000,
 0xc03a0007,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
 0x00470303,
 0x5c681000,
 0x00470202,
 0x5c681000,
 0x00470101,
 0x5c681000,
-0xfc0007e0,
+0xfde007e1,
 0x001f8000,
 0x00470000,
 0x5c681000,
diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
index 4fe2e19..a555beb 100644
--- a/src/shader/exas8nv110.fp
+++ b/src/shader/exas8nv110.fp
@@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0x8
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
 mov $r3 $r0 0xf
 mov $r2 $r0 0xf
 mov $r1 $r0 0xf
diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
index 1181c41..e58d168 100644
--- a/src/shader/exas8nv110.fpc
+++ b/src/shader/exas8nv110.fpc
@@ -1,21 +1,21 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff01,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001fbc03,
 0x0007ff00,
 0xe043ff88,
 0x2ff70000,
 0xc03a0004,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc200fe1,
+0x001f8400,
 0x00070003,
 0x5c980780,
 0x00070002,
diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
index 61374a6..ee818cd 100644
--- a/src/shader/exasanv110.fp
+++ b/src/shader/exasanv110.fp
@@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
 ipa $r2 a[0x80] $r0 0x0 0x1
 tex nodep $r4 $r2 0x0 0x0 t2d 0x8
 ipa $r1 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
 ipa $r0 a[0x90] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x1 t2d 0xf
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
 fmul ftz $r3 $r3 $r4
 fmul ftz $r2 $r2 $r4
 fmul ftz $r1 $r1 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
 fmul ftz $r0 $r0 $r4
 exit
 #endif
diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
index 5516a03..604bf9a 100644
--- a/src/shader/exasanv110.fpc
+++ b/src/shader/exasanv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff03,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001cbc03,
 0x0007ff02,
 0xe043ff88,
 0x2ff70204,
 0xc03a0004,
 0x4007ff01,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe5e0172f,
+0x001fbc02,
 0x0007ff00,
 0xe043ff89,
 0xaff70000,
 0xc03a0017,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
 0x00470303,
 0x5c681000,
 0x00470202,
 0x5c681000,
 0x00470101,
 0x5c681000,
-0xfc0007e0,
+0xfde007e1,
 0x001f8000,
 0x00470000,
 0x5c681000,
diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
index 90bbb55..86e14e8 100644
--- a/src/shader/exascnv110.fp
+++ b/src/shader/exascnv110.fp
@@ -25,14 +25,14 @@ NV110FP_Source[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0xf
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
index 2dba15d..1fef5d2 100644
--- a/src/shader/exascnv110.fpc
+++ b/src/shader/exascnv110.fpc
@@ -1,20 +1,20 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff01,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xfde0072f,
+0x001fbc03,
 0x0007ff00,
 0xe043ff88,
 0xaff70000,
 0xc03a0007,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
+0xfc0007ef,
 0x001f8000,
 0x0007000f,
 0xe3000000,
diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
index 2728311..773aad5 100644
--- a/src/shader/videonv110.fp
+++ b/src/shader/videonv110.fp
@@ -25,30 +25,30 @@ NV110FP_NV12[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r2 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r2 $r2
 ipa $r0 a[0x80] $r2 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1)
 ipa $r1 a[0x84] $r2 0x0 0x1
 tex nodep $r4 $r0 0x0 0x0 t2d 0x8
 tex nodep $r0 $r0 0x0 0x1 t2d 0xc
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x6) (st 0x1)
 depbar le 0x5 0x1 0x1
 fmul ftz $r5 $r4 c0[0x0]
 fadd ftz $r3 $r5 c0[0x4]
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6) (st 0x6) (st 0xf)
 fadd ftz $r4 $r5 c0[0x8]
 fadd ftz $r5 $r5 c0[0xc]
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x1) (st 0x1) (st 0x1)
 ffma ftz $r3 $r0 c0[0x10] $r3
 ffma ftz $r4 $r0 c0[0x14] $r4
 ffma ftz $r5 $r0 c0[0x18] $r5
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0x1) (st 0x6)
 ffma ftz $r0 $r1 c0[0x1c] $r3
 ffma ftz $r2 $r1 c0[0x24] $r5
 ffma ftz $r1 $r1 c0[0x20] $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
index 31d745a..8e7bedf 100644
--- a/src/shader/videonv110.fpc
+++ b/src/shader/videonv110.fpc
@@ -1,52 +1,52 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff02,
 0xe003ff87,
 0x00470202,
 0x50800000,
 0x0027ff00,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x003c3c03,
 0x4027ff01,
 0xe043ff88,
 0x2ff70004,
 0xc03a0004,
 0x2ff70000,
 0xc03a0016,
-0xfc0007e0,
-0x001f8000,
+0xfcc007ef,
+0x001f8400,
 0x34170001,
 0xf0f00000,
 0x00070405,
 0x4c681000,
 0x00170503,
 0x4c581000,
-0xfc0007e0,
-0x001f8000,
+0xfcc007e6,
+0x001fbc00,
 0x00270504,
 0x4c581000,
 0x00370505,
 0x4c581000,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc200fe6,
+0x001f8400,
 0x00470003,
 0x49a00180,
 0x00570004,
 0x49a00200,
 0x00670005,
 0x49a00280,
-0xfc0007e0,
-0x001f8000,
+0xfc2007e1,
+0x001f9800,
 0x00770100,
 0x49a00180,
 0x00970102,
 0x49a00280,
 0x00870101,
 0x49a00200,
-0xfc0007e0,
+0xfc0007ef,
 0x001f8000,
 0x0007000f,
 0xe3000000,
-- 
2.11.0



More information about the Nouveau mailing list