[Nouveau] [PATCH v4] nv110/exa: update sched codes
Samuel Pitoiset
samuel.pitoiset at gmail.com
Thu Jun 29 21:26:33 UTC 2017
Do you still have some glitches or does it work correctly now?
Did you also remove the spurious wait dep bars between v3 and v4?
On 06/27/2017 05:16 PM, Aaryaman Vasishta wrote:
> v4: Updated the wait dependancy bars based on tex component masks.
>
> This patch adds proper delays to maxwell exa shaders. Tested with
> rendercheck -f a8r8g8b8.
>
> I am still wondering whether the rd's are required. We could
> still wait on the write bars instead. eg. see
> "sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in exacmnv110.fp
>
> Trello:
> https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays
>
> Signed-off-by: Aaryaman Vasishta <jem456.vasishta at gmail.com>
> ---
> src/shader/exac8nv110.fp | 10 +++++-----
> src/shader/exac8nv110.fpc | 18 +++++++++---------
> src/shader/exacanv110.fp | 10 +++++-----
> src/shader/exacanv110.fpc | 18 +++++++++---------
> src/shader/exacmnv110.fp | 10 +++++-----
> src/shader/exacmnv110.fpc | 18 +++++++++---------
> src/shader/exas8nv110.fp | 6 +++---
> src/shader/exas8nv110.fpc | 12 ++++++------
> src/shader/exasanv110.fp | 10 +++++-----
> src/shader/exasanv110.fpc | 18 +++++++++---------
> src/shader/exascnv110.fp | 6 +++---
> src/shader/exascnv110.fpc | 10 +++++-----
> src/shader/videonv110.fp | 14 +++++++-------
> src/shader/videonv110.fpc | 26 +++++++++++++-------------
> 14 files changed, 93 insertions(+), 93 deletions(-)
>
> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
> index ce78036..101b67f 100644
> --- a/src/shader/exac8nv110.fp
> +++ b/src/shader/exac8nv110.fp
> @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r0 $r1
> mov $r2 $r3 0xf
> mov $r1 $r3 0xf
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> mov $r0 $r3 0xf
> exit
> #endif
> diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
> index 4aa1368..1f7d649 100644
> --- a/src/shader/exac8nv110.fpc
> +++ b/src/shader/exac8nv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0x21e0072f,
> +0x005cbc03,
> 0x0007ff02,
> 0xe043ff89,
> 0x2ff70201,
> 0xc03a0014,
> 0x4007ff03,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0074f,
> +0x001fbc06,
> 0x0007ff02,
> 0xe043ff88,
> 0x2ff70200,
> 0xc03a0004,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe6,
> +0x001f8400,
> 0x00170003,
> 0x5c681000,
> 0x00370002,
> 0x5c980780,
> 0x00370001,
> 0x5c980780,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00370000,
> 0x5c980780,
> diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
> index a70d5c5..fe55fcd 100644
> --- a/src/shader/exacanv110.fp
> +++ b/src/shader/exacanv110.fp
> @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r4 $r2 0x0 0x1 t2d 0xf
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r3 $r7
> fmul ftz $r2 $r2 $r6
> fmul ftz $r1 $r1 $r5
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> fmul ftz $r0 $r0 $r4
> exit
> #endif
> diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
> index 7c0ca5e..7c8ebbd 100644
> --- a/src/shader/exacanv110.fpc
> +++ b/src/shader/exacanv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
> 0x0007ff02,
> 0xe043ff89,
> 0xaff70204,
> 0xc03a0017,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0172f,
> +0x001fbc02,
> 0x0007ff00,
> 0xe043ff88,
> 0xaff70000,
> 0xc03a0007,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
> 0x00770303,
> 0x5c681000,
> 0x00670202,
> 0x5c681000,
> 0x00570101,
> 0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00470000,
> 0x5c681000,
> diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
> index fe5c294..7113ab3 100644
> --- a/src/shader/exacmnv110.fp
> +++ b/src/shader/exacmnv110.fp
> @@ -25,23 +25,23 @@ NV110FP_Composite[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r4 $r2 0x0 0x1 t2d 0x8
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r3 $r4
> fmul ftz $r2 $r2 $r4
> fmul ftz $r1 $r1 $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> fmul ftz $r0 $r0 $r4
> exit
> #endif
> diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
> index 9d62c1a..60352a8 100644
> --- a/src/shader/exacmnv110.fpc
> +++ b/src/shader/exacmnv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
> 0x0007ff02,
> 0xe043ff89,
> 0x2ff70204,
> 0xc03a0014,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0172f,
> +0x001fbc02,
> 0x0007ff00,
> 0xe043ff88,
> 0xaff70000,
> 0xc03a0007,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
> 0x00470303,
> 0x5c681000,
> 0x00470202,
> 0x5c681000,
> 0x00470101,
> 0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00470000,
> 0x5c681000,
> diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
> index 4fe2e19..a555beb 100644
> --- a/src/shader/exas8nv110.fp
> +++ b/src/shader/exas8nv110.fp
> @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0x8
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
> mov $r3 $r0 0xf
> mov $r2 $r0 0xf
> mov $r1 $r0 0xf
> diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
> index 1181c41..e58d168 100644
> --- a/src/shader/exas8nv110.fpc
> +++ b/src/shader/exas8nv110.fpc
> @@ -1,21 +1,21 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001fbc03,
> 0x0007ff00,
> 0xe043ff88,
> 0x2ff70000,
> 0xc03a0004,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc200fe1,
> +0x001f8400,
> 0x00070003,
> 0x5c980780,
> 0x00070002,
> diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
> index 61374a6..ee818cd 100644
> --- a/src/shader/exasanv110.fp
> +++ b/src/shader/exasanv110.fp
> @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r4 $r2 0x0 0x0 t2d 0x8
> ipa $r1 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> ipa $r0 a[0x90] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x1 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r3 $r4
> fmul ftz $r2 $r2 $r4
> fmul ftz $r1 $r1 $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> fmul ftz $r0 $r0 $r4
> exit
> #endif
> diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
> index 5516a03..604bf9a 100644
> --- a/src/shader/exasanv110.fpc
> +++ b/src/shader/exasanv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
> 0x0007ff02,
> 0xe043ff88,
> 0x2ff70204,
> 0xc03a0004,
> 0x4007ff01,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0172f,
> +0x001fbc02,
> 0x0007ff00,
> 0xe043ff89,
> 0xaff70000,
> 0xc03a0017,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
> 0x00470303,
> 0x5c681000,
> 0x00470202,
> 0x5c681000,
> 0x00470101,
> 0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00470000,
> 0x5c681000,
> diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
> index 90bbb55..86e14e8 100644
> --- a/src/shader/exascnv110.fp
> +++ b/src/shader/exascnv110.fp
> @@ -25,14 +25,14 @@ NV110FP_Source[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x0) (st 0x0)
> exit
> #endif
> diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
> index 2dba15d..1fef5d2 100644
> --- a/src/shader/exascnv110.fpc
> +++ b/src/shader/exascnv110.fpc
> @@ -1,20 +1,20 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xfde0072f,
> +0x001fbc03,
> 0x0007ff00,
> 0xe043ff88,
> 0xaff70000,
> 0xc03a0007,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> +0xfc0007ef,
> 0x001f8000,
> 0x0007000f,
> 0xe3000000,
> diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
> index 2728311..773aad5 100644
> --- a/src/shader/videonv110.fp
> +++ b/src/shader/videonv110.fp
> @@ -25,30 +25,30 @@ NV110FP_NV12[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r2 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r2 $r2
> ipa $r0 a[0x80] $r2 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1)
> ipa $r1 a[0x84] $r2 0x0 0x1
> tex nodep $r4 $r0 0x0 0x0 t2d 0x8
> tex nodep $r0 $r0 0x0 0x1 t2d 0xc
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x6) (st 0x1)
> depbar le 0x5 0x1 0x1
> fmul ftz $r5 $r4 c0[0x0]
> fadd ftz $r3 $r5 c0[0x4]
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6) (st 0x6) (st 0xf)
> fadd ftz $r4 $r5 c0[0x8]
> fadd ftz $r5 $r5 c0[0xc]
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x1) (st 0x1) (st 0x1)
> ffma ftz $r3 $r0 c0[0x10] $r3
> ffma ftz $r4 $r0 c0[0x14] $r4
> ffma ftz $r5 $r0 c0[0x18] $r5
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0x1) (st 0x6)
> ffma ftz $r0 $r1 c0[0x1c] $r3
> ffma ftz $r2 $r1 c0[0x24] $r5
> ffma ftz $r1 $r1 c0[0x20] $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x0) (st 0x0)
> exit
> #endif
> diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
> index 31d745a..8e7bedf 100644
> --- a/src/shader/videonv110.fpc
> +++ b/src/shader/videonv110.fpc
> @@ -1,52 +1,52 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff02,
> 0xe003ff87,
> 0x00470202,
> 0x50800000,
> 0x0027ff00,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x003c3c03,
> 0x4027ff01,
> 0xe043ff88,
> 0x2ff70004,
> 0xc03a0004,
> 0x2ff70000,
> 0xc03a0016,
> -0xfc0007e0,
> -0x001f8000,
> +0xfcc007ef,
> +0x001f8400,
> 0x34170001,
> 0xf0f00000,
> 0x00070405,
> 0x4c681000,
> 0x00170503,
> 0x4c581000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfcc007e6,
> +0x001fbc00,
> 0x00270504,
> 0x4c581000,
> 0x00370505,
> 0x4c581000,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc200fe6,
> +0x001f8400,
> 0x00470003,
> 0x49a00180,
> 0x00570004,
> 0x49a00200,
> 0x00670005,
> 0x49a00280,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc2007e1,
> +0x001f9800,
> 0x00770100,
> 0x49a00180,
> 0x00970102,
> 0x49a00280,
> 0x00870101,
> 0x49a00200,
> -0xfc0007e0,
> +0xfc0007ef,
> 0x001f8000,
> 0x0007000f,
> 0xe3000000,
>
More information about the Nouveau
mailing list