Mesa (master): nir,radeonsi: move ffma fusing to late optimizations for better codegen

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Sep 16 02:53:06 UTC 2020


Module: Mesa
Branch: master
Commit: 57bf4c2028cffe24ffb55b96592f7e33aa18f1ce
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=57bf4c2028cffe24ffb55b96592f7e33aa18f1ce

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Fri Sep  4 05:55:25 2020 -0400

nir,radeonsi: move ffma fusing to late optimizations for better codegen

The freedreno trace changes were suggested by Rob Clark.

ALU performance is higher, because ffma is used more often, but so is
register usage, because trinary opcodes (such as ffma) usually need
at least 3 live registers.

54793 shaders in 33659 tests
Totals:
SGPRS: 2639746 -> 2642938 (0.12 %)
VGPRS: 1534120 -> 1536392 (0.15 %)
Spilled SGPRs: 3541 -> 3618 (2.17 %)
Spilled VGPRs: 33 -> 44 (33.33 %)
Scratch size: 292 -> 312 (6.85 %) dwords per thread
Code Size: 55639836 -> 55620116 (-0.04 %) bytes
Max Waves: 964785 -> 963977 (-0.08 %)

Totals from affected shaders:
SGPRS: 1105800 -> 1108992 (0.29 %)
VGPRS: 635292 -> 637564 (0.36 %)
Spilled SGPRs: 3193 -> 3270 (2.41 %)
Spilled VGPRs: 33 -> 44 (33.33 %)
Scratch size: 36 -> 56 (55.56 %) dwords per thread
Code Size: 31568708 -> 31548988 (-0.06 %) bytes
Max Waves: 319991 -> 319183 (-0.25 %)

Reviewed-by: Connor Abbott <cwabbott0 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6596>

---

 .gitlab-ci/traces-freedreno.yml              | 19 +++++++------------
 .gitlab-ci/traces-radeonsi.yml               | 18 +++++++++---------
 src/compiler/nir/nir_opt_algebraic.py        |  4 +++-
 src/gallium/drivers/radeonsi/si_shader_nir.c | 11 +++++++++++
 4 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/.gitlab-ci/traces-freedreno.yml b/.gitlab-ci/traces-freedreno.yml
index 163833af111..daa72ee01a1 100644
--- a/.gitlab-ci/traces-freedreno.yml
+++ b/.gitlab-ci/traces-freedreno.yml
@@ -11,12 +11,12 @@ traces:
   - path: gputest/furmark.trace
     expectations:
       - device: freedreno-a630
-        checksum: de674022e53fc9e0a9eb217f8bf0fe03
+        checksum: af6e1faf11407a7e7c416f2c532de029
 # Note: Requires GL3.3
   - path: gputest/gimark.trace
     expectations:
       - device: freedreno-a630
-        checksum: 2cae8e2104356e2b3017cbd953cf7b4a
+        checksum: 47419914b87422b267e20b6981a7eb43
   - path: gputest/pixmark-julia-fp32.trace
     expectations:
       - device: freedreno-a630
@@ -37,16 +37,16 @@ traces:
     expectations:
       # Looks fine, but totally different shape from the rendering on i965.
       - device: freedreno-a630
-        checksum: 86d678c70b8adf27095ace1a6bbfe2d2
+        checksum: 9ee5a036510be0f506705eacc1516bf3
   - path: gputest/plot3d.trace
     expectations:
       - device: freedreno-a630
-        checksum: 67a9eb692e694b11107860bbcd47d493
+        checksum: 42aba3ab943dae2fe952cae1ff91c354
 # Note: Requires GL4 for tess.
   - path: gputest/tessmark.trace
     expectations:
       - device: freedreno-a630
-        checksum: 985e231b58b7dc4b6da34ff32f8ebb82
+        checksum: 8688b3904b6b2bc591d8b669ecae4d53
   - path: gputest/triangle.trace
     expectations:
       - device: freedreno-a630
@@ -149,7 +149,7 @@ traces:
   - path: glmark2/effect2d-kernel=1,1,1,1,1;1,1,1,1,1;1,1,1,1,1;.rdc
     expectations:
       - device: freedreno-a630
-        checksum: 2346a6597f4d1f20b493e8d6a8f7e592
+        checksum: 2964d37446db126a5fe462b1ba4542cd
   - path: glmark2/function-fragment-complexity=low:fragment-steps=5.rdc
     expectations:
       # Incorrect rendering, a bunch of the area is uniform gray when it should
@@ -215,7 +215,7 @@ traces:
   - path: glmark2/shading-shading=gouraud.rdc
     expectations:
       - device: freedreno-a630
-        checksum: fcc26fca31375b216382e69bc5f113fb
+        checksum: bd9058f041bd2d59c039cccdb7d50bf7
   - path: glmark2/shading-shading=phong.rdc
     # Some speckling on the main specular highlight that may just be
     # mediump artifacts
@@ -226,11 +226,6 @@ traces:
     expectations:
       - device: freedreno-a630
         checksum: d8b5931669733240797f1acf5d98db25
-    # Very yellow terrain compared to i965, may just be mediump artifacts.
-  - path: glmark2/terrain.rdc
-    expectations:
-      - device: freedreno-a630
-        checksum: 114f7dfe97768d9c565a29f656c8f9cf
   - path: glmark2/texture-texture-filter=linear.rdc
     expectations:
       - device: freedreno-a630
diff --git a/.gitlab-ci/traces-radeonsi.yml b/.gitlab-ci/traces-radeonsi.yml
index 111fc635e36..13b1da593ce 100644
--- a/.gitlab-ci/traces-radeonsi.yml
+++ b/.gitlab-ci/traces-radeonsi.yml
@@ -33,11 +33,11 @@ traces:
   - path: gputest/furmark.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 1c569668d608c644f353caa177d577c6
+        checksum: d71c0d8e6c46c8f29d1aa8d0ed7d3c87
   - path: gputest/pixmark-piano.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: a0e1d6358f76666603b08eab383af080
+        checksum: 777d48e82cabceef6d9489189f91d266
   - path: gputest/triangle.trace
     expectations:
       - device: gl-radeonsi-stoney
@@ -153,7 +153,7 @@ traces:
   - path: glmark2/shadow.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 4bf5ca9ce641de1031eb8125d80a3005
+        checksum: 03dfbf026a0f0ab643e5a6ef19623e81
   - path: glmark2/terrain.rdc
     expectations:
       - device: gl-radeonsi-stoney
@@ -173,7 +173,7 @@ traces:
   - path: godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 5164e238381e7d77a64e3de771cc005f
+        checksum: 990abd360dc380c95ee2645f8b402d47
   - path: gputest/gimark.trace
     expectations:
       - device: gl-radeonsi-stoney
@@ -189,15 +189,15 @@ traces:
   - path: gputest/pixmark-piano.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: a0e1d6358f76666603b08eab383af080
+        checksum: 777d48e82cabceef6d9489189f91d266
   - path: gputest/pixmark-volplosion.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 2fba173643c014bcfa4b31eb55a514b9
+        checksum: 708f92a8ac8aef23a4a544cc5ec755d6
   - path: gputest/plot3d.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: fd367551aa74e2903e0590a893da01a6
+        checksum: f9e6c1cb70add69cf2a4724800d48b25
   - path: gputest/tessmark.trace
     expectations:
       - device: gl-radeonsi-stoney
@@ -229,7 +229,7 @@ traces:
   - path: supertuxkart/supertuxkart-antediluvian-abyss.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 17f4039392a65ad23133cb2cac82dba4
+        checksum: a2c4c127873f93b7db4ef48ea9fb7689
   - path: supertuxkart/supertuxkart-menu.rdc
     expectations:
       - device: gl-radeonsi-stoney
@@ -237,4 +237,4 @@ traces:
   - path: supertuxkart/supertuxkart-ravenbridge-mansion.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 46f08af5c49d711b41d4082f8a5cf6d6
+        checksum: c8f9eae92c67c7d53db4d69a703e3914
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index f2ef598c912..39c07ce0b7f 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -194,7 +194,8 @@ optimizations.extend([
    (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
    (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
    (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
-   (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
+   # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
+   (('~ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma'),
 
    (('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a at bool')), ('fmul', b, c)), '#d'), '#e'),
     ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
@@ -2027,6 +2028,7 @@ late_optimizations = [
    (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
    (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
    (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
+   (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
 
    # These are duplicated from the main optimizations table.  The late
    # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 534973bcf49..4b879bff13a 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -698,6 +698,17 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
    if (changed)
       si_nir_opts(nir, false);
 
+   /* Run late optimizations to fuse ffma. */
+   bool more_late_algebraic = true;
+   while (more_late_algebraic) {
+      more_late_algebraic = false;
+      NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late);
+      NIR_PASS_V(nir, nir_opt_constant_folding);
+      NIR_PASS_V(nir, nir_copy_prop);
+      NIR_PASS_V(nir, nir_opt_dce);
+      NIR_PASS_V(nir, nir_opt_cse);
+   }
+
    NIR_PASS_V(nir, nir_lower_bool_to_int32);
    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
 



More information about the mesa-commit mailing list