Mesa (master): nir: scalarize fdot in reverse

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Nov 3 15:07:46 UTC 2020


Module: Mesa
Branch: master
Commit: 24a18b1a4b99c5dc97bfb2cf9a2b0c2211839b3a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=24a18b1a4b99c5dc97bfb2cf9a2b0c2211839b3a

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Tue Jun 16 16:04:09 2020 +0100

nir: scalarize fdot in reverse

This will create code that is easier to combine into MADs/FMA when the
last component is 1.0.

nir_opt_algebraic_late has an optimization to do something similar but it
only works for inexact code, if the multiplication-by-1 optimization is
done before it and if the backend enables fuse_ffma.

fossil-db (Navi):
Totals from 85583 (74.64% of 114665) affected shaders:
SGPRs: 4556060 -> 4558596 (+0.06%); split: -0.07%, +0.12%
VGPRs: 3315060 -> 3312984 (-0.06%); split: -0.23%, +0.17%
SpillSGPRs: 13552 -> 13553 (+0.01%)
CodeSize: 184962756 -> 184431388 (-0.29%); split: -0.32%, +0.03%
MaxWaves: 1208693 -> 1209361 (+0.06%); split: +0.17%, -0.11%
Instrs: 35678819 -> 35361617 (-0.89%); split: -0.91%, +0.02%

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5631>

---

 .gitlab-ci/traces-freedreno.yml            | 42 +++++++++++++++---------------
 .gitlab-ci/traces-llvmpipe.yml             | 40 ++++++++++++++--------------
 .gitlab-ci/traces-radeonsi.yml             | 42 +++++++++++++++---------------
 .gitlab-ci/traces-virgl.yml                |  6 ++---
 src/compiler/nir/nir_lower_alu_to_scalar.c |  4 +--
 src/compiler/nir/nir_opcodes.py            |  4 +--
 6 files changed, 69 insertions(+), 69 deletions(-)

diff --git a/.gitlab-ci/traces-freedreno.yml b/.gitlab-ci/traces-freedreno.yml
index 8d47fcd103e..3482e31ba73 100644
--- a/.gitlab-ci/traces-freedreno.yml
+++ b/.gitlab-ci/traces-freedreno.yml
@@ -5,7 +5,7 @@ traces:
   - path: 0ad/0ad.trace
     expectations:
       - device: freedreno-a630
-        checksum: 09eb7a4cfe73cb77043f2ac09db96d91
+        checksum: ea2b03e0768e8400c0badd46e4d26087
   - path: glmark2/buffer-columns=200:interleave=false:update-dispersion=0.9:update-fraction=0.5:update-method=map.rdc
     expectations:
       - device: freedreno-a630
@@ -21,15 +21,15 @@ traces:
   - path: glmark2/bump-bump-render=height.rdc
     expectations:
       - device: freedreno-a630
-        checksum: 6d06bb78d918ef6376aee4d417c9e9c1
+        checksum: bfa8557cd352b832e915c3c553b14c1f
   - path: glmark2/bump-bump-render=high-poly.rdc
     expectations:
       - device: freedreno-a630
-        checksum: 55312be6739a7142958bdef174e90269
+        checksum: 737a729713c894596b7cb4c1726239af
   - path: glmark2/bump-bump-render=normals.rdc
     expectations:
       - device: freedreno-a630
-        checksum: 9deea36f896b9aea01169839e1913e9b
+        checksum: 8f81c0d4aa531277784872935523efd8
   - path: glmark2/conditionals-fragment-steps=0:vertex-steps=0.rdc
     expectations:
       # Incorrect rendering, a bunch of the area is uniform gray when it should
@@ -87,11 +87,11 @@ traces:
   - path: glmark2/ideas-speed=duration.rdc
     expectations:
       - device: freedreno-a630
-        checksum: 4f51ce282835db86fd8dc42b81755ef8
+        checksum: e2c3f5163ef85818b85f1af634077f30
   - path: glmark2/jellyfish.rdc
     expectations:
       - device: freedreno-a630
-        checksum: b50568db464034da63a536a9fe89499b
+        checksum: 117212f2d89cbf09f0d81ca0d6b9e3f3
   - path: glmark2/loop-fragment-loop=false:fragment-steps=5:vertex-steps=5.rdc
     expectations:
       # Incorrect rendering, a bunch of the area is uniform gray when it should
@@ -117,15 +117,15 @@ traces:
   - path: glmark2/refract.rdc
     expectations:
       - device: freedreno-a630
-        checksum: a7b60492d31ca93f1506dfa59b462570
+        checksum: 48a9bdb712ad04476ffb397e9a63cd1c
   - path: glmark2/shading-shading=blinn-phong-inf.rdc
     expectations:
       - device: freedreno-a630
-        checksum: b9af27ae43986f6441235044a7f18e2a
+        checksum: 562772bffd90b5e85375dfe4eff28d81
   - path: glmark2/shading-shading=cel.rdc
     expectations:
       - device: freedreno-a630
-        checksum: fc2c2a68a89ad2ee145819e2a6edcbc5
+        checksum: 168fd41e4a8c1064d8e37353e2b2a887
   - path: glmark2/shading-shading=gouraud.rdc
     expectations:
       - device: freedreno-a630
@@ -135,7 +135,7 @@ traces:
     # mediump artifacts
     expectations:
       - device: freedreno-a630
-        checksum: ba29b12892f5cb9f9c506db9cc76f987
+        checksum: f227a5d0471b5bf2de636f519e38f1cb
   - path: glmark2/shadow.rdc
     expectations:
       - device: freedreno-a630
@@ -151,7 +151,7 @@ traces:
   - path: glmark2/texture-texture-filter=nearest.rdc
     expectations:
       - device: freedreno-a630
-        checksum: 0b0114e48cf37efc43d8b27b926eaadf
+        checksum: 84409b1b3c75e52ef43fa331f60c549f
   - path: glxgears/glxgears.trace
     expectations:
       - device: freedreno-a630
@@ -159,12 +159,12 @@ traces:
   - path: gputest/furmark.trace
     expectations:
       - device: freedreno-a630
-        checksum: af6e1faf11407a7e7c416f2c532de029
+        checksum: 751e0e784ba2f003cfc456fe8699f1fa
 # Note: Requires GL3.3
   - path: gputest/gimark.trace
     expectations:
       - device: freedreno-a630
-        checksum: 47419914b87422b267e20b6981a7eb43
+        checksum: 2cf40180a1315795389d0dfc18aad988
   - path: gputest/pixmark-julia-fp32.trace
     expectations:
       - device: freedreno-a630
@@ -185,7 +185,7 @@ traces:
     expectations:
       # Looks fine, but totally different shape from the rendering on i965.
       - device: freedreno-a630
-        checksum: 9ee5a036510be0f506705eacc1516bf3
+        checksum: 2d1cf314b5ef2e0a79f5f98eee8de446
   - path: gputest/plot3d.trace
     expectations:
       - device: freedreno-a630
@@ -194,7 +194,7 @@ traces:
   - path: gputest/tessmark.trace
     expectations:
       - device: freedreno-a630
-        checksum: 8688b3904b6b2bc591d8b669ecae4d53
+        checksum: 5d04b8d71517238b9bc8a527574e884b
   - path: gputest/triangle.trace
     expectations:
       - device: freedreno-a630
@@ -202,7 +202,7 @@ traces:
   - path: humus/AmbientAperture.trace
     expectations:
       - device: freedreno-a630
-        checksum: 894a9398f597258cb634bedfb8f2d31e
+        checksum: 83fd7bce0fc1e1f30bd143b7d30ca890
   - path: humus/CelShading.trace
     expectations:
       - device: freedreno-a630
@@ -219,15 +219,15 @@ traces:
     expectations:
       # Bad rendering -- some sort of gap in the upper right of the image.
       - device: freedreno-a630
-        checksum: 38bcb0a348010c7591efe41b3cc0a9e6
+        checksum: 773787656bdb83df21ff42cf9d98d7d2
   - path: humus/RaytracedShadows.trace
     expectations:
       - device: freedreno-a630
-        checksum: fa2d20bf642cd413341d83c4ec5ef1d4
+        checksum: 5f1a655e62eab99d53dab88b634afed3
   - path: humus/VolumetricFogging2.trace
     expectations:
       - device: freedreno-a630
-        checksum: 6027f10cbffa8c68ffe7d8208f84d32d
+        checksum: e5d2b54f49ba1c24279d5ff84e0f002b
 # Fails trying to use a GLX symbol
 #  - path: itoral-gl-terrain-demo/demo.trace
 #    expectations:
@@ -236,7 +236,7 @@ traces:
   - path: neverball/neverball.trace
     expectations:
       - device: freedreno-a630
-        checksum: 3e0a972c2a2180b349cb1c529d3ceca5
+        checksum: 57fd57aef14d37043a7b56a16dbf50b6
   - path: pathfinder/canvas_moire.trace
     expectations:
       - device: freedreno-a630
@@ -252,7 +252,7 @@ traces:
   - path: xonotic/xonotic-keybench-high.trace
     expectations:
       - device: freedreno-a630
-        checksum: 62fa3bb64f5c38142d155da85afb1312
+        checksum: 0efd30250af93d0b5f03eeabd079e8ee
 # Throws errors on freedreno
 #  - path: supertuxkart/supertuxkart-antediluvian-abyss.rdc
 #    expectations:
diff --git a/.gitlab-ci/traces-llvmpipe.yml b/.gitlab-ci/traces-llvmpipe.yml
index 73602ef3b59..e2d0e789af9 100644
--- a/.gitlab-ci/traces-llvmpipe.yml
+++ b/.gitlab-ci/traces-llvmpipe.yml
@@ -9,7 +9,7 @@ traces:
   - path: glmark2/jellyfish.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: e0fe979fee129c0ed42a3059d1a4e1c9
+        checksum: ebaa1e2d04d7dfe5a91499510722c46e
   - path: glxgears/glxgears-2.trace
     expectations:
       - device: gl-vmware-llvmpipe
@@ -17,7 +17,7 @@ traces:
   - path: 0ad/0ad.trace
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: d76d0f5b5b064bba3e6a2a122c0799cf
+        checksum: b244fb65c77f5cca2c49bb256874e132
   - path: pathfinder/demo.trace
     expectations:
       - device: gl-vmware-llvmpipe
@@ -37,7 +37,7 @@ traces:
   - path: gputest/pixmark-piano.trace
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 022b43b7fff9e8623dcfcd91e6bbe7aa
+        checksum: 3bb42312c7d7d694f7e186b480e16314
   - path: gputest/triangle.trace
     expectations:
       - device: gl-vmware-llvmpipe
@@ -45,7 +45,7 @@ traces:
   - path: humus/Portals.trace
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: d6eb6aee7a8a1c85d504a498cbf94659
+        checksum: 80e88e6be47138c18370d84c6426544f
   - path: bgfx/01-cubes.rdc
     expectations:
       - device: gl-vmware-llvmpipe
@@ -53,15 +53,15 @@ traces:
   - path: bgfx/02-metaballs.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 7e0d9320adb1cc660f9e10f4991bb6eb
+        checksum: 21425e0f19bd5e65ff8fd3a5f0b5c62d
   - path: bgfx/03-raymarch.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 0dc99a6fd61e6613f5153d066c9c6cd6
+        checksum: ae1e1558eee7108ba0254d785ac9687b
   - path: bgfx/04-mesh.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 79e669f1e41ed144d22151be7fdc74a4
+        checksum: 9ee7518f354ec4372fff84849416e864
   - path: bgfx/05-instancing.rdc
     expectations:
       - device: gl-vmware-llvmpipe
@@ -69,15 +69,15 @@ traces:
   - path: bgfx/06-bump.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: e6b325ef1650bd10d943900c468b8c63
+        checksum: 83f782ae968a682e82f31f2daae96d77
   - path: bgfx/07-callback.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 85ecb06e767fd3123e10ce67ac1f109b
+        checksum: 77381f3eb028306e028ab54267a1926a
   - path: bgfx/09-hdr.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 6bd4f501dfbee14fcea6912ced5a068d
+        checksum: 4f98347342ec1d349410774292e4f5c5
   - path: bgfx/10-font.rdc
     expectations:
       - device: gl-vmware-llvmpipe
@@ -89,27 +89,27 @@ traces:
   - path: bgfx/12-lod.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 9d03427c2a51f79861b03cb3b6eaf6ba
+        checksum: c86610a12170cd41f1f771001aceb9ac
   - path: bgfx/13-stencil.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: a6b0e3b8c79a2177a991f141d0c03af6
+        checksum: 23d1d46e5fbb6212fde10a9a27ba9b42
   - path: bgfx/14-shadowvolumes.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: b0a3e0897c43f874ced825c464d8ea34
+        checksum: 072e019d2ea1eab16c9258c42576a3e5
   - path: bgfx/15-shadowmaps-simple.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 06d568886371d68d3836d5c647aae1e0
+        checksum: d3497adc3a91f2fa89950da384391fe7
   - path: bgfx/16-shadowmaps.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 9b7efb87ab939f9c955a9e3b006d8c3e
+        checksum: 4b1942b6c218c83de66687782f2cfd3b
   - path: bgfx/18-ibl.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 905e0b702bd936f65186e8ecfe903182
+        checksum: 8b7be0c59b4be560dba4ca7583ed1130
   - path: bgfx/19-oit.rdc
     expectations:
       - device: gl-vmware-llvmpipe
@@ -137,7 +137,7 @@ traces:
   - path: bgfx/31-rsm.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: a67afc372d2783f123dd2e2c0e32dfaf
+        checksum: 559e9fabc82a7979599ab1153700f018
   - path: bgfx/32-particles.rdc
     expectations:
       - device: gl-vmware-llvmpipe
@@ -145,7 +145,7 @@ traces:
   - path: bgfx/33-pom.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: ebee76b92d1c7b8b71b53998ed20843f
+        checksum: 1816d243de0e945913360b78d04c01ca
   - path: bgfx/34-mvs.rdc
     expectations:
       - device: gl-vmware-llvmpipe
@@ -157,7 +157,7 @@ traces:
   - path: bgfx/36-sky.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 914356a89e1d00bb6389d60e3b0f19e5
+        checksum: 9971a794fe1b12fc21b8655ccb3b6bac
   - path: bgfx/37-gpudrivenrendering.rdc
     expectations:
       - device: gl-vmware-llvmpipe
@@ -169,7 +169,7 @@ traces:
   - path: bgfx/39-assao.rdc
     expectations:
       - device: gl-vmware-llvmpipe
-        checksum: 0845a76ade386b7896807313229d4afe
+        checksum: dfe7796f4bd2b758baf253714e92c8da
   - path: bgfx/40-svt.rdc
     expectations:
       - device: gl-vmware-llvmpipe
diff --git a/.gitlab-ci/traces-radeonsi.yml b/.gitlab-ci/traces-radeonsi.yml
index d9eccb1f49b..66c30c5e792 100644
--- a/.gitlab-ci/traces-radeonsi.yml
+++ b/.gitlab-ci/traces-radeonsi.yml
@@ -17,7 +17,7 @@ traces:
   - path: 0ad/0ad.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: c6261e56f37cc5d73b83d493561df646
+        checksum: 79634d7a6d48d36165ef12fde2901dfc
   - path: pathfinder/demo.trace
     expectations:
       - device: gl-radeonsi-stoney
@@ -33,11 +33,11 @@ traces:
   - path: gputest/furmark.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: d71c0d8e6c46c8f29d1aa8d0ed7d3c87
+        checksum: 84c499203944cdc59e70450c324bb8df
   - path: gputest/pixmark-piano.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 014b88f3357d32af0c306d4a14eb36ef
+        checksum: 7bfbac352caf8a83a09687b2a93933e5
   - path: gputest/triangle.trace
     expectations:
       - device: gl-radeonsi-stoney
@@ -45,7 +45,7 @@ traces:
   - path: humus/Portals.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: b6f00f39566152c3c8462903b7a1f258
+        checksum: fc7d00efe380cacbd4e9ef9b231aea2f
   - path: glmark2/buffer-columns=200:interleave=false:update-dispersion=0.9:update-fraction=0.5:update-method=map.rdc
     expectations:
       - device: gl-radeonsi-stoney
@@ -65,7 +65,7 @@ traces:
   - path: glmark2/bump-bump-render=high-poly.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: e2e83a59d4eb6763d2fe922932aa14b5
+        checksum: 0793212ea545bccd39c89d9a6680d9b1
   - path: glmark2/bump-bump-render=normals.rdc
     expectations:
       - device: gl-radeonsi-stoney
@@ -113,7 +113,7 @@ traces:
   - path: glmark2/ideas-speed=duration.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 966191b52dc4885cfc56c0e7c80856b2
+        checksum: 74f9323bd703e9d3f6440dc3d141fc9f
   - path: glmark2/loop-fragment-loop=false:fragment-steps=5:vertex-steps=5.rdc
     expectations:
       - device: gl-radeonsi-stoney
@@ -133,11 +133,11 @@ traces:
   - path: glmark2/refract.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: db300d3a8afb6cecc19ae8e38c7b87c6
+        checksum: 6184a3ce8de6605c6ab8a894b80cf5bf
   - path: glmark2/shading-shading=blinn-phong-inf.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 36b193b33127144f8080aa60d582e65e
+        checksum: 1752c09718288a5fc0f1f8f81d7dd558
   - path: glmark2/shading-shading=cel.rdc
     expectations:
       - device: gl-radeonsi-stoney
@@ -145,7 +145,7 @@ traces:
   - path: glmark2/shading-shading=gouraud.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 22bd04eb9862eb4ecff525ac83b2826d
+        checksum: 3558380167441cd395d010afe533a277
   - path: glmark2/shading-shading=phong.rdc
     expectations:
       - device: gl-radeonsi-stoney
@@ -157,7 +157,7 @@ traces:
   - path: glmark2/terrain.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 52f19a65bd77ae03b5f51ee0038a2982
+        checksum: 8ccd48a1085a23059573f3541840d295
   - path: glmark2/texture-texture-filter=linear.rdc
     expectations:
       - device: gl-radeonsi-stoney
@@ -169,15 +169,15 @@ traces:
   - path: glmark2/texture-texture-filter=nearest.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: bd00656bf6683dae40bfa26a6416061d
+        checksum: 555cf351afb193a725afa56204bbcf35
   - path: godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 41a0b6eddecdf1df645ccf34082aac88
+        checksum: 02f654ad77c0c1106e1b31e1c86c93bb
   - path: gputest/gimark.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 98e742377cef049e98014300a34d7bba
+        checksum: 4442dbd44a9704c499da4817fffce306
   - path: gputest/pixmark-julia-fp32.trace
     expectations:
       - device: gl-radeonsi-stoney
@@ -189,7 +189,7 @@ traces:
   - path: gputest/pixmark-volplosion.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 708f92a8ac8aef23a4a544cc5ec755d6
+        checksum: c8321043b7c05156b71f9837b54e7b50
   - path: gputest/plot3d.trace
     expectations:
       - device: gl-radeonsi-stoney
@@ -197,11 +197,11 @@ traces:
   - path: gputest/tessmark.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: d6597f93caa383c492d8290036c60919
+        checksum: cdcf984e81f2cda9ca7f56e3a4addaa7
   - path: humus/AmbientAperture.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 56291d097c4be9c079fb771c849c6a2e
+        checksum: 664ea58a62b27737b7d0ae9e86ab85c0
   - path: humus/CelShading.trace
     expectations:
       - device: gl-radeonsi-stoney
@@ -209,7 +209,7 @@ traces:
   - path: humus/DynamicBranching3.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 0f02743d054c66f04da404add87d67dc
+        checksum: 5a5addfb1c7acc22104c0ecb29f45174
   - path: humus/HDR.trace
     expectations:
       - device: gl-radeonsi-stoney
@@ -217,15 +217,15 @@ traces:
   - path: humus/RaytracedShadows.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 4209605af96a1d7c20aa7324aa93839f
+        checksum: 2e2e58e0b32329d6cf90ab0ec71108fd
   - path: humus/VolumetricFogging2.trace
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: 12ad73ad05b01854da65eb0094612ebf
+        checksum: 1a9b3aa90d2c4a0d3937ecacca2da052
   - path: supertuxkart/supertuxkart-antediluvian-abyss.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: a2c4c127873f93b7db4ef48ea9fb7689
+        checksum: 499e93c37e33cc6430c7a9f94266f2f7
   - path: supertuxkart/supertuxkart-menu.rdc
     expectations:
       - device: gl-radeonsi-stoney
@@ -233,4 +233,4 @@ traces:
   - path: supertuxkart/supertuxkart-ravenbridge-mansion.rdc
     expectations:
       - device: gl-radeonsi-stoney
-        checksum: c8f9eae92c67c7d53db4d69a703e3914
+        checksum: 38a9f26c60a0bc4245b97d32da84ef75
diff --git a/.gitlab-ci/traces-virgl.yml b/.gitlab-ci/traces-virgl.yml
index 5b142925ab5..86154abfbf1 100644
--- a/.gitlab-ci/traces-virgl.yml
+++ b/.gitlab-ci/traces-virgl.yml
@@ -9,7 +9,7 @@ traces:
   - path: glmark2/jellyfish.rdc
     expectations:
       - device: gl-virgl
-        checksum: e0fe979fee129c0ed42a3059d1a4e1c9
+        checksum: ebaa1e2d04d7dfe5a91499510722c46e
   - path: glxgears/glxgears-2.trace
     expectations:
       - device: gl-virgl
@@ -21,7 +21,7 @@ traces:
   - path: gputest/pixmark-piano.trace
     expectations:
       - device: gl-virgl
-        checksum: 24eb95484e5b8288b3d5d65117c98641
+        checksum: 43b2c4db0d6810cca945071b9a645561
   - path: gputest/triangle.trace
     expectations:
       - device: gl-virgl
@@ -29,4 +29,4 @@ traces:
   - path: humus/Portals.trace
     expectations:
       - device: gl-virgl
-        checksum: 7caf9a1b50b7fc312a623be7c45553cb
+        checksum: 6d78eed6749f01cc5625dec0ad129c66
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 5df25012dfd..80d477a5933 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -68,7 +68,7 @@ lower_reduction(nir_alu_instr *alu, nir_op chan_op, nir_op merge_op,
    unsigned num_components = nir_op_infos[alu->op].input_sizes[0];
 
    nir_ssa_def *last = NULL;
-   for (unsigned i = 0; i < num_components; i++) {
+   for (int i = num_components - 1; i >= 0; i--) {
       nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
       nir_alu_ssa_dest_init(chan, 1, alu->dest.dest.ssa.bit_size);
       nir_alu_src_copy(&chan->src[0], &alu->src[0], chan);
@@ -82,7 +82,7 @@ lower_reduction(nir_alu_instr *alu, nir_op chan_op, nir_op merge_op,
 
       nir_builder_instr_insert(builder, &chan->instr);
 
-      if (i == 0) {
+      if (i == num_components - 1) {
          last = &chan->dest.dest.ssa;
       } else {
          last = nir_build_alu(builder, merge_op,
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 4c3a048e24e..8c5045e4a13 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -552,14 +552,14 @@ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
    def pairwise_reduce(start, size):
       if (size == 1):
          return srcs[start]
-      return reduce_(pairwise_reduce(start, size // 2), pairwise_reduce(start + size // 2, size // 2))
+      return reduce_(pairwise_reduce(start + size // 2, size // 2), pairwise_reduce(start, size // 2))
    for size in [2, 4, 8, 16]:
       opcode(name + str(size) + suffix, output_size, output_type,
              [size, size], [src_type, src_type], False, _2src_commutative,
              final(pairwise_reduce(0, size)))
    opcode(name + "3" + suffix, output_size, output_type,
           [3, 3], [src_type, src_type], False, _2src_commutative,
-          final(reduce_(reduce_(srcs[0], srcs[1]), srcs[2])))
+          final(reduce_(reduce_(srcs[2], srcs[1]), srcs[0])))
 
 def binop_reduce_all_sizes(name, output_size, src_type, prereduce_expr,
                            reduce_expr, final_expr):



More information about the mesa-commit mailing list