Mesa (master): nir/algebraic: Mark some logic-joined comparison reductions as exact

Tue Jan 5 02:24:37 UTC 2021

Module: Mesa
Branch: master
Commit: 9167324a86b5c60d13aa1f0b7ec6251293a931c5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9167324a86b5c60d13aa1f0b7ec6251293a931c5

Author: Ian Romanick <ian.d.romanick at intel.com>
Date:   Wed Jul  8 12:53:07 2020 -0700

nir/algebraic: Mark some logic-joined comparison reductions as exact

This also prevents some fossil-db regressions in "spir-v: Mark floating
point comparisons exact".

v2: Mark the fmin / fmax in the replacement exact to prevent other
optimizations from ruining the NaN-clensing property of the fmin / fmax.
Suggested by Rhys.  Don't assume that constants are not NaN because some
components of a vector might be NaN while others are numbers.  Noticed
by Rhys.  This causes ~8 more shaders in Age of Wonders III (dxvk) to
regress on cycles (not instructions) by less than 1% when "spir-v: Mark
floating point comparisons exact" is applied.  This difference is too
small to care.

All Intel platforms had similar results. (Tiger Lake shown)
total instructions in shared programs: 20908668 -> 20908670 (<.01%)
instructions in affected programs: 9196 -> 9198 (0.02%)
helped: 10
HURT: 5
helped stats (abs) min: 1 max: 2 x̄: 1.40 x̃: 1
helped stats (rel) min: 0.02% max: 5.41% x̄: 2.20% x̃: 2.16%
HURT stats (abs)   min: 2 max: 6 x̄: 3.20 x̃: 3
HURT stats (rel)   min: 2.44% max: 16.67% x̄: 9.39% x̃: 12.50%
95% mean confidence interval for instructions value: -1.22 1.49
95% mean confidence interval for instructions %-change: -2.08% 5.41%
Inconclusive result (value mean confidence interval includes 0).

total cycles in shared programs: 473515330 -> 473515288 (<.01%)
cycles in affected programs: 67146 -> 67104 (-0.06%)
helped: 10
HURT: 7
helped stats (abs) min: 1 max: 36 x̄: 15.90 x̃: 17
helped stats (rel) min: 0.01% max: 1.29% x̄: 0.66% x̃: 0.89%
HURT stats (abs)   min: 1 max: 48 x̄: 16.71 x̃: 4
HURT stats (rel)   min: 0.08% max: 1.94% x̄: 0.87% x̃: 0.19%
95% mean confidence interval for cycles value: -13.88 8.94
95% mean confidence interval for cycles %-change: -0.56% 0.49%
Inconclusive result (value mean confidence interval includes 0).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6358>

---

 src/compiler/nir/nir_opt_algebraic.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 063a8579052..7401047563f 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -598,14 +598,20 @@ optimizations.extend([
     ('fsat', ('fadd', ('fneg',  a), b)), '!options->lower_fsat'),
 
    (('extract_u8', ('imin', ('imax', a, 0), 0xff), 0), ('imin', ('imax', a, 0), 0xff)),
-   (('~ior', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
-   (('~ior', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
-   (('~ior', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
-   (('~ior', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
-   (('~ior', ('flt', a, '#b'), ('flt', a, '#c')), ('flt', a, ('fmax', b, c))),
-   (('~ior', ('flt', '#a', c), ('flt', '#b', c)), ('flt', ('fmin', a, b), c)),
-   (('~ior', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('fmin', b, c))),
-   (('~ior', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('fmax', a, b), c)),
+
+   # The ior versions are exact because fmin and fmax will always pick a
+   # non-NaN value, if one exists.  Therefore (a < NaN) || (a < c) == a <
+   # fmax(NaN, c) == a < c.  Mark the fmin or fmax in the replacement as exact
+   # to prevent other optimizations from ruining the "NaN clensing" property
+   # of the fmin or fmax.
+   (('ior', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('!fmax', b, c))),
+   (('ior', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('!fmin', a, b), c)),
+   (('ior', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('!fmin', b, c))),
+   (('ior', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('!fmax', a, b), c)),
+   (('ior', ('flt', a, '#b'), ('flt', a, '#c')), ('flt', a, ('!fmax', b, c))),
+   (('ior', ('flt', '#a', c), ('flt', '#b', c)), ('flt', ('!fmin', a, b), c)),
+   (('ior', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('!fmin', b, c))),
+   (('ior', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('!fmax', a, b), c)),
    (('~iand', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('fmin', b, c))),
    (('~iand', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('fmax', a, b), c)),
    (('~iand', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('fmax', b, c))),
@@ -643,10 +649,10 @@ for s in [16, 32, 64]:
        # used once as this ensures that the number of comparisons will not
        # increase.  The sources to the ior and iand are not symmetric, so the
        # rules have to be duplicated to get this behavior.
-       (('~ior', ('flt(is_used_once)', 0.0, 'a@{}'.format(s)), ('flt', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
-       (('~ior', ('flt', 0.0, 'a@{}'.format(s)), ('flt(is_used_once)', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
-       (('~ior', ('fge(is_used_once)', 0.0, 'a@{}'.format(s)), ('fge', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
-       (('~ior', ('fge', 0.0, 'a@{}'.format(s)), ('fge(is_used_once)', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
+       (('ior', ('flt(is_used_once)', 0.0, 'a@{}'.format(s)), ('flt', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
+       (('ior', ('flt', 0.0, 'a@{}'.format(s)), ('flt(is_used_once)', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))),
+       (('ior', ('fge(is_used_once)', 0.0, 'a@{}'.format(s)), ('fge', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
+       (('ior', ('fge', 0.0, 'a@{}'.format(s)), ('fge(is_used_once)', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))),
        (('~iand', ('flt(is_used_once)', 0.0, 'a@{}'.format(s)), ('flt', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))),
        (('~iand', ('flt', 0.0, 'a@{}'.format(s)), ('flt(is_used_once)', 'b@{}'.format(s), 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))),
        (('~iand', ('fge(is_used_once)', 0.0, 'a@{}'.format(s)), ('fge', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))),