Mesa (master): nir/algebraic: Fix broken NaN and -0.0 behavior

Tue Jan 5 02:24:37 UTC 2021

Module: Mesa
Branch: master
Commit: 9771af5ddec37481042f6109772f15773642104c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9771af5ddec37481042f6109772f15773642104c

Author: Ian Romanick <ian.d.romanick at intel.com>
Date:   Tue Feb 18 12:52:42 2020 -0800

nir/algebraic: Fix broken NaN and -0.0 behavior

No shader-db or fossil-db changes on any Intel platform.

v2: Add a coding line to fix SCons build problems caused by the ±
character.

Fixes: 25bfba3335d ("nir/algebraic: Recognize open-coded copysign(1.0, a)")
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6358>

---

 src/compiler/nir/nir_opt_algebraic.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 11b54bf2555..1a725a74f65 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 # Copyright (C) 2014 Intel Corporation
 #
@@ -641,8 +642,6 @@ optimizations.extend([
 
 # Float sizes
 for s in [16, 32, 64]:
-    fp_one = {16: 0x3c00, 32: 0x3f800000, 64: 0x3ff0000000000000}[s]
-
     optimizations.extend([
        # These derive from the previous patterns with the application of b < 0 <=>
        # 0 < -b.  The transformation should be applied if either comparison is
@@ -658,16 +657,28 @@ for s in [16, 32, 64]:
        (('~iand', ('fge(is_used_once)', 0.0, 'a@{}'.format(s)), ('fge', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))),
        (('~iand', ('fge', 0.0, 'a@{}'.format(s)), ('fge(is_used_once)', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))),
 
-       # The (i2f32, ...) part is an open-coded fsign.  When that is combined with
-       # the bcsel, it's basically copysign(1.0, a).  There is no copysign in NIR,
-       # so emit an open-coded version of that.
+       # The (i2f32, ...) part is an open-coded fsign.  When that is combined
+       # with the bcsel, it's basically copysign(1.0, a).  There are some
+       # behavior differences between this pattern and copysign w.r.t. ±0 and
+       # NaN.  copysign(x, y) blindly takes the sign bit from y and applies it
+       # to x, regardless of whether either or both values are NaN.
+       #
+       # If a != a: bcsel(False, 1.0, i2f(b2i(False) - b2i(False))) = 0,
+       #            int(NaN >= 0.0) - int(NaN < 0.0) = 0 - 0 = 0
+       # If a == ±0: bcsel(True, 1.0, ...) = 1.0,
+       #            int(±0.0 >= 0.0) - int(±0.0 < 0.0) = 1 - 0 = 1
+       #
+       # For all other values of 'a', the original and replacement behave as
+       # copysign.
        #
-       # If 'a' is NaN, bcsel(False, 1.0, i2f(b2i(False) - b2i(False))) = 0, but
-       # the replacement will produce ±1.
+       # Marking the replacement comparisons as precise prevents any future
+       # optimizations from replacing either of the comparisons with the
+       # logical-not of the other.
        #
-       # The replacement will also produce a different value for -0: -1 vs +1.
+       # Note: Use b2i32 in the replacement because some platforms that
+       # support fp16 don't support int16.
        (('bcsel@{}'.format(s), ('feq', a, 0.0), 1.0, ('i2f{}'.format(s), ('iadd', ('b2i{}'.format(s), ('flt', 0.0, 'a@{}'.format(s))), ('ineg', ('b2i{}'.format(s), ('flt', 'a@{}'.format(s), 0.0)))))),
-        ('ior', fp_one, ('iand', a, 1 << (s - 1)))),
+        ('i2f{}'.format(s), ('iadd', ('b2i32', ('!fge', a, 0.0)), ('ineg', ('b2i32', ('!flt', a, 0.0)))))),
 
        (('bcsel', a, ('b2f(is_used_once)', 'b@{}'.format(s)), ('b2f', 'c@{}'.format(s))), ('b2f', ('bcsel', a, b, c))),