[Mesa-dev] [PATCH v3 19/44] nir/algebraic: add lowerings for ldexp with rounding modes

Wed Feb 6 10:44:48 UTC 2019

---
 src/compiler/nir/nir_opt_algebraic.py | 70 +++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 3800db1da20..3384c9c2e67 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -890,10 +890,80 @@ def ldexp(f, exp, bits):
    pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits)
    return ('fmul', ('fmul', f, pow2_1), pow2_2)
 
+def ldexp_rtne(f, exp, bits):
+   # First, we clamp exp to a reasonable range.  The maximum possible range
+   # for a normal exponent is [-126, 127] and, throwing in denormals, you get
+   # a maximum range of [-149, 127].  This means that we can potentially have
+   # a swing of +-276.  If you start with FLT_MAX, you actually have to do
+   # ldexp(FLT_MAX, -278) to get it to flush all the way to zero.  The GLSL
+   # spec, on the other hand, only requires that we handle an exponent value
+   # in the range [-126, 128].  This implementation is *mostly* correct; it
+   # handles a range on exp of [-252, 254] which allows you to create any
+   # value (including denorms if the hardware supports it) and to adjust the
+   # exponent of any normal value to anything you want.
+   if bits == 16:
+      exp = ('imin', ('imax', exp, -28), 30)
+   elif bits == 32:
+      exp = ('imin', ('imax', exp, -252), 254)
+   elif bits == 64:
+      exp = ('imin', ('imax', exp, -2044), 2046)
+   else:
+      assert False
+
+   # Now we compute two powers of 2, one for exp/2 and one for exp-exp/2.
+   # (We use ishr which isn't the same for -1, but the -1 case still works
+   # since we use exp-exp/2 as the second exponent.)  While the spec
+   # technically defines ldexp as f * 2.0^exp, simply multiplying once doesn't
+   # work with denormals and doesn't allow for the full swing in exponents
+   # that you can get with normalized values.  Instead, we create two powers
+   # of two and multiply by them each in turn.  That way the effective range
+   # of our exponent is doubled.
+   pow2_1 = fexp2i(('ishr', exp, 1), bits)
+   pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits)
+   return ('fmul_rtne', ('fmul_rtne', f, pow2_1), pow2_2)
+
+def ldexp_rtz(f, exp, bits):
+   # First, we clamp exp to a reasonable range.  The maximum possible range
+   # for a normal exponent is [-126, 127] and, throwing in denormals, you get
+   # a maximum range of [-149, 127].  This means that we can potentially have
+   # a swing of +-276.  If you start with FLT_MAX, you actually have to do
+   # ldexp(FLT_MAX, -278) to get it to flush all the way to zero.  The GLSL
+   # spec, on the other hand, only requires that we handle an exponent value
+   # in the range [-126, 128].  This implementation is *mostly* correct; it
+   # handles a range on exp of [-252, 254] which allows you to create any
+   # value (including denorms if the hardware supports it) and to adjust the
+   # exponent of any normal value to anything you want.
+   if bits == 16:
+      exp = ('imin', ('imax', exp, -28), 30)
+   elif bits == 32:
+      exp = ('imin', ('imax', exp, -252), 254)
+   elif bits == 64:
+      exp = ('imin', ('imax', exp, -2044), 2046)
+   else:
+      assert False
+
+   # Now we compute two powers of 2, one for exp/2 and one for exp-exp/2.
+   # (We use ishr which isn't the same for -1, but the -1 case still works
+   # since we use exp-exp/2 as the second exponent.)  While the spec
+   # technically defines ldexp as f * 2.0^exp, simply multiplying once doesn't
+   # work with denormals and doesn't allow for the full swing in exponents
+   # that you can get with normalized values.  Instead, we create two powers
+   # of two and multiply by them each in turn.  That way the effective range
+   # of our exponent is doubled.
+   pow2_1 = fexp2i(('ishr', exp, 1), bits)
+   pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits)
+   return ('fmul_rtz', ('fmul_rtz', f, pow2_1), pow2_2)
+
 optimizations += [
    (('ldexp at 16', 'x', 'exp'), ldexp('x', 'exp', 16), 'options->lower_ldexp'),
    (('ldexp at 32', 'x', 'exp'), ldexp('x', 'exp', 32), 'options->lower_ldexp'),
    (('ldexp at 64', 'x', 'exp'), ldexp('x', 'exp', 64), 'options->lower_ldexp'),
+   (('ldexp_rtne at 16', 'x', 'exp'), ldexp_rtne('x', 'exp', 16), 'options->lower_ldexp'),
+   (('ldexp_rtne at 32', 'x', 'exp'), ldexp_rtne('x', 'exp', 32), 'options->lower_ldexp'),
+   (('ldexp_rtne at 64', 'x', 'exp'), ldexp_rtne('x', 'exp', 64), 'options->lower_ldexp'),
+   (('ldexp_rtz at 16', 'x', 'exp'), ldexp_rtz('x', 'exp', 16), 'options->lower_ldexp'),
+   (('ldexp_rtz at 32', 'x', 'exp'), ldexp_rtz('x', 'exp', 32), 'options->lower_ldexp'),
+   (('ldexp_rtz at 64', 'x', 'exp'), ldexp_rtz('x', 'exp', 64), 'options->lower_ldexp'),
 ]
 
 # Unreal Engine 4 demo applications open-codes bitfieldReverse()
-- 
2.19.1