[Mesa-dev] [PATCH v3 18/44] nir/algebraic: add optimizations for fadd, fsub and fmul with rounding mode

Samuel Iglesias Gonsálvez siglesias at igalia.com
Wed Feb 6 10:44:47 UTC 2019


Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/compiler/nir/nir_opt_algebraic.py | 73 +++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 71c626e1b3f..3800db1da20 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -89,30 +89,51 @@ optimizations = [
    (('f2b', ('fneg', a)), ('f2b', a)),
    (('i2b', ('ineg', a)), ('i2b', a)),
    (('~fadd', a, 0.0), a),
+   (('~fadd_rtne', a, 0.0), a),
+   (('~fadd_rtz', a, 0.0), a),
    (('iadd', a, 0), a),
    (('usadd_4x8', a, 0), a),
    (('usadd_4x8', a, ~0), ~0),
    (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+   (('~fadd_rtne', ('fmul_rtne', a, b), ('fmul_rtne', a, c)), ('fmul_rtne', a, ('fadd_rtne', b, c))),
+   (('~fadd_rtz', ('fmul_rtz', a, b), ('fmul_rtz', a, c)), ('fmul_rtz', a, ('fadd_rtz', b, c))),
    (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
    (('~fadd', ('fneg', a), a), 0.0),
+   (('~fadd_rtne', ('fneg', a), a), 0.0),
+   (('~fadd_rtz', ('fneg', a), a), 0.0),
    (('iadd', ('ineg', a), a), 0),
    (('iadd', ('ineg', a), ('iadd', a, b)), b),
    (('iadd', a, ('iadd', ('ineg', a), b)), b),
    (('~fadd', ('fneg', a), ('fadd', a, b)), b),
    (('~fadd', a, ('fadd', ('fneg', a), b)), b),
    (('~fmul', a, 0.0), 0.0),
+   (('~fadd_rtne', ('fneg', a), ('fadd_rtne', a, b)), b),
+   (('~fadd_rtne', a, ('fadd_rtne', ('fneg', a), b)), b),
+   (('~fmul_rtne', a, 0.0), 0.0),
+   (('~fadd_rtz', ('fneg', a), ('fadd_rtz', a, b)), b),
+   (('~fadd_rtz', a, ('fadd_rtz', ('fneg', a), b)), b),
+   (('~fmul_rtz', a, 0.0), 0.0),
+
    (('imul', a, 0), 0),
    (('umul_unorm_4x8', a, 0), 0),
    (('umul_unorm_4x8', a, ~0), a),
    (('fmul', a, 1.0), a),
+   (('fmul_rtne', a, 1.0), a),
+   (('fmul_rtz', a, 1.0), a),
    (('imul', a, 1), a),
    (('fmul', a, -1.0), ('fneg', a)),
+   (('fmul_rtne', a, -1.0), ('fneg', a)),
+   (('fmul_rtz', a, -1.0), ('fneg', a)),
    (('imul', a, -1), ('ineg', a)),
    # If a < 0: fsign(a)*a*a => -1*a*a => -a*a => abs(a)*a
    # If a > 0: fsign(a)*a*a => 1*a*a => a*a => abs(a)*a
    # If a == 0: fsign(a)*a*a => 0*0*0 => abs(0)*0
    (('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)),
    (('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)),
+   (('fmul_rtne', ('fsign', a), ('fmul_rtne', a, a)), ('fmul_rtne', ('fabs', a), a)),
+   (('fmul_rtne', ('fmul_rtne', ('fsign', a), a), a), ('fmul_rtne', ('fabs', a), a)),
+   (('fmul_rtz', ('fsign', a), ('fmul_rtz', a, a)), ('fmul_rtz', ('fabs', a), a)),
+   (('fmul_rtz', ('fmul_rtz', ('fsign', a), a), a), ('fmul_rtz', ('fabs', a), a)),
    (('~ffma', 0.0, a, b), b),
    (('~ffma', a, 0.0, b), b),
    (('~ffma', a, b, 0.0), ('fmul', a, b)),
@@ -139,6 +160,23 @@ optimizations = [
    (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
    (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
 
+   (('~fadd_rtne', ('fmul_rtne', a, ('fadd_rtne', 1.0, ('fneg', ('b2f', 'c at 1')))), ('fmul_rtne', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'),
+   (('~fadd_rtne at 32', ('fmul_rtne', a, ('fadd_rtne', 1.0, ('fneg',         c ))), ('fmul_rtne', b,         c )), ('flrp', a, b, c), '!options->lower_flrp32'),
+   (('~fadd_rtne at 64', ('fmul_rtne', a, ('fadd_rtne', 1.0, ('fneg',         c ))), ('fmul_rtne', b,         c )), ('flrp', a, b, c), '!options->lower_flrp64'),
+   (('~fadd_rtne', a, ('fmul_rtne', ('b2f', 'c at 1'), ('fadd_rtne', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
+   (('~fadd_rtne at 32', a, ('fmul_rtne',         c , ('fadd_rtne', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'),
+   (('~fadd_rtne at 64', a, ('fmul_rtne',         c , ('fadd_rtne', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'),
+   (('~fadd_rtne', ('fmul_rtne', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
+
+   (('~fadd_rtz', ('fmul_rtz', a, ('fadd_rtz', 1.0, ('fneg', ('b2f', 'c at 1')))), ('fmul_rtz', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'),
+   (('~fadd_rtz at 32', ('fmul_rtz', a, ('fadd_rtz', 1.0, ('fneg',         c ))), ('fmul_rtz', b,         c )), ('flrp', a, b, c), '!options->lower_flrp32'),
+   (('~fadd_rtz at 64', ('fmul_rtz', a, ('fadd_rtz', 1.0, ('fneg',         c ))), ('fmul_rtz', b,         c )), ('flrp', a, b, c), '!options->lower_flrp64'),
+   (('~fadd_rtz', a, ('fmul_rtz', ('b2f', 'c at 1'), ('fadd_rtz', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
+   (('~fadd_rtz at 32', a, ('fmul_rtz',         c , ('fadd_rtz', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'),
+   (('~fadd_rtz at 64', a, ('fmul_rtz',         c , ('fadd_rtz', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'),
+   (('~fadd_rtz', ('fmul_rtz', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
+
+
    (('fdot4', ('vec4', a, b,   c,   1.0), d), ('fdph',  ('vec3', a, b, c), d)),
    (('fdot4', ('vec4', a, 0.0, 0.0, 0.0), b), ('fmul', a, b)),
    (('fdot4', ('vec4', a, b,   0.0, 0.0), c), ('fdot2', ('vec2', a, b), c)),
@@ -594,35 +632,56 @@ optimizations = [
 
    # Subtracts
    (('~fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
+   (('~fsub_rtz', a, ('fsub_rtz', 0.0, b)), ('fadd_rtz', a, b)),
+   (('~fsub_rtne', a, ('fsub_rtne', 0.0, b)), ('fadd_rtne', a, b)),
    (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
    (('ussub_4x8', a, 0), a),
    (('ussub_4x8', a, ~0), 0),
    (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
+   (('fsub_rtne', a, b), ('fadd_rtne', a, ('fneg', b)), 'options->lower_sub'),
+   (('fsub_rtz', a, b), ('fadd_rtz', a, ('fneg', b)), 'options->lower_sub'),
    (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
    (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
    (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
    (('~fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
+   (('~fadd_rtne', a, ('fsub_rtne', 0.0, b)), ('fsub_rtne', a, b)),
+   (('~fadd_rtz', a, ('fsub_rtz', 0.0, b)), ('fsub_rtz', a, b)),
    (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
    (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
    (('iabs', ('isub', 0, a)), ('iabs', a)),
 
    # Propagate negation up multiplication chains
    (('fmul', ('fneg', a), b), ('fneg', ('fmul', a, b))),
+   (('fmul_rtne', ('fneg', a), b), ('fneg', ('fmul_rtne', a, b))),
+   (('fmul_rtz', ('fneg', a), b), ('fneg', ('fmul_rtz', a, b))),
    (('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
 
    # Propagate constants up multiplication chains
    (('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul', ('fmul', a, c), b)),
+   (('~fmul_rtne(is_used_once)', ('fmul_rtne(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul_rtne', ('fmul_rtne', a, c), b)),
+   (('~fmul_rtz(is_used_once)', ('fmul_rtz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul_rtz', ('fmul_rtz', a, c), b)),
    (('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)),
    (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)),
+   (('~fadd_rtne(is_used_once)', ('fadd_rtne(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd_rtne', ('fadd_rtne', a, c), b)),
+   (('~fadd_rtz(is_used_once)', ('fadd_rtz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd_rtz', ('fadd_rtz', a, c), b)),
    (('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)),
 
    # Reassociate constants in add/mul chains so they can be folded together.
    # For now, we mostly only handle cases where the constants are separated by
    # a single non-constant.  We could do better eventually.
    (('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)),
+   (('~fmul_rtne', '#a', ('fmul_rtne', b, '#c')), ('fmul_rtne', ('fmul_rtne', a, c), b)),
+   (('~fmul_rtz', '#a', ('fmul_rtz', b, '#c')), ('fmul_rtz', ('fmul_rtz', a, c), b)),
+
    (('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)),
    (('~fadd', '#a',          ('fadd', b, '#c')),  ('fadd', ('fadd', a,          c),           b)),
    (('~fadd', '#a', ('fneg', ('fadd', b, '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
+
+   (('~fadd_rtne', '#a',          ('fadd_rtne', b, '#c')),  ('fadd_rtne', ('fadd_rtne', a,          c),           b)),
+   (('~fadd_rtne', '#a', ('fneg', ('fadd_rtne', b, '#c'))), ('fadd_rtne', ('fadd_rtne', a, ('fneg', c)), ('fneg', b))),
+   (('~fadd_rtz', '#a',          ('fadd_rtz', b, '#c')),  ('fadd_rtz', ('fadd_rtz', a,          c),           b)),
+   (('~fadd_rtz', '#a', ('fneg', ('fadd_rtz', b, '#c'))), ('fadd_rtz', ('fadd_rtz', a, ('fneg', c)), ('fneg', b))),
+
    (('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
 
    # By definition...
@@ -901,18 +960,32 @@ for op in ['fadd', 'fmul', 'iadd', 'imul']:
 before_ffma_optimizations = [
    # Propagate constants down multiplication chains
    (('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fmul', ('fmul', a, c), b)),
+   (('~fmul_rtne(is_used_once)', ('fmul_rtne(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fmul_rtne', ('fmul_rtne', a, c), b)),
+   (('~fmul_rtz(is_used_once)', ('fmul_rtz(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fmul_rtz', ('fmul_rtz', a, c), b)),
+
    (('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('imul', ('imul', a, c), b)),
    (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fadd', ('fadd', a, c), b)),
+   (('~fadd_rtne(is_used_once)', ('fadd_rtne(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fadd_rtne', ('fadd_rtne', a, c), b)),
+   (('~fadd_rtz(is_used_once)', ('fadd_rtz(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fadd_rtz', ('fadd_rtz', a, c), b)),
+
    (('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('iadd', ('iadd', a, c), b)),
 
    (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+   (('~fadd_rtne', ('fmul_rtne', a, b), ('fmul_rtne', a, c)), ('fmul_rtne', a, ('fadd_rtne', b, c))),
+   (('~fadd_rtz', ('fmul_rtz', a, b), ('fmul_rtz', a, c)), ('fmul_rtz', a, ('fadd_rtz', b, c))),
    (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
    (('~fadd', ('fneg', a), a), 0.0),
+   (('~fadd_rtne', ('fneg', a), a), 0.0),
+   (('~fadd_rtz', ('fneg', a), a), 0.0),
    (('iadd', ('ineg', a), a), 0),
    (('iadd', ('ineg', a), ('iadd', a, b)), b),
    (('iadd', a, ('iadd', ('ineg', a), b)), b),
    (('~fadd', ('fneg', a), ('fadd', a, b)), b),
    (('~fadd', a, ('fadd', ('fneg', a), b)), b),
+   (('~fadd_rtne', ('fneg', a), ('fadd_rtne', a, b)), b),
+   (('~fadd_rtne', a, ('fadd_rtne', ('fneg', a), b)), b),
+   (('~fadd_rtz', ('fneg', a), ('fadd_rtz', a, b)), b),
+   (('~fadd_rtz', a, ('fadd_rtz', ('fneg', a), b)), b),
 ]
 
 # This section contains "late" optimizations that should be run after the
-- 
2.19.1



More information about the mesa-dev mailing list