[Mesa-dev] [PATCH 11/13] nir: Add lrp lowering for doubles in opt_algebraic

Tue Apr 12 08:05:20 UTC 2016

Some hardware (i965 on Broadwell generation, for example) does not support
natively the execution of lrp instruction with double arguments.

Add 'lower_lrp_double' flag to lower this instruction in that case.

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/compiler/nir/nir.h                | 2 ++
 src/compiler/nir/nir_opt_algebraic.py | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 0d17ce0..0bfc849 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1582,6 +1582,8 @@ typedef struct nir_shader_compiler_options {
    bool lower_fdiv;
    bool lower_ffma;
    bool lower_flrp;
+   /** Lowers flrp when it does not supports doubles */
+   bool lower_flrp_double;
    bool lower_fpow;
    bool lower_fsat;
    bool lower_fsqrt;
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index ec8929a..f971585 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -92,11 +92,14 @@ optimizations = [
    (('~flrp', 0.0, a, b), ('fmul', a, b)),
    (('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp'),
    (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+   (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), '!options->lower_flrp && options->lower_flrp_double', 64),
    (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
    (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp'),
-   (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp'),
+   (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp', 32),
+   (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp && !options->lower_flrp_double', 64),
    (('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp'),
-   (('~fadd', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+   (('~fadd', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp', 32),
+   (('~fadd', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp && !options->lower_flrp_double', 64),
    (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
    (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
    # Comparison simplifications
-- 
2.5.0