[Mesa-dev] [PATCH 1/3] nir/algebraic: Separate ffma lowering from fusing
Jason Ekstrand
jason at jlekstrand.net
Fri May 6 00:51:18 UTC 2016
The i965 driver has its own pass for fusing mul+add combinations that's
much smarter than what nir_opt_algebraic can do so we don't want to get the
nir_opt_algebraic one just because we didn't set lower_ffma.
---
src/compiler/nir/nir.h | 1 +
src/compiler/nir/nir_opt_algebraic.py | 2 +-
src/gallium/drivers/freedreno/ir3/ir3_nir.c | 1 +
3 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 8a616d4..ba9d311 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1624,6 +1624,7 @@ typedef struct nir_function {
typedef struct nir_shader_compiler_options {
bool lower_fdiv;
bool lower_ffma;
+ bool fuse_ffma;
bool lower_flrp32;
/** Lowers flrp when it does not support doubles */
bool lower_flrp64;
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 0a95725..f8db2b6 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -108,7 +108,7 @@ optimizations = [
(('~fadd at 32', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'),
(('~fadd at 64', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
- (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+ (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
# Comparison simplifications
(('~inot', ('flt', a, b)), ('fge', a, b)),
(('~inot', ('fge', a, b)), ('flt', a, b)),
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index 99c2ecb..5662927 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -44,6 +44,7 @@ ir3_tgsi_to_nir(const struct tgsi_token *tokens)
.lower_scmp = true,
.lower_flrp32 = true,
.lower_ffract = true,
+ .fuse_ffma = true,
.native_integers = true,
.vertex_id_zero_based = true,
.lower_extract_byte = true,
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list