[Mesa-dev] [PATCH 3/8] nir/i965: add before ffma algebraic opts

Tue Apr 18 05:52:20 UTC 2017

From: Timothy Arceri <timothy.arceri at collabora.com>

This shuffles constants down in the reverse of what the previous
patch does and applies some simpilifications that may be made
possible from doing so.

Shader-db results BDW:

total instructions in shared programs: 12980814 -> 12977822 (-0.02%)
instructions in affected programs: 281889 -> 278897 (-1.06%)
helped: 1231
HURT: 128

total cycles in shared programs: 246562852 -> 246567288 (0.00%)
cycles in affected programs: 11271524 -> 11275960 (0.04%)
helped: 1630
HURT: 1378

V2: mark float opts as inexact
---
 src/compiler/nir/nir.h                |  1 +
 src/compiler/nir/nir_opt_algebraic.py | 23 +++++++++++++++++++++++
 src/intel/compiler/brw_nir.c          |  6 ++++++
 3 files changed, 30 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ce5b434..dbbf268 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2591,20 +2591,21 @@ void nir_convert_loop_to_lcssa(nir_loop *loop);
 /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
  * registers.  If false, convert all values (even those not involved in a phi
  * node) to registers.
  */
 bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
 
 bool nir_lower_phis_to_regs_block(nir_block *block);
 bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
 
 bool nir_opt_algebraic(nir_shader *shader);
+bool nir_opt_algebraic_before_ffma(nir_shader *shader);
 bool nir_opt_algebraic_late(nir_shader *shader);
 bool nir_opt_constant_folding(nir_shader *shader);
 
 bool nir_opt_global_to_local(nir_shader *shader);
 
 bool nir_copy_prop(nir_shader *shader);
 
 bool nir_opt_copy_prop_vars(nir_shader *shader);
 
 bool nir_opt_cse(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 974345a..fe6e33d 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -523,20 +523,41 @@ for op in ['flt', 'fge', 'feq']:
 # left with just the original variable "a".
 for op in ['flt', 'fge', 'feq', 'fne',
            'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
    optimizations += [
       ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
        ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
       ((op, '#d', ('bcsel', a, '#b', '#c')),
        ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
    ]
 
+# This section contains "late" optimizations that should be run before
+# creating ffmas and calling regular optimizations for the final time.
+# Optimizations should go here if they help code generation and conflict
+# with the regular optimizations.
+before_ffma_optimizations = [
+   # Propagate constants down multiplication chains
+   (('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fmul', ('fmul', a, c), b)),
+   (('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('imul', ('imul', a, c), b)),
+   (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fadd', ('fadd', a, c), b)),
+   (('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('iadd', ('iadd', a, c), b)),
+
+   (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+   (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
+   (('~fadd', ('fneg', a), a), 0.0),
+   (('iadd', ('ineg', a), a), 0),
+   (('iadd', ('ineg', a), ('iadd', a, b)), b),
+   (('iadd', a, ('iadd', ('ineg', a), b)), b),
+   (('~fadd', ('fneg', a), ('fadd', a, b)), b),
+   (('~fadd', a, ('fadd', ('fneg', a), b)), b),
+]
+
 # This section contains "late" optimizations that should be run after the
 # regular optimizations have finished.  Optimizations should go here if
 # they help code generation but do not necessarily produce code that is
 # more easily optimizable.
 late_optimizations = [
    # Most of these optimizations aren't quite safe when you get infinity or
    # Nan involved but the first one should be fine.
    (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
    (('~fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
    (('~feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
@@ -549,12 +570,14 @@ late_optimizations = [
 
    (('b2f(is_used_more_than_once)', ('inot', a)), ('bcsel', a, 0.0, 1.0)),
    (('fneg(is_used_more_than_once)', ('b2f', ('inot', a))), ('bcsel', a, -0.0, -1.0)),
 
    # we do these late so that we don't get in the way of creating ffmas
    (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
    (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
 ]
 
 print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma",
+                                  before_ffma_optimizations).render()
 print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
                                   late_optimizations).render()
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 36ccdf3..3c0a7ce 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -598,20 +598,26 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
 nir_shader *
 brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
                     bool is_scalar)
 {
    const struct gen_device_info *devinfo = compiler->devinfo;
    bool debug_enabled =
       (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
 
    UNUSED bool progress; /* Written by OPT */
 
+
+   do {
+      progress = false;
+      OPT(nir_opt_algebraic_before_ffma);
+   } while (progress);
+
    nir = nir_optimize(nir, compiler, is_scalar);
 
    if (devinfo->gen >= 6) {
       /* Try and fuse multiply-adds */
       OPT(brw_nir_opt_peephole_ffma);
    }
 
    OPT(nir_opt_algebraic_late);
 
    OPT(nir_lower_to_source_mods);
-- 
2.9.3