[Mesa-dev] [PATCH 3/8] nir/i965: add before ffma algebraic opts
tournier.elie
tournier.elie at gmail.com
Tue Apr 18 10:14:42 UTC 2017
On 18 April 2017 at 06:52, Timothy Arceri <tarceri at itsqueeze.com> wrote:
> From: Timothy Arceri <timothy.arceri at collabora.com>
>
> This shuffles constants down in the reverse of what the previous
> patch does and applies some simpilifications that may be made
> possible from doing so.
>
> Shader-db results BDW:
>
> total instructions in shared programs: 12980814 -> 12977822 (-0.02%)
> instructions in affected programs: 281889 -> 278897 (-1.06%)
> helped: 1231
> HURT: 128
>
> total cycles in shared programs: 246562852 -> 246567288 (0.00%)
> cycles in affected programs: 11271524 -> 11275960 (0.04%)
> helped: 1630
> HURT: 1378
>
> V2: mark float opts as inexact
Patches 1-3 are:
Reviewed-by: Elie Tournier <elie.tournier at collabora.com>
> ---
> src/compiler/nir/nir.h | 1 +
> src/compiler/nir/nir_opt_algebraic.py | 23 +++++++++++++++++++++++
> src/intel/compiler/brw_nir.c | 6 ++++++
> 3 files changed, 30 insertions(+)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index ce5b434..dbbf268 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2591,20 +2591,21 @@ void nir_convert_loop_to_lcssa(nir_loop *loop);
> /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
> * registers. If false, convert all values (even those not involved in a phi
> * node) to registers.
> */
> bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
>
> bool nir_lower_phis_to_regs_block(nir_block *block);
> bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
>
> bool nir_opt_algebraic(nir_shader *shader);
> +bool nir_opt_algebraic_before_ffma(nir_shader *shader);
> bool nir_opt_algebraic_late(nir_shader *shader);
> bool nir_opt_constant_folding(nir_shader *shader);
>
> bool nir_opt_global_to_local(nir_shader *shader);
>
> bool nir_copy_prop(nir_shader *shader);
>
> bool nir_opt_copy_prop_vars(nir_shader *shader);
>
> bool nir_opt_cse(nir_shader *shader);
> diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
> index 974345a..fe6e33d 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -523,20 +523,41 @@ for op in ['flt', 'fge', 'feq']:
> # left with just the original variable "a".
> for op in ['flt', 'fge', 'feq', 'fne',
> 'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
> optimizations += [
> ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
> ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
> ((op, '#d', ('bcsel', a, '#b', '#c')),
> ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
> ]
>
> +# This section contains "late" optimizations that should be run before
> +# creating ffmas and calling regular optimizations for the final time.
> +# Optimizations should go here if they help code generation and conflict
> +# with the regular optimizations.
> +before_ffma_optimizations = [
> + # Propagate constants down multiplication chains
> + (('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fmul', ('fmul', a, c), b)),
> + (('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('imul', ('imul', a, c), b)),
> + (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fadd', ('fadd', a, c), b)),
> + (('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('iadd', ('iadd', a, c), b)),
> +
> + (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
> + (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
> + (('~fadd', ('fneg', a), a), 0.0),
> + (('iadd', ('ineg', a), a), 0),
> + (('iadd', ('ineg', a), ('iadd', a, b)), b),
> + (('iadd', a, ('iadd', ('ineg', a), b)), b),
> + (('~fadd', ('fneg', a), ('fadd', a, b)), b),
> + (('~fadd', a, ('fadd', ('fneg', a), b)), b),
> +]
> +
> # This section contains "late" optimizations that should be run after the
> # regular optimizations have finished. Optimizations should go here if
> # they help code generation but do not necessarily produce code that is
> # more easily optimizable.
> late_optimizations = [
> # Most of these optimizations aren't quite safe when you get infinity or
> # Nan involved but the first one should be fine.
> (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
> (('~fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
> (('~feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
> @@ -549,12 +570,14 @@ late_optimizations = [
>
> (('b2f(is_used_more_than_once)', ('inot', a)), ('bcsel', a, 0.0, 1.0)),
> (('fneg(is_used_more_than_once)', ('b2f', ('inot', a))), ('bcsel', a, -0.0, -1.0)),
>
> # we do these late so that we don't get in the way of creating ffmas
> (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
> (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
> ]
>
> print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
> +print nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma",
> + before_ffma_optimizations).render()
> print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
> late_optimizations).render()
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index 36ccdf3..3c0a7ce 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -598,20 +598,26 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
> nir_shader *
> brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
> bool is_scalar)
> {
> const struct gen_device_info *devinfo = compiler->devinfo;
> bool debug_enabled =
> (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
>
> UNUSED bool progress; /* Written by OPT */
>
> +
> + do {
> + progress = false;
> + OPT(nir_opt_algebraic_before_ffma);
> + } while (progress);
> +
> nir = nir_optimize(nir, compiler, is_scalar);
>
> if (devinfo->gen >= 6) {
> /* Try and fuse multiply-adds */
> OPT(brw_nir_opt_peephole_ffma);
> }
>
> OPT(nir_opt_algebraic_late);
>
> OPT(nir_lower_to_source_mods);
> --
> 2.9.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list