[Mesa-dev] [PATCH 10/10] i965: use nir unrolling for scalar backend Gen7+

Fri Sep 16 01:06:12 UTC 2016

On Thu, 2016-09-15 at 19:49 -0400, Connor Abbott wrote:
> This seems a little dubious... why restrict it to gen7+?

Because if we don't switch to using nir_lower_indirect_derefs() then
lower_variable_index_to_cond_assign() makes a big mess before we get to
unrolling. I was getting piglit regressions when switching to for gen6
and below, unfortunately I have no hardware to debug it on. 

>  And why only
> scalar? This pass assumes SSA, but so do many other passes in core
> NIR
> that we also run in nir_optimize(), so that shouldn't be a problem.

I need to retest to give you the exact reason but there was a lowering
pass that is not called for the vector backend that meant we still had
to deal with nir registers.

> 
> On Thu, Sep 15, 2016 at 3:03 AM, Timothy Arceri
> <timothy.arceri at collabora.com> wrote:
> > 
> > ---
> >  src/compiler/glsl/glsl_parser_extras.cpp | 12 +++++----
> >  src/mesa/drivers/dri/i965/brw_compiler.c | 42
> > +++++++++++++++++++++++---------
> >  src/mesa/drivers/dri/i965/brw_nir.c      | 23 +++++++++++++----
> >  3 files changed, 55 insertions(+), 22 deletions(-)
> > 
> > diff --git a/src/compiler/glsl/glsl_parser_extras.cpp
> > b/src/compiler/glsl/glsl_parser_extras.cpp
> > index 436ddd0..a5c926a 100644
> > --- a/src/compiler/glsl/glsl_parser_extras.cpp
> > +++ b/src/compiler/glsl/glsl_parser_extras.cpp
> > @@ -2057,12 +2057,14 @@ do_common_optimization(exec_list *ir, bool
> > linked,
> >     OPT(optimize_split_arrays, ir, linked);
> >     OPT(optimize_redundant_jumps, ir);
> > 
> > -   loop_state *ls = analyze_loop_variables(ir);
> > -   if (ls->loop_found) {
> > -      OPT(set_loop_controls, ir, ls);
> > -      OPT(unroll_loops, ir, ls, options);
> > +   if (options->MaxUnrollIterations != 0) {
> > +      loop_state *ls = analyze_loop_variables(ir);
> > +      if (ls->loop_found) {
> > +         OPT(set_loop_controls, ir, ls);
> > +         OPT(unroll_loops, ir, ls, options);
> > +      }
> > +      delete ls;
> >     }
> > -   delete ls;
> > 
> >  #undef OPT
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c
> > b/src/mesa/drivers/dri/i965/brw_compiler.c
> > index 86b1eaa..523b554 100644
> > --- a/src/mesa/drivers/dri/i965/brw_compiler.c
> > +++ b/src/mesa/drivers/dri/i965/brw_compiler.c
> > @@ -43,18 +43,28 @@
> >     .use_interpolated_input_intrinsics =
> > true,                                 \
> >     .vertex_id_zero_based = true
> > 
> > +#define
> > COMMON_SCALAR_OPTIONS                                              
> >    \
> > +   .lower_pack_half_2x16 =
> > true,                                              \
> > +   .lower_pack_snorm_2x16 =
> > true,                                             \
> > +   .lower_pack_snorm_4x8 =
> > true,                                              \
> > +   .lower_pack_unorm_2x16 =
> > true,                                             \
> > +   .lower_pack_unorm_4x8 =
> > true,                                              \
> > +   .lower_unpack_half_2x16 =
> > true,                                            \
> > +   .lower_unpack_snorm_2x16 =
> > true,                                           \
> > +   .lower_unpack_snorm_4x8 =
> > true,                                            \
> > +   .lower_unpack_unorm_2x16 =
> > true,                                           \
> > +   .lower_unpack_unorm_4x8 =
> > true                                             \
> > +
> >  static const struct nir_shader_compiler_options scalar_nir_options
> > = {
> >     COMMON_OPTIONS,
> > -   .lower_pack_half_2x16 = true,
> > -   .lower_pack_snorm_2x16 = true,
> > -   .lower_pack_snorm_4x8 = true,
> > -   .lower_pack_unorm_2x16 = true,
> > -   .lower_pack_unorm_4x8 = true,
> > -   .lower_unpack_half_2x16 = true,
> > -   .lower_unpack_snorm_2x16 = true,
> > -   .lower_unpack_snorm_4x8 = true,
> > -   .lower_unpack_unorm_2x16 = true,
> > -   .lower_unpack_unorm_4x8 = true,
> > +   COMMON_SCALAR_OPTIONS,
> > +   .max_unroll_iterations = 0,
> > +};
> > +
> > +static const struct nir_shader_compiler_options
> > scalar_nir_options_gen7 = {
> > +   COMMON_OPTIONS,
> > +   COMMON_SCALAR_OPTIONS,
> > +   .max_unroll_iterations = 32,
> >  };
> > 
> >  static const struct nir_shader_compiler_options vector_nir_options
> > = {
> > @@ -75,6 +85,7 @@ static const struct nir_shader_compiler_options
> > vector_nir_options = {
> >     .lower_unpack_unorm_2x16 = true,
> >     .lower_extract_byte = true,
> >     .lower_extract_word = true,
> > +   .max_unroll_iterations = 0,
> >  };
> > 
> >  static const struct nir_shader_compiler_options
> > vector_nir_options_gen6 = {
> > @@ -92,6 +103,7 @@ static const struct nir_shader_compiler_options
> > vector_nir_options_gen6 = {
> >     .lower_unpack_unorm_2x16 = true,
> >     .lower_extract_byte = true,
> >     .lower_extract_word = true,
> > +   .max_unroll_iterations = 0,
> >  };
> > 
> >  struct brw_compiler *
> > @@ -119,7 +131,6 @@ brw_compiler_create(void *mem_ctx, const struct
> > gen_device_info *devinfo)
> > 
> >     /* We want the GLSL compiler to emit code that uses condition
> > codes */
> >     for (int i = 0; i < MESA_SHADER_STAGES; i++) {
> > -      compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
> >        compiler->glsl_compiler_options[i].MaxIfDepth =
> >           devinfo->gen < 6 ? 16 : UINT_MAX;
> > 
> > @@ -140,8 +151,15 @@ brw_compiler_create(void *mem_ctx, const
> > struct gen_device_info *devinfo)
> >           compiler->glsl_compiler_options[i].EmitNoIndirectSampler
> > = true;
> > 
> >        if (is_scalar) {
> > -         compiler->glsl_compiler_options[i].NirOptions =
> > &scalar_nir_options;
> > +         if (devinfo->gen > 6) {
> > +            compiler->glsl_compiler_options[i].MaxUnrollIterations 
> > = 0;
> > +         } else {
> > +            compiler->glsl_compiler_options[i].MaxUnrollIterations 
> > = 32;
> > +         }
> > +         compiler->glsl_compiler_options[i].NirOptions =
> > +            devinfo->gen < 7 ? &scalar_nir_options :
> > &scalar_nir_options_gen7;
> >        } else {
> > +         compiler->glsl_compiler_options[i].MaxUnrollIterations =
> > 32;
> >           compiler->glsl_compiler_options[i].NirOptions =
> >              devinfo->gen < 6 ? &vector_nir_options :
> > &vector_nir_options_gen6;
> >        }
> > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> > b/src/mesa/drivers/dri/i965/brw_nir.c
> > index 0c15b55..15abb77 100644
> > --- a/src/mesa/drivers/dri/i965/brw_nir.c
> > +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> > @@ -367,8 +367,17 @@ brw_nir_lower_cs_shared(nir_shader *nir)
> >  #define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
> > 
> >  static nir_shader *
> > -nir_optimize(nir_shader *nir, bool is_scalar)
> > +nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
> > +             bool is_scalar)
> >  {
> > +   nir_variable_mode indirect_mask = 0;
> > +   if (compiler->glsl_compiler_options[nir-
> > >stage].EmitNoIndirectInput)
> > +      indirect_mask |= nir_var_shader_in;
> > +   if (compiler->glsl_compiler_options[nir-
> > >stage].EmitNoIndirectOutput)
> > +      indirect_mask |= nir_var_shader_out;
> > +   if (compiler->glsl_compiler_options[nir-
> > >stage].EmitNoIndirectTemp)
> > +      indirect_mask |= nir_var_local;
> > +
> >     bool progress;
> >     do {
> >        progress = false;
> > @@ -391,6 +400,10 @@ nir_optimize(nir_shader *nir, bool is_scalar)
> >        OPT(nir_opt_algebraic);
> >        OPT(nir_opt_constant_folding);
> >        OPT(nir_opt_dead_cf);
> > +      if (nir->options->max_unroll_iterations != 0) {
> > +         OPT_V(nir_to_lcssa);
> > +         OPT(nir_opt_loop_unroll, indirect_mask);
> > +      }
> >        OPT(nir_opt_remove_phis);
> >        OPT(nir_opt_undef);
> >        OPT_V(nir_lower_doubles, nir_lower_drcp |
> > @@ -444,7 +457,7 @@ brw_preprocess_nir(const struct brw_compiler
> > *compiler, nir_shader *nir)
> > 
> >     OPT(nir_split_var_copies);
> > 
> > -   nir = nir_optimize(nir, is_scalar);
> > +   nir = nir_optimize(nir, compiler, is_scalar);
> > 
> >     if (is_scalar) {
> >        OPT_V(nir_lower_load_const_to_scalar);
> > @@ -466,7 +479,7 @@ brw_preprocess_nir(const struct brw_compiler
> > *compiler, nir_shader *nir)
> >     }
> > 
> >     /* Get rid of split copies */
> > -   nir = nir_optimize(nir, is_scalar);
> > +   nir = nir_optimize(nir, compiler, is_scalar);
> > 
> >     OPT(nir_remove_dead_variables, nir_var_local);
> > 
> > @@ -491,7 +504,7 @@ brw_postprocess_nir(nir_shader *nir, const
> > struct brw_compiler *compiler,
> >     bool progress; /* Written by OPT and OPT_V */
> >     (void)progress;
> > 
> > -   nir = nir_optimize(nir, is_scalar);
> > +   nir = nir_optimize(nir, compiler, is_scalar);
> > 
> >     if (devinfo->gen >= 6) {
> >        /* Try and fuse multiply-adds */
> > @@ -580,7 +593,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
> > 
> >     if (nir_lower_tex(nir, &tex_options)) {
> >        nir_validate_shader(nir);
> > -      nir = nir_optimize(nir, is_scalar);
> > +      nir = nir_optimize(nir, compiler, is_scalar);
> >     }
> > 
> >     return nir;
> > --
> > 2.7.4
> > 
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev