[Mesa-dev] [PATCH 10/10] i965: use nir unrolling for scalar backend Gen7+
Connor Abbott
cwabbott0 at gmail.com
Fri Sep 16 01:22:54 UTC 2016
On Thu, Sep 15, 2016 at 9:06 PM, Timothy Arceri
<timothy.arceri at collabora.com> wrote:
> On Thu, 2016-09-15 at 19:49 -0400, Connor Abbott wrote:
>> This seems a little dubious... why restrict it to gen7+?
>
> Because if we don't switch to using nir_lower_indirect_derefs() then
> lower_variable_index_to_cond_assign() makes a big mess before we get to
> unrolling. I was getting piglit regressions when switching to for gen6
> and below, unfortunately I have no hardware to debug it on.
Ok, probably should be in the commit message.
>
>> And why only
>> scalar? This pass assumes SSA, but so do many other passes in core
>> NIR
>> that we also run in nir_optimize(), so that shouldn't be a problem.
>
> I need to retest to give you the exact reason but there was a lowering
> pass that is not called for the vector backend that meant we still had
> to deal with nir registers.
That doesn't quite sound right... we never generate registers in the
frontend (except if they're immediately lowered away), and we don't
lower to registers until pretty late in the process.
>
>
>>
>> On Thu, Sep 15, 2016 at 3:03 AM, Timothy Arceri
>> <timothy.arceri at collabora.com> wrote:
>> >
>> > ---
>> > src/compiler/glsl/glsl_parser_extras.cpp | 12 +++++----
>> > src/mesa/drivers/dri/i965/brw_compiler.c | 42
>> > +++++++++++++++++++++++---------
>> > src/mesa/drivers/dri/i965/brw_nir.c | 23 +++++++++++++----
>> > 3 files changed, 55 insertions(+), 22 deletions(-)
>> >
>> > diff --git a/src/compiler/glsl/glsl_parser_extras.cpp
>> > b/src/compiler/glsl/glsl_parser_extras.cpp
>> > index 436ddd0..a5c926a 100644
>> > --- a/src/compiler/glsl/glsl_parser_extras.cpp
>> > +++ b/src/compiler/glsl/glsl_parser_extras.cpp
>> > @@ -2057,12 +2057,14 @@ do_common_optimization(exec_list *ir, bool
>> > linked,
>> > OPT(optimize_split_arrays, ir, linked);
>> > OPT(optimize_redundant_jumps, ir);
>> >
>> > - loop_state *ls = analyze_loop_variables(ir);
>> > - if (ls->loop_found) {
>> > - OPT(set_loop_controls, ir, ls);
>> > - OPT(unroll_loops, ir, ls, options);
>> > + if (options->MaxUnrollIterations != 0) {
>> > + loop_state *ls = analyze_loop_variables(ir);
>> > + if (ls->loop_found) {
>> > + OPT(set_loop_controls, ir, ls);
>> > + OPT(unroll_loops, ir, ls, options);
>> > + }
>> > + delete ls;
>> > }
>> > - delete ls;
>> >
>> > #undef OPT
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c
>> > b/src/mesa/drivers/dri/i965/brw_compiler.c
>> > index 86b1eaa..523b554 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_compiler.c
>> > +++ b/src/mesa/drivers/dri/i965/brw_compiler.c
>> > @@ -43,18 +43,28 @@
>> > .use_interpolated_input_intrinsics =
>> > true, \
>> > .vertex_id_zero_based = true
>> >
>> > +#define
>> > COMMON_SCALAR_OPTIONS
>> > \
>> > + .lower_pack_half_2x16 =
>> > true, \
>> > + .lower_pack_snorm_2x16 =
>> > true, \
>> > + .lower_pack_snorm_4x8 =
>> > true, \
>> > + .lower_pack_unorm_2x16 =
>> > true, \
>> > + .lower_pack_unorm_4x8 =
>> > true, \
>> > + .lower_unpack_half_2x16 =
>> > true, \
>> > + .lower_unpack_snorm_2x16 =
>> > true, \
>> > + .lower_unpack_snorm_4x8 =
>> > true, \
>> > + .lower_unpack_unorm_2x16 =
>> > true, \
>> > + .lower_unpack_unorm_4x8 =
>> > true \
>> > +
>> > static const struct nir_shader_compiler_options scalar_nir_options
>> > = {
>> > COMMON_OPTIONS,
>> > - .lower_pack_half_2x16 = true,
>> > - .lower_pack_snorm_2x16 = true,
>> > - .lower_pack_snorm_4x8 = true,
>> > - .lower_pack_unorm_2x16 = true,
>> > - .lower_pack_unorm_4x8 = true,
>> > - .lower_unpack_half_2x16 = true,
>> > - .lower_unpack_snorm_2x16 = true,
>> > - .lower_unpack_snorm_4x8 = true,
>> > - .lower_unpack_unorm_2x16 = true,
>> > - .lower_unpack_unorm_4x8 = true,
>> > + COMMON_SCALAR_OPTIONS,
>> > + .max_unroll_iterations = 0,
>> > +};
>> > +
>> > +static const struct nir_shader_compiler_options
>> > scalar_nir_options_gen7 = {
>> > + COMMON_OPTIONS,
>> > + COMMON_SCALAR_OPTIONS,
>> > + .max_unroll_iterations = 32,
>> > };
>> >
>> > static const struct nir_shader_compiler_options vector_nir_options
>> > = {
>> > @@ -75,6 +85,7 @@ static const struct nir_shader_compiler_options
>> > vector_nir_options = {
>> > .lower_unpack_unorm_2x16 = true,
>> > .lower_extract_byte = true,
>> > .lower_extract_word = true,
>> > + .max_unroll_iterations = 0,
>> > };
>> >
>> > static const struct nir_shader_compiler_options
>> > vector_nir_options_gen6 = {
>> > @@ -92,6 +103,7 @@ static const struct nir_shader_compiler_options
>> > vector_nir_options_gen6 = {
>> > .lower_unpack_unorm_2x16 = true,
>> > .lower_extract_byte = true,
>> > .lower_extract_word = true,
>> > + .max_unroll_iterations = 0,
>> > };
>> >
>> > struct brw_compiler *
>> > @@ -119,7 +131,6 @@ brw_compiler_create(void *mem_ctx, const struct
>> > gen_device_info *devinfo)
>> >
>> > /* We want the GLSL compiler to emit code that uses condition
>> > codes */
>> > for (int i = 0; i < MESA_SHADER_STAGES; i++) {
>> > - compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
>> > compiler->glsl_compiler_options[i].MaxIfDepth =
>> > devinfo->gen < 6 ? 16 : UINT_MAX;
>> >
>> > @@ -140,8 +151,15 @@ brw_compiler_create(void *mem_ctx, const
>> > struct gen_device_info *devinfo)
>> > compiler->glsl_compiler_options[i].EmitNoIndirectSampler
>> > = true;
>> >
>> > if (is_scalar) {
>> > - compiler->glsl_compiler_options[i].NirOptions =
>> > &scalar_nir_options;
>> > + if (devinfo->gen > 6) {
>> > + compiler->glsl_compiler_options[i].MaxUnrollIterations
>> > = 0;
>> > + } else {
>> > + compiler->glsl_compiler_options[i].MaxUnrollIterations
>> > = 32;
>> > + }
>> > + compiler->glsl_compiler_options[i].NirOptions =
>> > + devinfo->gen < 7 ? &scalar_nir_options :
>> > &scalar_nir_options_gen7;
>> > } else {
>> > + compiler->glsl_compiler_options[i].MaxUnrollIterations =
>> > 32;
>> > compiler->glsl_compiler_options[i].NirOptions =
>> > devinfo->gen < 6 ? &vector_nir_options :
>> > &vector_nir_options_gen6;
>> > }
>> > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
>> > b/src/mesa/drivers/dri/i965/brw_nir.c
>> > index 0c15b55..15abb77 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_nir.c
>> > +++ b/src/mesa/drivers/dri/i965/brw_nir.c
>> > @@ -367,8 +367,17 @@ brw_nir_lower_cs_shared(nir_shader *nir)
>> > #define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
>> >
>> > static nir_shader *
>> > -nir_optimize(nir_shader *nir, bool is_scalar)
>> > +nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
>> > + bool is_scalar)
>> > {
>> > + nir_variable_mode indirect_mask = 0;
>> > + if (compiler->glsl_compiler_options[nir-
>> > >stage].EmitNoIndirectInput)
>> > + indirect_mask |= nir_var_shader_in;
>> > + if (compiler->glsl_compiler_options[nir-
>> > >stage].EmitNoIndirectOutput)
>> > + indirect_mask |= nir_var_shader_out;
>> > + if (compiler->glsl_compiler_options[nir-
>> > >stage].EmitNoIndirectTemp)
>> > + indirect_mask |= nir_var_local;
>> > +
>> > bool progress;
>> > do {
>> > progress = false;
>> > @@ -391,6 +400,10 @@ nir_optimize(nir_shader *nir, bool is_scalar)
>> > OPT(nir_opt_algebraic);
>> > OPT(nir_opt_constant_folding);
>> > OPT(nir_opt_dead_cf);
>> > + if (nir->options->max_unroll_iterations != 0) {
>> > + OPT_V(nir_to_lcssa);
>> > + OPT(nir_opt_loop_unroll, indirect_mask);
>> > + }
>> > OPT(nir_opt_remove_phis);
>> > OPT(nir_opt_undef);
>> > OPT_V(nir_lower_doubles, nir_lower_drcp |
>> > @@ -444,7 +457,7 @@ brw_preprocess_nir(const struct brw_compiler
>> > *compiler, nir_shader *nir)
>> >
>> > OPT(nir_split_var_copies);
>> >
>> > - nir = nir_optimize(nir, is_scalar);
>> > + nir = nir_optimize(nir, compiler, is_scalar);
>> >
>> > if (is_scalar) {
>> > OPT_V(nir_lower_load_const_to_scalar);
>> > @@ -466,7 +479,7 @@ brw_preprocess_nir(const struct brw_compiler
>> > *compiler, nir_shader *nir)
>> > }
>> >
>> > /* Get rid of split copies */
>> > - nir = nir_optimize(nir, is_scalar);
>> > + nir = nir_optimize(nir, compiler, is_scalar);
>> >
>> > OPT(nir_remove_dead_variables, nir_var_local);
>> >
>> > @@ -491,7 +504,7 @@ brw_postprocess_nir(nir_shader *nir, const
>> > struct brw_compiler *compiler,
>> > bool progress; /* Written by OPT and OPT_V */
>> > (void)progress;
>> >
>> > - nir = nir_optimize(nir, is_scalar);
>> > + nir = nir_optimize(nir, compiler, is_scalar);
>> >
>> > if (devinfo->gen >= 6) {
>> > /* Try and fuse multiply-adds */
>> > @@ -580,7 +593,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
>> >
>> > if (nir_lower_tex(nir, &tex_options)) {
>> > nir_validate_shader(nir);
>> > - nir = nir_optimize(nir, is_scalar);
>> > + nir = nir_optimize(nir, compiler, is_scalar);
>> > }
>> >
>> > return nir;
>> > --
>> > 2.7.4
>> >
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev at lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list