[Mesa-dev] [PATCH 10/10] i965: use nir unrolling for scalar backend Gen7+

Thu Sep 15 07:03:21 UTC 2016

---
 src/compiler/glsl/glsl_parser_extras.cpp | 12 +++++----
 src/mesa/drivers/dri/i965/brw_compiler.c | 42 +++++++++++++++++++++++---------
 src/mesa/drivers/dri/i965/brw_nir.c      | 23 +++++++++++++----
 3 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp
index 436ddd0..a5c926a 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -2057,12 +2057,14 @@ do_common_optimization(exec_list *ir, bool linked,
    OPT(optimize_split_arrays, ir, linked);
    OPT(optimize_redundant_jumps, ir);
 
-   loop_state *ls = analyze_loop_variables(ir);
-   if (ls->loop_found) {
-      OPT(set_loop_controls, ir, ls);
-      OPT(unroll_loops, ir, ls, options);
+   if (options->MaxUnrollIterations != 0) {
+      loop_state *ls = analyze_loop_variables(ir);
+      if (ls->loop_found) {
+         OPT(set_loop_controls, ir, ls);
+         OPT(unroll_loops, ir, ls, options);
+      }
+      delete ls;
    }
-   delete ls;
 
 #undef OPT
 
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c
index 86b1eaa..523b554 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -43,18 +43,28 @@
    .use_interpolated_input_intrinsics = true,                                 \
    .vertex_id_zero_based = true
 
+#define COMMON_SCALAR_OPTIONS                                                 \
+   .lower_pack_half_2x16 = true,                                              \
+   .lower_pack_snorm_2x16 = true,                                             \
+   .lower_pack_snorm_4x8 = true,                                              \
+   .lower_pack_unorm_2x16 = true,                                             \
+   .lower_pack_unorm_4x8 = true,                                              \
+   .lower_unpack_half_2x16 = true,                                            \
+   .lower_unpack_snorm_2x16 = true,                                           \
+   .lower_unpack_snorm_4x8 = true,                                            \
+   .lower_unpack_unorm_2x16 = true,                                           \
+   .lower_unpack_unorm_4x8 = true                                             \
+
 static const struct nir_shader_compiler_options scalar_nir_options = {
    COMMON_OPTIONS,
-   .lower_pack_half_2x16 = true,
-   .lower_pack_snorm_2x16 = true,
-   .lower_pack_snorm_4x8 = true,
-   .lower_pack_unorm_2x16 = true,
-   .lower_pack_unorm_4x8 = true,
-   .lower_unpack_half_2x16 = true,
-   .lower_unpack_snorm_2x16 = true,
-   .lower_unpack_snorm_4x8 = true,
-   .lower_unpack_unorm_2x16 = true,
-   .lower_unpack_unorm_4x8 = true,
+   COMMON_SCALAR_OPTIONS,
+   .max_unroll_iterations = 0,
+};
+
+static const struct nir_shader_compiler_options scalar_nir_options_gen7 = {
+   COMMON_OPTIONS,
+   COMMON_SCALAR_OPTIONS,
+   .max_unroll_iterations = 32,
 };
 
 static const struct nir_shader_compiler_options vector_nir_options = {
@@ -75,6 +85,7 @@ static const struct nir_shader_compiler_options vector_nir_options = {
    .lower_unpack_unorm_2x16 = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
+   .max_unroll_iterations = 0,
 };
 
 static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
@@ -92,6 +103,7 @@ static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
    .lower_unpack_unorm_2x16 = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
+   .max_unroll_iterations = 0,
 };
 
 struct brw_compiler *
@@ -119,7 +131,6 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
 
    /* We want the GLSL compiler to emit code that uses condition codes */
    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-      compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
       compiler->glsl_compiler_options[i].MaxIfDepth =
          devinfo->gen < 6 ? 16 : UINT_MAX;
 
@@ -140,8 +151,15 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
          compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
 
       if (is_scalar) {
-         compiler->glsl_compiler_options[i].NirOptions = &scalar_nir_options;
+         if (devinfo->gen > 6) {
+            compiler->glsl_compiler_options[i].MaxUnrollIterations = 0;
+         } else {
+            compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
+         }
+         compiler->glsl_compiler_options[i].NirOptions =
+            devinfo->gen < 7 ? &scalar_nir_options : &scalar_nir_options_gen7;
       } else {
+         compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
          compiler->glsl_compiler_options[i].NirOptions =
             devinfo->gen < 6 ? &vector_nir_options : &vector_nir_options_gen6;
       }
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 0c15b55..15abb77 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -367,8 +367,17 @@ brw_nir_lower_cs_shared(nir_shader *nir)
 #define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
 
 static nir_shader *
-nir_optimize(nir_shader *nir, bool is_scalar)
+nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
+             bool is_scalar)
 {
+   nir_variable_mode indirect_mask = 0;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
+      indirect_mask |= nir_var_shader_in;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
+      indirect_mask |= nir_var_shader_out;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
+      indirect_mask |= nir_var_local;
+
    bool progress;
    do {
       progress = false;
@@ -391,6 +400,10 @@ nir_optimize(nir_shader *nir, bool is_scalar)
       OPT(nir_opt_algebraic);
       OPT(nir_opt_constant_folding);
       OPT(nir_opt_dead_cf);
+      if (nir->options->max_unroll_iterations != 0) {
+         OPT_V(nir_to_lcssa);
+         OPT(nir_opt_loop_unroll, indirect_mask);
+      }
       OPT(nir_opt_remove_phis);
       OPT(nir_opt_undef);
       OPT_V(nir_lower_doubles, nir_lower_drcp |
@@ -444,7 +457,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
 
    OPT(nir_split_var_copies);
 
-   nir = nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, compiler, is_scalar);
 
    if (is_scalar) {
       OPT_V(nir_lower_load_const_to_scalar);
@@ -466,7 +479,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
    }
 
    /* Get rid of split copies */
-   nir = nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, compiler, is_scalar);
 
    OPT(nir_remove_dead_variables, nir_var_local);
 
@@ -491,7 +504,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
    bool progress; /* Written by OPT and OPT_V */
    (void)progress;
 
-   nir = nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, compiler, is_scalar);
 
    if (devinfo->gen >= 6) {
       /* Try and fuse multiply-adds */
@@ -580,7 +593,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
 
    if (nir_lower_tex(nir, &tex_options)) {
       nir_validate_shader(nir);
-      nir = nir_optimize(nir, is_scalar);
+      nir = nir_optimize(nir, compiler, is_scalar);
    }
 
    return nir;
-- 
2.7.4