Mesa (main): broadcom/compiler: add a compiler strategy to disable loop unrolling

Thu May 6 10:43:23 UTC 2021

Module: Mesa
Branch: main
Commit: 296fe4daa64024530d7dcf66e55ef43c75cf53eb
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=296fe4daa64024530d7dcf66e55ef43c75cf53eb

Author: Iago Toral Quiroga <itoral at igalia.com>
Date:   Mon May  3 10:14:12 2021 +0200

broadcom/compiler: add a compiler strategy to disable loop unrolling

Loop unrolling can increase register pressure significantly, leading to
lower thread counts and spilling.

Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10647>

---

 src/broadcom/compiler/nir_to_vir.c    |  5 +++--
 src/broadcom/compiler/v3d_compiler.h  |  5 ++++-
 src/broadcom/compiler/vir.c           | 16 ++++++++++------
 src/gallium/drivers/v3d/v3d_program.c |  2 +-
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 3cec6ba9bcd..43ce7a0ffbc 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1774,7 +1774,7 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
 }
 
 void
-v3d_optimize_nir(struct nir_shader *s)
+v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
 {
         bool progress;
         unsigned lower_flrp =
@@ -1826,7 +1826,8 @@ v3d_optimize_nir(struct nir_shader *s)
                 NIR_PASS(progress, s, nir_opt_undef);
                 NIR_PASS(progress, s, nir_lower_undef_to_zero);
 
-                if (s->options->max_unroll_iterations > 0) {
+                if (c && !c->disable_loop_unrolling &&
+                    s->options->max_unroll_iterations > 0) {
                         NIR_PASS(progress, s, nir_opt_loop_unroll,
                                  nir_var_shader_in |
                                  nir_var_shader_out |
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index fe2f44d8134..9b87dd77dcf 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -660,6 +660,9 @@ struct v3d_compile {
          */
         bool disable_ldunif_opt;
 
+        /* Disables loop unrolling to reduce register pressure. */
+        bool disable_loop_unrolling;
+
         /* Minimum number of threads we are willing to use to register allocate
          * a shader with the current compilation strategy. This only prevents
          * us from lowering the thread count to register allocate successfully,
@@ -939,7 +942,7 @@ vir_has_uniform(struct qinst *inst)
 
 const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
 void v3d_compiler_free(const struct v3d_compiler *compiler);
-void v3d_optimize_nir(struct nir_shader *s);
+void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s);
 
 uint64_t *v3d_compile(const struct v3d_compiler *compiler,
                       struct v3d_key *key,
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 48eba571727..3a35df247f1 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -526,6 +526,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
                  void *debug_output_data,
                  int program_id, int variant_id,
                  uint32_t min_threads_for_reg_alloc,
+                 bool disable_loop_unrolling,
                  bool disable_constant_ubo_load_sorting,
                  bool disable_tmu_pipelining,
                  bool fallback_scheduler)
@@ -545,6 +546,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
         c->fallback_scheduler = fallback_scheduler;
         c->disable_tmu_pipelining = disable_tmu_pipelining;
         c->disable_constant_ubo_load_sorting = disable_constant_ubo_load_sorting;
+        c->disable_loop_unrolling = disable_loop_unrolling;
 
         s = nir_shader_clone(c, s);
         c->s = s;
@@ -867,7 +869,7 @@ v3d_nir_lower_vs_early(struct v3d_compile *c)
         NIR_PASS_V(c->s, nir_remove_unused_io_vars,
                    nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
         NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
-        v3d_optimize_nir(c->s);
+        v3d_optimize_nir(c, c->s);
         NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
 
         /* This must go before nir_lower_io */
@@ -901,7 +903,7 @@ v3d_nir_lower_gs_early(struct v3d_compile *c)
         NIR_PASS_V(c->s, nir_remove_unused_io_vars,
                    nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
         NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
-        v3d_optimize_nir(c->s);
+        v3d_optimize_nir(c, c->s);
         NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
 
         /* This must go before nir_lower_io */
@@ -1417,7 +1419,7 @@ v3d_attempt_compile(struct v3d_compile *c)
 
         NIR_PASS_V(c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
 
-        v3d_optimize_nir(c->s);
+        v3d_optimize_nir(c, c->s);
 
         /* Do late algebraic optimization to turn add(a, neg(b)) back into
          * subs, then the mandatory cleanup after algebraic.  Note that it may
@@ -1537,6 +1539,7 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
                 uint32_t min_threads_for_reg_alloc;
         } static const strategies[] = {
                 { "default",                  4 },
+                { "disable loop unrolling",   4 },
                 { "disable UBO load sorting", 1 },
                 { "disable TMU pipelining",   1 },
                 { "fallback scheduler",       1 }
@@ -1547,9 +1550,10 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
                                      debug_output, debug_output_data,
                                      program_id, variant_id,
                                      strategies[i].min_threads_for_reg_alloc,
-                                     i > 0, /* Disable UBO load sorting */
-                                     i > 1, /* Disable TMU pipelining */
-                                     i > 2  /* Fallback_scheduler */);
+                                     i > 0, /* Disable loop unrolling */
+                                     i > 1, /* Disable UBO load sorting */
+                                     i > 2, /* Disable TMU pipelining */
+                                     i > 3  /* Fallback_scheduler */);
 
                 v3d_attempt_compile(c);
 
diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c
index 52ab2cf6d63..4050b933319 100644
--- a/src/gallium/drivers/v3d/v3d_program.c
+++ b/src/gallium/drivers/v3d/v3d_program.c
@@ -318,7 +318,7 @@ v3d_uncompiled_shader_create(struct pipe_context *pctx,
 
         NIR_PASS_V(s, nir_lower_load_const_to_scalar);
 
-        v3d_optimize_nir(s);
+        v3d_optimize_nir(NULL, s);
 
         NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);