Mesa (main): broadcom/compiler: add a compiler strategy to disable loop unrolling
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu May 6 10:43:23 UTC 2021
Module: Mesa
Branch: main
Commit: 296fe4daa64024530d7dcf66e55ef43c75cf53eb
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=296fe4daa64024530d7dcf66e55ef43c75cf53eb
Author: Iago Toral Quiroga <itoral at igalia.com>
Date: Mon May 3 10:14:12 2021 +0200
broadcom/compiler: add a compiler strategy to disable loop unrolling
Loop unrolling can increase register pressure significantly, leading to
lower thread counts and spilling.
Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10647>
---
src/broadcom/compiler/nir_to_vir.c | 5 +++--
src/broadcom/compiler/v3d_compiler.h | 5 ++++-
src/broadcom/compiler/vir.c | 16 ++++++++++------
src/gallium/drivers/v3d/v3d_program.c | 2 +-
4 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 3cec6ba9bcd..43ce7a0ffbc 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1774,7 +1774,7 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
}
void
-v3d_optimize_nir(struct nir_shader *s)
+v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
{
bool progress;
unsigned lower_flrp =
@@ -1826,7 +1826,8 @@ v3d_optimize_nir(struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_lower_undef_to_zero);
- if (s->options->max_unroll_iterations > 0) {
+ if (c && !c->disable_loop_unrolling &&
+ s->options->max_unroll_iterations > 0) {
NIR_PASS(progress, s, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_shader_out |
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index fe2f44d8134..9b87dd77dcf 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -660,6 +660,9 @@ struct v3d_compile {
*/
bool disable_ldunif_opt;
+ /* Disables loop unrolling to reduce register pressure. */
+ bool disable_loop_unrolling;
+
/* Minimum number of threads we are willing to use to register allocate
* a shader with the current compilation strategy. This only prevents
* us from lowering the thread count to register allocate successfully,
@@ -939,7 +942,7 @@ vir_has_uniform(struct qinst *inst)
const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
void v3d_compiler_free(const struct v3d_compiler *compiler);
-void v3d_optimize_nir(struct nir_shader *s);
+void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s);
uint64_t *v3d_compile(const struct v3d_compiler *compiler,
struct v3d_key *key,
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 48eba571727..3a35df247f1 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -526,6 +526,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
void *debug_output_data,
int program_id, int variant_id,
uint32_t min_threads_for_reg_alloc,
+ bool disable_loop_unrolling,
bool disable_constant_ubo_load_sorting,
bool disable_tmu_pipelining,
bool fallback_scheduler)
@@ -545,6 +546,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
c->fallback_scheduler = fallback_scheduler;
c->disable_tmu_pipelining = disable_tmu_pipelining;
c->disable_constant_ubo_load_sorting = disable_constant_ubo_load_sorting;
+ c->disable_loop_unrolling = disable_loop_unrolling;
s = nir_shader_clone(c, s);
c->s = s;
@@ -867,7 +869,7 @@ v3d_nir_lower_vs_early(struct v3d_compile *c)
NIR_PASS_V(c->s, nir_remove_unused_io_vars,
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
- v3d_optimize_nir(c->s);
+ v3d_optimize_nir(c, c->s);
NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
/* This must go before nir_lower_io */
@@ -901,7 +903,7 @@ v3d_nir_lower_gs_early(struct v3d_compile *c)
NIR_PASS_V(c->s, nir_remove_unused_io_vars,
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
- v3d_optimize_nir(c->s);
+ v3d_optimize_nir(c, c->s);
NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
/* This must go before nir_lower_io */
@@ -1417,7 +1419,7 @@ v3d_attempt_compile(struct v3d_compile *c)
NIR_PASS_V(c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
- v3d_optimize_nir(c->s);
+ v3d_optimize_nir(c, c->s);
/* Do late algebraic optimization to turn add(a, neg(b)) back into
* subs, then the mandatory cleanup after algebraic. Note that it may
@@ -1537,6 +1539,7 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
uint32_t min_threads_for_reg_alloc;
} static const strategies[] = {
{ "default", 4 },
+ { "disable loop unrolling", 4 },
{ "disable UBO load sorting", 1 },
{ "disable TMU pipelining", 1 },
{ "fallback scheduler", 1 }
@@ -1547,9 +1550,10 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
debug_output, debug_output_data,
program_id, variant_id,
strategies[i].min_threads_for_reg_alloc,
- i > 0, /* Disable UBO load sorting */
- i > 1, /* Disable TMU pipelining */
- i > 2 /* Fallback_scheduler */);
+ i > 0, /* Disable loop unrolling */
+ i > 1, /* Disable UBO load sorting */
+ i > 2, /* Disable TMU pipelining */
+ i > 3 /* Fallback_scheduler */);
v3d_attempt_compile(c);
diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c
index 52ab2cf6d63..4050b933319 100644
--- a/src/gallium/drivers/v3d/v3d_program.c
+++ b/src/gallium/drivers/v3d/v3d_program.c
@@ -318,7 +318,7 @@ v3d_uncompiled_shader_create(struct pipe_context *pctx,
NIR_PASS_V(s, nir_lower_load_const_to_scalar);
- v3d_optimize_nir(s);
+ v3d_optimize_nir(NULL, s);
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
More information about the mesa-commit
mailing list