Mesa (main): broadcom/compiler: specify maximum thread count in compile strategies

Thu May 6 10:43:23 UTC 2021

Module: Mesa
Branch: main
Commit: c11e4798521e73de4f7f07105802c91f2c6c155d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c11e4798521e73de4f7f07105802c91f2c6c155d

Author: Iago Toral Quiroga <itoral at igalia.com>
Date:   Wed May  5 11:26:13 2021 +0200

broadcom/compiler: specify maximum thread count in compile strategies

Once we have exhausted compile strategies at 4 threads and we start
enabling lower thread counts, there is no point in starting compiles
with 4 threads for them, we know these will fail, so let's start at
2 in these cases.

This also has another nice implication: if the driver compiles at 4
threads and fails to register allocate, we were allowing it to try
with 2 threads, but this would only retry the register allocation
process and would not really recompile the shader with 2 threads. This
is not optimal, because at 2 threads we have more TMU fifo space for
each thread and we can do more TMU pipelining, so we were missing that
opportunity.

This improves performance in Sponza by ~1.5% and also seems to help
UE4 slightly.

Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10647>

---

 src/broadcom/compiler/vir.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 7fc799ac705..a6f8d845923 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -525,6 +525,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
                                       void *debug_output_data),
                  void *debug_output_data,
                  int program_id, int variant_id,
+                 uint32_t max_threads,
                  uint32_t min_threads_for_reg_alloc,
                  bool tmu_spilling_allowed,
                  bool disable_loop_unrolling,
@@ -539,7 +540,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
         c->key = key;
         c->program_id = program_id;
         c->variant_id = variant_id;
-        c->threads = 4;
+        c->threads = max_threads;
         c->debug_output = debug_output;
         c->debug_output_data = debug_output_data;
         c->compilation_result = V3D_COMPILATION_SUCCEEDED;
@@ -1525,21 +1526,22 @@ int v3d_shaderdb_dump(struct v3d_compile *c,
  */
 struct v3d_compiler_strategy {
         const char *name;
-        uint32_t min_threads_for_reg_alloc;
+        uint32_t max_threads;
+        uint32_t min_threads;
         bool disable_loop_unrolling;
         bool disable_ubo_load_sorting;
         bool disable_tmu_pipelining;
         bool tmu_spilling_allowed;
 } static const strategies[] = {
-  /*0*/ { "default",                        4, false, false, false, false },
-  /*1*/ { "disable loop unrolling",         4, true,  false, false, false },
-  /*2*/ { "disable UBO load sorting",       4, true,  true,  false, false },
-  /*3*/ { "disable TMU pipelining",         4, true,  true,  true,  false },
-  /*4*/ { "lower thread count",             1, false, false, false, false },
-  /*5*/ { "disable loop unrolling (ltc)",   1, true,  false, false, false },
-  /*6*/ { "disable UBO load sorting (ltc)", 1, true,  true,  false, false },
-  /*7*/ { "disable TMU pipelining (ltc)",   1, true,  true,  true,  true  },
-  /*8*/ { "fallback scheduler",             1, true,  true,  true,  true  }
+  /*0*/ { "default",                        4, 4, false, false, false, false },
+  /*1*/ { "disable loop unrolling",         4, 4, true,  false, false, false },
+  /*2*/ { "disable UBO load sorting",       4, 4, true,  true,  false, false },
+  /*3*/ { "disable TMU pipelining",         4, 4, true,  true,  true,  false },
+  /*4*/ { "lower thread count",             2, 1, false, false, false, false },
+  /*5*/ { "disable loop unrolling (ltc)",   2, 1, true,  false, false, false },
+  /*6*/ { "disable UBO load sorting (ltc)", 2, 1, true,  true,  false, false },
+  /*7*/ { "disable TMU pipelining (ltc)",   2, 1, true,  true,  true,  true  },
+  /*8*/ { "fallback scheduler",             2, 1, true,  true,  true,  true  }
 };
 
 /**
@@ -1623,7 +1625,8 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
                 c = vir_compile_init(compiler, key, s,
                                      debug_output, debug_output_data,
                                      program_id, variant_id,
-                                     strategies[i].min_threads_for_reg_alloc,
+                                     strategies[i].max_threads,
+                                     strategies[i].min_threads,
                                      strategies[i].tmu_spilling_allowed,
                                      strategies[i].disable_loop_unrolling,
                                      strategies[i].disable_ubo_load_sorting,