Mesa (main): iris: Enable threaded shader compilation

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jul 28 18:00:26 UTC 2021


Module: Mesa
Branch: main
Commit: 42c34e1ac8da3a000087c02cfd9f6fcb83e84fbc
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=42c34e1ac8da3a000087c02cfd9f6fcb83e84fbc

Author: Ian Romanick <ian.d.romanick at intel.com>
Date:   Fri Jun  4 14:17:42 2021 -0700

iris: Enable threaded shader compilation

There are a couple minor things that can be improved:

1. Eliminate (or reduce) the dynamic allocation of the
threaded_compile_job.

2. For apps like shader-db, improve the case where nr_threads=0.  Right
now this adds thread switching and mutex overhead.

3. Other performance improvements?  iris_uncompiled_shader::variants has
some special properties that make it ripe for replacement with a
lockless list.  Without gathering some data, it's hard to guess what
impact that could have.

v2: Fix whitespace and formatting issues.  Noticed by Ken.
s/threaded_compile_job/iris_threaded_compile_job/g.  Suggested by Ken.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229>

---

 src/gallium/drivers/iris/iris_context.c |  3 ++
 src/gallium/drivers/iris/iris_context.h |  3 ++
 src/gallium/drivers/iris/iris_program.c | 73 ++++++++++++++++++++++++++++++---
 src/gallium/drivers/iris/iris_screen.c  | 60 +++++++++++++++++++++++++++
 src/gallium/drivers/iris/iris_screen.h  |  2 +
 5 files changed, 135 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c
index 45d82b94519..5ce69c310c8 100644
--- a/src/gallium/drivers/iris/iris_context.c
+++ b/src/gallium/drivers/iris/iris_context.c
@@ -44,6 +44,9 @@ iris_set_debug_callback(struct pipe_context *ctx,
                         const struct pipe_debug_callback *cb)
 {
    struct iris_context *ice = (struct iris_context *)ctx;
+   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
+
+   util_queue_finish(&screen->shader_compiler_queue);
 
    if (cb)
       ice->dbg = *cb;
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index 626db8c2f17..c94217c5033 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -412,6 +412,9 @@ struct iris_uncompiled_shader {
 
    /** Lock for the variants list */
    simple_mtx_t lock;
+
+   /** For parallel shader compiles */
+   struct util_queue_fence ready;
 };
 
 enum iris_surface_group {
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 5fcd4ef3d5c..1bd83baa938 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -38,6 +38,7 @@
 #include "util/u_atomic.h"
 #include "util/u_upload_mgr.h"
 #include "util/debug.h"
+#include "util/u_async_debug.h"
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_serialize.h"
@@ -54,6 +55,14 @@
    .base.tex.compressed_multisample_layout_mask = ~0,    \
    .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
 
+struct iris_threaded_compile_job {
+   struct iris_screen *screen;
+   struct u_upload_mgr *uploader;
+   struct pipe_debug_callback *dbg;
+   struct iris_uncompiled_shader *ish;
+   struct iris_compiled_shader *shader;
+};
+
 static unsigned
 get_new_program_id(struct iris_screen *screen)
 {
@@ -1174,6 +1183,42 @@ find_or_add_variant(const struct iris_screen *screen,
    return variant;
 }
 
+static void
+iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
+                                 UNUSED int thread_index)
+{
+   free(_job);
+}
+
+static void
+iris_schedule_compile(struct iris_screen *screen,
+                      struct util_queue_fence *ready_fence,
+                      struct pipe_debug_callback *dbg,
+                      struct iris_threaded_compile_job *job,
+                      util_queue_execute_func execute)
+
+{
+   util_queue_fence_init(ready_fence);
+
+   struct util_async_debug_callback async_debug;
+
+   if (dbg) {
+      u_async_debug_init(&async_debug);
+      job->dbg = &async_debug.base;
+   }
+
+   util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
+                      iris_threaded_compile_job_delete, 0);
+
+   if (screen->driconf.sync_compile || dbg)
+      util_queue_fence_wait(ready_fence);
+
+   if (dbg) {
+      u_async_debug_drain(&async_debug, dbg);
+      u_async_debug_cleanup(&async_debug);
+   }
+}
+
 /**
  * Compile a vertex shader, and upload the assembly.
  */
@@ -2457,12 +2502,17 @@ iris_create_compute_state(struct pipe_context *ctx,
 }
 
 static void
-iris_compile_shader(struct iris_screen *screen,
-                    struct u_upload_mgr *uploader,
-                    struct pipe_debug_callback *dbg,
-                    struct iris_uncompiled_shader *ish,
-                    struct iris_compiled_shader *shader)
+iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
 {
+   const struct iris_threaded_compile_job *job =
+      (struct iris_threaded_compile_job *) _job;
+
+   struct iris_screen *screen = job->screen;
+   struct u_upload_mgr *uploader = job->uploader;
+   struct pipe_debug_callback *dbg = job->dbg;
+   struct iris_uncompiled_shader *ish = job->ish;
+   struct iris_compiled_shader *shader = job->shader;
+
    switch (ish->nir->info.stage) {
    case MESA_SHADER_VERTEX:
       iris_compile_vs(screen, uploader, dbg, ish, shader);
@@ -2615,7 +2665,17 @@ iris_create_shader_state(struct pipe_context *ctx,
 
       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
                                     &key, key_size)) {
-         iris_compile_shader(screen, uploader, &ice->dbg, ish, shader);
+         assert(!util_queue_fence_is_signalled(&shader->ready));
+
+         struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
+
+         job->screen = screen;
+         job->uploader = uploader;
+         job->ish = ish;
+         job->shader = shader;
+
+         iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
+                               iris_compile_shader);
       }
    }
 
@@ -2643,6 +2703,7 @@ iris_destroy_shader_state(struct pipe_context *ctx, void *state)
    }
 
    simple_mtx_destroy(&ish->lock);
+   util_queue_fence_destroy(&ish->ready);
 
    ralloc_free(ish->nir);
    free(ish);
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 4906514014f..ab868495ad6 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -38,6 +38,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_screen.h"
 #include "util/debug.h"
+#include "util/u_cpu_detect.h"
 #include "util/u_inlines.h"
 #include "util/format/u_format.h"
 #include "util/u_transfer_helper.h"
@@ -609,6 +610,7 @@ void
 iris_screen_destroy(struct iris_screen *screen)
 {
    iris_destroy_screen_measure(screen);
+   util_queue_destroy(&screen->shader_compiler_queue);
    glsl_type_singleton_decref();
    iris_bo_unreference(screen->workaround_bo);
    u_transfer_helper_destroy(screen->base.transfer_helper);
@@ -649,6 +651,38 @@ iris_get_disk_shader_cache(struct pipe_screen *pscreen)
    return screen->disk_cache;
 }
 
+static void
+iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
+                                     unsigned max_threads)
+{
+   struct iris_screen *screen = (struct iris_screen *) pscreen;
+   util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
+}
+
+static bool
+iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
+                                             void *v_shader,
+                                             enum pipe_shader_type p_stage)
+{
+   struct iris_screen *screen = (struct iris_screen *) pscreen;
+
+   /* Threaded compilation is only used for the precompile.  If precompile is
+    * disabled, threaded compilation is "done."
+    */
+   if (!screen->precompile)
+      return true;
+
+   struct iris_uncompiled_shader *ish = v_shader;
+
+   /* When precompile is enabled, the first entry is the precompile variant.
+    * Check the ready fence of the precompile variant.
+    */
+   struct iris_compiled_shader *first =
+      list_first_entry(&ish->variants, struct iris_compiled_shader, link);
+
+   return util_queue_fence_is_signalled(&first->ready);
+}
+
 static int
 iris_getparam(int fd, int param, int *value)
 {
@@ -869,10 +903,36 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
    pscreen->query_memory_info = iris_query_memory_info;
    pscreen->get_driver_query_group_info = iris_get_monitor_group_info;
    pscreen->get_driver_query_info = iris_get_monitor_info;
+   pscreen->is_parallel_shader_compilation_finished = iris_is_parallel_shader_compilation_finished;
+   pscreen->set_max_shader_compiler_threads = iris_set_max_shader_compiler_threads;
 
    genX_call(&screen->devinfo, init_screen_state, screen);
 
    glsl_type_singleton_init_or_ref();
 
+   /* FINISHME: Big core vs little core (for CPUs that have both kinds of
+    * cores) and, possibly, thread vs core should be considered here too.
+    */
+   unsigned compiler_threads = 1;
+   const struct util_cpu_caps_t *caps = util_get_cpu_caps();
+   unsigned hw_threads = caps->nr_cpus;
+
+   if (hw_threads >= 12) {
+      compiler_threads = hw_threads * 3 / 4;
+   } else if (hw_threads >= 6) {
+      compiler_threads = hw_threads - 2;
+   } else if (hw_threads >= 2) {
+      compiler_threads = hw_threads - 1;
+   }
+
+   if (!util_queue_init(&screen->shader_compiler_queue,
+                        "sh", 64, compiler_threads,
+                        UTIL_QUEUE_INIT_RESIZE_IF_FULL |
+                        UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY,
+                        NULL)) {
+      iris_screen_destroy(screen);
+      return NULL;
+   }
+
    return pscreen;
 }
diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h
index 503effc1d27..a1c0588ecdf 100644
--- a/src/gallium/drivers/iris/iris_screen.h
+++ b/src/gallium/drivers/iris/iris_screen.h
@@ -220,6 +220,8 @@ struct iris_screen {
    struct iris_bo *workaround_bo;
    struct iris_address workaround_address;
 
+   struct util_queue shader_compiler_queue;
+
    struct disk_cache *disk_cache;
 
    struct intel_measure_device measure;



More information about the mesa-commit mailing list