[Mesa-dev] [PATCH] radeonsi: use the correct LLVMTargetMachineRef in si_build_shader_variant

Mon Jun 12 21:26:32 UTC 2017

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

si_build_shader_variant can actually be called directly from one of
normal-priority compiler threads. In that case, the thread_index is
only valid for the normal tm array.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101384 (maybe)
Fixes: 86cc8097266c ("radeonsi: use a compiler queue with a low priority for optimized shaders")
--
I'm not sure whether this actually fixes the linked bug, waiting for
the reporter to provide an update. That said, it seems pretty clear
that there is a bug here as described, and this patch should fix it.

---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 62 ++++++++++++++++---------
 1 file changed, 39 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 677a6de..7758545 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1438,58 +1438,79 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 		break;
 	}
 	default:
 		assert(0);
 	}
 
 	if (unlikely(sctx->screen->b.debug_flags & DBG_NO_OPT_VARIANT))
 		memset(&key->opt, 0, sizeof(key->opt));
 }
 
-static void si_build_shader_variant(void *job, int thread_index)
+static LLVMTargetMachineRef get_tm(struct si_shader_selector *sel,
+				   int thread_index)
 {
-	struct si_shader *shader = (struct si_shader *)job;
-	struct si_shader_selector *sel = shader->selector;
 	struct si_screen *sscreen = sel->screen;
-	LLVMTargetMachineRef tm;
-	struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
-	int r;
 
 	if (thread_index >= 0) {
-		assert(thread_index < ARRAY_SIZE(sscreen->tm_low_priority));
-		tm = sscreen->tm_low_priority[thread_index];
-		if (!debug->async)
-			debug = NULL;
-	} else {
-		tm = shader->compiler_ctx_state.tm;
+		assert(thread_index < ARRAY_SIZE(sscreen->tm));
+		return sscreen->tm[thread_index];
 	}
 
+	return sel->compiler_ctx_state.tm;
+}
+
+static void si_build_shader_variant(struct si_screen *sscreen,
+				    struct si_shader *shader,
+				    LLVMTargetMachineRef tm,
+				    bool threaded)
+{
+	struct si_shader_selector *sel = shader->selector;
+	struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
+	int r;
+
+	if (threaded && !debug->async)
+		debug = NULL;
+
 	r = si_shader_create(sscreen, tm, shader, debug);
 	if (unlikely(r)) {
 		R600_ERR("Failed to build shader variant (type=%u) %d\n",
 			 sel->type, r);
 		shader->compilation_failed = true;
 		return;
 	}
 
 	if (shader->compiler_ctx_state.is_debug_context) {
 		FILE *f = open_memstream(&shader->shader_log,
 					 &shader->shader_log_size);
 		if (f) {
 			si_shader_dump(sscreen, shader, NULL, sel->type, f, false);
 			fclose(f);
 		}
 	}
 
 	si_shader_init_pm4_state(sscreen, shader);
 }
 
+static void si_build_shader_variant_low_priority(void *job, int thread_index)
+{
+	struct si_shader *shader = (struct si_shader *)job;
+	struct si_screen *sscreen = shader->selector->screen;
+	LLVMTargetMachineRef tm;
+
+	assert(thread_index >= 0);
+	assert(thread_index < ARRAY_SIZE(sscreen->tm_low_priority));
+
+	tm = sscreen->tm_low_priority[thread_index];
+
+	si_build_shader_variant(sscreen, shader, tm, true);
+}
+
 static const struct si_shader_key zeroed;
 
 static bool si_check_missing_main_part(struct si_screen *sscreen,
 				       struct si_shader_selector *sel,
 				       struct si_compiler_ctx_state *compiler_state,
 				       struct si_shader_key *key)
 {
 	struct si_shader **mainp = si_get_main_shader_part(sel, key);
 
 	if (!*mainp) {
@@ -1681,30 +1702,31 @@ again:
 		sel->last_variant = shader;
 	}
 
 	/* If it's an optimized shader, compile it asynchronously. */
 	if (shader->is_optimized &&
 	    !is_pure_monolithic &&
 	    thread_index < 0) {
 		/* Compile it asynchronously. */
 		util_queue_add_job(&sscreen->shader_compiler_queue_low_priority,
 				   shader, &shader->optimized_ready,
-				   si_build_shader_variant, NULL);
+				   si_build_shader_variant_low_priority, NULL);
 
 		/* Use the default (unoptimized) shader for now. */
 		memset(&key->opt, 0, sizeof(key->opt));
 		mtx_unlock(&sel->mutex);
 		goto again;
 	}
 
 	assert(!shader->is_optimized);
-	si_build_shader_variant(shader, thread_index);
+	si_build_shader_variant(sscreen, shader, get_tm(sel, thread_index),
+				thread_index >= 0);
 
 	if (!shader->compilation_failed)
 		state->current = shader;
 
 	mtx_unlock(&sel->mutex);
 	return shader->compilation_failed ? -1 : 0;
 }
 
 static int si_shader_select(struct pipe_context *ctx,
 			    struct si_shader_ctx_state *state,
@@ -1753,32 +1775,26 @@ static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
 
 /**
  * Compile the main shader part or the monolithic shader as part of
  * si_shader_selector initialization. Since it can be done asynchronously,
  * there is no way to report compile failures to applications.
  */
 void si_init_shader_selector_async(void *job, int thread_index)
 {
 	struct si_shader_selector *sel = (struct si_shader_selector *)job;
 	struct si_screen *sscreen = sel->screen;
-	LLVMTargetMachineRef tm;
+	LLVMTargetMachineRef tm = get_tm(sel, thread_index);
 	struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
 	unsigned i;
 
-	if (thread_index >= 0) {
-		assert(thread_index < ARRAY_SIZE(sscreen->tm));
-		tm = sscreen->tm[thread_index];
-		if (!debug->async)
-			debug = NULL;
-	} else {
-		tm = sel->compiler_ctx_state.tm;
-	}
+	if (thread_index >= 0 && !debug->async)
+		debug = NULL;
 
 	/* Compile the main shader part for use with a prolog and/or epilog.
 	 * If this fails, the driver will try to compile a monolithic shader
 	 * on demand.
 	 */
 	if (!sscreen->use_monolithic_shaders) {
 		struct si_shader *shader = CALLOC_STRUCT(si_shader);
 		void *tgsi_binary;
 
 		if (!shader) {
-- 
2.9.3