[Mesa-dev] [PATCH 03/11] radeonsi: use the correct target machine when building shader variants

Tue Jan 17 22:47:53 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

If the shader selector is created with a different context than
the shader variant, we should use the calling context's target machine
for the shader variant.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99419
---
 src/gallium/drivers/radeonsi/si_shader.h        |  2 ++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 41 ++++++++++++++++---------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 5a24318..cf4da85 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -491,20 +491,22 @@ struct si_shader_info {
 	ubyte			vs_output_param_offset[SI_MAX_VS_OUTPUTS];
 	ubyte			num_input_sgprs;
 	ubyte			num_input_vgprs;
 	char			face_vgpr_index;
 	bool			uses_instanceid;
 	ubyte			nr_pos_exports;
 	ubyte			nr_param_exports;
 };
 
 struct si_shader {
+	struct si_compiler_ctx_state	compiler_ctx_state;
+
 	struct si_shader_selector	*selector;
 	struct si_shader		*next_variant;
 
 	struct si_shader_part		*prolog;
 	struct si_shader_part		*epilog;
 
 	struct si_pm4_state		*pm4;
 	struct r600_resource		*bo;
 	struct r600_resource		*scratch_bo;
 	struct si_shader_key		key;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index eaba19a..010ce15 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1079,55 +1079,56 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 		assert(0);
 	}
 }
 
 static void si_build_shader_variant(void *job, int thread_index)
 {
 	struct si_shader *shader = (struct si_shader *)job;
 	struct si_shader_selector *sel = shader->selector;
 	struct si_screen *sscreen = sel->screen;
 	LLVMTargetMachineRef tm;
-	struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
+	struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
 	int r;
 
 	if (thread_index >= 0) {
 		assert(thread_index < ARRAY_SIZE(sscreen->tm));
 		tm = sscreen->tm[thread_index];
 		if (!debug->async)
 			debug = NULL;
 	} else {
-		tm = sel->compiler_ctx_state.tm;
+		tm = shader->compiler_ctx_state.tm;
 	}
 
 	r = si_shader_create(sscreen, tm, shader, debug);
 	if (unlikely(r)) {
 		R600_ERR("Failed to build shader variant (type=%u) %d\n",
 			 sel->type, r);
 		shader->compilation_failed = true;
 		return;
 	}
 
-	if (sel->compiler_ctx_state.is_debug_context) {
+	if (shader->compiler_ctx_state.is_debug_context) {
 		FILE *f = open_memstream(&shader->shader_log,
 					 &shader->shader_log_size);
 		if (f) {
 			si_shader_dump(sscreen, shader, NULL, sel->type, f, false);
 			fclose(f);
 		}
 	}
 
 	si_shader_init_pm4_state(sscreen, shader);
 }
 
 /* Select the hw shader variant depending on the current state. */
 static int si_shader_select_with_key(struct si_screen *sscreen,
 				     struct si_shader_ctx_state *state,
+				     struct si_compiler_ctx_state *compiler_state,
 				     struct si_shader_key *key,
 				     int thread_index)
 {
 	static const struct si_shader_key zeroed;
 	struct si_shader_selector *sel = state->cso;
 	struct si_shader *current = state->current;
 	struct si_shader *iter, *shader = NULL;
 
 	if (unlikely(sscreen->b.debug_flags & DBG_NO_OPT_VARIANT)) {
 		memset(&key->opt, 0, sizeof(key->opt));
@@ -1184,20 +1185,21 @@ again:
 	}
 
 	/* Build a new shader. */
 	shader = CALLOC_STRUCT(si_shader);
 	if (!shader) {
 		pipe_mutex_unlock(sel->mutex);
 		return -ENOMEM;
 	}
 	shader->selector = sel;
 	shader->key = *key;
+	shader->compiler_ctx_state = *compiler_state;
 
 	/* Monolithic-only shaders don't make a distinction between optimized
 	 * and unoptimized. */
 	shader->is_monolithic =
 		!sel->main_shader_part ||
 		sel->main_shader_part->key.as_ls != key->as_ls ||
 		sel->main_shader_part->key.as_es != key->as_es ||
 		memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0 ||
 		memcmp(&key->mono, &zeroed.mono, sizeof(key->mono)) != 0;
 
@@ -1233,27 +1235,29 @@ again:
 	si_build_shader_variant(shader, thread_index);
 
 	if (!shader->compilation_failed)
 		state->current = shader;
 
 	pipe_mutex_unlock(sel->mutex);
 	return shader->compilation_failed ? -1 : 0;
 }
 
 static int si_shader_select(struct pipe_context *ctx,
-			    struct si_shader_ctx_state *state)
+			    struct si_shader_ctx_state *state,
+			    struct si_compiler_ctx_state *compiler_state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_shader_key key;
 
 	si_shader_selector_key(ctx, state->cso, &key);
-	return si_shader_select_with_key(sctx->screen, state, &key, -1);
+	return si_shader_select_with_key(sctx->screen, state, compiler_state,
+					 &key, -1);
 }
 
 static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
 					  struct si_shader_key *key)
 {
 	unsigned next_shader = info->properties[TGSI_PROPERTY_NEXT_SHADER];
 
 	switch (info->processor) {
 	case PIPE_SHADER_VERTEX:
 		switch (next_shader) {
@@ -1420,21 +1424,23 @@ void si_init_shader_selector_async(void *job, int thread_index)
 				sel->info.uses_linear_center &&
 				sel->info.uses_linear_centroid;
 			key.part.ps.epilog.alpha_func = PIPE_FUNC_ALWAYS;
 			for (i = 0; i < 8; i++)
 				if (sel->info.colors_written & (1 << i))
 					key.part.ps.epilog.spi_shader_col_format |=
 						V_028710_SPI_SHADER_FP16_ABGR << (i * 4);
 			break;
 		}
 
-		if (si_shader_select_with_key(sscreen, &state, &key, thread_index))
+		if (si_shader_select_with_key(sscreen, &state,
+					      &sel->compiler_ctx_state, &key,
+					      thread_index))
 			fprintf(stderr, "radeonsi: can't create a monolithic shader\n");
 	}
 
 	/* The GS copy shader is always pre-compiled. */
 	if (sel->type == PIPE_SHADER_GEOMETRY) {
 		sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, tm, sel, debug);
 		if (!sel->gs_copy_shader) {
 			fprintf(stderr, "radeonsi: can't create GS copy shader\n");
 			return;
 		}
@@ -2363,88 +2369,95 @@ static void si_update_so(struct si_context *sctx, struct si_shader_selector *sha
 
 	for (i = 0; i < so->num_outputs; i++)
 		enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << (so->output[i].stream * 4);
 	sctx->b.streamout.enabled_stream_buffers_mask = enabled_stream_buffers_mask;
 	sctx->b.streamout.stride_in_dw = shader->so.stride;
 }
 
 bool si_update_shaders(struct si_context *sctx)
 {
 	struct pipe_context *ctx = (struct pipe_context*)sctx;
+	struct si_compiler_ctx_state compiler_state;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	struct si_shader *old_vs = si_get_vs_state(sctx);
 	bool old_clip_disable = old_vs ? old_vs->key.opt.hw_vs.clip_disable : false;
 	int r;
 
+	compiler_state.tm = sctx->tm;
+	compiler_state.debug = sctx->b.debug;
+	compiler_state.is_debug_context = sctx->is_debug;
+
 	/* Update stages before GS. */
 	if (sctx->tes_shader.cso) {
 		if (!sctx->tf_ring) {
 			si_init_tess_factor_ring(sctx);
 			if (!sctx->tf_ring)
 				return false;
 		}
 
 		/* VS as LS */
-		r = si_shader_select(ctx, &sctx->vs_shader);
+		r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state);
 		if (r)
 			return false;
 		si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
 
 		if (sctx->tcs_shader.cso) {
-			r = si_shader_select(ctx, &sctx->tcs_shader);
+			r = si_shader_select(ctx, &sctx->tcs_shader,
+					     &compiler_state);
 			if (r)
 				return false;
 			si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
 		} else {
 			if (!sctx->fixed_func_tcs_shader.cso) {
 				si_generate_fixed_func_tcs(sctx);
 				if (!sctx->fixed_func_tcs_shader.cso)
 					return false;
 			}
 
-			r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader);
+			r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader,
+					     &compiler_state);
 			if (r)
 				return false;
 			si_pm4_bind_state(sctx, hs,
 					  sctx->fixed_func_tcs_shader.current->pm4);
 		}
 
-		r = si_shader_select(ctx, &sctx->tes_shader);
+		r = si_shader_select(ctx, &sctx->tes_shader, &compiler_state);
 		if (r)
 			return false;
 
 		if (sctx->gs_shader.cso) {
 			/* TES as ES */
 			si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
 		} else {
 			/* TES as VS */
 			si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
 			si_update_so(sctx, sctx->tes_shader.cso);
 		}
 	} else if (sctx->gs_shader.cso) {
 		/* VS as ES */
-		r = si_shader_select(ctx, &sctx->vs_shader);
+		r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state);
 		if (r)
 			return false;
 		si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
 	} else {
 		/* VS as VS */
-		r = si_shader_select(ctx, &sctx->vs_shader);
+		r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state);
 		if (r)
 			return false;
 		si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
 		si_update_so(sctx, sctx->vs_shader.cso);
 	}
 
 	/* Update GS. */
 	if (sctx->gs_shader.cso) {
-		r = si_shader_select(ctx, &sctx->gs_shader);
+		r = si_shader_select(ctx, &sctx->gs_shader, &compiler_state);
 		if (r)
 			return false;
 		si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
 		si_pm4_bind_state(sctx, vs, sctx->gs_shader.cso->gs_copy_shader->pm4);
 		si_update_so(sctx, sctx->gs_shader.cso);
 
 		if (!si_update_gs_ring_buffers(sctx))
 			return false;
 	} else {
 		si_pm4_bind_state(sctx, gs, NULL);
@@ -2452,21 +2465,21 @@ bool si_update_shaders(struct si_context *sctx)
 	}
 
 	si_update_vgt_shader_config(sctx);
 
 	if (old_clip_disable != si_get_vs_state(sctx)->key.opt.hw_vs.clip_disable)
 		si_mark_atom_dirty(sctx, &sctx->clip_regs);
 
 	if (sctx->ps_shader.cso) {
 		unsigned db_shader_control;
 
-		r = si_shader_select(ctx, &sctx->ps_shader);
+		r = si_shader_select(ctx, &sctx->ps_shader, &compiler_state);
 		if (r)
 			return false;
 		si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
 
 		db_shader_control =
 			sctx->ps_shader.cso->db_shader_control |
 			S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
 
 		if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
 		    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
-- 
2.7.4