Mesa (master): radeonsi: support thread-safe shaders shared by multiple contexts

Tue Oct 20 11:29:05 UTC 2015

Module: Mesa
Branch: master
Commit: 9b54ce3362f117b4d46497b578211bb26554dd78
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b54ce3362f117b4d46497b578211bb26554dd78

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Wed Oct  7 01:48:18 2015 +0200

radeonsi: support thread-safe shaders shared by multiple contexts

The "current" shader pointer is moved from the CSO to the context, so that
the CSO is mostly immutable.

The only drawback is that the "current" pointer isn't saved when unbinding
a shader and it must be looked up when the shader is bound again.

This is also a prerequisite for multithreaded shader compilation.

Reviewed-by: Michel Dänzer <michel.daenzer at amd.com>

---

 src/gallium/drivers/radeonsi/si_blit.c          |   10 +-
 src/gallium/drivers/radeonsi/si_debug.c         |   18 +-
 src/gallium/drivers/radeonsi/si_descriptors.c   |   12 +-
 src/gallium/drivers/radeonsi/si_pipe.c          |    6 +-
 src/gallium/drivers/radeonsi/si_pipe.h          |   21 +-
 src/gallium/drivers/radeonsi/si_shader.h        |   31 +--
 src/gallium/drivers/radeonsi/si_state.c         |    2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c    |   44 ++--
 src/gallium/drivers/radeonsi/si_state_shaders.c |  279 ++++++++++++-----------
 9 files changed, 224 insertions(+), 199 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index d5c5db3..082ea85 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -55,11 +55,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
 	util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
 	util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
 	util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
-	util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
-	util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
-	util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader);
-	util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
-	util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
+	util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
+	util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
+	util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
+	util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
+	util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
 	util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
 	util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
 	util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 7d41e8d..5306218 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -31,15 +31,15 @@
 #include "ddebug/dd_util.h"
 
 
-static void si_dump_shader(struct si_shader_selector *sel, const char *name,
+static void si_dump_shader(struct si_shader_ctx_state *state, const char *name,
 			   FILE *f)
 {
-	if (!sel || !sel->current)
+	if (!state->cso || !state->current)
 		return;
 
 	fprintf(f, "%s shader disassembly:\n", name);
-	si_dump_shader_key(sel->type, &sel->current->key, f);
-	fprintf(f, "%s\n\n", sel->current->binary.disasm_string);
+	si_dump_shader_key(state->cso->type, &state->current->key, f);
+	fprintf(f, "%s\n\n", state->current->binary.disasm_string);
 }
 
 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
@@ -536,11 +536,11 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
 	if (flags & PIPE_DEBUG_DEVICE_IS_HUNG)
 		si_dump_debug_registers(sctx, f);
 
-	si_dump_shader(sctx->vs_shader, "Vertex", f);
-	si_dump_shader(sctx->tcs_shader, "Tessellation control", f);
-	si_dump_shader(sctx->tes_shader, "Tessellation evaluation", f);
-	si_dump_shader(sctx->gs_shader, "Geometry", f);
-	si_dump_shader(sctx->ps_shader, "Fragment", f);
+	si_dump_shader(&sctx->vs_shader, "Vertex", f);
+	si_dump_shader(&sctx->tcs_shader, "Tessellation control", f);
+	si_dump_shader(&sctx->tes_shader, "Tessellation evaluation", f);
+	si_dump_shader(&sctx->gs_shader, "Geometry", f);
+	si_dump_shader(&sctx->ps_shader, "Fragment", f);
 
 	si_dump_last_bo_list(sctx, f);
 	si_dump_last_ib(sctx, f);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 19dd14f..13738da 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -915,10 +915,10 @@ static void si_set_user_data_base(struct si_context *sctx,
 void si_shader_change_notify(struct si_context *sctx)
 {
 	/* VS can be bound as VS, ES, or LS. */
-	if (sctx->tes_shader)
+	if (sctx->tes_shader.cso)
 		si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
 				      R_00B530_SPI_SHADER_USER_DATA_LS_0);
-	else if (sctx->gs_shader)
+	else if (sctx->gs_shader.cso)
 		si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
 				      R_00B330_SPI_SHADER_USER_DATA_ES_0);
 	else
@@ -926,8 +926,8 @@ void si_shader_change_notify(struct si_context *sctx)
 				      R_00B130_SPI_SHADER_USER_DATA_VS_0);
 
 	/* TES can be bound as ES, VS, or not bound. */
-	if (sctx->tes_shader) {
-		if (sctx->gs_shader)
+	if (sctx->tes_shader.cso) {
+		if (sctx->gs_shader.cso)
 			si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
 					      R_00B330_SPI_SHADER_USER_DATA_ES_0);
 		else
@@ -964,7 +964,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
 	unsigned i;
 	uint32_t *sh_base = sctx->shader_userdata.sh_base;
 
-	if (sctx->gs_shader) {
+	if (sctx->gs_shader.cso) {
 		/* The VS copy shader needs these for clipping, streamout, and rings. */
 		unsigned vs_base = R_00B130_SPI_SHADER_USER_DATA_VS_0;
 		unsigned i = PIPE_SHADER_VERTEX;
@@ -975,7 +975,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
 		/* The TESSEVAL shader needs this for the ESGS ring buffer. */
 		si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc,
 				       R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
-	} else if (sctx->tes_shader) {
+	} else if (sctx->tes_shader.cso) {
 		/* The TESSEVAL shader needs this for streamout. */
 		si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc,
 				       R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 37e793a..c084f03 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -57,8 +57,8 @@ static void si_destroy_context(struct pipe_context *context)
 		sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
 	if (sctx->dummy_pixel_shader)
 		sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
-	if (sctx->fixed_func_tcs_shader)
-		sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader);
+	if (sctx->fixed_func_tcs_shader.cso)
+		sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
 	if (sctx->custom_dsa_flush)
 		sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
 	if (sctx->custom_blend_resolve)
@@ -293,6 +293,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
 	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+	case PIPE_CAP_SHAREABLE_SHADERS:
 	case PIPE_CAP_DEPTH_BOUNDS_TEST:
 	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 	case PIPE_CAP_TEXTURE_QUERY_LOD:
@@ -337,7 +338,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_VERTEXID_NOBASE:
-	case PIPE_CAP_SHAREABLE_SHADERS:
 		return 0;
 
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 2abd5b5..d7a2282 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -152,6 +152,15 @@ struct si_viewports {
 	struct pipe_viewport_state	states[SI_MAX_VIEWPORTS];
 };
 
+/* A shader state consists of the shader selector, which is a constant state
+ * object shared by multiple contexts and shouldn't be modified, and
+ * the current shader variant selected for this context.
+ */
+struct si_shader_ctx_state {
+	struct si_shader_selector	*cso;
+	struct si_shader		*current;
+};
+
 struct si_context {
 	struct r600_common_context	b;
 	struct blitter_context		*blitter;
@@ -162,7 +171,7 @@ struct si_context {
 	void				*pstipple_sampler_state;
 	struct si_screen		*screen;
 	struct pipe_fence_handle	*last_gfx_fence;
-	struct si_shader_selector	*fixed_func_tcs_shader;
+	struct si_shader_ctx_state	fixed_func_tcs_shader;
 	LLVMTargetMachineRef		tm;
 
 	/* Atoms (direct states). */
@@ -199,11 +208,11 @@ struct si_context {
 	void				*dummy_pixel_shader;
 
 	/* shaders */
-	struct si_shader_selector	*ps_shader;
-	struct si_shader_selector	*gs_shader;
-	struct si_shader_selector	*vs_shader;
-	struct si_shader_selector	*tcs_shader;
-	struct si_shader_selector	*tes_shader;
+	struct si_shader_ctx_state	ps_shader;
+	struct si_shader_ctx_state	gs_shader;
+	struct si_shader_ctx_state	vs_shader;
+	struct si_shader_ctx_state	tcs_shader;
+	struct si_shader_ctx_state	tes_shader;
 	struct si_cs_shader_state	cs_shader_state;
 
 	/* shader information */
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 54dad72..b1076ed 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -179,15 +179,18 @@ struct radeon_shader_reloc;
 
 struct si_shader;
 
+/* A shader selector is a gallium CSO and contains shader variants and
+ * binaries for one TGSI program. This can be shared by multiple contexts.
+ */
 struct si_shader_selector {
-	struct si_shader *current;
+	pipe_mutex		mutex;
+	struct si_shader	*first_variant; /* immutable after the first variant */
+	struct si_shader	*last_variant; /* mutable */
 
 	struct tgsi_token       *tokens;
 	struct pipe_stream_output_info  so;
 	struct tgsi_shader_info		info;
 
-	unsigned	num_shaders;
-
 	/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
 	unsigned	type;
 
@@ -293,24 +296,24 @@ struct si_shader {
 
 static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
 {
-	if (sctx->gs_shader)
-		return &sctx->gs_shader->info;
-	else if (sctx->tes_shader)
-		return &sctx->tes_shader->info;
-	else if (sctx->vs_shader)
-		return &sctx->vs_shader->info;
+	if (sctx->gs_shader.cso)
+		return &sctx->gs_shader.cso->info;
+	else if (sctx->tes_shader.cso)
+		return &sctx->tes_shader.cso->info;
+	else if (sctx->vs_shader.cso)
+		return &sctx->vs_shader.cso->info;
 	else
 		return NULL;
 }
 
 static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
 {
-	if (sctx->gs_shader)
-		return sctx->gs_shader->current->gs_copy_shader;
-	else if (sctx->tes_shader)
-		return sctx->tes_shader->current;
+	if (sctx->gs_shader.current)
+		return sctx->gs_shader.current->gs_copy_shader;
+	else if (sctx->tes_shader.current)
+		return sctx->tes_shader.current;
 	else
-		return sctx->vs_shader->current;
+		return sctx->vs_shader.current;
 }
 
 static inline bool si_vs_exports_prim_id(struct si_shader *shader)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 2e77a36..243bdc6 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -266,7 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
 	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
 	 */
 	if (blend->dual_src_blend &&
-	    (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+	    (sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
 		mask = 0;
 
 	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 5face42..ce6c98c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -109,11 +109,11 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 				       unsigned *num_patches)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	struct si_shader_selector *ls = sctx->vs_shader;
+	struct si_shader_ctx_state *ls = &sctx->vs_shader;
 	/* The TES pointer will only be used for sctx->last_tcs.
 	 * It would be wrong to think that TCS = TES. */
 	struct si_shader_selector *tcs =
-		sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader;
+		sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
 	unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
 	unsigned num_tcs_input_cp = info->vertices_per_patch;
 	unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
@@ -138,9 +138,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 
 	/* This calculates how shader inputs and outputs among VS, TCS, and TES
 	 * are laid out in LDS. */
-	num_tcs_inputs = util_last_bit64(ls->outputs_written);
+	num_tcs_inputs = util_last_bit64(ls->cso->outputs_written);
 
-	if (sctx->tcs_shader) {
+	if (sctx->tcs_shader.cso) {
 		num_tcs_outputs = util_last_bit64(tcs->outputs_written);
 		num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
 		num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
@@ -159,7 +159,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
 	output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
 
-	output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0;
+	output_patch0_offset = sctx->tcs_shader.cso ? input_patch_size * *num_patches : 0;
 	perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
 
 	lds_size = output_patch0_offset + output_patch_size * *num_patches;
@@ -231,13 +231,13 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
 	bool partial_vs_wave = false;
 	bool partial_es_wave = false;
 
-	if (sctx->gs_shader)
+	if (sctx->gs_shader.cso)
 		primgroup_size = 64; /* recommended with a GS */
 
-	if (sctx->tes_shader) {
+	if (sctx->tes_shader.cso) {
 		unsigned num_cp_out =
-			sctx->tcs_shader ?
-			sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+			sctx->tcs_shader.cso ?
+			sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
 			info->vertices_per_patch;
 		unsigned max_size = 256 / MAX2(info->vertices_per_patch, num_cp_out);
 
@@ -248,8 +248,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
 
 		/* SWITCH_ON_EOI must be set if PrimID is used.
 		 * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
-		if ((sctx->tcs_shader && sctx->tcs_shader->info.uses_primid) ||
-		    sctx->tes_shader->info.uses_primid) {
+		if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) ||
+		    sctx->tes_shader.cso->info.uses_primid) {
 			ia_switch_on_eoi = true;
 			partial_es_wave = true;
 		}
@@ -258,7 +258,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
 		if ((sctx->b.family == CHIP_TAHITI ||
 		     sctx->b.family == CHIP_PITCAIRN ||
 		     sctx->b.family == CHIP_BONAIRE) &&
-		    sctx->gs_shader)
+		    sctx->gs_shader.cso)
 			partial_vs_wave = true;
 	}
 
@@ -328,11 +328,11 @@ static unsigned si_get_ls_hs_config(struct si_context *sctx,
 {
 	unsigned num_output_cp;
 
-	if (!sctx->tes_shader)
+	if (!sctx->tes_shader.cso)
 		return 0;
 
-	num_output_cp = sctx->tcs_shader ?
-		sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+	num_output_cp = sctx->tcs_shader.cso ?
+		sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
 		info->vertices_per_patch;
 
 	return S_028B58_NUM_PATCHES(num_patches) |
@@ -395,7 +395,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
 	unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
 	unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
 
-	if (sctx->tes_shader)
+	if (sctx->tes_shader.cso)
 		si_emit_derived_tess_state(sctx, info, &num_patches);
 
 	ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
@@ -735,11 +735,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	    (info->indexed || !info->count_from_stream_output))
 		return;
 
-	if (!sctx->ps_shader || !sctx->vs_shader) {
+	if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) {
 		assert(0);
 		return;
 	}
-	if (!!sctx->tes_shader != (info->mode == PIPE_PRIM_PATCHES)) {
+	if (!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES)) {
 		assert(0);
 		return;
 	}
@@ -751,11 +751,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	 * This must be done after si_decompress_textures, which can call
 	 * draw_vbo recursively, and before si_update_shaders, which uses
 	 * current_rast_prim for this draw_vbo call. */
-	if (sctx->gs_shader)
-		sctx->current_rast_prim = sctx->gs_shader->gs_output_prim;
-	else if (sctx->tes_shader)
+	if (sctx->gs_shader.cso)
+		sctx->current_rast_prim = sctx->gs_shader.cso->gs_output_prim;
+	else if (sctx->tes_shader.cso)
 		sctx->current_rast_prim =
-			sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+			sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
 	else
 		sctx->current_rast_prim = info->mode;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c98509b..8b26b94 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -523,26 +523,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 				key->vs.instance_divisors[i] =
 					sctx->vertex_elements->elements[i].instance_divisor;
 
-		if (sctx->tes_shader)
+		if (sctx->tes_shader.cso)
 			key->vs.as_ls = 1;
-		else if (sctx->gs_shader) {
+		else if (sctx->gs_shader.cso) {
 			key->vs.as_es = 1;
-			key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
+			key->vs.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
 		}
 
-		if (!sctx->gs_shader && sctx->ps_shader &&
-		    sctx->ps_shader->info.uses_primid)
+		if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
+		    sctx->ps_shader.cso->info.uses_primid)
 			key->vs.export_prim_id = 1;
 		break;
 	case PIPE_SHADER_TESS_CTRL:
 		key->tcs.prim_mode =
-			sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+			sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
 		break;
 	case PIPE_SHADER_TESS_EVAL:
-		if (sctx->gs_shader) {
+		if (sctx->gs_shader.cso) {
 			key->tes.as_es = 1;
-			key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
-		} else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid)
+			key->tes.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
+		} else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
 			key->tes.export_prim_id = 1;
 		break;
 	case PIPE_SHADER_GEOMETRY:
@@ -589,11 +589,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 
 /* Select the hw shader variant depending on the current state. */
 static int si_shader_select(struct pipe_context *ctx,
-			    struct si_shader_selector *sel)
+			    struct si_shader_ctx_state *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
+	struct si_shader_selector *sel = state->cso;
+	struct si_shader *current = state->current;
 	union si_shader_key key;
-	struct si_shader * shader = NULL;
+	struct si_shader *iter, *shader = NULL;
 	int r;
 
 	si_shader_selector_key(ctx, sel, &key);
@@ -602,49 +604,51 @@ static int si_shader_select(struct pipe_context *ctx,
 	 * This path is also used for most shaders that don't need multiple
 	 * variants, it will cost just a computation of the key and this
 	 * test. */
-	if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
+	if (likely(current && memcmp(&current->key, &key, sizeof(key)) == 0))
 		return 0;
-	}
 
-	/* lookup if we have other variants in the list */
-	if (sel->num_shaders > 1) {
-		struct si_shader *p = sel->current, *c = p->next_variant;
+	pipe_mutex_lock(sel->mutex);
 
-		while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
-			p = c;
-			c = c->next_variant;
+	/* Find the shader variant. */
+	for (iter = sel->first_variant; iter; iter = iter->next_variant) {
+		/* Don't check the "current" shader. We checked it above. */
+		if (current != iter &&
+		    memcmp(&iter->key, &key, sizeof(key)) == 0) {
+			state->current = iter;
+			pipe_mutex_unlock(sel->mutex);
+			return 0;
 		}
+	}
 
-		if (c) {
-			p->next_variant = c->next_variant;
-			shader = c;
-		}
+	/* Build a new shader. */
+	shader = CALLOC_STRUCT(si_shader);
+	if (!shader) {
+		pipe_mutex_unlock(sel->mutex);
+		return -ENOMEM;
 	}
+	shader->selector = sel;
+	shader->key = key;
+
+	r = si_shader_create(sctx->screen, sctx->tm, shader);
+	if (unlikely(r)) {
+		R600_ERR("Failed to build shader variant (type=%u) %d\n",
+			 sel->type, r);
+		FREE(shader);
+		pipe_mutex_unlock(sel->mutex);
+		return r;
+	}
+	si_shader_init_pm4_state(shader);
 
-	if (shader) {
-		shader->next_variant = sel->current;
-		sel->current = shader;
+	if (!sel->last_variant) {
+		sel->first_variant = shader;
+		sel->last_variant = shader;
 	} else {
-		shader = CALLOC(1, sizeof(struct si_shader));
-		shader->selector = sel;
-		shader->key = key;
-
-		shader->next_variant = sel->current;
-		sel->current = shader;
-		r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm,
-				     shader);
-		if (unlikely(r)) {
-			R600_ERR("Failed to build shader variant (type=%u) %d\n",
-				 sel->type, r);
-			sel->current = NULL;
-			FREE(shader);
-			return r;
-		}
-		si_shader_init_pm4_state(shader);
-		sel->num_shaders++;
-		p_atomic_inc(&sctx->screen->b.num_compilations);
+		sel->last_variant->next_variant = shader;
+		sel->last_variant = shader;
 	}
-
+	state->current = shader;
+	p_atomic_inc(&sctx->screen->b.num_compilations);
+	pipe_mutex_unlock(sel->mutex);
 	return 0;
 }
 
@@ -752,14 +756,18 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 		break;
 	}
 
-	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
-		if (si_shader_select(ctx, sel)) {
+	if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
+		struct si_shader_ctx_state state = {sel};
+
+		if (si_shader_select(ctx, &state)) {
 			fprintf(stderr, "radeonsi: can't create a shader\n");
 			tgsi_free_tokens(sel->tokens);
 			FREE(sel);
 			return NULL;
 		}
+	}
 
+	pipe_mutex_init(sel->mutex);
 	return sel;
 }
 
@@ -787,10 +795,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_shader_selector *sel = state;
 
-	if (sctx->vs_shader == sel || !sel)
+	if (sctx->vs_shader.cso == sel || !sel)
 		return;
 
-	sctx->vs_shader = sel;
+	sctx->vs_shader.cso = sel;
+	sctx->vs_shader.current = sel->first_variant;
 	si_mark_atom_dirty(sctx, &sctx->clip_regs);
 	si_update_viewports_and_scissors(sctx);
 }
@@ -799,12 +808,13 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_shader_selector *sel = state;
-	bool enable_changed = !!sctx->gs_shader != !!sel;
+	bool enable_changed = !!sctx->gs_shader.cso != !!sel;
 
-	if (sctx->gs_shader == sel)
+	if (sctx->gs_shader.cso == sel)
 		return;
 
-	sctx->gs_shader = sel;
+	sctx->gs_shader.cso = sel;
+	sctx->gs_shader.current = sel ? sel->first_variant : NULL;
 	si_mark_atom_dirty(sctx, &sctx->clip_regs);
 	sctx->last_rast_prim = -1; /* reset this so that it gets updated */
 
@@ -817,12 +827,13 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_shader_selector *sel = state;
-	bool enable_changed = !!sctx->tcs_shader != !!sel;
+	bool enable_changed = !!sctx->tcs_shader.cso != !!sel;
 
-	if (sctx->tcs_shader == sel)
+	if (sctx->tcs_shader.cso == sel)
 		return;
 
-	sctx->tcs_shader = sel;
+	sctx->tcs_shader.cso = sel;
+	sctx->tcs_shader.current = sel ? sel->first_variant : NULL;
 
 	if (enable_changed)
 		sctx->last_tcs = NULL; /* invalidate derived tess state */
@@ -832,12 +843,13 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_shader_selector *sel = state;
-	bool enable_changed = !!sctx->tes_shader != !!sel;
+	bool enable_changed = !!sctx->tes_shader.cso != !!sel;
 
-	if (sctx->tes_shader == sel)
+	if (sctx->tes_shader.cso == sel)
 		return;
 
-	sctx->tes_shader = sel;
+	sctx->tes_shader.cso = sel;
+	sctx->tes_shader.current = sel ? sel->first_variant : NULL;
 	si_mark_atom_dirty(sctx, &sctx->clip_regs);
 	sctx->last_rast_prim = -1; /* reset this so that it gets updated */
 
@@ -864,7 +876,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	struct si_shader_selector *sel = state;
 
 	/* skip if supplied shader is one already in use */
-	if (sctx->ps_shader == sel)
+	if (sctx->ps_shader.cso == sel)
 		return;
 
 	/* use a dummy shader if binding a NULL shader */
@@ -873,7 +885,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 		sel = sctx->dummy_pixel_shader;
 	}
 
-	sctx->ps_shader = sel;
+	sctx->ps_shader.cso = sel;
+	sctx->ps_shader.current = sel->first_variant;
 	si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
 }
 
@@ -881,8 +894,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_shader_selector *sel = (struct si_shader_selector *)state;
-	struct si_shader *p = sel->current, *c;
-	struct si_shader_selector **current_shader[SI_NUM_SHADERS] = {
+	struct si_shader *p = sel->first_variant, *c;
+	struct si_shader_ctx_state *current_shader[SI_NUM_SHADERS] = {
 		[PIPE_SHADER_VERTEX] = &sctx->vs_shader,
 		[PIPE_SHADER_TESS_CTRL] = &sctx->tcs_shader,
 		[PIPE_SHADER_TESS_EVAL] = &sctx->tes_shader,
@@ -890,8 +903,10 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
 		[PIPE_SHADER_FRAGMENT] = &sctx->ps_shader,
 	};
 
-	if (*current_shader[sel->type] == sel)
-		*current_shader[sel->type] = NULL;
+	if (current_shader[sel->type]->cso == sel) {
+		current_shader[sel->type]->cso = NULL;
+		current_shader[sel->type]->current = NULL;
+	}
 
 	while (p) {
 		c = p->next_variant;
@@ -927,6 +942,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
 		p = c;
 	}
 
+	pipe_mutex_destroy(sel->mutex);
 	free(sel->tokens);
 	free(sel);
 }
@@ -934,7 +950,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
 static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	struct si_shader *ps = sctx->ps_shader->current;
+	struct si_shader *ps = sctx->ps_shader.current;
 	struct si_shader *vs = si_get_vs_state(sctx);
 	struct tgsi_shader_info *psinfo = &ps->selector->info;
 	struct tgsi_shader_info *vsinfo = &vs->selector->info;
@@ -1004,7 +1020,7 @@ bcolor:
 static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	struct si_shader *ps = sctx->ps_shader->current;
+	struct si_shader *ps = sctx->ps_shader.current;
 	unsigned input_ena = ps->spi_ps_input_ena;
 
 	/* we need to enable at least one of them, otherwise we hang the GPU */
@@ -1133,7 +1149,7 @@ static void si_init_gs_rings(struct si_context *sctx)
 
 static void si_update_gs_rings(struct si_context *sctx)
 {
-	unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize;
+	unsigned gsvs_itemsize = sctx->gs_shader.cso->gsvs_itemsize;
 	uint64_t offset;
 
 	if (gsvs_itemsize == sctx->last_gsvs_itemsize)
@@ -1167,17 +1183,14 @@ static void si_update_gs_rings(struct si_context *sctx)
  *          < 0 if there was a failure
  */
 static int si_update_scratch_buffer(struct si_context *sctx,
-				    struct si_shader_selector *sel)
+				    struct si_shader *shader)
 {
-	struct si_shader *shader;
 	uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
 	int r;
 
-	if (!sel)
+	if (!shader)
 		return 0;
 
-	shader = sel->current;
-
 	/* This shader doesn't need a scratch buffer */
 	if (shader->scratch_bytes_per_wave == 0)
 		return 0;
@@ -1209,20 +1222,20 @@ static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
 	return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0;
 }
 
-static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader_selector *sel)
+static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
 {
-	return sel ? sel->current->scratch_bytes_per_wave : 0;
+	return shader ? shader->scratch_bytes_per_wave : 0;
 }
 
 static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
 {
 	unsigned bytes = 0;
 
-	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader));
-	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader));
-	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader));
-	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader));
-	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader.current));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
 	return bytes;
 }
 
@@ -1256,46 +1269,46 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
 		 * last used, so we still need to try to update them, even if
 		 * they require scratch buffers smaller than the current size.
 		 */
-		r = si_update_scratch_buffer(sctx, sctx->ps_shader);
+		r = si_update_scratch_buffer(sctx, sctx->ps_shader.current);
 		if (r < 0)
 			return false;
 		if (r == 1)
-			si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
+			si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
 
-		r = si_update_scratch_buffer(sctx, sctx->gs_shader);
+		r = si_update_scratch_buffer(sctx, sctx->gs_shader.current);
 		if (r < 0)
 			return false;
 		if (r == 1)
-			si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+			si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
 
-		r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
+		r = si_update_scratch_buffer(sctx, sctx->tcs_shader.current);
 		if (r < 0)
 			return false;
 		if (r == 1)
-			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
+			si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
 
 		/* VS can be bound as LS, ES, or VS. */
-		r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+		r = si_update_scratch_buffer(sctx, sctx->vs_shader.current);
 		if (r < 0)
 			return false;
 		if (r == 1) {
-			if (sctx->tes_shader)
-				si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
-			else if (sctx->gs_shader)
-				si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+			if (sctx->tes_shader.current)
+				si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
+			else if (sctx->gs_shader.current)
+				si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
 			else
-				si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+				si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
 		}
 
 		/* TES can be bound as ES or VS. */
-		r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+		r = si_update_scratch_buffer(sctx, sctx->tes_shader.current);
 		if (r < 0)
 			return false;
 		if (r == 1) {
-			if (sctx->gs_shader)
-				si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+			if (sctx->gs_shader.current)
+				si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
 			else
-				si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+				si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
 		}
 	}
 
@@ -1361,7 +1374,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
 	if (!ureg)
 		return; /* if we get here, we're screwed */
 
-	assert(!sctx->fixed_func_tcs_shader);
+	assert(!sctx->fixed_func_tcs_shader.cso);
 
 	ureg_DECL_constant2D(ureg, 0, 1, SI_DRIVER_STATE_CONST_BUF);
 	const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
@@ -1376,7 +1389,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
 	ureg_MOV(ureg, tessinner, const1);
 	ureg_END(ureg);
 
-	sctx->fixed_func_tcs_shader =
+	sctx->fixed_func_tcs_shader.cso =
 		ureg_create_shader_and_destroy(ureg, &sctx->b.b);
 }
 
@@ -1384,7 +1397,7 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
 {
 	/* Calculate the index of the config.
 	 * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
-	unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader;
+	unsigned index = 2*!!sctx->tes_shader.cso + !!sctx->gs_shader.cso;
 	struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
 
 	if (!*pm4) {
@@ -1392,17 +1405,17 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
 
 		*pm4 = CALLOC_STRUCT(si_pm4_state);
 
-		if (sctx->tes_shader) {
+		if (sctx->tes_shader.cso) {
 			stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
 				  S_028B54_HS_EN(1);
 
-			if (sctx->gs_shader)
+			if (sctx->gs_shader.cso)
 				stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
 					  S_028B54_GS_EN(1) |
 				          S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
 			else
 				stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
-		} else if (sctx->gs_shader) {
+		} else if (sctx->gs_shader.cso) {
 			stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
 				  S_028B54_GS_EN(1) |
 			          S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
@@ -1432,7 +1445,7 @@ bool si_update_shaders(struct si_context *sctx)
 	int r;
 
 	/* Update stages before GS. */
-	if (sctx->tes_shader) {
+	if (sctx->tes_shader.cso) {
 		if (!sctx->tf_ring) {
 			si_init_tess_factor_ring(sctx);
 			if (!sctx->tf_ring)
@@ -1440,65 +1453,65 @@ bool si_update_shaders(struct si_context *sctx)
 		}
 
 		/* VS as LS */
-		r = si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, &sctx->vs_shader);
 		if (r)
 			return false;
-		si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
+		si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
 
-		if (sctx->tcs_shader) {
-			r = si_shader_select(ctx, sctx->tcs_shader);
+		if (sctx->tcs_shader.cso) {
+			r = si_shader_select(ctx, &sctx->tcs_shader);
 			if (r)
 				return false;
-			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
+			si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
 		} else {
-			if (!sctx->fixed_func_tcs_shader) {
+			if (!sctx->fixed_func_tcs_shader.cso) {
 				si_generate_fixed_func_tcs(sctx);
-				if (!sctx->fixed_func_tcs_shader)
+				if (!sctx->fixed_func_tcs_shader.cso)
 					return false;
 			}
 
-			r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+			r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader);
 			if (r)
 				return false;
 			si_pm4_bind_state(sctx, hs,
-					  sctx->fixed_func_tcs_shader->current->pm4);
+					  sctx->fixed_func_tcs_shader.current->pm4);
 		}
 
-		r = si_shader_select(ctx, sctx->tes_shader);
+		r = si_shader_select(ctx, &sctx->tes_shader);
 		if (r)
 			return false;
 
-		if (sctx->gs_shader) {
+		if (sctx->gs_shader.cso) {
 			/* TES as ES */
-			si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+			si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
 		} else {
 			/* TES as VS */
-			si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
-			si_update_so(sctx, sctx->tes_shader);
+			si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
+			si_update_so(sctx, sctx->tes_shader.cso);
 		}
-	} else if (sctx->gs_shader) {
+	} else if (sctx->gs_shader.cso) {
 		/* VS as ES */
-		r = si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, &sctx->vs_shader);
 		if (r)
 			return false;
-		si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+		si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
 	} else {
 		/* VS as VS */
-		r = si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, &sctx->vs_shader);
 		if (r)
 			return false;
-		si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
-		si_update_so(sctx, sctx->vs_shader);
+		si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
+		si_update_so(sctx, sctx->vs_shader.cso);
 	}
 
 	/* Update GS. */
-	if (sctx->gs_shader) {
-		r = si_shader_select(ctx, sctx->gs_shader);
+	if (sctx->gs_shader.cso) {
+		r = si_shader_select(ctx, &sctx->gs_shader);
 		if (r)
 			return false;
-		si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
-		si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
-		si_update_so(sctx, sctx->gs_shader);
+		si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
+		si_pm4_bind_state(sctx, vs, sctx->gs_shader.current->gs_copy_shader->pm4);
+		si_update_so(sctx, sctx->gs_shader.cso);
 
 		if (!sctx->gsvs_ring) {
 			si_init_gs_rings(sctx);
@@ -1514,10 +1527,10 @@ bool si_update_shaders(struct si_context *sctx)
 
 	si_update_vgt_shader_config(sctx);
 
-	r = si_shader_select(ctx, sctx->ps_shader);
+	r = si_shader_select(ctx, &sctx->ps_shader);
 	if (r)
 		return false;
-	si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
+	si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
 
 	if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
 	    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
@@ -1543,13 +1556,13 @@ bool si_update_shaders(struct si_context *sctx)
 			return false;
 	}
 
-	if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
-		sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
+	if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
+		sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
 	}
 
-	if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
-		sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
+	if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
+		sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
 
 		if (sctx->b.chip_class == SI)