[Mesa-dev] [PATCH 10/11] radeonsi: fold *gallivm

Fri Sep 29 14:49:54 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c          | 71 +++++++++--------------
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 12 ++--
 2 files changed, 31 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d80d10c..db9a0d7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1005,30 +1005,29 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
  *
  * \param type		output value type
  * \param swizzle	offset (typically 0..3); it can be ~0, which loads a vec4
  * \param dw_addr	address in dwords
  */
 static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
 			     enum tgsi_opcode_type type, unsigned swizzle,
 			     LLVMValueRef dw_addr)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMValueRef value;
 
 	if (swizzle == ~0) {
 		LLVMValueRef values[TGSI_NUM_CHANNELS];
 
 		for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
 			values[chan] = lds_load(bld_base, type, chan, dw_addr);
 
-		return lp_build_gather_values(gallivm, values,
+		return lp_build_gather_values(&ctx->gallivm, values,
 					      TGSI_NUM_CHANNELS);
 	}
 
 	dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
 			    LLVMConstInt(ctx->i32, swizzle, 0));
 
 	value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
 	if (tgsi_type_is_64bit(type)) {
 		LLVMValueRef value2;
 		dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
@@ -1136,21 +1135,20 @@ static LLVMValueRef fetch_input_tes(
 	return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
 }
 
 static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
 			     const struct tgsi_full_instruction *inst,
 			     const struct tgsi_opcode_info *info,
 			     unsigned index,
 			     LLVMValueRef dst[4])
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	const struct tgsi_full_dst_register *reg = &inst->Dst[index];
 	const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
 	unsigned chan_index;
 	LLVMValueRef dw_addr, stride;
 	LLVMValueRef buffer, base, buf_addr;
 	LLVMValueRef values[4];
 	bool skip_lds_store;
 	bool is_tess_factor = false, is_tess_inner = false;
 
 	/* Only handle per-patch and per-vertex outputs here.
@@ -1220,37 +1218,36 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
 				LLVMBuildStore(ctx->ac.builder, value, /* outer */
 					       ctx->invoc0_tess_factors[chan_index]);
 			} else if (chan_index < 2) {
 				LLVMBuildStore(ctx->ac.builder, value, /* inner */
 					       ctx->invoc0_tess_factors[4 + chan_index]);
 			}
 		}
 	}
 
 	if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
-		LLVMValueRef value = lp_build_gather_values(gallivm,
+		LLVMValueRef value = lp_build_gather_values(&ctx->gallivm,
 		                                            values, 4);
 		ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
 					    base, 0, 1, 0, true, false);
 	}
 }
 
 static LLVMValueRef fetch_input_gs(
 	struct lp_build_tgsi_context *bld_base,
 	const struct tgsi_full_src_register *reg,
 	enum tgsi_opcode_type type,
 	unsigned swizzle)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct si_shader *shader = ctx->shader;
 	struct lp_build_context *uint =	&ctx->bld_base.uint_bld;
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMValueRef vtx_offset, soffset;
 	struct tgsi_shader_info *info = &shader->selector->info;
 	unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
 	unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
 	unsigned param;
 	LLVMValueRef value;
 
 	if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
 		return get_primitive_id(ctx, swizzle);
 
@@ -1286,21 +1283,21 @@ static LLVMValueRef fetch_input_gs(
 		return lds_load(bld_base, type, swizzle, vtx_offset);
 	}
 
 	/* GFX6: input load from the ESGS ring in memory. */
 	if (swizzle == ~0) {
 		LLVMValueRef values[TGSI_NUM_CHANNELS];
 		unsigned chan;
 		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 			values[chan] = fetch_input_gs(bld_base, reg, type, chan);
 		}
-		return lp_build_gather_values(gallivm, values,
+		return lp_build_gather_values(&ctx->gallivm, values,
 					      TGSI_NUM_CHANNELS);
 	}
 
 	/* Get the vertex offset parameter on GFX6. */
 	unsigned vtx_offset_param = reg->Dimension.Index;
 	if (vtx_offset_param < 2) {
 		vtx_offset_param += ctx->param_gs_vtx0_offset;
 	} else {
 		assert(vtx_offset_param < 6);
 		vtx_offset_param += ctx->param_gs_vtx2_offset - 2;
@@ -1539,45 +1536,43 @@ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
 				      LLVMValueRef resource,
 				      LLVMValueRef offset)
 {
 	return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
 				    0, 0, 0, true, true);
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id)
 {
 	struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
 	LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_PS_CONST_SAMPLE_POSITIONS, 0);
 	LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index);
 
 	/* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
 	LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
 	LLVMValueRef offset1 = LLVMBuildAdd(ctx->ac.builder, offset0, LLVMConstInt(ctx->i32, 4, 0), "");
 
 	LLVMValueRef pos[4] = {
 		buffer_load_const(ctx, resource, offset0),
 		buffer_load_const(ctx, resource, offset1),
 		LLVMConstReal(ctx->f32, 0),
 		LLVMConstReal(ctx->f32, 0)
 	};
 
-	return lp_build_gather_values(gallivm, pos, 4);
+	return lp_build_gather_values(&ctx->gallivm, pos, 4);
 }
 
 void si_load_system_value(struct si_shader_context *ctx,
 			  unsigned index,
 			  const struct tgsi_full_declaration *decl)
 {
 	struct lp_build_context *bld = &ctx->bld_base.base;
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMValueRef value = 0;
 
 	assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
 
 	switch (decl->Semantic.Name) {
 	case TGSI_SEMANTIC_INSTANCEID:
 		value = ctx->abi.instance_id;
 		break;
 
 	case TGSI_SEMANTIC_VERTEXID:
@@ -1630,21 +1625,21 @@ void si_load_system_value(struct si_shader_context *ctx,
 	case TGSI_SEMANTIC_POSITION:
 	{
 		LLVMValueRef pos[4] = {
 			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
 			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
 			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT),
 			lp_build_emit_llvm_unary(&ctx->bld_base, TGSI_OPCODE_RCP,
 						 LLVMGetParam(ctx->main_fn,
 							      SI_PARAM_POS_W_FLOAT)),
 		};
-		value = lp_build_gather_values(gallivm, pos, 4);
+		value = lp_build_gather_values(&ctx->gallivm, pos, 4);
 		break;
 	}
 
 	case TGSI_SEMANTIC_FACE:
 		value = ctx->abi.front_face;
 		break;
 
 	case TGSI_SEMANTIC_SAMPLEID:
 		value = get_sample_id(ctx);
 		break;
@@ -1653,21 +1648,21 @@ void si_load_system_value(struct si_shader_context *ctx,
 		LLVMValueRef pos[4] = {
 			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
 			LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
 			LLVMConstReal(ctx->f32, 0),
 			LLVMConstReal(ctx->f32, 0)
 		};
 		pos[0] = lp_build_emit_llvm_unary(&ctx->bld_base,
 						  TGSI_OPCODE_FRC, pos[0]);
 		pos[1] = lp_build_emit_llvm_unary(&ctx->bld_base,
 						  TGSI_OPCODE_FRC, pos[1]);
-		value = lp_build_gather_values(gallivm, pos, 4);
+		value = lp_build_gather_values(&ctx->gallivm, pos, 4);
 		break;
 	}
 
 	case TGSI_SEMANTIC_SAMPLEMASK:
 		/* This can only occur with the OpenGL Core profile, which
 		 * doesn't support smoothing.
 		 */
 		value = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE);
 		break;
 
@@ -1679,21 +1674,21 @@ void si_load_system_value(struct si_shader_context *ctx,
 			bld->zero,
 			bld->zero
 		};
 
 		/* For triangles, the vector should be (u, v, 1-u-v). */
 		if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
 		    PIPE_PRIM_TRIANGLES)
 			coord[2] = lp_build_sub(bld, bld->one,
 						lp_build_add(bld, coord[0], coord[1]));
 
-		value = lp_build_gather_values(gallivm, coord, 4);
+		value = lp_build_gather_values(&ctx->gallivm, coord, 4);
 		break;
 	}
 
 	case TGSI_SEMANTIC_VERTICESIN:
 		if (ctx->type == PIPE_SHADER_TESS_CTRL)
 			value = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 26, 6);
 		else if (ctx->type == PIPE_SHADER_TESS_EVAL)
 			value = get_num_tcs_out_vertices(ctx);
 		else
 			assert(!"invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
@@ -1724,21 +1719,21 @@ void si_load_system_value(struct si_shader_context *ctx,
 		int i, offset;
 
 		slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
 		buf = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
 		buf = ac_build_indexed_load_const(&ctx->ac, buf, slot);
 		offset = decl->Semantic.Name == TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0;
 
 		for (i = 0; i < 4; i++)
 			val[i] = buffer_load_const(ctx, buf,
 						   LLVMConstInt(ctx->i32, (offset + i) * 4, 0));
-		value = lp_build_gather_values(gallivm, val, 4);
+		value = lp_build_gather_values(&ctx->gallivm, val, 4);
 		break;
 	}
 
 	case TGSI_SEMANTIC_PRIMID:
 		value = get_primitive_id(ctx, 0);
 		break;
 
 	case TGSI_SEMANTIC_GRID_SIZE:
 		value = LLVMGetParam(ctx->main_fn, ctx->param_grid_size);
 		break;
@@ -1752,39 +1747,39 @@ void si_load_system_value(struct si_shader_context *ctx,
 		if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
 			unsigned sizes[3] = {
 				properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
 				properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
 				properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
 			};
 
 			for (i = 0; i < 3; ++i)
 				values[i] = LLVMConstInt(ctx->i32, sizes[i], 0);
 
-			value = lp_build_gather_values(gallivm, values, 3);
+			value = lp_build_gather_values(&ctx->gallivm, values, 3);
 		} else {
 			value = LLVMGetParam(ctx->main_fn, ctx->param_block_size);
 		}
 		break;
 	}
 
 	case TGSI_SEMANTIC_BLOCK_ID:
 	{
 		LLVMValueRef values[3];
 
 		for (int i = 0; i < 3; i++) {
 			values[i] = ctx->i32_0;
 			if (ctx->param_block_id[i] >= 0) {
 				values[i] = LLVMGetParam(ctx->main_fn,
 							 ctx->param_block_id[i]);
 			}
 		}
-		value = lp_build_gather_values(gallivm, values, 3);
+		value = lp_build_gather_values(&ctx->gallivm, values, 3);
 		break;
 	}
 
 	case TGSI_SEMANTIC_THREAD_ID:
 		value = LLVMGetParam(ctx->main_fn, ctx->param_thread_id);
 		break;
 
 	case TGSI_SEMANTIC_HELPER_INVOCATION:
 		value = lp_build_intrinsic(ctx->ac.builder,
 					   "llvm.amdgcn.ps.live",
@@ -1839,30 +1834,29 @@ void si_load_system_value(struct si_shader_context *ctx,
 		return;
 	}
 
 	ctx->system_values[index] = value;
 }
 
 void si_declare_compute_memory(struct si_shader_context *ctx,
 			       const struct tgsi_full_declaration *decl)
 {
 	struct si_shader_selector *sel = ctx->shader->selector;
-	struct gallivm_state *gallivm = &ctx->gallivm;
 
 	LLVMTypeRef i8p = LLVMPointerType(ctx->i8, LOCAL_ADDR_SPACE);
 	LLVMValueRef var;
 
 	assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
 	assert(decl->Range.First == decl->Range.Last);
 	assert(!ctx->shared_memory);
 
-	var = LLVMAddGlobalInAddressSpace(gallivm->module,
+	var = LLVMAddGlobalInAddressSpace(ctx->ac.module,
 	                                  LLVMArrayType(ctx->i8, sel->local_size),
 	                                  "compute_lds",
 	                                  LOCAL_ADDR_SPACE);
 	LLVMSetAlignment(var, 4);
 
 	ctx->shared_memory = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
 }
 
 static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
 {
@@ -2327,39 +2321,38 @@ static void emit_streamout_output(struct si_shader_context *ctx,
 /**
  * Write streamout data to buffers for vertex stream @p stream (different
  * vertex streams can occur for GS copy shaders).
  */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
 				   struct si_shader_output_values *outputs,
 				   unsigned noutput, unsigned stream)
 {
 	struct si_shader_selector *sel = ctx->shader->selector;
 	struct pipe_stream_output_info *so = &sel->so;
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMBuilderRef builder = ctx->ac.builder;
 	int i;
 	struct lp_build_if_state if_ctx;
 
 	/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
 	LLVMValueRef so_vtx_count =
 		unpack_param(ctx, ctx->param_streamout_config, 16, 7);
 
 	LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
 
 	/* can_emit = tid < so_vtx_count; */
 	LLVMValueRef can_emit =
 		LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
 
 	/* Emit the streamout code conditionally. This actually avoids
 	 * out-of-bounds buffer access. The hw tells us via the SGPR
 	 * (so_vtx_count) which threads are allowed to emit streamout data. */
-	lp_build_if(&if_ctx, gallivm, can_emit);
+	lp_build_if(&if_ctx, &ctx->gallivm, can_emit);
 	{
 		/* The buffer offset is computed as follows:
 		 *   ByteOffset = streamout_offset[buffer_id]*4 +
 		 *                (streamout_write_index + thread_id)*stride[buffer_id] +
 		 *                attrib_offset
                  */
 
 		LLVMValueRef so_write_index =
 			LLVMGetParam(ctx->main_fn,
 				     ctx->param_streamout_write_index);
@@ -2658,39 +2651,38 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 }
 
 static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 				  LLVMValueRef rel_patch_id,
 				  LLVMValueRef invocation_id,
 				  LLVMValueRef tcs_out_current_patch_data_offset,
 				  LLVMValueRef invoc0_tf_outer[4],
 				  LLVMValueRef invoc0_tf_inner[2])
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	struct si_shader *shader = ctx->shader;
 	unsigned tess_inner_index, tess_outer_index;
 	LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
 	LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
 	unsigned stride, outer_comps, inner_comps, i, offset;
 	struct lp_build_if_state if_ctx, inner_if_ctx;
 
 	/* Add a barrier before loading tess factors from LDS. */
 	if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
 		si_llvm_emit_barrier(NULL, bld_base, NULL);
 
 	/* Do this only for invocation 0, because the tess levels are per-patch,
 	 * not per-vertex.
 	 *
 	 * This can't jump, because invocation 0 executes this. It should
 	 * at least mask out the loads and stores for other invocations.
 	 */
-	lp_build_if(&if_ctx, gallivm,
+	lp_build_if(&if_ctx, &ctx->gallivm,
 		    LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
 				  invocation_id, ctx->i32_0, ""));
 
 	/* Determine the layout of one tess factor element in the buffer. */
 	switch (shader->key.part.tcs.epilog.prim_mode) {
 	case PIPE_PRIM_LINES:
 		stride = 2; /* 2 dwords, 1 vec2 store */
 		outer_comps = 2;
 		inner_comps = 0;
 		break;
@@ -2748,36 +2740,36 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 	if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
 		/* For isolines, the hardware expects tess factors in the
 		 * reverse order from what GLSL / TGSI specify.
 		 */
 		LLVMValueRef tmp = out[0];
 		out[0] = out[1];
 		out[1] = tmp;
 	}
 
 	/* Convert the outputs to vectors for stores. */
-	vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
+	vec0 = lp_build_gather_values(&ctx->gallivm, out, MIN2(stride, 4));
 	vec1 = NULL;
 
 	if (stride > 4)
-		vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
+		vec1 = lp_build_gather_values(&ctx->gallivm, out+4, stride - 4);
 
 	/* Get the buffer. */
 	buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_factor_addr_base64k);
 
 	/* Get the offset. */
 	tf_base = LLVMGetParam(ctx->main_fn,
 			       ctx->param_tcs_factor_offset);
 	byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
 				  LLVMConstInt(ctx->i32, 4 * stride, 0), "");
 
-	lp_build_if(&inner_if_ctx, gallivm,
+	lp_build_if(&inner_if_ctx, &ctx->gallivm,
 		    LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
 				  rel_patch_id, ctx->i32_0, ""));
 
 	/* Store the dynamic HS control word. */
 	offset = 0;
 	if (ctx->screen->b.chip_class <= VI) {
 		ac_build_buffer_store_dword(&ctx->ac, buffer,
 					    LLVMConstInt(ctx->i32, 0x80000000, 0),
 					    1, ctx->i32_0, tf_base,
 					    offset, 1, 0, true, false);
@@ -2803,34 +2795,34 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 		unsigned param_outer, param_inner;
 
 		buf = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
 		base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
 
 		param_outer = si_shader_io_get_unique_index_patch(
 				      TGSI_SEMANTIC_TESSOUTER, 0);
 		tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
 					LLVMConstInt(ctx->i32, param_outer, 0));
 
-		outer_vec = lp_build_gather_values(gallivm, outer,
+		outer_vec = lp_build_gather_values(&ctx->gallivm, outer,
 						   util_next_power_of_two(outer_comps));
 
 		ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
 					    outer_comps, tf_outer_offset,
 					    base, 0, 1, 0, true, false);
 		if (inner_comps) {
 			param_inner = si_shader_io_get_unique_index_patch(
 					      TGSI_SEMANTIC_TESSINNER, 0);
 			tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
 					LLVMConstInt(ctx->i32, param_inner, 0));
 
 			inner_vec = inner_comps == 1 ? inner[0] :
-				    lp_build_gather_values(gallivm, inner, inner_comps);
+				    lp_build_gather_values(&ctx->gallivm, inner, inner_comps);
 			ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
 						    inner_comps, tf_inner_offset,
 						    base, 0, 1, 0, true, false);
 		}
 	}
 
 	lp_build_endif(&if_ctx);
 }
 
 static LLVMValueRef
@@ -3156,21 +3148,20 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
 
 	if (ctx->screen->b.chip_class >= GFX9)
 		lp_build_endif(&ctx->merged_wrap_if_state);
 }
 
 static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
 				     unsigned max_outputs,
 				     LLVMValueRef *addrs)
 {
 	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	struct tgsi_shader_info *info = &ctx->shader->selector->info;
 	struct si_shader_output_values *outputs = NULL;
 	int i,j;
 
 	assert(!ctx->shader->is_gs_copy_shader);
 	assert(info->num_outputs <= max_outputs);
 
 	outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
 
 	/* Vertex color clamping.
@@ -3189,21 +3180,21 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
 			    info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
 				continue;
 
 			/* We've found a color. */
 			if (!cond) {
 				/* The state is in the first bit of the user SGPR. */
 				cond = LLVMGetParam(ctx->main_fn,
 						    ctx->param_vs_state_bits);
 				cond = LLVMBuildTrunc(ctx->ac.builder, cond,
 						      ctx->i1, "");
-				lp_build_if(&if_ctx, gallivm, cond);
+				lp_build_if(&if_ctx, &ctx->gallivm, cond);
 			}
 
 			for (j = 0; j < 4; j++) {
 				addr = addrs[4 * i + j];
 				val = LLVMBuildLoad(ctx->ac.builder, addr, "");
 				val = ac_build_clamp(&ctx->ac, val);
 				LLVMBuildStore(ctx->ac.builder, val, addr);
 			}
 		}
 
@@ -3626,40 +3617,38 @@ static void si_llvm_emit_ddxy(
 /*
  * this takes an I,J coordinate pair,
  * and works out the X and Y derivatives.
  * it returns DDX(I), DDX(J), DDY(I), DDY(J).
  */
 static LLVMValueRef si_llvm_emit_ddxy_interp(
 	struct lp_build_tgsi_context *bld_base,
 	LLVMValueRef interp_ij)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMValueRef result[4], a;
 	unsigned i;
 
 	for (i = 0; i < 2; i++) {
 		a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
 					    LLVMConstInt(ctx->i32, i, 0), "");
 		result[i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDX, a);
 		result[2+i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDY, a);
 	}
 
-	return lp_build_gather_values(gallivm, result, 4);
+	return lp_build_gather_values(&ctx->gallivm, result, 4);
 }
 
 static void interp_fetch_args(
 	struct lp_build_tgsi_context *bld_base,
 	struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 
 	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
 		/* offset is in second src, first two channels */
 		emit_data->args[0] = lp_build_emit_fetch(bld_base,
 							 emit_data->inst, 1,
 							 TGSI_CHAN_X);
 		emit_data->args[1] = lp_build_emit_fetch(bld_base,
 							 emit_data->inst, 1,
 							 TGSI_CHAN_Y);
@@ -3691,21 +3680,21 @@ static void interp_fetch_args(
 		 * sample position doesn't work.
 		 */
 		if (ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center) {
 			LLVMValueRef center[4] = {
 				LLVMConstReal(ctx->f32, 0.5),
 				LLVMConstReal(ctx->f32, 0.5),
 				ctx->ac.f32_0,
 				ctx->ac.f32_0,
 			};
 
-			sample_position = lp_build_gather_values(gallivm, center, 4);
+			sample_position = lp_build_gather_values(&ctx->gallivm, center, 4);
 		} else {
 			sample_position = load_sample_position(ctx, sample_id);
 		}
 
 		emit_data->args[0] = LLVMBuildExtractElement(ctx->ac.builder,
 							     sample_position,
 							     ctx->i32_0, "");
 
 		emit_data->args[0] = LLVMBuildFSub(ctx->ac.builder, emit_data->args[0], halfval, "");
 		emit_data->args[1] = LLVMBuildExtractElement(ctx->ac.builder,
@@ -3715,21 +3704,20 @@ static void interp_fetch_args(
 		emit_data->arg_count = 2;
 	}
 }
 
 static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 				struct lp_build_tgsi_context *bld_base,
 				struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct si_shader *shader = ctx->shader;
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	const struct tgsi_shader_info *info = &shader->selector->info;
 	LLVMValueRef interp_param;
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	const struct tgsi_full_src_register *input = &inst->Src[0];
 	int input_base, input_array_size;
 	int chan;
 	int i;
 	LLVMValueRef prim_mask = LLVMGetParam(ctx->main_fn, SI_PARAM_PRIM_MASK);
 	LLVMValueRef array_idx;
 	int interp_param_idx;
@@ -3800,21 +3788,21 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 			interp_el = ac_to_float(&ctx->ac, interp_el);
 
 			temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, emit_data->args[0], "");
 
 			temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, interp_el, "");
 
 			temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, emit_data->args[1], "");
 
 			ij_out[i] = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, "");
 		}
-		interp_param = lp_build_gather_values(gallivm, ij_out, 2);
+		interp_param = lp_build_gather_values(&ctx->gallivm, ij_out, 2);
 	}
 
 	if (interp_param)
 		interp_param = ac_to_float(&ctx->ac, interp_param);
 
 	for (chan = 0; chan < 4; chan++) {
 		LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->f32, input_array_size));
 		unsigned schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
 
 		for (unsigned idx = 0; idx < input_array_size; ++idx) {
@@ -3944,21 +3932,20 @@ static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
 /* Emit one vertex from the geometry shader */
 static void si_llvm_emit_vertex(
 	const struct lp_build_tgsi_action *action,
 	struct lp_build_tgsi_context *bld_base,
 	struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct lp_build_context *uint = &bld_base->uint_bld;
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	struct lp_build_if_state if_state;
 	LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
 					    ctx->param_gs2vs_offset);
 	LLVMValueRef gs_next_vertex;
 	LLVMValueRef can_emit, kill;
 	unsigned chan, offset;
 	int i;
 	unsigned stream;
 
 	stream = si_llvm_get_stream(bld_base, emit_data);
@@ -3981,21 +3968,21 @@ static void si_llvm_emit_vertex(
 					      shader->selector->gs_max_out_vertices, 0), "");
 
 	bool use_kill = !info->writes_memory;
 	if (use_kill) {
 		kill = lp_build_select(&bld_base->base, can_emit,
 				       LLVMConstReal(ctx->f32, 1.0f),
 				       LLVMConstReal(ctx->f32, -1.0f));
 
 		ac_build_kill(&ctx->ac, kill);
 	} else {
-		lp_build_if(&if_state, gallivm, can_emit);
+		lp_build_if(&if_state, &ctx->gallivm, can_emit);
 	}
 
 	offset = 0;
 	for (i = 0; i < info->num_outputs; i++) {
 		LLVMValueRef *out_ptr = ctx->outputs[i];
 
 		for (chan = 0; chan < 4; chan++) {
 			if (!(info->output_usagemask[i] & (1 << chan)) ||
 			    ((info->output_streams[i] >> (2 * chan)) & 3) != stream)
 				continue;
@@ -6058,21 +6045,20 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx,
 /**
  * Given a list of shader part functions, build a wrapper function that
  * runs them in sequence to form a monolithic shader.
  */
 static void si_build_wrapper_function(struct si_shader_context *ctx,
 				      LLVMValueRef *parts,
 				      unsigned num_parts,
 				      unsigned main_part,
 				      unsigned next_shader_first_part)
 {
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMBuilderRef builder = ctx->ac.builder;
 	/* PS epilog has one arg per color component; gfx9 merged shader
 	 * prologs need to forward 32 user SGPRs.
 	 */
 	struct si_function_info fninfo;
 	LLVMValueRef initial[64], out[64];
 	LLVMTypeRef function_type;
 	unsigned num_first_params;
 	unsigned num_out, initial_num_out;
 	MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
@@ -6215,21 +6201,21 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
 #endif
 				lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG);
 			}
 
 			assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
 			assert(is_sgpr || out_idx >= num_out_sgpr);
 
 			if (param_size == 1)
 				arg = out[out_idx];
 			else
-				arg = lp_build_gather_values(gallivm, &out[out_idx], param_size);
+				arg = lp_build_gather_values(&ctx->gallivm, &out[out_idx], param_size);
 
 			if (LLVMTypeOf(arg) != param_type) {
 				if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
 					arg = LLVMBuildBitCast(builder, arg, ctx->i64, "");
 					arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
 				} else {
 					arg = LLVMBuildBitCast(builder, arg, param_type, "");
 				}
 			}
 
@@ -6636,21 +6622,20 @@ si_get_shader_part(struct si_screen *sscreen,
 			return result;
 		}
 	}
 
 	/* Compile a new one. */
 	result = CALLOC_STRUCT(si_shader_part);
 	result->key = *key;
 
 	struct si_shader shader = {};
 	struct si_shader_context ctx;
-	struct gallivm_state *gallivm = &ctx.gallivm;
 
 	si_init_shader_ctx(&ctx, sscreen, tm);
 	ctx.shader = &shader;
 	ctx.type = type;
 
 	switch (type) {
 	case PIPE_SHADER_VERTEX:
 		break;
 	case PIPE_SHADER_TESS_CTRL:
 		assert(!prolog);
@@ -6668,44 +6653,43 @@ si_get_shader_part(struct si_screen *sscreen,
 	default:
 		unreachable("bad shader part");
 	}
 
 	build(&ctx, key);
 
 	/* Compile. */
 	si_llvm_optimize_module(&ctx);
 
 	if (si_compile_llvm(sscreen, &result->binary, &result->config, tm,
-			    gallivm->module, debug, ctx.type, name)) {
+			    ctx.ac.module, debug, ctx.type, name)) {
 		FREE(result);
 		result = NULL;
 		goto out;
 	}
 
 	result->next = *list;
 	*list = result;
 
 out:
 	si_llvm_dispose(&ctx);
 	mtx_unlock(&sscreen->shader_parts_mutex);
 	return result;
 }
 
 static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
 {
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMValueRef ptr[2], list;
 
 	/* Get the pointer to rw buffers. */
 	ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
 	ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
-	list = lp_build_gather_values(gallivm, ptr, 2);
+	list = lp_build_gather_values(&ctx->gallivm, ptr, 2);
 	list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
 	list = LLVMBuildIntToPtr(ctx->ac.builder, list,
 				 si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), "");
 	return list;
 }
 
 /**
  * Build the vertex shader prolog function.
  *
  * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
@@ -7042,21 +7026,20 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen,
  * - overriding interpolation parameters for the API PS
  * - polygon stippling
  *
  * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
  * overriden by other states. (e.g. per-sample interpolation)
  * Interpolated colors are stored after the preloaded VGPRs.
  */
 static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key)
 {
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	struct si_function_info fninfo;
 	LLVMValueRef ret, func;
 	int num_returns, i, num_color_channels;
 
 	assert(si_need_ps_prolog(key));
 
 	si_init_function_info(&fninfo);
 
 	/* Declare inputs. */
 	for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
@@ -7227,21 +7210,21 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 		/* If the interpolation qualifier is not CONSTANT (-1). */
 		if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
 			unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
 					       key->ps_prolog.color_interp_vgpr_index[i];
 
 			/* Get the (i,j) updated by bc_optimize handling. */
 			interp[0] = LLVMBuildExtractValue(ctx->ac.builder, ret,
 							  interp_vgpr, "");
 			interp[1] = LLVMBuildExtractValue(ctx->ac.builder, ret,
 							  interp_vgpr + 1, "");
-			interp_ij = lp_build_gather_values(gallivm, interp, 2);
+			interp_ij = lp_build_gather_values(&ctx->gallivm, interp, 2);
 		}
 
 		/* Use the absolute location of the input. */
 		prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
 
 		if (key->ps_prolog.states.color_two_side) {
 			face = LLVMGetParam(func, face_vgpr);
 			face = ac_to_integer(&ctx->ac, face);
 		}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index d7ba5c4..7c2afe3 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -226,21 +226,20 @@ image_fetch_rsrc(
 				   target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
 				   dcc_off);
 }
 
 static LLVMValueRef image_fetch_coords(
 		struct lp_build_tgsi_context *bld_base,
 		const struct tgsi_full_instruction *inst,
 		unsigned src, LLVMValueRef desc)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMBuilderRef builder = ctx->ac.builder;
 	unsigned target = inst->Memory.Texture;
 	unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
 	LLVMValueRef coords[4];
 	LLVMValueRef tmp;
 	int chan;
 
 	for (chan = 0; chan < num_coords; ++chan) {
 		tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
 		tmp = ac_to_integer(&ctx->ac, tmp);
@@ -276,21 +275,21 @@ static LLVMValueRef image_fetch_coords(
 
 	if (num_coords == 1)
 		return coords[0];
 
 	if (num_coords == 3) {
 		/* LLVM has difficulties lowering 3-element vectors. */
 		coords[3] = bld_base->uint_bld.undef;
 		num_coords = 4;
 	}
 
-	return lp_build_gather_values(gallivm, coords, num_coords);
+	return lp_build_gather_values(&ctx->gallivm, coords, num_coords);
 }
 
 /**
  * Append the extra mode bits that are used by image load and store.
  */
 static void image_append_args(
 		struct si_shader_context *ctx,
 		struct lp_build_emit_data * emit_data,
 		unsigned target,
 		bool atomic,
@@ -467,38 +466,37 @@ static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
 	ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), "");
 
 	return ptr;
 }
 
 static void load_emit_memory(
 		struct si_shader_context *ctx,
 		struct lp_build_emit_data *emit_data)
 {
 	const struct tgsi_full_instruction *inst = emit_data->inst;
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	unsigned writemask = inst->Dst[0].Register.WriteMask;
 	LLVMValueRef channels[4], ptr, derived_ptr, index;
 	int chan;
 
 	ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
 
 	for (chan = 0; chan < 4; ++chan) {
 		if (!(writemask & (1 << chan))) {
 			channels[chan] = LLVMGetUndef(ctx->f32);
 			continue;
 		}
 
 		index = LLVMConstInt(ctx->i32, chan, 0);
 		derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
 		channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
 	}
-	emit_data->output[emit_data->chan] = lp_build_gather_values(gallivm, channels, 4);
+	emit_data->output[emit_data->chan] = lp_build_gather_values(&ctx->gallivm, channels, 4);
 }
 
 /**
  * Return true if the memory accessed by a LOAD or STORE instruction is
  * read-only or write-only, respectively.
  *
  * \param shader_buffers_reverse_access_mask
  *	For LOAD, set this to (store | atomic) slot usage in the shader.
  *	For STORE, set this to (load | atomic) slot usage in the shader.
  * \param images_reverse_access_mask  Same as above, but for images.
@@ -613,34 +611,33 @@ static void load_emit(
 				emit_data->args, emit_data->arg_count,
 				get_load_intr_attribs(can_speculate));
 	}
 }
 
 static void store_fetch_args(
 		struct lp_build_tgsi_context * bld_base,
 		struct lp_build_emit_data * emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	const struct tgsi_full_instruction * inst = emit_data->inst;
 	struct tgsi_full_src_register memory;
 	LLVMValueRef chans[4];
 	LLVMValueRef data;
 	LLVMValueRef rsrc;
 	unsigned chan;
 
 	emit_data->dst_type = ctx->voidt;
 
 	for (chan = 0; chan < 4; ++chan) {
 		chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
 	}
-	data = lp_build_gather_values(gallivm, chans, 4);
+	data = lp_build_gather_values(&ctx->gallivm, chans, 4);
 
 	emit_data->args[emit_data->arg_count++] = data;
 
 	memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);
 
 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
 		LLVMValueRef offset;
 		LLVMValueRef tmp;
 
 		rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false);
@@ -992,29 +989,28 @@ static void atomic_emit(
 	emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
 }
 
 static void set_tex_fetch_args(struct si_shader_context *ctx,
 			       struct lp_build_emit_data *emit_data,
 			       unsigned target,
 			       LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
 			       LLVMValueRef *param, unsigned count,
 			       unsigned dmask)
 {
-	struct gallivm_state *gallivm = &ctx->gallivm;
 	struct ac_image_args args = {};
 
 	/* Pad to power of two vector */
 	while (count < util_next_power_of_two(count))
 		param[count++] = LLVMGetUndef(ctx->i32);
 
 	if (count > 1)
-		args.addr = lp_build_gather_values(gallivm, param, count);
+		args.addr = lp_build_gather_values(&ctx->gallivm, param, count);
 	else
 		args.addr = param[0];
 
 	args.resource = res_ptr;
 	args.sampler = samp_ptr;
 	args.dmask = dmask;
 	args.unorm = target == TGSI_TEXTURE_RECT ||
 		     target == TGSI_TEXTURE_SHADOWRECT;
 	args.da = tgsi_is_array_sampler(target);
 
-- 
2.7.4