[Mesa-dev] [PATCH 15/16] ac: clean up ac_build_indexed_load function interfaces

Nicolai Hähnle nhaehnle at gmail.com
Tue Oct 17 12:24:12 UTC 2017


This patch and patches 1 - 13:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


On 13.10.2017 14:04, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> ---
>   src/amd/common/ac_llvm_build.c                    | 42 ++++++++++++++---------
>   src/amd/common/ac_llvm_build.h                    | 14 ++++----
>   src/amd/common/ac_nir_to_llvm.c                   | 22 ++++++------
>   src/gallium/drivers/radeonsi/si_shader.c          | 34 +++++++++---------
>   src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  4 +--
>   5 files changed, 61 insertions(+), 55 deletions(-)
> 
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 1d97b09..949f181 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -710,46 +710,54 @@ ac_build_indexed_store(struct ac_llvm_context *ctx,
>   		       ac_build_gep0(ctx, base_ptr, index));
>   }
>   
>   /**
>    * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
>    * It's equivalent to doing a load from &base_ptr[index].
>    *
>    * \param base_ptr  Where the array starts.
>    * \param index     The element index into the array.
>    * \param uniform   Whether the base_ptr and index can be assumed to be
> - *                  dynamically uniform
> + *                  dynamically uniform (i.e. load to an SGPR)
> + * \param invariant Whether the load is invariant (no other opcodes affect it)
>    */
> -LLVMValueRef
> -ac_build_indexed_load(struct ac_llvm_context *ctx,
> -		      LLVMValueRef base_ptr, LLVMValueRef index,
> -		      bool uniform)
> +static LLVMValueRef
> +ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
> +		     LLVMValueRef index, bool uniform, bool invariant)
>   {
> -	LLVMValueRef pointer;
> +	LLVMValueRef pointer, result;
>   
>   	pointer = ac_build_gep0(ctx, base_ptr, index);
>   	if (uniform)
>   		LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
> -	return LLVMBuildLoad(ctx->builder, pointer, "");
> +	result = LLVMBuildLoad(ctx->builder, pointer, "");
> +	if (invariant)
> +		LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
> +	return result;
>   }
>   
> -/**
> - * Do a load from &base_ptr[index], but also add a flag that it's loading
> - * a constant from a dynamically uniform index.
> - */
> -LLVMValueRef
> -ac_build_indexed_load_const(struct ac_llvm_context *ctx,
> -			    LLVMValueRef base_ptr, LLVMValueRef index)
> +LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
> +			   LLVMValueRef index)
>   {
> -	LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
> -	LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
> -	return result;
> +	return ac_build_load_custom(ctx, base_ptr, index, false, false);
> +}
> +
> +LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
> +				     LLVMValueRef base_ptr, LLVMValueRef index)
> +{
> +	return ac_build_load_custom(ctx, base_ptr, index, false, true);
> +}
> +
> +LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
> +				   LLVMValueRef base_ptr, LLVMValueRef index)
> +{
> +	return ac_build_load_custom(ctx, base_ptr, index, true, true);
>   }
>   
>   /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
>    * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
>    * or v4i32 (num_channels=3,4).
>    */
>   void
>   ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
>   			    LLVMValueRef rsrc,
>   			    LLVMValueRef vdata,
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index ac8ea9c..f0b5875 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -143,28 +143,26 @@ ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
>   LLVMValueRef
>   ac_build_gep0(struct ac_llvm_context *ctx,
>   	      LLVMValueRef base_ptr,
>   	      LLVMValueRef index);
>   
>   void
>   ac_build_indexed_store(struct ac_llvm_context *ctx,
>   		       LLVMValueRef base_ptr, LLVMValueRef index,
>   		       LLVMValueRef value);
>   
> -LLVMValueRef
> -ac_build_indexed_load(struct ac_llvm_context *ctx,
> -		      LLVMValueRef base_ptr, LLVMValueRef index,
> -		      bool uniform);
> -
> -LLVMValueRef
> -ac_build_indexed_load_const(struct ac_llvm_context *ctx,
> -			    LLVMValueRef base_ptr, LLVMValueRef index);
> +LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
> +			   LLVMValueRef index);
> +LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
> +				     LLVMValueRef base_ptr, LLVMValueRef index);
> +LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
> +				   LLVMValueRef base_ptr, LLVMValueRef index);
>   
>   void
>   ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
>   			    LLVMValueRef rsrc,
>   			    LLVMValueRef vdata,
>   			    unsigned num_channels,
>   			    LLVMValueRef voffset,
>   			    LLVMValueRef soffset,
>   			    unsigned inst_offset,
>   		            bool glc,
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 4492d8e..8278486 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -792,21 +792,21 @@ static void create_function(struct nir_to_llvm_context *ctx)
>   			} else
>   				ctx->descriptor_sets[i] = NULL;
>   		}
>   	} else {
>   		uint32_t desc_sgpr_idx = user_sgpr_idx;
>   		set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, &user_sgpr_idx, 2);
>   
>   		for (unsigned i = 0; i < num_sets; ++i) {
>   			if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
>   				set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8);
> -				ctx->descriptor_sets[i] = ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
> +				ctx->descriptor_sets[i] = ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
>   
>   			} else
>   				ctx->descriptor_sets[i] = NULL;
>   		}
>   		ctx->shader_info->need_indirect_descriptor_sets = true;
>   	}
>   
>   	if (ctx->shader_info->info.needs_push_constants) {
>   		set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, &user_sgpr_idx, 2);
>   	}
> @@ -2518,21 +2518,21 @@ out:
>   
>   	*const_out = const_offset;
>   	*indir_out = offset;
>   }
>   
>   static LLVMValueRef
>   lds_load(struct nir_to_llvm_context *ctx,
>   	 LLVMValueRef dw_addr)
>   {
>   	LLVMValueRef value;
> -	value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
> +	value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
>   	return value;
>   }
>   
>   static void
>   lds_store(struct nir_to_llvm_context *ctx,
>   	  LLVMValueRef dw_addr, LLVMValueRef value)
>   {
>   	value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, "");
>   	ac_build_indexed_store(&ctx->ac, ctx->lds,
>   			       dw_addr, value);
> @@ -3733,21 +3733,21 @@ static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
>   static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
>   					 LLVMValueRef sample_id)
>   {
>   	LLVMValueRef result;
>   	LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_PS_SAMPLE_POSITIONS, false));
>   
>   	ptr = LLVMBuildBitCast(ctx->builder, ptr,
>   			       const_array(ctx->v2f32, 64), "");
>   
>   	sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
> -	result = ac_build_indexed_load(&ctx->ac, ptr, sample_id, false);
> +	result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
>   
>   	return result;
>   }
>   
>   static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
>   {
>   	LLVMValueRef values[2];
>   
>   	values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]);
>   	values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]);
> @@ -4224,21 +4224,21 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
>   	assert(stride % type_size == 0);
>   
>   	if (!index)
>   		index = ctx->i32zero;
>   
>   	index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
>   
>   	list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->i32, offset, 0));
>   	list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
>   
> -	return ac_build_indexed_load_const(&ctx->ac, list, index);
> +	return ac_build_load_to_sgpr(&ctx->ac, list, index);
>   }
>   
>   static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
>   				     const nir_deref_var *deref,
>   				     enum ac_descriptor_type desc_type,
>   				     bool image, bool write)
>   {
>   	LLVMValueRef index = NULL;
>   	unsigned constant_index = 0;
>   	const nir_deref *tail = &deref->deref;
> @@ -4921,21 +4921,21 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
>   					    ctx->abi.start_instance, "");
>   		ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
>   		                            ctx->shader_info->vs.vgpr_comp_cnt);
>   	} else
>   		buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
>   					    ctx->abi.base_vertex, "");
>   
>   	for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
>   		t_offset = LLVMConstInt(ctx->i32, index + i, false);
>   
> -		t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
> +		t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
>   
>   		input = ac_build_buffer_load_format(&ctx->ac, t_list,
>   						    buffer_index,
>   						    LLVMConstInt(ctx->i32, 0, false),
>   						    true);
>   
>   		for (unsigned chan = 0; chan < 4; chan++) {
>   			LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
>   			ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
>   				ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder,
> @@ -6211,43 +6211,43 @@ ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx)
>   			       outinfo->vs_output_param_offset,
>   			       VARYING_SLOT_MAX,
>   			       &outinfo->param_exports);
>   }
>   
>   static void
>   ac_setup_rings(struct nir_to_llvm_context *ctx)
>   {
>   	if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
>   	    (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
> -		ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
> +		ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
>   	}
>   
>   	if (ctx->is_gs_copy_shader) {
> -		ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
> +		ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
>   	}
>   	if (ctx->stage == MESA_SHADER_GEOMETRY) {
>   		LLVMValueRef tmp;
> -		ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
> -		ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
> +		ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
> +		ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
>   
>   		ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v4i32, "");
>   
>   		ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->i32, 2, false), "");
>   		tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->i32one, "");
>   		tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, "");
>   		ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->i32one, "");
>   	}
>   
>   	if (ctx->stage == MESA_SHADER_TESS_CTRL ||
>   	    ctx->stage == MESA_SHADER_TESS_EVAL) {
> -		ctx->hs_ring_tess_offchip = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
> -		ctx->hs_ring_tess_factor = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
> +		ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
> +		ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
>   	}
>   }
>   
>   static unsigned
>   ac_nir_get_max_workgroup_size(enum chip_class chip_class,
>   			      const struct nir_shader *nir)
>   {
>   	switch (nir->stage) {
>   	case MESA_SHADER_TESS_CTRL:
>   		return chip_class >= CIK ? 128 : 64;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 506da6f..f83f45c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -554,21 +554,21 @@ void si_llvm_load_input_vs(
>   	LLVMValueRef t_offset;
>   	LLVMValueRef t_list;
>   	LLVMValueRef vertex_index;
>   	LLVMValueRef input[3];
>   
>   	/* Load the T list */
>   	t_list_ptr = LLVMGetParam(ctx->main_fn, ctx->param_vertex_buffers);
>   
>   	t_offset = LLVMConstInt(ctx->i32, input_index, 0);
>   
> -	t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
> +	t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
>   
>   	vertex_index = LLVMGetParam(ctx->main_fn,
>   				    ctx->param_vertex_index0 +
>   				    input_index);
>   
>   	fix_fetch = ctx->shader->key.mono.vs_fix_fetch[input_index];
>   
>   	/* Do multiple loads for special formats. */
>   	switch (fix_fetch) {
>   	case SI_FIX_FETCH_RGB_64_FLOAT:
> @@ -1092,26 +1092,26 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
>   		for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
>   			values[chan] = lds_load(bld_base, type, chan, dw_addr);
>   
>   		return lp_build_gather_values(&ctx->gallivm, values,
>   					      TGSI_NUM_CHANNELS);
>   	}
>   
>   	dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
>   			    LLVMConstInt(ctx->i32, swizzle, 0));
>   
> -	value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
> +	value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
>   	if (tgsi_type_is_64bit(type)) {
>   		LLVMValueRef value2;
>   		dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
>   				       ctx->i32_1);
> -		value2 = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
> +		value2 = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
>   		return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
>   	}
>   
>   	return bitcast(bld_base, type, value);
>   }
>   
>   /**
>    * Store to LDS.
>    *
>    * \param swizzle	offset (typically 0..3)
> @@ -1610,21 +1610,21 @@ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
>   {
>   	return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
>   				    0, 0, 0, true, true);
>   }
>   
>   static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id)
>   {
>   	struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
>   	LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
>   	LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_PS_CONST_SAMPLE_POSITIONS, 0);
> -	LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index);
> +	LLVMValueRef resource = ac_build_load_to_sgpr(&ctx->ac, desc, buf_index);
>   
>   	/* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
>   	LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
>   	LLVMValueRef offset1 = LLVMBuildAdd(ctx->ac.builder, offset0, LLVMConstInt(ctx->i32, 4, 0), "");
>   
>   	LLVMValueRef pos[4] = {
>   		buffer_load_const(ctx, resource, offset0),
>   		buffer_load_const(ctx, resource, offset1),
>   		LLVMConstReal(ctx->f32, 0),
>   		LLVMConstReal(ctx->f32, 0)
> @@ -1785,21 +1785,21 @@ void si_load_system_value(struct si_shader_context *ctx,
>   	}
>   
>   	case TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI:
>   	case TGSI_SEMANTIC_DEFAULT_TESSINNER_SI:
>   	{
>   		LLVMValueRef buf, slot, val[4];
>   		int i, offset;
>   
>   		slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
>   		buf = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
> -		buf = ac_build_indexed_load_const(&ctx->ac, buf, slot);
> +		buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
>   		offset = decl->Semantic.Name == TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0;
>   
>   		for (i = 0; i < 4; i++)
>   			val[i] = buffer_load_const(ctx, buf,
>   						   LLVMConstInt(ctx->i32, (offset + i) * 4, 0));
>   		value = lp_build_gather_values(&ctx->gallivm, val, 4);
>   		break;
>   	}
>   
>   	case TGSI_SEMANTIC_PRIMID:
> @@ -1928,49 +1928,49 @@ void si_declare_compute_memory(struct si_shader_context *ctx,
>   	LLVMSetAlignment(var, 4);
>   
>   	ctx->shared_memory = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
>   }
>   
>   static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
>   {
>   	LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
>   					     ctx->param_const_and_shader_buffers);
>   
> -	return ac_build_indexed_load_const(&ctx->ac, list_ptr,
> -			LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
> +	return ac_build_load_to_sgpr(&ctx->ac, list_ptr,
> +				     LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
>   }
>   
>   static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
>   {
>   	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
>   	LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
>   
>   	index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
>   	index = LLVMBuildAdd(ctx->ac.builder, index,
>   			     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
>   
> -	return ac_build_indexed_load_const(&ctx->ac, ptr, index);
> +	return ac_build_load_to_sgpr(&ctx->ac, ptr, index);
>   }
>   
>   static LLVMValueRef
>   load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
>   {
>   	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
>   	LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
>   					     ctx->param_const_and_shader_buffers);
>   
>   	index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers);
>   	index = LLVMBuildSub(ctx->ac.builder,
>   			     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
>   			     index, "");
>   
> -	return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index);
> +	return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
>   }
>   
>   static LLVMValueRef fetch_constant(
>   	struct lp_build_tgsi_context *bld_base,
>   	const struct tgsi_full_src_register *reg,
>   	enum tgsi_opcode_type type,
>   	unsigned swizzle)
>   {
>   	struct si_shader_context *ctx = si_shader_context(bld_base);
>   	const struct tgsi_ind_register *ireg = &reg->Indirect;
> @@ -2001,21 +2001,21 @@ static LLVMValueRef fetch_constant(
>   	idx = reg->Register.Index * 4 + swizzle;
>   
>   	if (reg->Dimension.Indirect) {
>   		LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
>   		LLVMValueRef index;
>   		index = si_get_bounded_indirect_index(ctx, &reg->DimIndirect,
>   						      reg->Dimension.Index,
>   						      ctx->num_const_buffers);
>   		index = LLVMBuildAdd(ctx->ac.builder, index,
>   				     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
> -		bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index);
> +		bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
>   	} else
>   		bufp = load_const_buffer_desc(ctx, buf);
>   
>   	if (reg->Register.Indirect) {
>   		addr = si_get_indirect_index(ctx, ireg, 16, idx * 4);
>   	} else {
>   		addr = LLVMConstInt(ctx->i32, idx * 4, 0);
>   	}
>   
>   	return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr));
> @@ -2276,21 +2276,21 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base,
>   {
>   	struct si_shader_context *ctx = si_shader_context(bld_base);
>   	struct lp_build_context *base = &bld_base->base;
>   	unsigned reg_index;
>   	unsigned chan;
>   	unsigned const_chan;
>   	LLVMValueRef base_elt;
>   	LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
>   	LLVMValueRef constbuf_index = LLVMConstInt(ctx->i32,
>   						   SI_VS_CONST_CLIP_PLANES, 0);
> -	LLVMValueRef const_resource = ac_build_indexed_load_const(&ctx->ac, ptr, constbuf_index);
> +	LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index);
>   
>   	for (reg_index = 0; reg_index < 2; reg_index ++) {
>   		struct ac_export_args *args = &pos[2 + reg_index];
>   
>   		args->out[0] =
>   		args->out[1] =
>   		args->out[2] =
>   		args->out[3] = LLVMConstReal(ctx->f32, 0.0f);
>   
>   		/* Compute dot products of position and user clip plane vectors */
> @@ -2433,21 +2433,21 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx,
>   		LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
>   						    ctx->param_rw_buffers);
>   
>   		for (i = 0; i < 4; i++) {
>   			if (!so->stride[i])
>   				continue;
>   
>   			LLVMValueRef offset = LLVMConstInt(ctx->i32,
>   							   SI_VS_STREAMOUT_BUF0 + i, 0);
>   
> -			so_buffers[i] = ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
> +			so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
>   
>   			LLVMValueRef so_offset = LLVMGetParam(ctx->main_fn,
>   							      ctx->param_streamout_offset[i]);
>   			so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->i32, 4, 0), "");
>   
>   			so_write_offset[i] = LLVMBuildMul(builder, so_write_index,
>   							  LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
>   			so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
>   		}
>   
> @@ -4717,34 +4717,34 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
>   					    ctx->param_rw_buffers);
>   
>   	if (ctx->screen->b.chip_class <= VI &&
>   	    (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) {
>   		unsigned ring =
>   			ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
>   							     : SI_ES_RING_ESGS;
>   		LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0);
>   
>   		ctx->esgs_ring =
> -			ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
> +			ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
>   	}
>   
>   	if (ctx->shader->is_gs_copy_shader) {
>   		LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
>   
>   		ctx->gsvs_ring[0] =
> -			ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
> +			ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
>   	} else if (ctx->type == PIPE_SHADER_GEOMETRY) {
>   		const struct si_shader_selector *sel = ctx->shader->selector;
>   		LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
>   		LLVMValueRef base_ring;
>   
> -		base_ring = ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
> +		base_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
>   
>   		/* The conceptual layout of the GSVS ring is
>   		 *   v0c0 .. vLv0 v0c1 .. vLc1 ..
>   		 * but the real memory layout is swizzled across
>   		 * threads:
>   		 *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
>   		 *   t16v0c0 ..
>   		 * Override the buffer descriptor accordingly.
>   		 */
>   		LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
> @@ -4813,21 +4813,21 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
>   
>   	/* Use the fixed-point gl_FragCoord input.
>   	 * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
>   	 * per coordinate to get the repeating effect.
>   	 */
>   	address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5);
>   	address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5);
>   
>   	/* Load the buffer descriptor. */
>   	slot = LLVMConstInt(ctx->i32, SI_PS_CONST_POLY_STIPPLE, 0);
> -	desc = ac_build_indexed_load_const(&ctx->ac, param_rw_buffers, slot);
> +	desc = ac_build_load_to_sgpr(&ctx->ac, param_rw_buffers, slot);
>   
>   	/* The stipple pattern is 32x32, each row has 32 bits. */
>   	offset = LLVMBuildMul(builder, address[1],
>   			      LLVMConstInt(ctx->i32, 4, 0), "");
>   	row = buffer_load_const(ctx, desc, offset);
>   	row = ac_to_integer(&ctx->ac, row);
>   	bit = LLVMBuildLShr(builder, row, address[0], "");
>   	bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
>   
>   	/* The intrinsic kills the thread if arg < 0. */
> @@ -6882,21 +6882,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
>   	}
>   
>   	/* Compute vertex load indices from instance divisors. */
>   	LLVMValueRef instance_divisor_constbuf = NULL;
>   
>   	if (key->vs_prolog.states.instance_divisor_is_fetched) {
>   		LLVMValueRef list = si_prolog_get_rw_buffers(ctx);
>   		LLVMValueRef buf_index =
>   			LLVMConstInt(ctx->i32, SI_VS_CONST_INSTANCE_DIVISORS, 0);
>   		instance_divisor_constbuf =
> -			ac_build_indexed_load_const(&ctx->ac, list, buf_index);
> +			ac_build_load_to_sgpr(&ctx->ac, list, buf_index);
>   	}
>   
>   	for (i = 0; i <= key->vs_prolog.last_input; i++) {
>   		bool divisor_is_one =
>   			key->vs_prolog.states.instance_divisor_is_one & (1u << i);
>   		bool divisor_is_fetched =
>   			key->vs_prolog.states.instance_divisor_is_fetched & (1u << i);
>   		LLVMValueRef index;
>   
>   		if (divisor_is_one || divisor_is_fetched) {
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> index a2b2b87..ec11c75 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> @@ -138,21 +138,21 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
>   		index = LLVMBuildMul(builder, index,
>   				     LLVMConstInt(ctx->i32, 2, 0), "");
>   		index = LLVMBuildAdd(builder, index,
>   				     ctx->i32_1, "");
>   		list = LLVMBuildPointerCast(builder, list,
>   					    si_const_array(ctx->v4i32, 0), "");
>   	} else {
>   		assert(desc_type == AC_DESC_IMAGE);
>   	}
>   
> -	rsrc = ac_build_indexed_load_const(&ctx->ac, list, index);
> +	rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
>   	if (dcc_off)
>   		rsrc = force_dcc_off(ctx, rsrc);
>   	return rsrc;
>   }
>   
>   /**
>    * Load the resource descriptor for \p image.
>    */
>   static void
>   image_fetch_rsrc(
> @@ -1127,21 +1127,21 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
>   		break;
>   	case AC_DESC_SAMPLER:
>   		/* The sampler state is at [12:15]. */
>   		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
>   		index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
>   		list = LLVMBuildPointerCast(builder, list,
>   					    si_const_array(ctx->v4i32, 0), "");
>   		break;
>   	}
>   
> -	return ac_build_indexed_load_const(&ctx->ac, list, index);
> +	return ac_build_load_to_sgpr(&ctx->ac, list, index);
>   }
>   
>   /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
>    *
>    * SI-CI:
>    *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
>    *   filtering manually. The driver sets img7 to a mask clearing
>    *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
>    *     s_and_b32 samp0, samp0, img7
>    *
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list