[Mesa-dev] [PATCH] ac/nir: move lds declaration/load/store into shared code.

Nicolai Hähnle nhaehnle at gmail.com
Thu Nov 2 15:43:02 UTC 2017


We really should clean up our act at some point, teach LLVM how to make 
reasonable decisions for where to put variables (e.g. arrays: VGPR vs. 
spill memory vs. LDS), and use a proper structure on LDS. But this is 
alright for now...

Cheers,
Nicolai

On 26.10.2017 07:05, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
> 
> This was duplicated between both drivers, share here.
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>   src/amd/common/ac_llvm_build.c                    | 24 ++++++++++
>   src/amd/common/ac_llvm_build.h                    | 12 +++++
>   src/amd/common/ac_nir_to_llvm.c                   | 56 ++++++-----------------
>   src/gallium/drivers/radeonsi/si_shader.c          | 20 ++------
>   src/gallium/drivers/radeonsi/si_shader_internal.h |  1 -
>   5 files changed, 56 insertions(+), 57 deletions(-)
> 
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 80b027e..946f97f 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1748,3 +1748,27 @@ void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
>   			   "llvm.amdgcn.init.exec", ctx->voidt,
>   			   &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
>   }
> +
> +void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
> +{
> +	unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768;
> +	ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
> +				     LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE),
> +				     "lds");
> +}
> +
> +LLVMValueRef
> +ac_lds_load(struct ac_llvm_context *ctx,
> +	    LLVMValueRef dw_addr)
> +{
> +	return ac_build_load(ctx, ctx->lds, dw_addr);
> +}
> +
> +void
> +ac_lds_store(struct ac_llvm_context *ctx,
> +	     LLVMValueRef dw_addr, LLVMValueRef value)
> +{
> +	value = ac_to_integer(ctx, value);
> +	ac_build_indexed_store(ctx, ctx->lds,
> +			       dw_addr, value);
> +}
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 996f558..7d57b8b 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -34,6 +34,10 @@
>   extern "C" {
>   #endif
>   
> +enum {
> +	AC_LOCAL_ADDR_SPACE = 3,
> +};
> +
>   struct ac_llvm_context {
>   	LLVMContextRef context;
>   	LLVMModuleRef module;
> @@ -65,6 +69,8 @@ struct ac_llvm_context {
>   	LLVMValueRef empty_md;
>   
>   	enum chip_class chip_class;
> +
> +	LLVMValueRef lds;
>   };
>   
>   void
> @@ -283,6 +289,12 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
>   			    uint32_t num_outputs,
>   			    uint8_t *num_param_exports);
>   void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
> +
> +void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
> +LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
> +			 LLVMValueRef dw_addr);
> +void ac_lds_store(struct ac_llvm_context *ctx,
> +		  LLVMValueRef dw_addr, LLVMValueRef value);
>   #ifdef __cplusplus
>   }
>   #endif
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 06937d6..cbd646e 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -162,7 +162,6 @@ struct nir_to_llvm_context {
>   	LLVMValueRef empty_md;
>   	gl_shader_stage stage;
>   
> -	LLVMValueRef lds;
>   	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
>   
>   	uint64_t input_mask;
> @@ -548,14 +547,6 @@ static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uin
>   	ud_info->indirect_offset = indirect_offset;
>   }
>   
> -static void declare_tess_lds(struct nir_to_llvm_context *ctx)
> -{
> -	unsigned lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768;
> -	ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32zero,
> -				     LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
> -		"tess_lds");
> -}
> -
>   struct user_sgpr_info {
>   	bool need_ring_offsets;
>   	uint8_t sgpr_count;
> @@ -971,7 +962,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
>   			set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1);
>   		}
>   		if (ctx->options->key.vs.as_ls)
> -			declare_tess_lds(ctx);
> +			ac_declare_lds_as_pointer(&ctx->ac);
>   		break;
>   	case MESA_SHADER_TESS_CTRL:
>   		radv_define_vs_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
> @@ -980,7 +971,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
>   		set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
>   		if (ctx->view_index)
>   			set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
> -		declare_tess_lds(ctx);
> +		ac_declare_lds_as_pointer(&ctx->ac);
>   		break;
>   	case MESA_SHADER_TESS_EVAL:
>   		set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
> @@ -998,7 +989,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
>   		if (ctx->view_index)
>   			set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
>   		if (has_previous_stage)
> -			declare_tess_lds(ctx);
> +			ac_declare_lds_as_pointer(&ctx->ac);
>   		break;
>   	case MESA_SHADER_FRAGMENT:
>   		if (ctx->shader_info->info.ps.needs_sample_positions) {
> @@ -2670,23 +2661,6 @@ out:
>   	*indir_out = offset;
>   }
>   
> -static LLVMValueRef
> -lds_load(struct nir_to_llvm_context *ctx,
> -	 LLVMValueRef dw_addr)
> -{
> -	LLVMValueRef value;
> -	value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
> -	return value;
> -}
> -
> -static void
> -lds_store(struct nir_to_llvm_context *ctx,
> -	  LLVMValueRef dw_addr, LLVMValueRef value)
> -{
> -	value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, "");
> -	ac_build_indexed_store(&ctx->ac, ctx->lds,
> -			       dw_addr, value);
> -}
>   
>   /* The offchip buffer layout for TCS->TES is
>    *
> @@ -2862,7 +2836,7 @@ load_tcs_input(struct nir_to_llvm_context *ctx,
>   
>   	unsigned comp = instr->variables[0]->var->data.location_frac;
>   	for (unsigned i = 0; i < instr->num_components + comp; i++) {
> -		value[i] = lds_load(ctx, dw_addr);
> +		value[i] = ac_lds_load(&ctx->ac, dw_addr);
>   		dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
>   				       ctx->i32one, "");
>   	}
> @@ -2901,7 +2875,7 @@ load_tcs_output(struct nir_to_llvm_context *ctx,
>   
>   	unsigned comp = instr->variables[0]->var->data.location_frac;
>   	for (unsigned i = comp; i < instr->num_components + comp; i++) {
> -		value[i] = lds_load(ctx, dw_addr);
> +		value[i] = ac_lds_load(&ctx->ac, dw_addr);
>   		dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
>   				       ctx->i32one, "");
>   	}
> @@ -2963,7 +2937,7 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
>   			continue;
>   		LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - comp);
>   
> -		lds_store(ctx, dw_addr, value);
> +		ac_lds_store(&ctx->ac, dw_addr, value);
>   
>   		if (!is_tess_factor && writemask != 0xF)
>   			ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
> @@ -3044,7 +3018,7 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>   			LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
>   			dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
>   			                       LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
> -			value[i] = lds_load(ctx, dw_addr);
> +			value[i] = ac_lds_load(&ctx->ac, dw_addr);
>   		} else {
>   			args[0] = ctx->esgs_ring;
>   			args[1] = vtx_offset;
> @@ -5949,8 +5923,8 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx,
>   			out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
>   
>   			if (ctx->ac.chip_class  >= GFX9) {
> -				lds_store(ctx, dw_addr,
> -				          LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
> +				ac_lds_store(&ctx->ac, dw_addr,
> +					     LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
>   				dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, "");
>   			} else {
>   				ac_build_buffer_store_dword(&ctx->ac,
> @@ -5989,8 +5963,8 @@ handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
>   						    LLVMConstInt(ctx->i32, param * 4, false),
>   						    "");
>   		for (unsigned j = 0; j < length; j++) {
> -			lds_store(ctx, dw_addr,
> -				  LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
> +			ac_lds_store(&ctx->ac, dw_addr,
> +				     LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
>   			dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, "");
>   		}
>   	}
> @@ -6142,20 +6116,20 @@ write_tess_factors(struct nir_to_llvm_context *ctx)
>   
>   	// LINES reverseal
>   	if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
> -		outer[0] = out[1] = lds_load(ctx, lds_outer);
> +		outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer);
>   		lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
>   					 LLVMConstInt(ctx->i32, 1, false), "");
> -		outer[1] = out[0] = lds_load(ctx, lds_outer);
> +		outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer);
>   	} else {
>   		for (i = 0; i < outer_comps; i++) {
>   			outer[i] = out[i] =
> -				lds_load(ctx, lds_outer);
> +				ac_lds_load(&ctx->ac, lds_outer);
>   			lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
>   						 LLVMConstInt(ctx->i32, 1, false), "");
>   		}
>   		for (i = 0; i < inner_comps; i++) {
>   			inner[i] = out[outer_comps+i] =
> -				lds_load(ctx, lds_inner);
> +				ac_lds_load(&ctx->ac, lds_inner);
>   			lds_inner = LLVMBuildAdd(ctx->builder, lds_inner,
>   						 LLVMConstInt(ctx->i32, 1, false), "");
>   		}
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index c343048..4bf2a45 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1099,12 +1099,12 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
>   	dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
>   			    LLVMConstInt(ctx->i32, swizzle, 0));
>   
> -	value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
> +	value = ac_lds_load(&ctx->ac, dw_addr);
>   	if (tgsi_type_is_64bit(type)) {
>   		LLVMValueRef value2;
>   		dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
>   				       ctx->i32_1);
> -		value2 = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
> +		value2 = ac_lds_load(&ctx->ac, dw_addr);
>   		return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
>   	}
>   
> @@ -1127,9 +1127,7 @@ static void lds_store(struct lp_build_tgsi_context *bld_base,
>   	dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
>   			    LLVMConstInt(ctx->i32, dw_offset_imm, 0));
>   
> -	value = ac_to_integer(&ctx->ac, value);
> -	ac_build_indexed_store(&ctx->ac, ctx->lds,
> -			       dw_addr, value);
> +	ac_lds_store(&ctx->ac, dw_addr, value);
>   }
>   
>   static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx,
> @@ -4254,14 +4252,6 @@ static void declare_streamout_params(struct si_shader_context *ctx,
>   	}
>   }
>   
> -static void declare_lds_as_pointer(struct si_shader_context *ctx)
> -{
> -	unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
> -	ctx->lds = LLVMBuildIntToPtr(ctx->ac.builder, ctx->i32_0,
> -		LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
> -		"lds");
> -}
> -
>   static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
>   {
>   	switch (shader->selector->type) {
> @@ -4752,7 +4742,7 @@ static void create_function(struct si_shader_context *ctx)
>   	    (ctx->screen->b.chip_class >= GFX9 &&
>   	     (shader->key.as_es ||
>   	      ctx->type == PIPE_SHADER_GEOMETRY)))
> -		declare_lds_as_pointer(ctx);
> +		ac_declare_lds_as_pointer(&ctx->ac);
>   }
>   
>   /**
> @@ -7076,7 +7066,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
>   	/* Create the function. */
>   	si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo,
>   			   ctx->screen->b.chip_class >= CIK ? 128 : 64);
> -	declare_lds_as_pointer(ctx);
> +	ac_declare_lds_as_pointer(&ctx->ac);
>   	func = ctx->main_fn;
>   
>   	LLVMValueRef invoc0_tess_factors[6];
> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
> index 5c736f6..b249bf9 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h
> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
> @@ -209,7 +209,6 @@ struct si_shader_context {
>   	LLVMValueRef esgs_ring;
>   	LLVMValueRef gsvs_ring[4];
>   
> -	LLVMValueRef lds;
>   	LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
>   	LLVMValueRef gs_next_vertex[4];
>   	LLVMValueRef postponed_kill;
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list