[Mesa-dev] [PATCH] ac/nir: move lds declaration/load/store into shared code.
Nicolai Hähnle
nhaehnle at gmail.com
Thu Nov 2 15:43:02 UTC 2017
We really should clean up our act at some point, teach LLVM how to make
reasonable decisions for where to put variables (e.g. arrays: VGPR vs.
spill memory vs. LDS), and use a proper structure on LDS. But this is
alright for now...
Cheers,
Nicolai
On 26.10.2017 07:05, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This was duplicated between both drivers, share here.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/amd/common/ac_llvm_build.c | 24 ++++++++++
> src/amd/common/ac_llvm_build.h | 12 +++++
> src/amd/common/ac_nir_to_llvm.c | 56 ++++++-----------------
> src/gallium/drivers/radeonsi/si_shader.c | 20 ++------
> src/gallium/drivers/radeonsi/si_shader_internal.h | 1 -
> 5 files changed, 56 insertions(+), 57 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 80b027e..946f97f 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1748,3 +1748,27 @@ void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
> "llvm.amdgcn.init.exec", ctx->voidt,
> &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
> }
> +
> +void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
> +{
> + unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768;
> + ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
> + LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE),
> + "lds");
> +}
> +
> +LLVMValueRef
> +ac_lds_load(struct ac_llvm_context *ctx,
> + LLVMValueRef dw_addr)
> +{
> + return ac_build_load(ctx, ctx->lds, dw_addr);
> +}
> +
> +void
> +ac_lds_store(struct ac_llvm_context *ctx,
> + LLVMValueRef dw_addr, LLVMValueRef value)
> +{
> + value = ac_to_integer(ctx, value);
> + ac_build_indexed_store(ctx, ctx->lds,
> + dw_addr, value);
> +}
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 996f558..7d57b8b 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -34,6 +34,10 @@
> extern "C" {
> #endif
>
> +enum {
> + AC_LOCAL_ADDR_SPACE = 3,
> +};
> +
> struct ac_llvm_context {
> LLVMContextRef context;
> LLVMModuleRef module;
> @@ -65,6 +69,8 @@ struct ac_llvm_context {
> LLVMValueRef empty_md;
>
> enum chip_class chip_class;
> +
> + LLVMValueRef lds;
> };
>
> void
> @@ -283,6 +289,12 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
> uint32_t num_outputs,
> uint8_t *num_param_exports);
> void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
> +
> +void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
> +LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
> + LLVMValueRef dw_addr);
> +void ac_lds_store(struct ac_llvm_context *ctx,
> + LLVMValueRef dw_addr, LLVMValueRef value);
> #ifdef __cplusplus
> }
> #endif
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 06937d6..cbd646e 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -162,7 +162,6 @@ struct nir_to_llvm_context {
> LLVMValueRef empty_md;
> gl_shader_stage stage;
>
> - LLVMValueRef lds;
> LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
>
> uint64_t input_mask;
> @@ -548,14 +547,6 @@ static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uin
> ud_info->indirect_offset = indirect_offset;
> }
>
> -static void declare_tess_lds(struct nir_to_llvm_context *ctx)
> -{
> - unsigned lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768;
> - ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32zero,
> - LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
> - "tess_lds");
> -}
> -
> struct user_sgpr_info {
> bool need_ring_offsets;
> uint8_t sgpr_count;
> @@ -971,7 +962,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
> set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1);
> }
> if (ctx->options->key.vs.as_ls)
> - declare_tess_lds(ctx);
> + ac_declare_lds_as_pointer(&ctx->ac);
> break;
> case MESA_SHADER_TESS_CTRL:
> radv_define_vs_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
> @@ -980,7 +971,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
> set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
> if (ctx->view_index)
> set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
> - declare_tess_lds(ctx);
> + ac_declare_lds_as_pointer(&ctx->ac);
> break;
> case MESA_SHADER_TESS_EVAL:
> set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
> @@ -998,7 +989,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
> if (ctx->view_index)
> set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
> if (has_previous_stage)
> - declare_tess_lds(ctx);
> + ac_declare_lds_as_pointer(&ctx->ac);
> break;
> case MESA_SHADER_FRAGMENT:
> if (ctx->shader_info->info.ps.needs_sample_positions) {
> @@ -2670,23 +2661,6 @@ out:
> *indir_out = offset;
> }
>
> -static LLVMValueRef
> -lds_load(struct nir_to_llvm_context *ctx,
> - LLVMValueRef dw_addr)
> -{
> - LLVMValueRef value;
> - value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
> - return value;
> -}
> -
> -static void
> -lds_store(struct nir_to_llvm_context *ctx,
> - LLVMValueRef dw_addr, LLVMValueRef value)
> -{
> - value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, "");
> - ac_build_indexed_store(&ctx->ac, ctx->lds,
> - dw_addr, value);
> -}
>
> /* The offchip buffer layout for TCS->TES is
> *
> @@ -2862,7 +2836,7 @@ load_tcs_input(struct nir_to_llvm_context *ctx,
>
> unsigned comp = instr->variables[0]->var->data.location_frac;
> for (unsigned i = 0; i < instr->num_components + comp; i++) {
> - value[i] = lds_load(ctx, dw_addr);
> + value[i] = ac_lds_load(&ctx->ac, dw_addr);
> dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
> ctx->i32one, "");
> }
> @@ -2901,7 +2875,7 @@ load_tcs_output(struct nir_to_llvm_context *ctx,
>
> unsigned comp = instr->variables[0]->var->data.location_frac;
> for (unsigned i = comp; i < instr->num_components + comp; i++) {
> - value[i] = lds_load(ctx, dw_addr);
> + value[i] = ac_lds_load(&ctx->ac, dw_addr);
> dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
> ctx->i32one, "");
> }
> @@ -2963,7 +2937,7 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
> continue;
> LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - comp);
>
> - lds_store(ctx, dw_addr, value);
> + ac_lds_store(&ctx->ac, dw_addr, value);
>
> if (!is_tess_factor && writemask != 0xF)
> ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
> @@ -3044,7 +3018,7 @@ load_gs_input(struct nir_to_llvm_context *ctx,
> LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
> dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
> LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
> - value[i] = lds_load(ctx, dw_addr);
> + value[i] = ac_lds_load(&ctx->ac, dw_addr);
> } else {
> args[0] = ctx->esgs_ring;
> args[1] = vtx_offset;
> @@ -5949,8 +5923,8 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx,
> out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
>
> if (ctx->ac.chip_class >= GFX9) {
> - lds_store(ctx, dw_addr,
> - LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
> + ac_lds_store(&ctx->ac, dw_addr,
> + LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
> dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, "");
> } else {
> ac_build_buffer_store_dword(&ctx->ac,
> @@ -5989,8 +5963,8 @@ handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
> LLVMConstInt(ctx->i32, param * 4, false),
> "");
> for (unsigned j = 0; j < length; j++) {
> - lds_store(ctx, dw_addr,
> - LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
> + ac_lds_store(&ctx->ac, dw_addr,
> + LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
> dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, "");
> }
> }
> @@ -6142,20 +6116,20 @@ write_tess_factors(struct nir_to_llvm_context *ctx)
>
> // LINES reverseal
> if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
> - outer[0] = out[1] = lds_load(ctx, lds_outer);
> + outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer);
> lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
> LLVMConstInt(ctx->i32, 1, false), "");
> - outer[1] = out[0] = lds_load(ctx, lds_outer);
> + outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer);
> } else {
> for (i = 0; i < outer_comps; i++) {
> outer[i] = out[i] =
> - lds_load(ctx, lds_outer);
> + ac_lds_load(&ctx->ac, lds_outer);
> lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
> LLVMConstInt(ctx->i32, 1, false), "");
> }
> for (i = 0; i < inner_comps; i++) {
> inner[i] = out[outer_comps+i] =
> - lds_load(ctx, lds_inner);
> + ac_lds_load(&ctx->ac, lds_inner);
> lds_inner = LLVMBuildAdd(ctx->builder, lds_inner,
> LLVMConstInt(ctx->i32, 1, false), "");
> }
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index c343048..4bf2a45 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1099,12 +1099,12 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
> dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
> LLVMConstInt(ctx->i32, swizzle, 0));
>
> - value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
> + value = ac_lds_load(&ctx->ac, dw_addr);
> if (tgsi_type_is_64bit(type)) {
> LLVMValueRef value2;
> dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
> ctx->i32_1);
> - value2 = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
> + value2 = ac_lds_load(&ctx->ac, dw_addr);
> return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
> }
>
> @@ -1127,9 +1127,7 @@ static void lds_store(struct lp_build_tgsi_context *bld_base,
> dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
> LLVMConstInt(ctx->i32, dw_offset_imm, 0));
>
> - value = ac_to_integer(&ctx->ac, value);
> - ac_build_indexed_store(&ctx->ac, ctx->lds,
> - dw_addr, value);
> + ac_lds_store(&ctx->ac, dw_addr, value);
> }
>
> static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx,
> @@ -4254,14 +4252,6 @@ static void declare_streamout_params(struct si_shader_context *ctx,
> }
> }
>
> -static void declare_lds_as_pointer(struct si_shader_context *ctx)
> -{
> - unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
> - ctx->lds = LLVMBuildIntToPtr(ctx->ac.builder, ctx->i32_0,
> - LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
> - "lds");
> -}
> -
> static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
> {
> switch (shader->selector->type) {
> @@ -4752,7 +4742,7 @@ static void create_function(struct si_shader_context *ctx)
> (ctx->screen->b.chip_class >= GFX9 &&
> (shader->key.as_es ||
> ctx->type == PIPE_SHADER_GEOMETRY)))
> - declare_lds_as_pointer(ctx);
> + ac_declare_lds_as_pointer(&ctx->ac);
> }
>
> /**
> @@ -7076,7 +7066,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
> /* Create the function. */
> si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo,
> ctx->screen->b.chip_class >= CIK ? 128 : 64);
> - declare_lds_as_pointer(ctx);
> + ac_declare_lds_as_pointer(&ctx->ac);
> func = ctx->main_fn;
>
> LLVMValueRef invoc0_tess_factors[6];
> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
> index 5c736f6..b249bf9 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h
> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
> @@ -209,7 +209,6 @@ struct si_shader_context {
> LLVMValueRef esgs_ring;
> LLVMValueRef gsvs_ring[4];
>
> - LLVMValueRef lds;
> LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
> LLVMValueRef gs_next_vertex[4];
> LLVMValueRef postponed_kill;
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list