[Mesa-dev] [PATCH 19/19] radeonsi: don't declare LDS in PS when ds_bpermute is used
Nicolai Hähnle
nhaehnle at gmail.com
Tue Oct 4 09:46:13 UTC 2016
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
On 02.10.2016 23:09, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> I guess this is not needed because dead code elimination removes
> the declaration.
> ---
> src/gallium/drivers/radeonsi/si_pipe.c | 3 +++
> src/gallium/drivers/radeonsi/si_pipe.h | 1 +
> src/gallium/drivers/radeonsi/si_shader.c | 7 +++----
> 3 files changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 43d6377..2aa679c 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -810,20 +810,23 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
> (sscreen->b.chip_class == VI &&
> sscreen->b.info.pfp_fw_version >= 121 &&
> sscreen->b.info.me_fw_version >= 87) ||
> (sscreen->b.chip_class == CIK &&
> sscreen->b.info.pfp_fw_version >= 211 &&
> sscreen->b.info.me_fw_version >= 173) ||
> (sscreen->b.chip_class == SI &&
> sscreen->b.info.pfp_fw_version >= 121 &&
> sscreen->b.info.me_fw_version >= 87);
>
> + sscreen->has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
> + sscreen->b.chip_class >= VI;
> +
> sscreen->b.has_cp_dma = true;
> sscreen->b.has_streamout = true;
> pipe_mutex_init(sscreen->shader_parts_mutex);
> sscreen->use_monolithic_shaders =
> HAVE_LLVM < 0x0308 ||
> (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
>
> sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
> SI_CONTEXT_INV_VMEM_L1 |
> SI_CONTEXT_INV_GLOBAL_L2;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index 558e185..3cefee7 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -76,20 +76,21 @@
> struct si_compute;
> struct hash_table;
> struct u_suballocator;
>
> struct si_screen {
> struct r600_common_screen b;
> unsigned gs_table_depth;
> unsigned tess_offchip_block_dw_size;
> bool has_distributed_tess;
> bool has_draw_indirect_multi;
> + bool has_ds_bpermute;
>
> /* Whether shaders are monolithic (1-part) or separate (3-part). */
> bool use_monolithic_shaders;
> bool record_llvm_ir;
>
> pipe_mutex shader_parts_mutex;
> struct si_shader_part *vs_prologs;
> struct si_shader_part *vs_epilogs;
> struct si_shader_part *tcs_epilogs;
> struct si_shader_part *ps_prologs;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 7844ebd..30bf093 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -5002,43 +5002,41 @@ static void si_llvm_emit_ddxy(
> const struct lp_build_tgsi_action *action,
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> unsigned opcode = emit_data->info->opcode;
> LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2];
> int idx;
> unsigned mask;
> - bool has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
> - ctx->screen->b.chip_class >= VI;
>
> thread_id = get_thread_id(ctx);
>
> if (opcode == TGSI_OPCODE_DDX_FINE)
> mask = TID_MASK_LEFT;
> else if (opcode == TGSI_OPCODE_DDY_FINE)
> mask = TID_MASK_TOP;
> else
> mask = TID_MASK_TOP_LEFT;
>
> tl_tid = LLVMBuildAnd(gallivm->builder, thread_id,
> lp_build_const_int32(gallivm, mask), "");
>
> /* for DDX we want to next X pixel, DDY next Y pixel. */
> idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
> trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid,
> lp_build_const_int32(gallivm, idx), "");
>
> val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
>
> - if (has_ds_bpermute) {
> + if (ctx->screen->has_ds_bpermute) {
> args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
> lp_build_const_int32(gallivm, 4), "");
> args[1] = val;
> tl = lp_build_intrinsic(gallivm->builder,
> "llvm.amdgcn.ds.bpermute", ctx->i32,
> args, 2, LLVMReadNoneAttribute);
>
> args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
> lp_build_const_int32(gallivm, 4), "");
> trbl = lp_build_intrinsic(gallivm->builder,
> @@ -5731,21 +5729,22 @@ static void create_function(struct si_shader_context *ctx)
> for (i = 0; i <= last_sgpr; ++i)
> shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
>
> /* Unused fragment shader inputs are eliminated by the compiler,
> * so we don't know yet how many there will be.
> */
> if (ctx->type != PIPE_SHADER_FRAGMENT)
> for (; i < num_params; ++i)
> shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
>
> - if (bld_base->info &&
> + if (!ctx->screen->has_ds_bpermute &&
> + bld_base->info &&
> (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
> bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
> bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
> bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
> bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
> bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
> ctx->lds =
> LLVMAddGlobalInAddressSpace(gallivm->module,
> LLVMArrayType(ctx->i32, 64),
> "ddxy_lds",
>
More information about the mesa-dev
mailing list