[Mesa-dev] [PATCH] radeonsi: get rid of img/buf/sampler descriptor preloading (v2)
Nicolai Hähnle
nhaehnle at gmail.com
Wed Sep 14 07:19:21 UTC 2016
On 13.09.2016 22:20, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> 26011 shaders in 14651 tests
> Totals:
> SGPRS: 1251920 -> 1152636 (-7.93 %)
> VGPRS: 728421 -> 728198 (-0.03 %)
> Spilled SGPRs: 16644 -> 3776 (-77.31 %)
> Spilled VGPRs: 369 -> 369 (0.00 %)
> Scratch VGPRs: 1344 -> 1344 (0.00 %) dwords per thread
> Code Size: 36001064 -> 35835152 (-0.46 %) bytes
> LDS: 767 -> 767 (0.00 %) blocks
> Max Waves: 222221 -> 222372 (0.07 %)
> Wait states: 0 -> 0 (0.00 %)
>
> v2: merge codepaths where possible
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 173 ++++++++-----------------------
> 1 file changed, 41 insertions(+), 132 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 84cbfd7..6f9c45f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -100,25 +100,20 @@ struct si_shader_context
>
> LLVMTargetMachineRef tm;
>
> unsigned invariant_load_md_kind;
> unsigned range_md_kind;
> unsigned uniform_md_kind;
> LLVMValueRef empty_md;
>
> /* Preloaded descriptors. */
> LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
> - LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS];
> - LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
> - LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
> - LLVMValueRef fmasks[SI_NUM_SAMPLERS];
> - LLVMValueRef images[SI_NUM_IMAGES];
> LLVMValueRef esgs_ring;
> LLVMValueRef gsvs_ring[4];
>
> LLVMValueRef lds;
> LLVMValueRef gs_next_vertex[4];
> LLVMValueRef return_value;
>
> LLVMTypeRef voidt;
> LLVMTypeRef i1;
> LLVMTypeRef i8;
> @@ -3399,32 +3394,32 @@ static void membar_emit(
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
>
> emit_waitcnt(ctx);
> }
>
> static LLVMValueRef
> shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
> const struct tgsi_full_src_register *reg)
> {
> - LLVMValueRef ind_index;
> - LLVMValueRef rsrc_ptr;
> + LLVMValueRef index;
> + LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
> + SI_PARAM_SHADER_BUFFERS);
>
> if (!reg->Register.Indirect)
> - return ctx->shader_buffers[reg->Register.Index];
> -
> - ind_index = get_bounded_indirect_index(ctx, ®->Indirect,
> - reg->Register.Index,
> - SI_NUM_SHADER_BUFFERS);
> + index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
> + else
> + index = get_bounded_indirect_index(ctx, ®->Indirect,
> + reg->Register.Index,
> + SI_NUM_SHADER_BUFFERS);
>
> - rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS);
> - return build_indexed_load_const(ctx, rsrc_ptr, ind_index);
> + return build_indexed_load_const(ctx, rsrc_ptr, index);
> }
>
> static bool tgsi_is_array_sampler(unsigned target)
> {
> return target == TGSI_TEXTURE_1D_ARRAY ||
> target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
> target == TGSI_TEXTURE_2D_ARRAY ||
> target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
> target == TGSI_TEXTURE_CUBE_ARRAY ||
> target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
> @@ -3473,51 +3468,47 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
> * Load the resource descriptor for \p image.
> */
> static void
> image_fetch_rsrc(
> struct lp_build_tgsi_context *bld_base,
> const struct tgsi_full_src_register *image,
> bool dcc_off,
> LLVMValueRef *rsrc)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> + LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
> + SI_PARAM_IMAGES);
> + LLVMValueRef index, tmp;
>
> assert(image->Register.File == TGSI_FILE_IMAGE);
>
> if (!image->Register.Indirect) {
> - /* Fast path: use preloaded resources */
> - *rsrc = ctx->images[image->Register.Index];
> + index = LLVMConstInt(ctx->i32, image->Register.Index, 0);
I think it would be beneficial to put
if (info->images_writemask & (1 << image->Register.Index) &&
!(info->images_buffers & (1 << image->Register.Index)))
dcc_off = true;
here, so that CSE can work better when an image is both read from and
written to.
Apart from that, the patch is
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> } else {
> - /* Indexing and manual load */
> - LLVMValueRef ind_index;
> - LLVMValueRef rsrc_ptr;
> - LLVMValueRef tmp;
> -
> /* From the GL_ARB_shader_image_load_store extension spec:
> *
> * If a shader performs an image load, store, or atomic
> * operation using an image variable declared as an array,
> * and if the index used to select an individual element is
> * negative or greater than or equal to the size of the
> * array, the results of the operation are undefined but may
> * not lead to termination.
> */
> - ind_index = get_bounded_indirect_index(ctx, &image->Indirect,
> - image->Register.Index,
> - SI_NUM_IMAGES);
> -
> - rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
> - tmp = build_indexed_load_const(ctx, rsrc_ptr, ind_index);
> - if (dcc_off)
> - tmp = force_dcc_off(ctx, tmp);
> - *rsrc = tmp;
> + index = get_bounded_indirect_index(ctx, &image->Indirect,
> + image->Register.Index,
> + SI_NUM_IMAGES);
> }
> +
> + tmp = build_indexed_load_const(ctx, rsrc_ptr, index);
> + if (dcc_off)
> + tmp = force_dcc_off(ctx, tmp);
> + *rsrc = tmp;
> }
>
> static LLVMValueRef image_fetch_coords(
> struct lp_build_tgsi_context *bld_base,
> const struct tgsi_full_instruction *inst,
> unsigned src)
> {
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> unsigned target = inst->Memory.Texture;
> @@ -4355,55 +4346,51 @@ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
> static void tex_fetch_ptrs(
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data,
> LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> const struct tgsi_full_instruction *inst = emit_data->inst;
> unsigned target = inst->Texture.Texture;
> unsigned sampler_src;
> unsigned sampler_index;
> + LLVMValueRef index;
>
> sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
> sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
>
> if (emit_data->inst->Src[sampler_src].Register.Indirect) {
> const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
> - LLVMValueRef ind_index;
>
> - ind_index = get_bounded_indirect_index(ctx,
> - ®->Indirect,
> - reg->Register.Index,
> - SI_NUM_SAMPLERS);
> + index = get_bounded_indirect_index(ctx,
> + ®->Indirect,
> + reg->Register.Index,
> + SI_NUM_SAMPLERS);
> + } else {
> + index = LLVMConstInt(ctx->i32, sampler_index, 0);
> + }
>
> - *res_ptr = load_sampler_desc(ctx, ind_index, DESC_IMAGE);
> + *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
>
> - if (target == TGSI_TEXTURE_2D_MSAA ||
> - target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
> - if (samp_ptr)
> - *samp_ptr = NULL;
> - if (fmask_ptr)
> - *fmask_ptr = load_sampler_desc(ctx, ind_index, DESC_FMASK);
> - } else {
> - if (samp_ptr) {
> - *samp_ptr = load_sampler_desc(ctx, ind_index, DESC_SAMPLER);
> - *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
> - }
> - if (fmask_ptr)
> - *fmask_ptr = NULL;
> - }
> - } else {
> - *res_ptr = ctx->sampler_views[sampler_index];
> + if (target == TGSI_TEXTURE_2D_MSAA ||
> + target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
> if (samp_ptr)
> - *samp_ptr = ctx->sampler_states[sampler_index];
> + *samp_ptr = NULL;
> if (fmask_ptr)
> - *fmask_ptr = ctx->fmasks[sampler_index];
> + *fmask_ptr = load_sampler_desc(ctx, index, DESC_FMASK);
> + } else {
> + if (samp_ptr) {
> + *samp_ptr = load_sampler_desc(ctx, index, DESC_SAMPLER);
> + *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
> + }
> + if (fmask_ptr)
> + *fmask_ptr = NULL;
> }
> }
>
> static void txq_fetch_args(
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> @@ -5856,95 +5843,20 @@ static void preload_constant_buffers(struct si_shader_context *ctx)
> for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
> if (info->const_file_max[buf] == -1)
> continue;
>
> /* Load the resource descriptor */
> ctx->const_buffers[buf] =
> build_indexed_load_const(ctx, ptr, lp_build_const_int32(gallivm, buf));
> }
> }
>
> -static void preload_shader_buffers(struct si_shader_context *ctx)
> -{
> - struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
> - LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS);
> - int buf, maxbuf;
> -
> - maxbuf = MIN2(ctx->shader->selector->info.file_max[TGSI_FILE_BUFFER],
> - SI_NUM_SHADER_BUFFERS - 1);
> - for (buf = 0; buf <= maxbuf; ++buf) {
> - ctx->shader_buffers[buf] =
> - build_indexed_load_const(
> - ctx, ptr, lp_build_const_int32(gallivm, buf));
> - }
> -}
> -
> -static void preload_samplers(struct si_shader_context *ctx)
> -{
> - struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
> - struct gallivm_state *gallivm = bld_base->base.gallivm;
> - const struct tgsi_shader_info *info = bld_base->info;
> - unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
> - LLVMValueRef offset;
> -
> - if (num_samplers == 0)
> - return;
> -
> - /* Load the resources and samplers, we rely on the code sinking to do the rest */
> - for (i = 0; i < num_samplers; ++i) {
> - /* Resource */
> - offset = lp_build_const_int32(gallivm, i);
> - ctx->sampler_views[i] =
> - load_sampler_desc(ctx, offset, DESC_IMAGE);
> -
> - /* FMASK resource */
> - if (info->is_msaa_sampler[i])
> - ctx->fmasks[i] =
> - load_sampler_desc(ctx, offset, DESC_FMASK);
> - else {
> - ctx->sampler_states[i] =
> - load_sampler_desc(ctx, offset, DESC_SAMPLER);
> - ctx->sampler_states[i] =
> - sici_fix_sampler_aniso(ctx, ctx->sampler_views[i],
> - ctx->sampler_states[i]);
> - }
> - }
> -}
> -
> -static void preload_images(struct si_shader_context *ctx)
> -{
> - struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
> - struct tgsi_shader_info *info = &ctx->shader->selector->info;
> - struct gallivm_state *gallivm = bld_base->base.gallivm;
> - unsigned num_images = bld_base->info->file_max[TGSI_FILE_IMAGE] + 1;
> - LLVMValueRef res_ptr;
> - unsigned i;
> -
> - if (num_images == 0)
> - return;
> -
> - res_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
> -
> - for (i = 0; i < num_images; ++i) {
> - /* Rely on LLVM to shrink the load for buffer resources. */
> - LLVMValueRef rsrc =
> - build_indexed_load_const(ctx, res_ptr,
> - lp_build_const_int32(gallivm, i));
> -
> - if (info->images_writemask & (1 << i) &&
> - !(info->images_buffers & (1 << i)))
> - rsrc = force_dcc_off(ctx, rsrc);
> -
> - ctx->images[i] = rsrc;
> - }
> -}
> -
> /**
> * Load ESGS and GSVS ring buffer resource descriptors and save the variables
> * for later use.
> */
> static void preload_ring_buffers(struct si_shader_context *ctx)
> {
> struct gallivm_state *gallivm =
> ctx->radeon_bld.soa.bld_base.base.gallivm;
>
> LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
> @@ -6773,23 +6685,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
> ctx.radeon_bld.declare_memory_region = declare_compute_memory;
> break;
> default:
> assert(!"Unsupported shader type");
> return -1;
> }
>
> create_meta_data(&ctx);
> create_function(&ctx);
> preload_constant_buffers(&ctx);
> - preload_shader_buffers(&ctx);
> - preload_samplers(&ctx);
> - preload_images(&ctx);
> preload_ring_buffers(&ctx);
>
> if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT &&
> shader->key.ps.prolog.poly_stipple) {
> LLVMValueRef list = LLVMGetParam(ctx.radeon_bld.main_fn,
> SI_PARAM_RW_BUFFERS);
> si_llvm_emit_polygon_stipple(&ctx, list,
> SI_PARAM_POS_FIXED_PT);
> }
>
>
More information about the mesa-dev
mailing list