[Mesa-dev] [PATCH 4/6] radeonsi: get rid of img/buf/sampler descriptor preloading

Nicolai Hähnle nhaehnle at gmail.com
Tue Sep 13 18:07:04 UTC 2016


On 13.09.2016 19:13, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> 26011 shaders in 14651 tests
> Totals:
> SGPRS: 1251920 -> 1152636 (-7.93 %)
> VGPRS: 728421 -> 728198 (-0.03 %)
> Spilled SGPRs: 16644 -> 3776 (-77.31 %)
> Spilled VGPRs: 369 -> 369 (0.00 %)
> Scratch VGPRs: 1344 -> 1344 (0.00 %) dwords per thread
> Code Size: 36001064 -> 35835152 (-0.46 %) bytes
> LDS: 767 -> 767 (0.00 %) blocks
> Max Waves: 222221 -> 222372 (0.07 %)
> Wait states: 0 -> 0 (0.00 %)
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 123 +++++++------------------------
>  1 file changed, 28 insertions(+), 95 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 3f77714..c96c52e 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -100,25 +100,20 @@ struct si_shader_context
>
>  	LLVMTargetMachineRef tm;
>
>  	unsigned invariant_load_md_kind;
>  	unsigned range_md_kind;
>  	unsigned uniform_md_kind;
>  	LLVMValueRef empty_md;
>
>  	/* Preloaded descriptors. */
>  	LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
> -	LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS];
> -	LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
> -	LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
> -	LLVMValueRef fmasks[SI_NUM_SAMPLERS];
> -	LLVMValueRef images[SI_NUM_IMAGES];
>  	LLVMValueRef esgs_ring;
>  	LLVMValueRef gsvs_ring[4];
>
>  	LLVMValueRef lds;
>  	LLVMValueRef gs_next_vertex[4];
>  	LLVMValueRef return_value;
>
>  	LLVMTypeRef voidt;
>  	LLVMTypeRef i1;
>  	LLVMTypeRef i8;
> @@ -3420,30 +3415,32 @@ static void membar_emit(
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
>
>  	emit_waitcnt(ctx);
>  }
>
>  static LLVMValueRef
>  shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
>  			 const struct tgsi_full_src_register *reg)
>  {
>  	LLVMValueRef ind_index;
> -	LLVMValueRef rsrc_ptr;
> +	LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
> +					     SI_PARAM_SHADER_BUFFERS);
>
> -	if (!reg->Register.Indirect)
> -		return ctx->shader_buffers[reg->Register.Index];
> +	if (!reg->Register.Indirect) {
> +		ind_index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
> +		return build_indexed_load_const(ctx, rsrc_ptr, ind_index);
> +	}
>
>  	ind_index = get_bounded_indirect_index(ctx, &reg->Indirect,
>  					       reg->Register.Index,
>  					       SI_NUM_SHADER_BUFFERS);
>
> -	rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS);
>  	return build_indexed_load_const(ctx, rsrc_ptr, ind_index);

The calls to build_indexed_load_const can be further unified.

>  }
>
>  static bool tgsi_is_array_sampler(unsigned target)
>  {
>  	return target == TGSI_TEXTURE_1D_ARRAY ||
>  	       target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
>  	       target == TGSI_TEXTURE_2D_ARRAY ||
>  	       target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
>  	       target == TGSI_TEXTURE_CUBE_ARRAY ||
> @@ -3493,46 +3490,54 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
>   * Load the resource descriptor for \p image.
>   */
>  static void
>  image_fetch_rsrc(
>  	struct lp_build_tgsi_context *bld_base,
>  	const struct tgsi_full_src_register *image,
>  	bool dcc_off,
>  	LLVMValueRef *rsrc)
>  {
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
> +	LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
> +					     SI_PARAM_IMAGES);
>
>  	assert(image->Register.File == TGSI_FILE_IMAGE);
>
>  	if (!image->Register.Indirect) {
> -		/* Fast path: use preloaded resources */
> -		*rsrc = ctx->images[image->Register.Index];
> +		struct tgsi_shader_info *info = &ctx->shader->selector->info;
> +		int i = image->Register.Index;
> +		LLVMValueRef index = LLVMConstInt(ctx->i32, i, 0);
> +
> +		/* Rely on LLVM to shrink the load for buffer resources. */
> +		*rsrc = build_indexed_load_const(ctx, rsrc_ptr, index);
> +
> +		if (info->images_writemask & (1 << i) &&
> +		    !(info->images_buffers & (1 << i)))
> +			*rsrc = force_dcc_off(ctx, *rsrc);
>  	} else {
>  		/* Indexing and manual load */
>  		LLVMValueRef ind_index;
> -		LLVMValueRef rsrc_ptr;
>  		LLVMValueRef tmp;
>
>  		/* From the GL_ARB_shader_image_load_store extension spec:
>  		 *
>  		 *    If a shader performs an image load, store, or atomic
>  		 *    operation using an image variable declared as an array,
>  		 *    and if the index used to select an individual element is
>  		 *    negative or greater than or equal to the size of the
>  		 *    array, the results of the operation are undefined but may
>  		 *    not lead to termination.
>  		 */
>  		ind_index = get_bounded_indirect_index(ctx, &image->Indirect,
>  						       image->Register.Index,
>  						       SI_NUM_IMAGES);
>
> -		rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
>  		tmp = build_indexed_load_const(ctx, rsrc_ptr, ind_index);
>  		if (dcc_off)
>  			tmp = force_dcc_off(ctx, tmp);
>  		*rsrc = tmp;
>  	}
>  }

It might be worth merging the indirect and non-indirect code paths. The 
dcc_off condition in the non-indirect case should still take precedence 
to help LLVM's CSE.

>
>  static LLVMValueRef image_fetch_coords(
>  		struct lp_build_tgsi_context *bld_base,
>  		const struct tgsi_full_instruction *inst,
> @@ -4405,25 +4410,31 @@ static void tex_fetch_ptrs(
>  				*fmask_ptr = load_sampler_desc(ctx, ind_index, DESC_FMASK);
>  		} else {
>  			if (samp_ptr) {
>  				*samp_ptr = load_sampler_desc(ctx, ind_index, DESC_SAMPLER);
>  				*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
>  			}
>  			if (fmask_ptr)
>  				*fmask_ptr = NULL;
>  		}
>  	} else {
> -		*res_ptr = ctx->sampler_views[sampler_index];
> -		if (samp_ptr)
> -			*samp_ptr = ctx->sampler_states[sampler_index];
> +		LLVMValueRef index = LLVMConstInt(ctx->i32, sampler_index, 0);
> +
> +		*res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
> +
> +		if (samp_ptr) {
> +			*samp_ptr = load_sampler_desc(ctx, index, DESC_SAMPLER);
> +			*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr,
> +							   *samp_ptr);
> +		}
>  		if (fmask_ptr)
> -			*fmask_ptr = ctx->fmasks[sampler_index];
> +			*fmask_ptr = load_sampler_desc(ctx, index, DESC_FMASK);

The indirect and non-indirect paths can also be merged here.

Nicolai

>  	}
>  }
>
>  static void txq_fetch_args(
>  	struct lp_build_tgsi_context *bld_base,
>  	struct lp_build_emit_data *emit_data)
>  {
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
>  	struct gallivm_state *gallivm = bld_base->base.gallivm;
>  	LLVMBuilderRef builder = gallivm->builder;
> @@ -5876,95 +5887,20 @@ static void preload_constant_buffers(struct si_shader_context *ctx)
>  	for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
>  		if (info->const_file_max[buf] == -1)
>  			continue;
>
>  		/* Load the resource descriptor */
>  		ctx->const_buffers[buf] =
>  			build_indexed_load_const(ctx, ptr, lp_build_const_int32(gallivm, buf));
>  	}
>  }
>
> -static void preload_shader_buffers(struct si_shader_context *ctx)
> -{
> -	struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
> -	LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS);
> -	int buf, maxbuf;
> -
> -	maxbuf = MIN2(ctx->shader->selector->info.file_max[TGSI_FILE_BUFFER],
> -		      SI_NUM_SHADER_BUFFERS - 1);
> -	for (buf = 0; buf <= maxbuf; ++buf) {
> -		ctx->shader_buffers[buf] =
> -			build_indexed_load_const(
> -				ctx, ptr, lp_build_const_int32(gallivm, buf));
> -	}
> -}
> -
> -static void preload_samplers(struct si_shader_context *ctx)
> -{
> -	struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
> -	struct gallivm_state *gallivm = bld_base->base.gallivm;
> -	const struct tgsi_shader_info *info = bld_base->info;
> -	unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
> -	LLVMValueRef offset;
> -
> -	if (num_samplers == 0)
> -		return;
> -
> -	/* Load the resources and samplers, we rely on the code sinking to do the rest */
> -	for (i = 0; i < num_samplers; ++i) {
> -		/* Resource */
> -		offset = lp_build_const_int32(gallivm, i);
> -		ctx->sampler_views[i] =
> -			load_sampler_desc(ctx, offset, DESC_IMAGE);
> -
> -		/* FMASK resource */
> -		if (info->is_msaa_sampler[i])
> -			ctx->fmasks[i] =
> -				load_sampler_desc(ctx, offset, DESC_FMASK);
> -		else {
> -			ctx->sampler_states[i] =
> -				load_sampler_desc(ctx, offset, DESC_SAMPLER);
> -			ctx->sampler_states[i] =
> -				sici_fix_sampler_aniso(ctx, ctx->sampler_views[i],
> -						       ctx->sampler_states[i]);
> -		}
> -	}
> -}
> -
> -static void preload_images(struct si_shader_context *ctx)
> -{
> -	struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
> -	struct tgsi_shader_info *info = &ctx->shader->selector->info;
> -	struct gallivm_state *gallivm = bld_base->base.gallivm;
> -	unsigned num_images = bld_base->info->file_max[TGSI_FILE_IMAGE] + 1;
> -	LLVMValueRef res_ptr;
> -	unsigned i;
> -
> -	if (num_images == 0)
> -		return;
> -
> -	res_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
> -
> -	for (i = 0; i < num_images; ++i) {
> -		/* Rely on LLVM to shrink the load for buffer resources. */
> -		LLVMValueRef rsrc =
> -			build_indexed_load_const(ctx, res_ptr,
> -						 lp_build_const_int32(gallivm, i));
> -
> -		if (info->images_writemask & (1 << i) &&
> -		    !(info->images_buffers & (1 << i)))
> -			rsrc = force_dcc_off(ctx, rsrc);
> -
> -		ctx->images[i] = rsrc;
> -	}
> -}
> -
>  /**
>   * Load ESGS and GSVS ring buffer resource descriptors and save the variables
>   * for later use.
>   */
>  static void preload_ring_buffers(struct si_shader_context *ctx)
>  {
>  	struct gallivm_state *gallivm =
>  		ctx->radeon_bld.soa.bld_base.base.gallivm;
>
>  	LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
> @@ -6793,23 +6729,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
>  		ctx.radeon_bld.declare_memory_region = declare_compute_memory;
>  		break;
>  	default:
>  		assert(!"Unsupported shader type");
>  		return -1;
>  	}
>
>  	create_meta_data(&ctx);
>  	create_function(&ctx);
>  	preload_constant_buffers(&ctx);
> -	preload_shader_buffers(&ctx);
> -	preload_samplers(&ctx);
> -	preload_images(&ctx);
>  	preload_ring_buffers(&ctx);
>
>  	if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT &&
>  	    shader->key.ps.prolog.poly_stipple) {
>  		LLVMValueRef list = LLVMGetParam(ctx.radeon_bld.main_fn,
>  						 SI_PARAM_RW_BUFFERS);
>  		si_llvm_emit_polygon_stipple(&ctx, list,
>  					     SI_PARAM_POS_FIXED_PT);
>  	}
>
>


More information about the mesa-dev mailing list