[Mesa-dev] [PATCH] radeonsi: put image, fmask, and sampler descriptors into one array

Nicolai Hähnle nhaehnle at gmail.com
Wed Feb 10 14:51:38 UTC 2016


On 10.02.2016 09:16, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> The texture slot is expanded to 16 dwords containing 2 descriptors.
> Those can be:
> - Image and fmask, or
> - Image and sampler state
>
> By carefully choosing the locations, we can put all three into one slot,
> with the fmask and sampler state being mutually exclusive.
>
> This improves shaders in 2 ways:
> - 2 user SGPRs are unused, shaders can use them as temporary registers now
> - each pair of descriptors is always on the same cache line

Very nice! The way fmasks are set has been itching me as well. Some 
comments below.

> ---
>   src/gallium/drivers/radeonsi/si_blit.c        |   2 +-
>   src/gallium/drivers/radeonsi/si_descriptors.c | 123 ++++++++++++++------------
>   src/gallium/drivers/radeonsi/si_pipe.h        |   1 -
>   src/gallium/drivers/radeonsi/si_shader.c      |  95 ++++++++++++--------
>   src/gallium/drivers/radeonsi/si_shader.h      |   8 +-
>   src/gallium/drivers/radeonsi/si_state.h       |  19 ++--
>   6 files changed, 133 insertions(+), 115 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index a93887e..1158770 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -80,7 +80,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
>   	if (op & SI_SAVE_TEXTURES) {
>   		util_blitter_save_fragment_sampler_states(
>   			sctx->blitter, 2,
> -			sctx->samplers[PIPE_SHADER_FRAGMENT].states.saved_states);
> +			sctx->samplers[PIPE_SHADER_FRAGMENT].views.sampler_states);
>
>   		util_blitter_save_fragment_sampler_views(sctx->blitter, 2,
>   			sctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 6c79673..a15e2dd 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -41,6 +41,18 @@
>    *
>    * Also, uploading descriptors to newly allocated memory doesn't require
>    * a KCACHE flush.
> + *
> + *
> + * Possible scenarios for one 16 dword image+sampler slot:
> + *
> + *       | Image        | w/ FMASK   | Buffer       | NULL
> + * [ 0: 3] Image[0:3]   | Image[0:3] | Null[0:3]    | Null[0:3]
> + * [ 4: 7] Image[4:7]   | Image[4:7] | Buffer[0:3]  | 0
> + * [ 8:11] Null[0:3]    | Fmask[0:3] | Null[0:3]    | Null[0:3]
> + * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
> + *
> + * FMASK implies MSAA, therefore no sampler state.
> + * Sampler states are never unbound except when FMASK is bound.
>    */
>
>   #include "radeon/r600_cs.h"
> @@ -88,9 +100,9 @@ static void si_init_descriptors(struct si_descriptors *desc,
>   	desc->shader_userdata_offset = shader_userdata_index * 4;
>
>   	/* Initialize the array to NULL descriptors if the element size is 8. */
> -	if (element_dw_size == 8)
> -		for (i = 0; i < num_elements; i++)
> -			memcpy(desc->list + i*element_dw_size, null_descriptor,
> +	if (element_dw_size % 8 == 0)
> +		for (i = 0; i < num_elements * element_dw_size / 8; i++)
> +			memcpy(desc->list + i*8, null_descriptor,
>   			       sizeof(null_descriptor));
>   }
>
> @@ -138,8 +150,8 @@ static void si_release_sampler_views(struct si_sampler_views *views)
>   	si_release_descriptors(&views->desc);
>   }
>
> -static void si_sampler_view_add_buffers(struct si_context *sctx,
> -					struct si_sampler_view *rview)
> +static void si_sample_view_add_buffers(struct si_context *sctx,
> +				       struct si_sampler_view *rview)
>   {
>   	if (rview->resource) {
>   		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> @@ -165,7 +177,7 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
>   		struct si_sampler_view *rview =
>   			(struct si_sampler_view*)views->views[i];
>
> -		si_sampler_view_add_buffers(sctx, rview);
> +		si_sample_view_add_buffers(sctx, rview);
>   	}
>
>   	if (!views->desc.buffer)
> @@ -174,27 +186,42 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
>   			      RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
>   }
>
> -static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
> -				unsigned slot, struct pipe_sampler_view *view,
> -				unsigned *view_desc)
> +static void si_set_sampler_view(struct si_context *sctx,
> +				struct si_sampler_views *views,
> +				unsigned slot, struct pipe_sampler_view *view)
>   {
> -	struct si_sampler_views *views = &sctx->samplers[shader].views;
> -
>   	if (views->views[slot] == view)
>   		return;
>
>   	if (view) {
>   		struct si_sampler_view *rview =
>   			(struct si_sampler_view*)view;
> +		struct r600_texture *rtex = (struct r600_texture*)view->texture;
>
> -		si_sampler_view_add_buffers(sctx, rview);
> +		si_sample_view_add_buffers(sctx, rview);
>
>   		pipe_sampler_view_reference(&views->views[slot], view);
> -		memcpy(views->desc.list + slot*8, view_desc, 8*4);
> +		memcpy(views->desc.list + slot * 16, rview->state, 8*4);
> +
> +		if (rtex && rtex->fmask.size) {
> +			memcpy(views->desc.list + slot*16 + 8,
> +			       rview->fmask_state, 8*4);
> +		} else {
> +			/* Disable FMASK and bind sampler state in [12:15]. */
> +			memcpy(views->desc.list + slot*16 + 8,
> +			       null_descriptor, 4*4);
> +
> +			if (views->sampler_states[slot])
> +				memcpy(views->desc.list + slot*16 + 12,
> +				       views->sampler_states[slot], 4*4);
> +		}
> +
>   		views->desc.enabled_mask |= 1llu << slot;
>   	} else {
>   		pipe_sampler_view_reference(&views->views[slot], NULL);
> -		memcpy(views->desc.list + slot*8, null_descriptor, 8*4);
> +		memcpy(views->desc.list + slot*16, null_descriptor, 8*4);
> +		/* Only clear the lower dwords of FMASK. */
> +		memcpy(views->desc.list + slot*16 + 8, null_descriptor, 4*4);
>   		views->desc.enabled_mask &= ~(1llu << slot);
>   	}
>
> @@ -208,7 +235,6 @@ static void si_set_sampler_views(struct pipe_context *ctx,
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   	struct si_textures_info *samplers = &sctx->samplers[shader];
> -	struct si_sampler_view **rviews = (struct si_sampler_view **)views;
>   	int i;
>
>   	if (!count || shader >= SI_NUM_SHADERS)
> @@ -220,13 +246,11 @@ static void si_set_sampler_views(struct pipe_context *ctx,
>   		if (!views || !views[i]) {
>   			samplers->depth_texture_mask &= ~(1 << slot);
>   			samplers->compressed_colortex_mask &= ~(1 << slot);
> -			si_set_sampler_view(sctx, shader, slot, NULL, NULL);
> -			si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
> -					    NULL, NULL);
> +			si_set_sampler_view(sctx, &samplers->views, slot, NULL);
>   			continue;
>   		}
>
> -		si_set_sampler_view(sctx, shader, slot, views[i], rviews[i]->state);
> +		si_set_sampler_view(sctx, &samplers->views, slot, views[i]);
>
>   		if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
>   			struct r600_texture *rtex =
> @@ -243,60 +267,46 @@ static void si_set_sampler_views(struct pipe_context *ctx,
>   			} else {
>   				samplers->compressed_colortex_mask &= ~(1 << slot);
>   			}
> -
> -			if (rtex->fmask.size) {
> -				si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
> -						    views[i], rviews[i]->fmask_state);
> -			} else {
> -				si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
> -						    NULL, NULL);
> -			}
>   		} else {
>   			samplers->depth_texture_mask &= ~(1 << slot);
>   			samplers->compressed_colortex_mask &= ~(1 << slot);
> -			si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
> -					    NULL, NULL);
>   		}
>   	}
>   }
>
>   /* SAMPLER STATES */
>
> -static void si_sampler_states_begin_new_cs(struct si_context *sctx,
> -					   struct si_sampler_states *states)
> -{
> -	if (!states->desc.buffer)
> -		return;
> -	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, states->desc.buffer,
> -			      RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
> -}
> -
>   static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
>                                      unsigned start, unsigned count, void **states)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
> -	struct si_sampler_states *samplers = &sctx->samplers[shader].states;
> +	struct si_textures_info *samplers = &sctx->samplers[shader];
> +	struct si_descriptors *desc = &samplers->views.desc;
>   	struct si_sampler_state **sstates = (struct si_sampler_state**)states;
>   	int i;
>
>   	if (!count || shader >= SI_NUM_SHADERS)
>   		return;
>
> -	if (start == 0)
> -		samplers->saved_states[0] = states[0];
> -	if (start == 1)
> -		samplers->saved_states[1] = states[0];
> -	else if (start == 0 && count >= 2)
> -		samplers->saved_states[1] = states[1];
> -
>   	for (i = 0; i < count; i++) {
>   		unsigned slot = start + i;
>
> -		if (!sstates[i])
> +		if (!sstates[i] ||
> +		    sstates[i] == samplers->views.sampler_states[slot])
> +			continue;
> +
> +		samplers->views.sampler_states[slot] = sstates[i];
> +
> +		/* If FMASK is bound, don't overwrite it.
> +		 * The sampler state will be set after FMASK is unbound.
> +		 */
> +		if (samplers->views.views[i] &&
> +		    samplers->views.views[i]->texture &&
> +		    ((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size)
>   			continue;
>
> -		memcpy(samplers->desc.list + slot*4, sstates[i]->val, 4*4);
> -		samplers->desc.list_dirty = true;
> +		memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
> +		desc->list_dirty = true;
>   	}
>   }
>
> @@ -862,7 +872,9 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
>   		while (mask) {
>   			unsigned i = u_bit_scan64(&mask);
>   			if (views->views[i]->texture == buf) {
> -				si_desc_reset_buffer_offset(ctx, views->desc.list + i*8+4,
> +				si_desc_reset_buffer_offset(ctx,
> +							    views->desc.list +
> +							    i * 16 + 4,
>   							    old_va, buf);
>   				views->desc.list_dirty = true;
>
> @@ -882,7 +894,6 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx,
>   	sctx->const_buffers[shader].desc.pointer_dirty = true;
>   	sctx->rw_buffers[shader].desc.pointer_dirty = true;
>   	sctx->samplers[shader].views.desc.pointer_dirty = true;
> -	sctx->samplers[shader].states.desc.pointer_dirty = true;
>
>   	if (shader == PIPE_SHADER_VERTEX)
>   		sctx->vertex_buffers.pointer_dirty = true;
> @@ -1003,7 +1014,6 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
>
>   		si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
>   		si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
> -		si_emit_shader_pointer(sctx, &sctx->samplers[i].states.desc, base, false);
>   	}
>   	si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
>   }
> @@ -1023,9 +1033,7 @@ void si_init_all_descriptors(struct si_context *sctx)
>   					 RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
>
>   		si_init_descriptors(&sctx->samplers[i].views.desc,
> -				    SI_SGPR_SAMPLER_VIEWS, 8, SI_NUM_SAMPLER_VIEWS);
> -		si_init_descriptors(&sctx->samplers[i].states.desc,
> -				    SI_SGPR_SAMPLER_STATES, 4, SI_NUM_SAMPLER_STATES);
> +				    SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS);
>   	}
>
>   	si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
> @@ -1056,8 +1064,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx)
>   	for (i = 0; i < SI_NUM_SHADERS; i++) {
>   		if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
>   		    !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
> -		    !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
> -		    !si_upload_descriptors(sctx, &sctx->samplers[i].states.desc))
> +		    !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc))
>   			return false;
>   	}
>   	return si_upload_vertex_buffer_descriptors(sctx);
> @@ -1071,7 +1078,6 @@ void si_release_all_descriptors(struct si_context *sctx)
>   		si_release_buffer_resources(&sctx->const_buffers[i]);
>   		si_release_buffer_resources(&sctx->rw_buffers[i]);
>   		si_release_sampler_views(&sctx->samplers[i].views);
> -		si_release_descriptors(&sctx->samplers[i].states.desc);
>   	}
>   	si_release_descriptors(&sctx->vertex_buffers);
>   }
> @@ -1084,7 +1090,6 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
>   		si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
>   		si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
>   		si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
> -		si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states);
>   	}
>   	si_vertex_buffers_begin_new_cs(sctx);
>   	si_shader_userdata_begin_new_cs(sctx);
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index 3c963db..b5790d6 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -113,7 +113,6 @@ struct si_cs_shader_state {
>
>   struct si_textures_info {
>   	struct si_sampler_views		views;
> -	struct si_sampler_states	states;
>   	uint32_t			depth_texture_mask; /* which textures are depth */
>   	uint32_t			compressed_colortex_mask;
>   };
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index c1d3edc..42e031d 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -86,8 +86,9 @@ struct si_shader_context
>   	LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
>   	LLVMValueRef lds;
>   	LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
> -	LLVMValueRef sampler_views[SI_NUM_SAMPLER_VIEWS];
> -	LLVMValueRef sampler_states[SI_NUM_SAMPLER_STATES];
> +	LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
> +	LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
> +	LLVMValueRef fmasks[SI_NUM_USER_SAMPLERS];
>   	LLVMValueRef so_buffers[4];
>   	LLVMValueRef esgs_ring;
>   	LLVMValueRef gsvs_ring[4];
> @@ -2480,13 +2481,52 @@ static void set_tex_fetch_args(struct gallivm_state *gallivm,
>
>   static const struct lp_build_tgsi_action tex_action;
>
> +enum desc_type {
> +	DESC_IMAGE,
> +	DESC_FMASK,
> +	DESC_SAMPLER
> +};
> +
> +/**
> + * Load an image view, fmask view. or sampler state descriptor.
> + */
> +static LLVMValueRef get_sampler_desc(struct si_shader_context *si_shader_ctx,
> +				     LLVMValueRef index, enum desc_type type)
> +{
> +	struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
> +	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
> +	LLVMBuilderRef builder = gallivm->builder;
> +	LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
> +					SI_PARAM_SAMPLERS);
> +
> +	switch (type) {
> +	case DESC_IMAGE:
> +		/* The image is at [0:7]. */
> +		index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 2, 0), "");
> +		break;
> +	case DESC_FMASK:
> +		/* The FMASK is at [8:15]. */
> +		index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 2, 0), "");
> +		index = LLVMBuildAdd(builder, index, LLVMConstInt(i32, 1, 0), "");
> +		break;
> +	case DESC_SAMPLER:
> +		/* The sampler state is at [12:15]. */
> +		index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 4, 0), "");
> +		index = LLVMBuildAdd(builder, index, LLVMConstInt(i32, 3, 0), "");
> +		ptr = LLVMBuildPointerCast(builder, ptr,
> +					   const_array(LLVMVectorType(i32, 4), 0), "");
> +		break;
> +	}
> +
> +	return build_indexed_load_const(si_shader_ctx, ptr, index);
> +}
> +
>   static void tex_fetch_ptrs(
>   	struct lp_build_tgsi_context * bld_base,
>   	struct lp_build_emit_data * emit_data,
>   	LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
>   {
>   	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
> -	struct gallivm_state *gallivm = bld_base->base.gallivm;
>   	const struct tgsi_full_instruction * inst = emit_data->inst;
>   	unsigned target = inst->Texture.Texture;
>   	unsigned sampler_src;
> @@ -2501,24 +2541,16 @@ static void tex_fetch_ptrs(
>
>   		ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
>
> -		*res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
> -		*res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index);
> -
> -		*samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
> -		*samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index);
> +		*res_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_IMAGE);
> +		*samp_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_SAMPLER);
>
>   		if (target == TGSI_TEXTURE_2D_MSAA ||
> -		    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
> -			ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
> -						 lp_build_const_int32(gallivm,
> -								      SI_FMASK_TEX_OFFSET), "");
> -			*fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
> -			*fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index);
> -		}
> +		    target == TGSI_TEXTURE_2D_ARRAY_MSAA)
> +			*fmask_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_FMASK);

You could save the LLVM dead-code elimination some work and use an 
if-else for fmask vs. sampler state.

>   	} else {
>   		*res_ptr = si_shader_ctx->sampler_views[sampler_index];
>   		*samp_ptr = si_shader_ctx->sampler_states[sampler_index];
> -		*fmask_ptr = si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + sampler_index];
> +		*fmask_ptr = si_shader_ctx->fmasks[sampler_index];
>   	}
>   }
>
> @@ -3530,7 +3562,7 @@ static void create_function(struct si_shader_context *si_shader_ctx)
>   	struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
>   	struct gallivm_state *gallivm = bld_base->base.gallivm;
>   	struct si_shader *shader = si_shader_ctx->shader;
> -	LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v4i32, v8i32;
> +	LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8;
>   	unsigned i, last_array_pointer, last_sgpr, num_params;
>
>   	i8 = LLVMInt8TypeInContext(gallivm->context);
> @@ -3538,15 +3570,13 @@ static void create_function(struct si_shader_context *si_shader_ctx)
>   	f32 = LLVMFloatTypeInContext(gallivm->context);
>   	v2i32 = LLVMVectorType(i32, 2);
>   	v3i32 = LLVMVectorType(i32, 3);
> -	v4i32 = LLVMVectorType(i32, 4);
> -	v8i32 = LLVMVectorType(i32, 8);
>   	v16i8 = LLVMVectorType(i8, 16);
>
>   	params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, SI_NUM_RW_BUFFERS);
>   	params[SI_PARAM_CONST_BUFFERS] = const_array(v16i8, SI_NUM_CONST_BUFFERS);
> -	params[SI_PARAM_SAMPLER_STATES] = const_array(v4i32, SI_NUM_SAMPLER_STATES);
> -	params[SI_PARAM_SAMPLER_VIEWS] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS);
> -	last_array_pointer = SI_PARAM_SAMPLER_VIEWS;
> +	params[SI_PARAM_SAMPLERS] = const_array(LLVMVectorType(i32, 8), SI_NUM_SAMPLERS);

I'm going to use v8i32 again for images, so I'd prefer that you didn't 
remove it. It's easy enough to add it again when I rebase though, so I 
don't feel strongly about it.

> +	params[SI_PARAM_UNUSED] = LLVMPointerType(i32, CONST_ADDR_SPACE);
> +	last_array_pointer = SI_PARAM_UNUSED;
>
>   	switch (si_shader_ctx->type) {
>   	case TGSI_PROCESSOR_VERTEX:
> @@ -3747,34 +3777,27 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx)
>   	struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
>   	struct gallivm_state * gallivm = bld_base->base.gallivm;
>   	const struct tgsi_shader_info * info = bld_base->info;
> -
>   	unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
> -
> -	LLVMValueRef res_ptr, samp_ptr;
>   	LLVMValueRef offset;
>
>   	if (num_samplers == 0)
>   		return;
>
> -	res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
> -	samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
> -
>   	/* Load the resources and samplers, we rely on the code sinking to do the rest */
>   	for (i = 0; i < num_samplers; ++i) {
>   		/* Resource */
>   		offset = lp_build_const_int32(gallivm, i);
> -		si_shader_ctx->sampler_views[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset);
> +		si_shader_ctx->sampler_views[i] =
> +			get_sampler_desc(si_shader_ctx, offset, DESC_IMAGE);
>
>   		/* Sampler */
> -		offset = lp_build_const_int32(gallivm, i);
> -		si_shader_ctx->sampler_states[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset);
> +		si_shader_ctx->sampler_states[i] =
> +			get_sampler_desc(si_shader_ctx, offset, DESC_SAMPLER);
>
>   		/* FMASK resource */
> -		if (info->is_msaa_sampler[i]) {
> -			offset = lp_build_const_int32(gallivm, SI_FMASK_TEX_OFFSET + i);
> -			si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + i] =
> -				build_indexed_load_const(si_shader_ctx, res_ptr, offset);
> -		}
> +		if (info->is_msaa_sampler[i])
> +			si_shader_ctx->fmasks[i] =
> +				get_sampler_desc(si_shader_ctx, offset, DESC_FMASK);

Again, if-else for fmask vs. sampler state.

>   	}
>   }
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index c42c51e..dc75e03 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -77,8 +77,8 @@ struct radeon_shader_reloc;
>
>   #define SI_SGPR_RW_BUFFERS	0  /* rings (& stream-out, VS only) */
>   #define SI_SGPR_CONST_BUFFERS	2
> -#define SI_SGPR_SAMPLER_STATES	4
> -#define SI_SGPR_SAMPLER_VIEWS	6
> +#define SI_SGPR_SAMPLERS	4  /* images & sampler states interleaved */
> +/* TODO: gap */

I'll use that gap for images if you don't mind... saves me from having 
to re-do the shifting when I rebase.

With the if-else comments addressed, this patch is

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

>   #define SI_SGPR_VERTEX_BUFFERS	8  /* VS only */
>   #define SI_SGPR_BASE_VERTEX	10 /* VS only */
>   #define SI_SGPR_START_INSTANCE	11 /* VS only */
> @@ -101,8 +101,8 @@ struct radeon_shader_reloc;
>   /* LLVM function parameter indices */
>   #define SI_PARAM_RW_BUFFERS	0
>   #define SI_PARAM_CONST_BUFFERS	1
> -#define SI_PARAM_SAMPLER_STATES	2
> -#define SI_PARAM_SAMPLER_VIEWS	3
> +#define SI_PARAM_SAMPLERS	2
> +#define SI_PARAM_UNUSED		3 /* TODO: use */
>
>   /* VS only parameters */
>   #define SI_PARAM_VERTEX_BUFFERS	4
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index e9a0175..f64c4d4 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -144,17 +144,12 @@ struct si_shader_data {
>   	uint32_t		sh_base[SI_NUM_SHADERS];
>   };
>
> -#define SI_NUM_USER_SAMPLERS            16 /* AKA OpenGL textures units per shader */
> -#define SI_POLY_STIPPLE_SAMPLER         SI_NUM_USER_SAMPLERS
> -#define SI_NUM_SAMPLERS                 (SI_POLY_STIPPLE_SAMPLER + 1)
> -
>   /* User sampler views:   0..15
>    * Polygon stipple tex:  16
> - * FMASK sampler views:  17..33 (no sampler states)
>    */
> -#define SI_FMASK_TEX_OFFSET		SI_NUM_SAMPLERS
> -#define SI_NUM_SAMPLER_VIEWS		(SI_FMASK_TEX_OFFSET + SI_NUM_SAMPLERS)
> -#define SI_NUM_SAMPLER_STATES		SI_NUM_SAMPLERS
> +#define SI_NUM_USER_SAMPLERS            16 /* AKA OpenGL textures units per shader */
> +#define SI_POLY_STIPPLE_SAMPLER         SI_NUM_USER_SAMPLERS
> +#define SI_NUM_SAMPLERS                 (SI_POLY_STIPPLE_SAMPLER + 1)
>
>   /* User constant buffers:   0..15
>    * Driver state constants:  16
> @@ -210,12 +205,8 @@ struct si_descriptors {
>
>   struct si_sampler_views {
>   	struct si_descriptors		desc;
> -	struct pipe_sampler_view	*views[SI_NUM_SAMPLER_VIEWS];
> -};
> -
> -struct si_sampler_states {
> -	struct si_descriptors		desc;
> -	void				*saved_states[2]; /* saved for u_blitter */
> +	struct pipe_sampler_view	*views[SI_NUM_SAMPLERS];
> +	void				*sampler_states[SI_NUM_SAMPLERS];
>   };
>
>   struct si_buffer_resources {
>


More information about the mesa-dev mailing list