[Mesa-dev] [PATCH 02/10] radeonsi: merge sampler and image descriptor lists into one

Marek Olšák maraeo at gmail.com
Wed May 17 19:38:44 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

Sampler slots: slot[8], .. slot[39] (ascending)
Image slots: slot[7], .. slot[0] (descending)

Each image occupies 1/2 of each slot, so there are 16 images in total,
therefore the layout is: slot[15], .. slot[0]. (in 1/2 slot increments)

Updating image slot 2n+i (i <= 1) also dirties and re-uploads slot 2n+!i.
---
 src/gallium/drivers/radeonsi/si_descriptors.c     | 134 ++++++++++------------
 src/gallium/drivers/radeonsi/si_shader.c          |  22 ++--
 src/gallium/drivers/radeonsi/si_shader.h          |  20 ++--
 src/gallium/drivers/radeonsi/si_shader_internal.h |   3 +-
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  15 ++-
 src/gallium/drivers/radeonsi/si_state.h           |  17 ++-
 6 files changed, 99 insertions(+), 112 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 5dc7068..177d2f3 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -92,48 +92,37 @@ static uint32_t null_image_descriptor[8] = {
 	0,
 	S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
 	/* the rest must contain zeros, which is also used by the buffer
 	 * descriptor */
 };
 
 static void si_init_descriptors(struct si_descriptors *desc,
 				unsigned shader_userdata_index,
 				unsigned element_dw_size,
 				unsigned num_elements,
-				const uint32_t *null_descriptor,
 				unsigned *ce_offset)
 {
-	int i;
-
 	assert(num_elements <= sizeof(desc->dirty_mask)*8);
 
 	desc->list = CALLOC(num_elements, element_dw_size * 4);
 	desc->element_dw_size = element_dw_size;
 	desc->num_elements = num_elements;
-	desc->dirty_mask = num_elements == 32 ? ~0u : (1u << num_elements) - 1;
+	desc->dirty_mask = u_bit_consecutive64(0, num_elements);
 	desc->shader_userdata_offset = shader_userdata_index * 4;
 
 	if (ce_offset) {
 		desc->uses_ce = true;
 		desc->ce_offset = *ce_offset;
 
 		/* make sure that ce_offset stays 32 byte aligned */
 		*ce_offset += align(element_dw_size * num_elements * 4, 32);
 	}
-
-	/* Initialize the array to NULL descriptors if the element size is 8. */
-	if (null_descriptor) {
-		assert(element_dw_size % 8 == 0);
-		for (i = 0; i < num_elements * element_dw_size / 8; i++)
-			memcpy(desc->list + i * 8, null_descriptor,
-			       8 * 4);
-	}
 }
 
 static void si_release_descriptors(struct si_descriptors *desc)
 {
 	r600_resource_reference(&desc->buffer, NULL);
 	FREE(desc->list);
 }
 
 static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned size,
 			 unsigned *out_offset, struct r600_resource **out_buf) {
@@ -212,22 +201,22 @@ static bool si_upload_descriptors(struct si_context *sctx,
 		return true;
 
 	if (sctx->ce_ib && desc->uses_ce) {
 		uint32_t const* list = (uint32_t const*)desc->list;
 
 		if (desc->ce_ram_dirty)
 			si_ce_reinitialize_descriptors(sctx, desc);
 
 		while(desc->dirty_mask) {
 			int begin, count;
-			u_bit_scan_consecutive_range(&desc->dirty_mask, &begin,
-						     &count);
+			u_bit_scan_consecutive_range64(&desc->dirty_mask, &begin,
+						       &count);
 
 			begin *= desc->element_dw_size;
 			count *= desc->element_dw_size;
 
 			radeon_emit(sctx->ce_ib,
 			            PKT3(PKT3_WRITE_CONST_RAM, count, 0));
 			radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4);
 			radeon_emit_array(sctx->ce_ib, list + begin, count);
 		}
 
@@ -266,30 +255,30 @@ si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc
 	if (!desc->buffer)
 		return;
 
 	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
 				  RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 }
 
 /* SAMPLER VIEWS */
 
 static unsigned
-si_sampler_descriptors_idx(unsigned shader)
+si_sampler_and_image_descriptors_idx(unsigned shader)
 {
 	return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
-	       SI_SHADER_DESCS_SAMPLERS;
+	       SI_SHADER_DESCS_SAMPLERS_AND_IMAGES;
 }
 
 static struct si_descriptors *
-si_sampler_descriptors(struct si_context *sctx, unsigned shader)
+si_sampler_and_image_descriptors(struct si_context *sctx, unsigned shader)
 {
-	return &sctx->descriptors[si_sampler_descriptors_idx(shader)];
+	return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
 }
 
 static void si_release_sampler_views(struct si_sampler_views *views)
 {
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(views->views); i++) {
 		pipe_sampler_view_reference(&views->views[i], NULL);
 	}
 }
@@ -467,22 +456,23 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
 	}
 }
 
 static void si_set_sampler_view(struct si_context *sctx,
 				unsigned shader,
 				unsigned slot, struct pipe_sampler_view *view,
 				bool disallow_early_out)
 {
 	struct si_sampler_views *views = &sctx->samplers[shader].views;
 	struct si_sampler_view *rview = (struct si_sampler_view*)view;
-	struct si_descriptors *descs = si_sampler_descriptors(sctx, shader);
-	uint32_t *desc = descs->list + slot * 16;
+	struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
+	unsigned desc_slot = si_get_sampler_slot(slot);
+	uint32_t *desc = descs->list + desc_slot * 16;
 
 	if (views->views[slot] == view && !disallow_early_out)
 		return;
 
 	if (view) {
 		struct r600_texture *rtex = (struct r600_texture *)view->texture;
 		bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
 
 		if (unlikely(!is_buffer && rview->dcc_incompatible)) {
 			if (vi_dcc_enabled(rtex, view->u.tex.first_level))
@@ -542,22 +532,22 @@ static void si_set_sampler_view(struct si_context *sctx,
 		/* Only clear the lower dwords of FMASK. */
 		memcpy(desc + 8, null_texture_descriptor, 4*4);
 		/* Re-set the sampler state if we are transitioning from FMASK. */
 		if (views->sampler_states[slot])
 			memcpy(desc + 12,
 			       views->sampler_states[slot]->val, 4*4);
 
 		views->enabled_mask &= ~(1u << slot);
 	}
 
-	descs->dirty_mask |= 1u << slot;
-	sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
+	descs->dirty_mask |= 1ull << desc_slot;
+	sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
 }
 
 static bool is_compressed_colortex(struct r600_texture *rtex)
 {
 	return rtex->fmask.size ||
 	       (rtex->dirty_level_mask &&
 		(rtex->cmask.size || rtex->dcc_offset));
 }
 
 static bool depth_needs_decompression(struct r600_texture *rtex,
@@ -649,33 +639,20 @@ si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers)
 				samplers->compressed_colortex_mask |= 1u << i;
 			} else {
 				samplers->compressed_colortex_mask &= ~(1u << i);
 			}
 		}
 	}
 }
 
 /* IMAGE VIEWS */
 
-static unsigned
-si_image_descriptors_idx(unsigned shader)
-{
-	return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
-	       SI_SHADER_DESCS_IMAGES;
-}
-
-static struct si_descriptors*
-si_image_descriptors(struct si_context *sctx, unsigned shader)
-{
-	return &sctx->descriptors[si_image_descriptors_idx(shader)];
-}
-
 static void
 si_release_image_views(struct si_images_info *images)
 {
 	unsigned i;
 
 	for (i = 0; i < SI_NUM_IMAGES; ++i) {
 		struct pipe_image_view *view = &images->views[i];
 
 		pipe_resource_reference(&view->resource, NULL);
 	}
@@ -697,29 +674,31 @@ si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *imag
 					   RADEON_USAGE_READWRITE, false, false);
 	}
 }
 
 static void
 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
 {
 	struct si_images_info *images = &ctx->images[shader];
 
 	if (images->enabled_mask & (1u << slot)) {
-		struct si_descriptors *descs = si_image_descriptors(ctx, shader);
+		struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
+		unsigned desc_slot = si_get_image_slot(slot);
 
 		pipe_resource_reference(&images->views[slot].resource, NULL);
 		images->compressed_colortex_mask &= ~(1 << slot);
 
-		memcpy(descs->list + slot*8, null_image_descriptor, 8*4);
+		memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4);
 		images->enabled_mask &= ~(1u << slot);
-		descs->dirty_mask |= 1u << slot;
-		ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
+		/* two 8-byte images share one 16-byte slot */
+		descs->dirty_mask |= 1u << (desc_slot / 2);
+		ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
 	}
 }
 
 static void
 si_mark_image_range_valid(const struct pipe_image_view *view)
 {
 	struct r600_resource *res = (struct r600_resource *)view->resource;
 
 	assert(res && res->b.b.target == PIPE_BUFFER);
 
@@ -728,23 +707,24 @@ si_mark_image_range_valid(const struct pipe_image_view *view)
 		       view->u.buf.offset + view->u.buf.size);
 }
 
 static void si_set_shader_image(struct si_context *ctx,
 				unsigned shader,
 				unsigned slot, const struct pipe_image_view *view,
 				bool skip_decompress)
 {
 	struct si_screen *screen = ctx->screen;
 	struct si_images_info *images = &ctx->images[shader];
-	struct si_descriptors *descs = si_image_descriptors(ctx, shader);
+	struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
 	struct r600_resource *res;
-	uint32_t *desc = descs->list + slot * 8;
+	unsigned desc_slot = si_get_image_slot(slot);
+	uint32_t *desc = descs->list + desc_slot * 8;
 
 	if (!view || !view->resource) {
 		si_disable_shader_image(ctx, shader, slot);
 		return;
 	}
 
 	res = (struct r600_resource *)view->resource;
 
 	if (&images->views[slot] != view)
 		util_copy_image_view(&images->views[slot], view);
@@ -824,22 +804,23 @@ static void si_set_shader_image(struct si_context *ctx,
 					   width, height, depth,
 					   desc, NULL);
 		si_set_mutable_tex_desc_fields(screen, tex,
 					       &tex->surface.u.legacy.level[level],
 					       level, level,
 					       util_format_get_blockwidth(view->format),
 					       false, desc);
 	}
 
 	images->enabled_mask |= 1u << slot;
-	descs->dirty_mask |= 1u << slot;
-	ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
+	/* two 8-byte images share one 16-byte slot */
+	descs->dirty_mask |= 1u << (desc_slot / 2);
+	ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
 
 	/* Since this can flush, it must be done after enabled_mask is updated. */
 	si_sampler_view_add_buffer(ctx, &res->b.b,
 				   RADEON_USAGE_READWRITE, false, true);
 }
 
 static void
 si_set_shader_images(struct pipe_context *pipe,
 		     enum pipe_shader_type shader,
 		     unsigned start_slot, unsigned count,
@@ -888,51 +869,52 @@ si_images_update_compressed_colortex_mask(struct si_images_info *images)
 }
 
 /* SAMPLER STATES */
 
 static void si_bind_sampler_states(struct pipe_context *ctx,
                                    enum pipe_shader_type shader,
                                    unsigned start, unsigned count, void **states)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_textures_info *samplers = &sctx->samplers[shader];
-	struct si_descriptors *desc = si_sampler_descriptors(sctx, shader);
+	struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
 	struct si_sampler_state **sstates = (struct si_sampler_state**)states;
 	int i;
 
 	if (!count || shader >= SI_NUM_SHADERS)
 		return;
 
 	for (i = 0; i < count; i++) {
 		unsigned slot = start + i;
+		unsigned desc_slot = si_get_sampler_slot(slot);
 
 		if (!sstates[i] ||
 		    sstates[i] == samplers->views.sampler_states[slot])
 			continue;
 
 #ifdef DEBUG
 		assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
 #endif
 		samplers->views.sampler_states[slot] = sstates[i];
 
 		/* If FMASK is bound, don't overwrite it.
 		 * The sampler state will be set after FMASK is unbound.
 		 */
 		if (samplers->views.views[slot] &&
 		    samplers->views.views[slot]->texture &&
 		    samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
 		    ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
 			continue;
 
-		memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
-		desc->dirty_mask |= 1u << slot;
-		sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
+		memcpy(desc->list + desc_slot * 16 + 12, sstates[i]->val, 4*4);
+		desc->dirty_mask |= 1ull << desc_slot;
+		sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
 	}
 }
 
 /* BUFFER RESOURCES */
 
 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
 				     struct si_descriptors *descs,
 				     unsigned num_buffers,
 				     unsigned shader_userdata_index,
 				     enum radeon_bo_usage shader_usage,
@@ -941,21 +923,21 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers,
 				     enum radeon_bo_priority priority_constbuf,
 				     unsigned *ce_offset)
 {
 	buffers->shader_usage = shader_usage;
 	buffers->shader_usage_constbuf = shader_usage_constbuf;
 	buffers->priority = priority;
 	buffers->priority_constbuf = priority_constbuf;
 	buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
 
 	si_init_descriptors(descs, shader_userdata_index, 4,
-			    num_buffers, NULL, ce_offset);
+			    num_buffers, ce_offset);
 }
 
 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
 					struct si_descriptors *descs)
 {
 	int i;
 
 	for (i = 0; i < descs->num_elements; i++) {
 		pipe_resource_reference(&buffers->buffers[i], NULL);
 	}
@@ -1703,64 +1685,69 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
 						  buf, old_va,
 						  sctx->const_and_shader_buffers[shader].shader_usage,
 						  sctx->const_and_shader_buffers[shader].priority);
 	}
 
 	if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
 		/* Texture buffers - update bindings. */
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 			struct si_sampler_views *views = &sctx->samplers[shader].views;
 			struct si_descriptors *descs =
-				si_sampler_descriptors(sctx, shader);
+				si_sampler_and_image_descriptors(sctx, shader);
 			unsigned mask = views->enabled_mask;
 
 			while (mask) {
 				unsigned i = u_bit_scan(&mask);
 				if (views->views[i]->texture == buf) {
+					unsigned desc_slot = si_get_sampler_slot(i);
+
 					si_desc_reset_buffer_offset(ctx,
 								    descs->list +
-								    i * 16 + 4,
+								    desc_slot * 16 + 4,
 								    old_va, buf);
-					descs->dirty_mask |= 1u << i;
+					descs->dirty_mask |= 1ull << desc_slot;
 					sctx->descriptors_dirty |=
-						1u << si_sampler_descriptors_idx(shader);
+						1u << si_sampler_and_image_descriptors_idx(shader);
 
 					radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
 									    rbuffer, RADEON_USAGE_READ,
 									    RADEON_PRIO_SAMPLER_BUFFER,
 									    true);
 				}
 			}
 		}
 	}
 
 	/* Shader images */
 	if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
 		for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
 			struct si_images_info *images = &sctx->images[shader];
 			struct si_descriptors *descs =
-				si_image_descriptors(sctx, shader);
+				si_sampler_and_image_descriptors(sctx, shader);
 			unsigned mask = images->enabled_mask;
 
 			while (mask) {
 				unsigned i = u_bit_scan(&mask);
 
 				if (images->views[i].resource == buf) {
+					unsigned desc_slot = si_get_image_slot(i);
+
 					if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
 						si_mark_image_range_valid(&images->views[i]);
 
 					si_desc_reset_buffer_offset(
-						ctx, descs->list + i * 8 + 4,
+						ctx, descs->list + desc_slot * 8 + 4,
 						old_va, buf);
-					descs->dirty_mask |= 1u << i;
+					/* two 8-byte images share one 16-byte slot */
+					descs->dirty_mask |= 1u << (desc_slot / 2);
 					sctx->descriptors_dirty |=
-						1u << si_image_descriptors_idx(shader);
+						1u << si_sampler_and_image_descriptors_idx(shader);
 
 					radeon_add_to_buffer_list_check_mem(
 						&sctx->b, &sctx->b.gfx, rbuffer,
 						RADEON_USAGE_READWRITE,
 						RADEON_PRIO_SAMPLER_BUFFER, true);
 				}
 			}
 		}
 	}
 }
@@ -2008,66 +1995,63 @@ void si_init_all_descriptors(struct si_context *sctx)
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
 		bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
 				i == PIPE_SHADER_TESS_CTRL;
 		bool gfx9_gs = sctx->b.chip_class == GFX9 &&
 			       i == PIPE_SHADER_GEOMETRY;
 		/* GFX9 has only 4KB of CE, while previous chips had 32KB.
 		 * Rarely used descriptors don't use CE RAM.
 		 */
 		bool big_ce = sctx->b.chip_class <= VI;
-		bool images_use_ce = big_ce;
 		bool const_and_shaderbufs_use_ce = big_ce ||
 						   i == PIPE_SHADER_VERTEX ||
 						   i == PIPE_SHADER_FRAGMENT;
-		bool samplers_use_ce = big_ce ||
-				       i == PIPE_SHADER_FRAGMENT;
+		bool samplers_and_images_use_ce = big_ce ||
+						  i == PIPE_SHADER_FRAGMENT;
 
 		si_init_buffer_resources(&sctx->const_and_shader_buffers[i],
 					 si_const_and_shader_buffer_descriptors(sctx, i),
 					 SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS,
 					 gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
 					 gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
 						   SI_SGPR_CONST_AND_SHADER_BUFFERS,
 					 RADEON_USAGE_READWRITE,
 					 RADEON_USAGE_READ,
 					 RADEON_PRIO_SHADER_RW_BUFFER,
 					 RADEON_PRIO_CONST_BUFFER,
 					 const_and_shaderbufs_use_ce ? &ce_offset : NULL);
 
-		si_init_descriptors(si_sampler_descriptors(sctx, i),
-				    gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
-				    gfx9_gs ? GFX9_SGPR_GS_SAMPLERS :
-					      SI_SGPR_SAMPLERS,
-				    16, SI_NUM_SAMPLERS,
-				    null_texture_descriptor,
-				    samplers_use_ce ? &ce_offset : NULL);
-
-		si_init_descriptors(si_image_descriptors(sctx, i),
-				    gfx9_tcs ? GFX9_SGPR_TCS_IMAGES :
-				    gfx9_gs ? GFX9_SGPR_GS_IMAGES :
-					      SI_SGPR_IMAGES,
-				    8, SI_NUM_IMAGES,
-				    null_image_descriptor,
-				    images_use_ce ? &ce_offset : NULL);
+		struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, i);
+		si_init_descriptors(desc,
+				    gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
+				    gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
+					      SI_SGPR_SAMPLERS_AND_IMAGES,
+				    16, SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS,
+				    samplers_and_images_use_ce ? &ce_offset : NULL);
+
+		int j;
+		for (j = 0; j < SI_NUM_IMAGES; j++)
+			memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
+		for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
+			memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
 	}
 
 	si_init_buffer_resources(&sctx->rw_buffers,
 				 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
 				 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
 				 /* The second set of usage/priority is used by
 				  * const buffers in RW buffer slots. */
 				 RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
 				 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER,
 				 &ce_offset);
 	si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
-			    4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
+			    4, SI_NUM_VERTEX_BUFFERS, NULL);
 
 	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 
 	if (sctx->b.chip_class >= GFX9)
 		assert(ce_offset <= 4096);
 	else
 		assert(ce_offset <= 32768);
 
 	/* Set pipe_context functions. */
 	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8c5bcb9..f847e46 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2793,23 +2793,21 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
 				  8 + GFX9_SGPR_TCS_OUT_LAYOUT);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
 				  8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k,
 				  8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
 
 	unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2;
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
 					   8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
-					   8 + GFX9_SGPR_TCS_SAMPLERS);
-	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
-					   8 + GFX9_SGPR_TCS_IMAGES);
+					   8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
 
 	unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
 	ret = si_insert_input_ret_float(ctx, ret,
 					ctx->param_tcs_patch_id, vgpr++);
 	ret = si_insert_input_ret_float(ctx, ret,
 					ctx->param_tcs_rel_ids, vgpr++);
 	ctx->return_value = ret;
 }
 
 /* Pass GS inputs from ES to GS on GFX9. */
@@ -2820,23 +2818,21 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
 
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
 
 	unsigned desc_param = ctx->param_vs_state_bits + 1;
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
 					   8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
-					   8 + GFX9_SGPR_GS_SAMPLERS);
-	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
-					   8 + GFX9_SGPR_GS_IMAGES);
+					   8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
 
 	unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
 	for (unsigned i = 0; i < 5; i++) {
 		unsigned param = ctx->param_gs_vtx01_offset + i;
 		ret = si_insert_input_ret_float(ctx, ret, param, vgpr++);
 	}
 	ctx->return_value = ret;
 }
 
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
@@ -4054,27 +4050,26 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
 	return max_work_group_size;
 }
 
 static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
 					    LLVMTypeRef *params,
 					    unsigned *num_params,
 					    bool assign_params)
 {
 	params[(*num_params)++] = si_const_array(ctx->v4i32,
 						 SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS);
-	params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_SAMPLERS);
-	params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_IMAGES);
+	params[(*num_params)++] = si_const_array(ctx->v8i32,
+						 SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2);
 
 	if (assign_params) {
-		ctx->param_const_and_shader_buffers = *num_params - 3;
-		ctx->param_samplers	  = *num_params - 2;
-		ctx->param_images	  = *num_params - 1;
+		ctx->param_const_and_shader_buffers = *num_params - 2;
+		ctx->param_samplers_and_images = *num_params - 1;
 	}
 }
 
 static void declare_default_desc_pointers(struct si_shader_context *ctx,
 					  LLVMTypeRef *params,
 				          unsigned *num_params)
 {
 	params[ctx->param_rw_buffers = (*num_params)++] =
 		si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
 	declare_per_stage_desc_pointers(ctx, params, num_params, true);
@@ -6659,35 +6654,33 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 		params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32; /* wave info */
 		params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
-		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
 		params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
 	} else {
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
-		params[num_params++] = ctx->i64;
 		params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
 		params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
 		params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
 		params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
 	}
 	last_sgpr = num_params - 1;
@@ -7031,22 +7024,21 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
 	LLVMTypeRef params[16+8*4+3];
 	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
 	int last_sgpr, num_params = 0, i;
 	struct si_ps_exports exp = {};
 
 	/* Declare input SGPRs. */
 	params[ctx->param_rw_buffers = num_params++] = ctx->i64;
 	params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64;
-	params[ctx->param_samplers = num_params++] = ctx->i64;
-	params[ctx->param_images = num_params++] = ctx->i64;
+	params[ctx->param_samplers_and_images = num_params++] = ctx->i64;
 	assert(num_params == SI_PARAM_ALPHA_REF);
 	params[SI_PARAM_ALPHA_REF] = ctx->f32;
 	last_sgpr = SI_PARAM_ALPHA_REF;
 
 	/* Declare input VGPRs. */
 	num_params = (last_sgpr + 1) +
 		     util_bitcount(key->ps_epilog.colors_written) * 4 +
 		     key->ps_epilog.writes_z +
 		     key->ps_epilog.writes_stencil +
 		     key->ps_epilog.writes_samplemask;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 08e809c..ffb7dc3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -152,24 +152,22 @@ struct ac_shader_binary;
 
 /* SGPR user data indices */
 enum {
 	/* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
 	 * and these two are used for other purposes.
 	 */
 	SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
 	SI_SGPR_RW_BUFFERS_HI,
 	SI_SGPR_CONST_AND_SHADER_BUFFERS,
 	SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
-	SI_SGPR_SAMPLERS,  /* images & sampler states interleaved */
-	SI_SGPR_SAMPLERS_HI,
-	SI_SGPR_IMAGES,
-	SI_SGPR_IMAGES_HI,
+	SI_SGPR_SAMPLERS_AND_IMAGES,
+	SI_SGPR_SAMPLERS_AND_IMAGES_HI,
 	SI_NUM_RESOURCE_SGPRS,
 
 	/* all VS variants */
 	SI_SGPR_VERTEX_BUFFERS	= SI_NUM_RESOURCE_SGPRS,
 	SI_SGPR_VERTEX_BUFFERS_HI,
 	SI_SGPR_BASE_VERTEX,
 	SI_SGPR_START_INSTANCE,
 	SI_SGPR_DRAWID,
 	SI_SGPR_VS_STATE_BITS,
 	SI_VS_NUM_USER_SGPR,
@@ -190,47 +188,43 @@ enum {
 
 	/* GFX9: Merged LS-HS (VS-TCS) only. */
 	GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
 	GFX9_SGPR_TCS_OUT_OFFSETS,
 	GFX9_SGPR_TCS_OUT_LAYOUT,
 	GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K,
 	GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K,
 	GFX9_SGPR_unused_to_align_the_next_pointer,
 	GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS,
 	GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI,
-	GFX9_SGPR_TCS_SAMPLERS,  /* images & sampler states interleaved */
-	GFX9_SGPR_TCS_SAMPLERS_HI,
-	GFX9_SGPR_TCS_IMAGES,
-	GFX9_SGPR_TCS_IMAGES_HI,
+	GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES,
+	GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES_HI,
 	GFX9_TCS_NUM_USER_SGPR,
 
 	/* GFX9: Merged ES-GS (VS-GS or TES-GS). */
 	GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR,
 	GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI,
-	GFX9_SGPR_GS_SAMPLERS,
-	GFX9_SGPR_GS_SAMPLERS_HI,
-	GFX9_SGPR_GS_IMAGES,
-	GFX9_SGPR_GS_IMAGES_HI,
+	GFX9_SGPR_GS_SAMPLERS_AND_IMAGES,
+	GFX9_SGPR_GS_SAMPLERS_AND_IMAGES_HI,
 	GFX9_GS_NUM_USER_SGPR,
 
 	/* GS limits */
 	GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
 	SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS_HI + 1,
 
 	/* PS only */
 	SI_SGPR_ALPHA_REF	= SI_NUM_RESOURCE_SGPRS,
 	SI_PS_NUM_USER_SGPR,
 };
 
 /* LLVM function parameter indices */
 enum {
-	SI_NUM_RESOURCE_PARAMS = 4,
+	SI_NUM_RESOURCE_PARAMS = 3,
 
 	/* PS only parameters */
 	SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
 	SI_PARAM_PRIM_MASK,
 	SI_PARAM_PERSP_SAMPLE,
 	SI_PARAM_PERSP_CENTER,
 	SI_PARAM_PERSP_CENTROID,
 	SI_PARAM_PERSP_PULL_MODEL,
 	SI_PARAM_LINEAR_SAMPLE,
 	SI_PARAM_LINEAR_CENTER,
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 9fd027d..5094023 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -103,22 +103,21 @@ struct si_shader_context {
 	LLVMValueRef *temp_array_allocas;
 
 	LLVMValueRef undef_alloca;
 
 	LLVMValueRef main_fn;
 	LLVMTypeRef return_type;
 
 	/* Parameter indices for LLVMGetParam. */
 	int param_rw_buffers;
 	int param_const_and_shader_buffers;
-	int param_samplers;
-	int param_images;
+	int param_samplers_and_images;
 	/* Common inputs for merged shaders. */
 	int param_merged_wave_info;
 	int param_merged_scratch_offset;
 	/* API VS */
 	int param_vertex_buffers;
 	int param_base_vertex;
 	int param_start_instance;
 	int param_draw_id;
 	int param_vertex_id;
 	int param_rel_auto_id;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 9c44cff..5fc3420 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -173,48 +173,52 @@ static LLVMValueRef load_image_desc(struct si_shader_context *ctx,
  */
 static void
 image_fetch_rsrc(
 	struct lp_build_tgsi_context *bld_base,
 	const struct tgsi_full_src_register *image,
 	bool is_store, unsigned target,
 	LLVMValueRef *rsrc)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
-					     ctx->param_images);
+					     ctx->param_samplers_and_images);
 	LLVMValueRef index;
 	bool dcc_off = is_store;
 
 	assert(image->Register.File == TGSI_FILE_IMAGE);
 
 	if (!image->Register.Indirect) {
 		const struct tgsi_shader_info *info = bld_base->info;
 		unsigned images_writemask = info->images_store |
 					    info->images_atomic;
 
-		index = LLVMConstInt(ctx->i32, image->Register.Index, 0);
+		index = LLVMConstInt(ctx->i32,
+				     si_get_image_slot(image->Register.Index), 0);
 
 		if (images_writemask & (1 << image->Register.Index))
 			dcc_off = true;
 	} else {
 		/* From the GL_ARB_shader_image_load_store extension spec:
 		 *
 		 *    If a shader performs an image load, store, or atomic
 		 *    operation using an image variable declared as an array,
 		 *    and if the index used to select an individual element is
 		 *    negative or greater than or equal to the size of the
 		 *    array, the results of the operation are undefined but may
 		 *    not lead to termination.
 		 */
 		index = si_get_bounded_indirect_index(ctx, &image->Indirect,
 						      image->Register.Index,
 						      SI_NUM_IMAGES);
+		index = LLVMBuildSub(ctx->gallivm.builder,
+				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
+				     index, "");
 	}
 
 	*rsrc = load_image_desc(ctx, rsrc_ptr, index, target);
 	if (dcc_off && target != TGSI_TEXTURE_BUFFER)
 		*rsrc = force_dcc_off(ctx, *rsrc);
 }
 
 static LLVMValueRef image_fetch_coords(
 		struct lp_build_tgsi_context *bld_base,
 		const struct tgsi_full_instruction *inst,
@@ -1174,37 +1178,40 @@ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
 	return LLVMBuildInsertElement(builder, samp, samp0,
 				      ctx->i32_0, "");
 }
 
 static void tex_fetch_ptrs(
 	struct lp_build_tgsi_context *bld_base,
 	struct lp_build_emit_data *emit_data,
 	LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers);
+	LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	const struct tgsi_full_src_register *reg;
 	unsigned target = inst->Texture.Texture;
 	unsigned sampler_src;
 	LLVMValueRef index;
 
 	sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
 	reg = &emit_data->inst->Src[sampler_src];
 
 	if (reg->Register.Indirect) {
 		index = si_get_bounded_indirect_index(ctx,
 						      &reg->Indirect,
 						      reg->Register.Index,
 						      SI_NUM_SAMPLERS);
+		index = LLVMBuildAdd(ctx->gallivm.builder, index,
+				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
 	} else {
-		index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
+		index = LLVMConstInt(ctx->i32,
+				     si_get_sampler_slot(reg->Register.Index), 0);
 	}
 
 	if (target == TGSI_TEXTURE_BUFFER)
 		*res_ptr = load_sampler_desc(ctx, list, index, DESC_BUFFER);
 	else
 		*res_ptr = load_sampler_desc(ctx, list, index, DESC_IMAGE);
 
 	if (samp_ptr)
 		*samp_ptr = NULL;
 	if (fmask_ptr)
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 90d0972..c4ef903 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -189,22 +189,21 @@ enum {
  *  1 - vertex const buffers
  *  2 - vertex shader buffers
  *   ...
  *  5 - fragment const buffers
  *   ...
  *  21 - compute const buffers
  *   ...
  */
 enum {
 	SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS,
-	SI_SHADER_DESCS_SAMPLERS,
-	SI_SHADER_DESCS_IMAGES,
+	SI_SHADER_DESCS_SAMPLERS_AND_IMAGES,
 	SI_NUM_SHADER_DESCS,
 };
 
 #define SI_DESCS_RW_BUFFERS            0
 #define SI_DESCS_FIRST_SHADER          1
 #define SI_DESCS_FIRST_COMPUTE         (SI_DESCS_FIRST_SHADER + \
                                         PIPE_SHADER_COMPUTE * SI_NUM_SHADER_DESCS)
 #define SI_NUM_DESCS                   (SI_DESCS_FIRST_SHADER + \
                                         SI_NUM_SHADERS * SI_NUM_SHADER_DESCS)
 
@@ -222,21 +221,21 @@ struct si_descriptors {
 	unsigned num_elements;
 
 	/* The buffer where the descriptors have been uploaded. */
 	struct r600_resource *buffer;
 	unsigned buffer_offset;
 
 	/* Offset in CE RAM */
 	unsigned ce_offset;
 
 	/* elements of the list that are changed and need to be uploaded */
-	unsigned dirty_mask;
+	uint64_t dirty_mask;
 
 	/* Whether CE is used to upload this descriptor array. */
 	bool uses_ce;
 	/* Whether the CE ram is dirty and needs to be reinitialized entirely
 	 * before we can do partial updates. */
 	bool ce_ram_dirty;
 
 	/* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
 	 * array will be stored. */
 	unsigned shader_userdata_offset;
@@ -380,11 +379,23 @@ static inline unsigned si_get_constbuf_slot(unsigned slot)
 	/* Constant buffers are in slots [16..31], ascending */
 	return SI_NUM_SHADER_BUFFERS + slot;
 }
 
 static inline unsigned si_get_shaderbuf_slot(unsigned slot)
 {
 	/* shader buffers are in slots [15..0], descending */
 	return SI_NUM_SHADER_BUFFERS - 1 - slot;
 }
 
+static inline unsigned si_get_sampler_slot(unsigned slot)
+{
+	/* samplers are in slots [8..39], ascending */
+	return SI_NUM_IMAGES / 2 + slot;
+}
+
+static inline unsigned si_get_image_slot(unsigned slot)
+{
+	/* images are in slots [15..0] (sampler slots [7..0]), descending */
+	return SI_NUM_IMAGES - 1 - slot;
+}
+
 #endif
-- 
2.7.4



More information about the mesa-dev mailing list