Mesa (master): radeonsi: add FMASK slots for shader images (for MSAA images)

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Oct 9 21:14:17 UTC 2019


Module: Mesa
Branch: master
Commit: 743a9d85e2ca5aef93e40fe7833742a067a5943d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=743a9d85e2ca5aef93e40fe7833742a067a5943d

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Thu Sep 12 21:13:08 2019 -0400

radeonsi: add FMASK slots for shader images (for MSAA images)

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>

---

 src/gallium/drivers/radeonsi/si_descriptors.c     | 33 ++++++++++++-----------
 src/gallium/drivers/radeonsi/si_shader_nir.c      |  4 +--
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  4 +--
 src/gallium/drivers/radeonsi/si_state.h           | 11 +++++---
 src/gallium/drivers/radeonsi/si_state_shaders.c   | 16 +++++++++--
 5 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 28fe5c1e585..12ab1f51e97 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -794,8 +794,6 @@ static void si_set_shader_image(struct si_context *ctx,
 	struct si_images *images = &ctx->images[shader];
 	struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
 	struct si_resource *res;
-	unsigned desc_slot = si_get_image_slot(slot);
-	uint32_t *desc = descs->list + desc_slot * 8;
 
 	if (!view || !view->resource) {
 		si_disable_shader_image(ctx, shader, slot);
@@ -807,7 +805,9 @@ static void si_set_shader_image(struct si_context *ctx,
 	if (&images->views[slot] != view)
 		util_copy_image_view(&images->views[slot], view);
 
-	si_set_shader_image_desc(ctx, view, skip_decompress, desc, NULL);
+	si_set_shader_image_desc(ctx, view, skip_decompress,
+				 descs->list + si_get_image_slot(slot) * 8,
+				 descs->list + si_get_image_slot(slot + SI_NUM_IMAGES) * 8);
 
 	if (res->b.b.target == PIPE_BUFFER ||
 	    view->shader_access & SI_IMAGE_ACCESS_AS_BUFFER) {
@@ -1981,18 +1981,19 @@ static void si_update_bindless_image_descriptor(struct si_context *sctx,
 	struct si_descriptors *desc = &sctx->bindless_descriptors;
 	unsigned desc_slot_offset = img_handle->desc_slot * 16;
 	struct pipe_image_view *view = &img_handle->view;
-	uint32_t desc_list[8];
+	struct pipe_resource *res = view->resource;
+	uint32_t image_desc[16];
+	unsigned desc_size = (res->nr_samples >= 2 ? 16 : 8) * 4;
 
-	if (view->resource->target == PIPE_BUFFER)
+	if (res->target == PIPE_BUFFER)
 		return;
 
-	memcpy(desc_list, desc->list + desc_slot_offset,
-	       sizeof(desc_list));
+	memcpy(image_desc, desc->list + desc_slot_offset, desc_size);
 	si_set_shader_image_desc(sctx, view, true,
-				 desc->list + desc_slot_offset, NULL);
+				 desc->list + desc_slot_offset,
+				 desc->list + desc_slot_offset + 8);
 
-	if (memcmp(desc_list, desc->list + desc_slot_offset,
-		   sizeof(desc_list))) {
+	if (memcmp(image_desc, desc->list + desc_slot_offset, desc_size)) {
 		img_handle->desc_dirty = true;
 		sctx->bindless_descriptors_dirty = true;
 	}
@@ -2584,7 +2585,7 @@ static uint64_t si_create_image_handle(struct pipe_context *ctx,
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_image_handle *img_handle;
-	uint32_t desc_list[8];
+	uint32_t desc_list[16];
 	uint64_t handle;
 
 	if (!view || !view->resource)
@@ -2595,9 +2596,9 @@ static uint64_t si_create_image_handle(struct pipe_context *ctx,
 		return 0;
 
 	memset(desc_list, 0, sizeof(desc_list));
-	si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor);
+	si_init_descriptor_list(&desc_list[0], 8, 2, null_image_descriptor);
 
-	si_set_shader_image_desc(sctx, view, false, &desc_list[0], NULL);
+	si_set_shader_image_desc(sctx, view, false, &desc_list[0], &desc_list[8]);
 
 	img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
 							      sizeof(desc_list));
@@ -2764,7 +2765,7 @@ void si_init_all_descriptors(struct si_context *sctx)
 		bool is_2nd = sctx->chip_class >= GFX9 &&
 				     (i == PIPE_SHADER_TESS_CTRL ||
 				      i == PIPE_SHADER_GEOMETRY);
-		unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
+		unsigned num_sampler_slots = SI_NUM_IMAGE_SLOTS / 2 + SI_NUM_SAMPLERS;
 		unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
 		int rel_dw_offset;
 		struct si_descriptors *desc;
@@ -2809,9 +2810,9 @@ void si_init_all_descriptors(struct si_context *sctx)
 		si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots);
 
 		int j;
-		for (j = 0; j < SI_NUM_IMAGES; j++)
+		for (j = 0; j < SI_NUM_IMAGE_SLOTS; j++)
 			memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
-		for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
+		for (; j < SI_NUM_IMAGE_SLOTS + SI_NUM_SAMPLERS * 2; j++)
 			memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 3bf30c72436..680930bb523 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -1114,13 +1114,13 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
 
 	if (image) {
 		index = LLVMBuildSub(ctx->ac.builder,
-				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
+				     LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
 				     index, "");
 		return si_load_image_desc(ctx, list, index, desc_type, write, false);
 	}
 
 	index = LLVMBuildAdd(ctx->ac.builder, index,
-			     LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
+			     LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
 	return si_load_sampler_desc(ctx, list, index, desc_type);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 243286cc629..10fbb808e9b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -235,7 +235,7 @@ image_fetch_rsrc(
 						      image->Register.Index,
 						      ctx->num_images);
 		index = LLVMBuildSub(ctx->ac.builder,
-				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
+				     LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
 				     index, "");
 	}
 
@@ -1126,7 +1126,7 @@ static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
 						      reg->Register.Index,
 						      ctx->num_samplers);
 		index = LLVMBuildAdd(ctx->ac.builder, index,
-				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
+				     LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
 	} else {
 		index = LLVMConstInt(ctx->i32,
 				     si_get_sampler_slot(reg->Register.Index), 0);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index e3e6cf293e1..88e01512cd6 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -37,6 +37,7 @@
 #define SI_NUM_SAMPLERS			32 /* OpenGL textures units per shader */
 #define SI_NUM_CONST_BUFFERS		16
 #define SI_NUM_IMAGES			16
+#define SI_NUM_IMAGE_SLOTS		(SI_NUM_IMAGES * 2) /* the second half are FMASK slots */
 #define SI_NUM_SHADER_BUFFERS		16
 
 struct si_screen;
@@ -647,14 +648,16 @@ static inline unsigned si_get_shaderbuf_slot(unsigned slot)
 
 static inline unsigned si_get_sampler_slot(unsigned slot)
 {
-	/* samplers are in slots [8..39], ascending */
-	return SI_NUM_IMAGES / 2 + slot;
+	/* 32 samplers are in sampler slots [16..47], 16 dw per slot, ascending */
+	/* those are equivalent to image slots [32..95], 8 dw per slot, ascending  */
+	return SI_NUM_IMAGE_SLOTS / 2 + slot;
 }
 
 static inline unsigned si_get_image_slot(unsigned slot)
 {
-	/* images are in slots [15..0] (sampler slots [7..0]), descending */
-	return SI_NUM_IMAGES - 1 - slot;
+	/* image slots are in [31..0] (sampler slots [15..0]), descending */
+	/* images are in slots [31..16], while FMASKs are in slots [15..0] */
+	return SI_NUM_IMAGE_SLOTS - 1 - slot;
 }
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index bbdd0d08b42..04443db7a44 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2633,12 +2633,13 @@ void si_get_active_slot_masks(const struct tgsi_shader_info *info,
 			      uint32_t *const_and_shader_buffers,
 			      uint64_t *samplers_and_images)
 {
-	unsigned start, num_shaderbufs, num_constbufs, num_images, num_samplers;
+	unsigned start, num_shaderbufs, num_constbufs, num_images, num_msaa_images, num_samplers;
 
 	num_shaderbufs = util_last_bit(info->shader_buffers_declared);
 	num_constbufs = util_last_bit(info->const_buffers_declared);
 	/* two 8-byte images share one 16-byte slot */
 	num_images = align(util_last_bit(info->images_declared), 2);
+	num_msaa_images = align(util_last_bit(info->msaa_images_declared), 2);
 	num_samplers = util_last_bit(info->samplers_declared);
 
 	/* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */
@@ -2646,7 +2647,18 @@ void si_get_active_slot_masks(const struct tgsi_shader_info *info,
 	*const_and_shader_buffers =
 		u_bit_consecutive(start, num_shaderbufs + num_constbufs);
 
-	/* The layout is: image[last] ... image[0], sampler[0] ... sampler[last] */
+	/* The layout is:
+	 *   - fmask[last] ... fmask[0]     go to [15-last .. 15]
+	 *   - image[last] ... image[0]     go to [31-last .. 31]
+	 *   - sampler[0] ... sampler[last] go to [32 .. 32+last*2]
+	 *
+	 * FMASKs for images are placed separately, because MSAA images are rare,
+	 * and so we can benefit from a better cache hit rate if we keep image
+	 * descriptors together.
+	 */
+	if (num_msaa_images)
+		num_images = SI_NUM_IMAGES + num_msaa_images; /* add FMASK descriptors */
+
 	start = si_get_image_slot(num_images - 1) / 2;
 	*samplers_and_images =
 		u_bit_consecutive64(start, num_images / 2 + num_samplers);




More information about the mesa-commit mailing list