[Mesa-dev] [PATCH 1/5] radeonsi/gfx9: add support for PIPE_ALIGNED=0

Marek Olšák maraeo at gmail.com
Thu Feb 28 21:20:21 UTC 2019


From: Marek Olšák <marek.olsak at amd.com>

Needed by displayable DCC.

We need to flush L2 after rendering if PIPE_ALIGNED=0 and DCC is enabled.
---
 src/gallium/drivers/radeonsi/si_blit.c        |  7 ++++---
 .../drivers/radeonsi/si_compute_blit.c        |  9 +++++++--
 src/gallium/drivers/radeonsi/si_pipe.h        |  6 ++++--
 src/gallium/drivers/radeonsi/si_state.c       | 20 ++++++++++++++-----
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index f39cb5d143f..7613a63e3cb 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -414,21 +414,21 @@ si_decompress_depth(struct si_context *sctx,
 		 */
 		si_make_DB_shader_coherent(sctx, tex->buffer.b.b.nr_samples,
 					   inplace_planes & PIPE_MASK_S,
 					   tc_compat_htile);
 	}
 	/* set_framebuffer_state takes care of coherency for single-sample.
 	 * The DB->CB copy uses CB for the final writes.
 	 */
 	if (copy_planes && tex->buffer.b.b.nr_samples > 1)
 		si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples,
-					   false);
+					   false, true /* no DCC */);
 }
 
 static void
 si_decompress_sampler_depth_textures(struct si_context *sctx,
 				     struct si_samplers *textures)
 {
 	unsigned i;
 	unsigned mask = textures->needs_depth_decompress_mask;
 
 	while (mask) {
@@ -527,21 +527,22 @@ static void si_blit_decompress_color(struct si_context *sctx,
 
 		/* The texture will always be dirty if some layers aren't flushed.
 		 * I don't think this case occurs often though. */
 		if (first_layer == 0 && last_layer >= max_layer) {
 			tex->dirty_level_mask &= ~(1 << level);
 		}
 	}
 
 	sctx->decompression_enabled = false;
 	si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples,
-				   vi_dcc_enabled(tex, first_level));
+				   vi_dcc_enabled(tex, first_level),
+				   tex->surface.u.gfx9.dcc.pipe_aligned);
 }
 
 static void
 si_decompress_color_texture(struct si_context *sctx, struct si_texture *tex,
 			    unsigned first_level, unsigned last_level)
 {
 	/* CMASK or DCC can be discarded and we can still end up here. */
 	if (!tex->cmask_buffer && !tex->surface.fmask_size && !tex->dcc_offset)
 		return;
 
@@ -1069,21 +1070,21 @@ static void si_do_CB_resolve(struct si_context *sctx,
 
 	si_blitter_begin(sctx, SI_COLOR_RESOLVE |
 			 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
 	util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z,
 					  info->src.resource, info->src.box.z,
 					  ~0, sctx->custom_blend_resolve,
 					  format);
 	si_blitter_end(sctx);
 
 	/* Flush caches for possible texturing. */
-	si_make_CB_shader_coherent(sctx, 1, false);
+	si_make_CB_shader_coherent(sctx, 1, false, true /* no DCC */);
 }
 
 static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
 				     const struct pipe_blit_info *info)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct si_texture *src = (struct si_texture*)info->src.resource;
 	struct si_texture *dst = (struct si_texture*)info->dst.resource;
 	MAYBE_UNUSED struct si_texture *stmp;
 	unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c
index f5e9c02dd10..2ce56d6a81a 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -317,21 +317,25 @@ void si_compute_copy_image(struct si_context *sctx,
 	unsigned depth = src_box->depth;
 
 	unsigned data[] = {src_box->x, src_box->y, src_box->z, 0, dstx, dsty, dstz, 0};
 
 	if (width == 0 || height == 0)
 		return;
 
 	si_compute_internal_begin(sctx);
 	sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
 		       si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
-	si_make_CB_shader_coherent(sctx, dst->nr_samples, true);
+
+	/* src and dst have the same number of samples. */
+	si_make_CB_shader_coherent(sctx, src->nr_samples, true,
+				   /* Only src can have DCC.*/
+				   ((struct si_texture*)src)->surface.u.gfx9.dcc.pipe_aligned);
 
 	struct pipe_constant_buffer saved_cb = {};
 	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
 
 	struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE];
 	struct pipe_image_view saved_image[2] = {0};
 	util_copy_image_view(&saved_image[0], &images->views[0]);
 	util_copy_image_view(&saved_image[1], &images->views[1]);
 
 	void *saved_cs = sctx->cs_shader_state.program;
@@ -443,21 +447,22 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
 		memcpy(data + 4, color_srgb.ui, sizeof(color->ui));
 	} else {
 		memcpy(data + 4, color->ui, sizeof(color->ui));
 	}
 
 	si_compute_internal_begin(sctx);
 	sctx->render_cond_force_off = !render_condition_enabled;
 
 	sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
 		       si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
-	si_make_CB_shader_coherent(sctx, dstsurf->texture->nr_samples, true);
+	si_make_CB_shader_coherent(sctx, dstsurf->texture->nr_samples, true,
+				   true /* DCC is not possible with image stores */);
 
 	struct pipe_constant_buffer saved_cb = {};
 	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
 
 	struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE];
 	struct pipe_image_view saved_image = {0};
 	util_copy_image_view(&saved_image, &images->views[0]);
 
 	void *saved_cs = sctx->cs_shader_state.program;
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b3198d45ea6..39152587a99 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -631,20 +631,21 @@ struct si_framebuffer {
 	ubyte				compressed_cb_mask;
 	ubyte				uncompressed_cb_mask;
 	ubyte				color_is_int8;
 	ubyte				color_is_int10;
 	ubyte				dirty_cbufs;
 	ubyte				dcc_overwrite_combiner_watermark;
 	bool				dirty_zsbuf;
 	bool				any_dst_linear;
 	bool				CB_has_shader_readable_metadata;
 	bool				DB_has_shader_readable_metadata;
+	bool				all_DCC_pipe_aligned;
 };
 
 enum si_quant_mode {
 	/* This is the list we want to support. */
 	SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH,
 	SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH,
 	SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH,
 };
 
 struct si_signed_scissor {
@@ -1539,31 +1540,32 @@ static inline void
 si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
 {
 	if (pipe_reference(&(*dst)->reference, &src->reference))
 		si_destroy_saved_cs(*dst);
 
 	*dst = src;
 }
 
 static inline void
 si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
-			   bool shaders_read_metadata)
+			   bool shaders_read_metadata, bool dcc_pipe_aligned)
 {
 	sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
 		       SI_CONTEXT_INV_VMEM_L1;
 
 	if (sctx->chip_class >= GFX9) {
 		/* Single-sample color is coherent with shaders on GFX9, but
 		 * L2 metadata must be flushed if shaders read metadata.
 		 * (DCC, CMASK).
 		 */
-		if (num_samples >= 2)
+		if (num_samples >= 2 ||
+		    (shaders_read_metadata && !dcc_pipe_aligned))
 			sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
 		else if (shaders_read_metadata)
 			sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
 	} else {
 		/* SI-CI-VI */
 		sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
 	}
 }
 
 static inline void
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 458b108a7e3..017a06a808d 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2800,23 +2800,25 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	 *
 	 * DB caches are flushed on demand (using si_decompress_textures).
 	 *
 	 * When MSAA is enabled, CB and TC caches are flushed on demand
 	 * (after FMASK decompression). Shader write -> FB read transitions
 	 * cannot happen for MSAA textures, because MSAA shader images are
 	 * not supported.
 	 *
 	 * Only flush and wait for CB if there is actually a bound color buffer.
 	 */
-	if (sctx->framebuffer.uncompressed_cb_mask)
+	if (sctx->framebuffer.uncompressed_cb_mask) {
 		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
-					   sctx->framebuffer.CB_has_shader_readable_metadata);
+					   sctx->framebuffer.CB_has_shader_readable_metadata,
+					   sctx->framebuffer.all_DCC_pipe_aligned);
+	}
 
 	sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	/* u_blitter doesn't invoke depth decompression when it does multiple
 	 * blits in a row, but the only case when it matters for DB is when
 	 * doing generate_mipmap. So here we flush DB manually between
 	 * individual generate_mipmap blits.
 	 * Note that lower mipmap levels aren't compressed.
 	 */
 	if (sctx->generate_mipmap_for_depth) {
@@ -2851,20 +2853,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	sctx->framebuffer.color_is_int10 = 0;
 
 	sctx->framebuffer.compressed_cb_mask = 0;
 	sctx->framebuffer.uncompressed_cb_mask = 0;
 	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
 	sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples;
 	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
 	sctx->framebuffer.any_dst_linear = false;
 	sctx->framebuffer.CB_has_shader_readable_metadata = false;
 	sctx->framebuffer.DB_has_shader_readable_metadata = false;
+	sctx->framebuffer.all_DCC_pipe_aligned = true;
 	unsigned num_bpp64_colorbufs = 0;
 
 	for (i = 0; i < state->nr_cbufs; i++) {
 		if (!state->cbufs[i])
 			continue;
 
 		surf = (struct si_surface*)state->cbufs[i];
 		tex = (struct si_texture*)surf->base.texture;
 
 		if (!surf->color_initialized) {
@@ -2901,23 +2904,28 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 				     tex->buffer.b.b.nr_storage_samples);
 			sctx->framebuffer.nr_color_samples =
 				MAX2(1, sctx->framebuffer.nr_color_samples);
 		}
 
 		if (tex->surface.is_linear)
 			sctx->framebuffer.any_dst_linear = true;
 		if (tex->surface.bpe >= 8)
 			num_bpp64_colorbufs++;
 
-		if (vi_dcc_enabled(tex, surf->base.u.tex.level))
+		if (vi_dcc_enabled(tex, surf->base.u.tex.level)) {
 			sctx->framebuffer.CB_has_shader_readable_metadata = true;
 
+			if (sctx->chip_class >= GFX9 &&
+			    !tex->surface.u.gfx9.dcc.pipe_aligned)
+				sctx->framebuffer.all_DCC_pipe_aligned = false;
+		}
+
 		si_context_add_resource_size(sctx, surf->base.texture);
 
 		p_atomic_inc(&tex->framebuffers_bound);
 
 		if (tex->dcc_gather_statistics) {
 			/* Dirty tracking must be enabled for DCC usage analysis. */
 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
 			vi_separate_dcc_start_query(sctx, tex);
 		}
 	}
@@ -4693,23 +4701,25 @@ static void si_set_tess_state(struct pipe_context *ctx,
 	pipe_resource_reference(&cb.buffer, NULL);
 }
 
 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	si_update_fb_dirtiness_after_rendering(sctx);
 
 	/* Multisample surfaces are flushed in si_decompress_textures. */
-	if (sctx->framebuffer.uncompressed_cb_mask)
+	if (sctx->framebuffer.uncompressed_cb_mask) {
 		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
-					   sctx->framebuffer.CB_has_shader_readable_metadata);
+					   sctx->framebuffer.CB_has_shader_readable_metadata,
+					   sctx->framebuffer.all_DCC_pipe_aligned);
+	}
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
 void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	/* Subsequent commands must wait for all shader invocations to
 	 * complete. */
 	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-- 
2.17.1



More information about the mesa-dev mailing list