[Mesa-dev] [PATCH 4/9] radeonsi: flush CB after MSAA only when transitioning from CB to textures

Marek Olšák maraeo at gmail.com
Fri Jun 16 12:58:00 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

The main flush before texturing is done after the FMASK decompress pass.

CB after MSAA rendering is not flushed in set_framebuffer_state and also
not in memory_barrier if the current color buffer is MSAA. We fully rely
on the FMASK decompress pass for the flushing.

Some CB decompress and resolve passes need an explicit flush before and
after.
---
 src/gallium/drivers/radeonsi/si_blit.c  | 29 ++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_state.c | 43 ++++++++++++++++++++++-----------
 2 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 0993ebd..1159594 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -398,20 +398,28 @@ si_decompress_depth(struct si_context *sctx,
 		 * state becomes 0 for the whole mipmap tree and all planes.
 		 * (there is nothing else to flush)
 		 */
 		if (tex->tc_compatible_htile) {
 			if (r600_can_sample_zs(tex, false))
 				tex->dirty_level_mask = 0;
 			if (r600_can_sample_zs(tex, true))
 				tex->stencil_dirty_level_mask = 0;
 		}
 	}
+	/* set_framebuffer_state takes care of coherency for single-sample.
+	 * The DB->CB copy uses CB for the final writes.
+	 */
+	if (copy_planes && tex->resource.b.b.nr_samples > 1) {
+		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+				 SI_CONTEXT_INV_GLOBAL_L2 |
+				 SI_CONTEXT_FLUSH_AND_INV_CB;
+	}
 }
 
 static void
 si_decompress_sampler_depth_textures(struct si_context *sctx,
 				     struct si_textures_info *textures)
 {
 	unsigned i;
 	unsigned mask = textures->needs_depth_decompress_mask;
 
 	while (mask) {
@@ -480,36 +488,49 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
 
 		for (layer = first_layer; layer <= checked_last_layer; layer++) {
 			struct pipe_surface *cbsurf, surf_tmpl;
 
 			surf_tmpl.format = rtex->resource.b.b.format;
 			surf_tmpl.u.tex.level = level;
 			surf_tmpl.u.tex.first_layer = layer;
 			surf_tmpl.u.tex.last_layer = layer;
 			cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
 
+			/* Required before and after FMASK and DCC_DECOMPRESS. */
+			if (custom_blend == sctx->custom_blend_fmask_decompress ||
+			    custom_blend == sctx->custom_blend_dcc_decompress)
+				sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+
 			si_blitter_begin(ctx, SI_DECOMPRESS);
 			util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
 			si_blitter_end(ctx);
 
+			if (custom_blend == sctx->custom_blend_fmask_decompress ||
+			    custom_blend == sctx->custom_blend_dcc_decompress)
+				sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+
 			pipe_surface_reference(&cbsurf, NULL);
 		}
 
 		/* The texture will always be dirty if some layers aren't flushed.
 		 * I don't think this case occurs often though. */
 		if (first_layer == 0 && last_layer >= max_layer) {
 			rtex->dirty_level_mask &= ~(1 << level);
 		}
 	}
 
 	sctx->decompression_enabled = false;
 	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
+
+	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+			 SI_CONTEXT_INV_GLOBAL_L2 |
+			 SI_CONTEXT_INV_VMEM_L1;
 }
 
 static void
 si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
 			    unsigned first_level, unsigned last_level)
 {
 	/* CMASK or DCC can be discarded and we can still end up here. */
 	if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
 		return;
 
@@ -1148,27 +1169,35 @@ void si_resource_copy_region(struct pipe_context *ctx,
 	pipe_surface_reference(&dst_view, NULL);
 	pipe_sampler_view_reference(&src_view, NULL);
 }
 
 static void si_do_CB_resolve(struct si_context *sctx,
 			     const struct pipe_blit_info *info,
 			     struct pipe_resource *dst,
 			     unsigned dst_level, unsigned dst_z,
 			     enum pipe_format format)
 {
+	/* Required before and after CB_RESOLVE. */
+	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+
 	si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE |
 			 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
 	util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z,
 					  info->src.resource, info->src.box.z,
 					  ~0, sctx->custom_blend_resolve,
 					  format);
 	si_blitter_end(&sctx->b.b);
+
+	/* Flush caches for possible texturing. */
+	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+			 SI_CONTEXT_INV_GLOBAL_L2 |
+			 SI_CONTEXT_INV_VMEM_L1;
 }
 
 static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
 				     const struct pipe_blit_info *info)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct r600_texture *src = (struct r600_texture*)info->src.resource;
 	struct r600_texture *dst = (struct r600_texture*)info->dst.resource;
 	MAYBE_UNUSED struct r600_texture *rtmp;
 	unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 44e5f1c..ab27af2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2523,34 +2523,42 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
 	/* Only flush TC when changing the framebuffer state, because
 	 * the only client not using TC that can change textures is
 	 * the framebuffer.
 	 *
 	 * Wait for compute shaders because of possible transitions:
 	 * - FB write -> shader read
 	 * - shader write -> FB read
 	 *
 	 * DB caches are flushed on demand (using si_decompress_textures).
+	 *
+	 * When MSAA is enabled, CB and TC caches are flushed on demand
+	 * (after FMASK decompression).
 	 */
-	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-			 SI_CONTEXT_INV_GLOBAL_L2 |
-			 SI_CONTEXT_FLUSH_AND_INV_CB |
-			 SI_CONTEXT_CS_PARTIAL_FLUSH;
+	if (sctx->framebuffer.nr_samples <= 1) {
+		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+				 SI_CONTEXT_INV_GLOBAL_L2 |
+				 SI_CONTEXT_FLUSH_AND_INV_CB;
+	}
+	sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	/* u_blitter doesn't invoke depth decompression when it does multiple
 	 * blits in a row, but the only case when it matters for DB is when
 	 * doing generate_mipmap. So here we flush DB manually between
 	 * individual generate_mipmap blits.
 	 * Note that lower mipmap levels aren't compressed.
 	 */
-	if (sctx->generate_mipmap_for_depth)
-		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+	if (sctx->generate_mipmap_for_depth) {
+		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+				 SI_CONTEXT_INV_GLOBAL_L2 |
+				 SI_CONTEXT_FLUSH_AND_INV_DB;
+	}
 
 	/* Take the maximum of the old and new count. If the new count is lower,
 	 * dirtying is needed to disable the unbound colorbuffers.
 	 */
 	sctx->framebuffer.dirty_cbufs |=
 		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
 	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
 
 	si_dec_framebuffer_counters(&sctx->framebuffer.state);
 	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
@@ -3948,23 +3956,26 @@ static void si_set_tess_state(struct pipe_context *ctx,
 			       &cb.buffer_offset);
 
 	si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
 	pipe_resource_reference(&cb.buffer, NULL);
 }
 
 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
-	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
-			 SI_CONTEXT_INV_GLOBAL_L2 |
-			 SI_CONTEXT_FLUSH_AND_INV_CB;
+	/* Multisample surfaces are flushed in si_decompress_textures. */
+	if (sctx->framebuffer.nr_samples <= 1) {
+		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+				 SI_CONTEXT_INV_GLOBAL_L2 |
+				 SI_CONTEXT_FLUSH_AND_INV_CB;
+	}
 	sctx->framebuffer.do_update_surf_dirtiness = true;
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	/* Subsequent commands must wait for all shader invocations to
 	 * complete. */
@@ -3988,26 +3999,30 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 	}
 
 	if (flags & PIPE_BARRIER_INDEX_BUFFER) {
 		/* Indices are read through TC L2 since VI.
 		 * L1 isn't used.
 		 */
 		if (sctx->screen->b.chip_class <= CIK)
 			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 	}
 
-	/* Depth and stencil are flushed in si_decompress_textures when needed. */
-	if (flags & PIPE_BARRIER_FRAMEBUFFER)
-		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+	/* MSAA color, any depth and any stencil are flushed in
+	 * si_decompress_textures when needed.
+	 */
+	if (flags & PIPE_BARRIER_FRAMEBUFFER &&
+	    sctx->framebuffer.nr_samples <= 1) {
+		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+				 SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+	}
 
-	if (flags & (PIPE_BARRIER_FRAMEBUFFER |
-		     PIPE_BARRIER_INDIRECT_BUFFER))
+	if (flags & PIPE_BARRIER_INDIRECT_BUFFER)
 		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 }
 
 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
 {
 	struct pipe_blend_state blend;
 
 	memset(&blend, 0, sizeof(blend));
 	blend.independent_blend_enable = true;
 	blend.rt[0].colormask = 0xf;
-- 
2.7.4



More information about the mesa-dev mailing list