[Mesa-dev] [PATCH] radeonsi: update dirty_level_mask only when flushing or unbinding framebuffer

Marek Olšák maraeo at gmail.com
Wed Jul 26 13:03:20 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

This fixes corruption with bindless textures in Dawn Of War 3.

The do_update_surf_dirtiness mechanism was complicated and dirty_level_mask
was only updated after the first draw call. The problem is bindless textures
are checked for decompression every draw call and we would only decompress
after the first draw call. The solution is to set dirtiness after the last
draw call to the framebuffer, so the (unconditional) decompression of
bindless textures happens at the right time.

Cc: 17.2 <mesa-stable at lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_blit.c       | 31 ++++++++++++++++-------
 src/gallium/drivers/radeonsi/si_pipe.h       |  1 -
 src/gallium/drivers/radeonsi/si_state.c      | 38 ++++++++++++++++++++++++++--
 src/gallium/drivers/radeonsi/si_state.h      |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c | 31 -----------------------
 5 files changed, 59 insertions(+), 43 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 631676b..caa4c3c 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -114,23 +114,21 @@ si_blit_dbcb_copy(struct si_context *sctx,
 	unsigned fully_copied_levels = 0;
 
 	if (planes & PIPE_MASK_Z)
 		sctx->dbcb_depth_copy_enabled = true;
 	if (planes & PIPE_MASK_S)
 		sctx->dbcb_stencil_copy_enabled = true;
 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
 
-	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
 	sctx->decompression_enabled = true;
-	sctx->framebuffer.do_update_surf_dirtiness = false;
 
 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
 
 		/* The smaller the mipmap level, the less layers there are
 		 * as far as 3D textures are concerned. */
 		max_layer = util_max_layer(&src->resource.b.b, level);
 		checked_last_layer = MIN2(last_layer, max_layer);
 
 		surf_tmpl.u.tex.level = level;
@@ -162,21 +160,20 @@ si_blit_dbcb_copy(struct si_context *sctx,
 			pipe_surface_reference(&zsurf, NULL);
 			pipe_surface_reference(&cbsurf, NULL);
 		}
 
 		if (first_layer == 0 && last_layer >= max_layer &&
 		    first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b))
 			fully_copied_levels |= 1u << level;
 	}
 
 	sctx->decompression_enabled = false;
-	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
 	sctx->dbcb_depth_copy_enabled = false;
 	sctx->dbcb_stencil_copy_enabled = false;
 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 	return fully_copied_levels;
 }
 
 static void si_blit_decompress_depth(struct pipe_context *ctx,
 				     struct r600_texture *texture,
 				     struct r600_texture *staging,
@@ -218,23 +215,21 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
 		return;
 
 	if (planes & PIPE_MASK_S)
 		sctx->db_flush_stencil_inplace = true;
 	if (planes & PIPE_MASK_Z)
 		sctx->db_flush_depth_inplace = true;
 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 	surf_tmpl.format = texture->resource.b.b.format;
 
-	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
 	sctx->decompression_enabled = true;
-	sctx->framebuffer.do_update_surf_dirtiness = false;
 
 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
 
 		surf_tmpl.u.tex.level = level;
 
 		/* The smaller the mipmap level, the less layers there are
 		 * as far as 3D textures are concerned. */
 		max_layer = util_max_layer(&texture->resource.b.b, level);
 		checked_last_layer = MIN2(last_layer, max_layer);
@@ -260,21 +255,20 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
 			fully_decompressed_mask |= 1u << level;
 		}
 	}
 
 	if (planes & PIPE_MASK_Z)
 		texture->dirty_level_mask &= ~fully_decompressed_mask;
 	if (planes & PIPE_MASK_S)
 		texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
 
 	sctx->decompression_enabled = false;
-	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
 	sctx->db_flush_depth_inplace = false;
 	sctx->db_flush_stencil_inplace = false;
 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
 }
 
 /* Helper function of si_flush_depth_texture: decompress the given levels
  * of Z and/or S planes in place.
  */
 static void
 si_blit_decompress_zs_in_place(struct si_context *sctx,
@@ -467,23 +461,21 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
 		for (int i = first_level; i <= last_level; i++) {
 			if (!vi_dcc_enabled(rtex, i))
 				level_mask &= ~(1 << i);
 		}
 	} else if (rtex->fmask.size) {
 		custom_blend = sctx->custom_blend_fmask_decompress;
 	} else {
 		custom_blend = sctx->custom_blend_eliminate_fastclear;
 	}
 
-	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
 	sctx->decompression_enabled = true;
-	sctx->framebuffer.do_update_surf_dirtiness = false;
 
 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
 
 		/* The smaller the mipmap level, the less layers there are
 		 * as far as 3D textures are concerned. */
 		max_layer = util_max_layer(&rtex->resource.b.b, level);
 		checked_last_layer = MIN2(last_layer, max_layer);
 
 		for (layer = first_layer; layer <= checked_last_layer; layer++) {
@@ -512,21 +504,20 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
 		}
 
 		/* The texture will always be dirty if some layers aren't flushed.
 		 * I don't think this case occurs often though. */
 		if (first_layer == 0 && last_layer >= max_layer) {
 			rtex->dirty_level_mask &= ~(1 << level);
 		}
 	}
 
 	sctx->decompression_enabled = false;
-	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
 
 	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
 			 SI_CONTEXT_INV_GLOBAL_L2 |
 			 SI_CONTEXT_INV_VMEM_L1;
 }
 
 static void
 si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
 			    unsigned first_level, unsigned last_level)
 {
@@ -964,24 +955,46 @@ static void si_decompress_subresource(struct pipe_context *ctx,
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct r600_texture *rtex = (struct r600_texture*)tex;
 
 	if (rtex->db_compatible) {
 		planes &= PIPE_MASK_Z | PIPE_MASK_S;
 
 		if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
 			planes &= ~PIPE_MASK_S;
 
+		/* If we've rendered into the framebuffer and it's a blitting
+		 * source, make sure the decompression pass is invoked
+		 * by dirtying the framebuffer.
+		 */
+		if (sctx->framebuffer.state.zsbuf &&
+		    sctx->framebuffer.state.zsbuf->u.tex.level == level &&
+		    sctx->framebuffer.state.zsbuf->texture == tex)
+			si_update_fb_dirtiness_after_rendering(sctx);
+
 		si_decompress_depth(sctx, rtex, planes,
 				    level, level,
 				    first_layer, last_layer);
 	} else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
+		/* If we've rendered into the framebuffer and it's a blitting
+		 * source, make sure the decompression pass is invoked
+		 * by dirtying the framebuffer.
+		 */
+		for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+			if (sctx->framebuffer.state.cbufs[i] &&
+			    sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
+			    sctx->framebuffer.state.cbufs[i]->texture == tex) {
+				si_update_fb_dirtiness_after_rendering(sctx);
+				break;
+			}
+		}
+
 		si_blit_decompress_color(ctx, rtex, level, level,
 					 first_layer, last_layer, false);
 	}
 }
 
 struct texture_orig_info {
 	unsigned format;
 	unsigned width0;
 	unsigned height0;
 	unsigned npix_x;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d25705b..2e8a3bf 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -180,21 +180,20 @@ struct si_framebuffer {
 	unsigned			spi_shader_col_format_blend;
 	unsigned			spi_shader_col_format_blend_alpha;
 	ubyte				nr_samples:5; /* at most 16xAA */
 	ubyte				log_samples:3; /* at most 4 = 16xAA */
 	ubyte				compressed_cb_mask;
 	ubyte				color_is_int8;
 	ubyte				color_is_int10;
 	ubyte				dirty_cbufs;
 	bool				dirty_zsbuf;
 	bool				any_dst_linear;
-	bool				do_update_surf_dirtiness;
 };
 
 struct si_clip_state {
 	struct r600_atom		atom;
 	struct pipe_clip_state		state;
 };
 
 struct si_sample_locs {
 	struct r600_atom	atom;
 	unsigned		nr_samples;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 42d81e7..b7f5566 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2445,20 +2445,52 @@ static void si_init_depth_surface(struct si_context *sctx,
 			}
 		}
 	}
 
 	surf->db_z_info = z_info;
 	surf->db_stencil_info = s_info;
 
 	surf->depth_initialized = true;
 }
 
+void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
+{
+	if (sctx->decompression_enabled)
+		return;
+
+	if (sctx->framebuffer.state.zsbuf) {
+		struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
+		struct r600_texture *rtex = (struct r600_texture *)surf->texture;
+
+		rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+
+		if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+			rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
+	}
+	if (sctx->framebuffer.compressed_cb_mask) {
+		struct pipe_surface *surf;
+		struct r600_texture *rtex;
+		unsigned mask = sctx->framebuffer.compressed_cb_mask;
+
+		do {
+			unsigned i = u_bit_scan(&mask);
+			surf = sctx->framebuffer.state.cbufs[i];
+			rtex = (struct r600_texture*)surf->texture;
+
+			if (rtex->fmask.size)
+				rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+			if (rtex->dcc_gather_statistics)
+				rtex->separate_dcc_dirty = true;
+		} while (mask);
+	}
+}
+
 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
 {
 	for (int i = 0; i < state->nr_cbufs; ++i) {
 		struct r600_surface *surf = NULL;
 		struct r600_texture *rtex;
 
 		if (!state->cbufs[i])
 			continue;
 		surf = (struct r600_surface*)state->cbufs[i];
 		rtex = (struct r600_texture*)surf->base.texture;
@@ -2472,20 +2504,22 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct pipe_constant_buffer constbuf = {0};
 	struct r600_surface *surf = NULL;
 	struct r600_texture *rtex;
 	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
 	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
 	bool unbound = false;
 	int i;
 
+	si_update_fb_dirtiness_after_rendering(sctx);
+
 	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
 		if (!sctx->framebuffer.state.cbufs[i])
 			continue;
 
 		rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
 		if (rtex->dcc_gather_statistics)
 			vi_separate_dcc_stop_query(ctx, rtex);
 	}
 
 	/* Disable DCC if the formats are incompatible. */
@@ -2669,21 +2703,20 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
 	}
 
 	sctx->do_update_shaders = true;
 
 	if (!sctx->decompression_enabled) {
 		/* Prevent textures decompression when the framebuffer state
 		 * changes come from the decompression passes themselves.
 		 */
 		sctx->need_check_render_feedback = true;
-		sctx->framebuffer.do_update_surf_dirtiness = true;
 	}
 }
 
 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
 	unsigned i, nr_cbufs = state->nr_cbufs;
 	struct r600_texture *tex = NULL;
 	struct r600_surface *cb = NULL;
@@ -3977,28 +4010,29 @@ static void si_set_tess_state(struct pipe_context *ctx,
 			       &cb.buffer_offset);
 
 	si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
 	pipe_resource_reference(&cb.buffer, NULL);
 }
 
 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
+	si_update_fb_dirtiness_after_rendering(sctx);
+
 	/* Multisample surfaces are flushed in si_decompress_textures. */
 	if (sctx->framebuffer.nr_samples <= 1 &&
 	    sctx->framebuffer.state.nr_cbufs) {
 		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
 				 SI_CONTEXT_INV_GLOBAL_L2 |
 				 SI_CONTEXT_FLUSH_AND_INV_CB;
 	}
-	sctx->framebuffer.do_update_surf_dirtiness = true;
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	/* Subsequent commands must wait for all shader invocations to
 	 * complete. */
 	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index ec28aba..acc8fb7 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -377,20 +377,21 @@ si_make_texture_descriptor(struct si_screen *screen,
 			   unsigned first_layer, unsigned last_layer,
 			   unsigned width, unsigned height, unsigned depth,
 			   uint32_t *state,
 			   uint32_t *fmask_state);
 struct pipe_sampler_view *
 si_create_sampler_view_custom(struct pipe_context *ctx,
 			      struct pipe_resource *texture,
 			      const struct pipe_sampler_view *state,
 			      unsigned width0, unsigned height0,
 			      unsigned force_level);
+void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);
 
 /* si_state_shader.c */
 bool si_update_shaders(struct si_context *sctx);
 void si_init_shader_functions(struct si_context *sctx);
 bool si_init_shader_cache(struct si_screen *sscreen);
 void si_destroy_shader_cache(struct si_screen *sscreen);
 void si_init_shader_selector_async(void *job, int thread_index);
 void si_get_active_slot_masks(const struct tgsi_shader_info *info,
 			      uint32_t *const_and_shader_buffers,
 			      uint64_t *samplers_and_images);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 332e0c4..c1edf7f 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1200,21 +1200,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		return;
 	}
 
 	/* Recompute and re-emit the texture resource states if needed. */
 	dirty_tex_counter = p_atomic_read(&sctx->b.screen->dirty_tex_counter);
 	if (unlikely(dirty_tex_counter != sctx->b.last_dirty_tex_counter)) {
 		sctx->b.last_dirty_tex_counter = dirty_tex_counter;
 		sctx->framebuffer.dirty_cbufs |=
 			((1 << sctx->framebuffer.state.nr_cbufs) - 1);
 		sctx->framebuffer.dirty_zsbuf = true;
-		sctx->framebuffer.do_update_surf_dirtiness = true;
 		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
 		si_update_all_texture_descriptors(sctx);
 	}
 
 	si_decompress_graphics_textures(sctx);
 
 	/* Set the rasterization primitive type.
 	 *
 	 * This must be done after si_decompress_textures, which can call
 	 * draw_vbo recursively, and before si_update_shaders, which uses
@@ -1385,50 +1384,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 	/* Workaround for a VGT hang when streamout is enabled.
 	 * It must be done after drawing. */
 	if ((sctx->b.family == CHIP_HAWAII ||
 	     sctx->b.family == CHIP_TONGA ||
 	     sctx->b.family == CHIP_FIJI) &&
 	    r600_get_strmout_en(&sctx->b)) {
 		sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
 	}
 
-	if (sctx->framebuffer.do_update_surf_dirtiness) {
-		/* Set the depth buffer as dirty. */
-		if (sctx->framebuffer.state.zsbuf) {
-			struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
-			struct r600_texture *rtex = (struct r600_texture *)surf->texture;
-
-			rtex->dirty_level_mask |= 1 << surf->u.tex.level;
-
-			if (rtex->surface.flags & RADEON_SURF_SBUFFER)
-				rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
-		}
-		if (sctx->framebuffer.compressed_cb_mask) {
-			struct pipe_surface *surf;
-			struct r600_texture *rtex;
-			unsigned mask = sctx->framebuffer.compressed_cb_mask;
-
-			do {
-				unsigned i = u_bit_scan(&mask);
-				surf = sctx->framebuffer.state.cbufs[i];
-				rtex = (struct r600_texture*)surf->texture;
-
-				if (rtex->fmask.size)
-					rtex->dirty_level_mask |= 1 << surf->u.tex.level;
-				if (rtex->dcc_gather_statistics)
-					rtex->separate_dcc_dirty = true;
-			} while (mask);
-		}
-		sctx->framebuffer.do_update_surf_dirtiness = false;
-	}
-
 	sctx->b.num_draw_calls++;
 	if (info->primitive_restart)
 		sctx->b.num_prim_restart_calls++;
 	if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
 		sctx->b.num_spill_draw_calls++;
 	if (index_size && indexbuf != info->index.resource)
 		pipe_resource_reference(&indexbuf, NULL);
 }
 
 void si_trace_emit(struct si_context *sctx)
-- 
2.7.4



More information about the mesa-dev mailing list