[Mesa-dev] [PATCH] radeonsi: update dirty_level_mask only when flushing or unbinding framebuffer

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Jul 26 13:10:21 UTC 2017


Thanks for the fix, it is better than mine. :-)

Tested-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 07/26/2017 03:03 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> This fixes corruption with bindless textures in Dawn Of War 3.
> 
> The do_update_surf_dirtiness mechanism was complicated and dirty_level_mask
> was only updated after the first draw call. The problem is bindless textures
> are checked for decompression every draw call and we would only decompress
> after the first draw call. The solution is to set dirtiness after the last
> draw call to the framebuffer, so the (unconditional) decompression of
> bindless textures happens at the right time.
> 
> Cc: 17.2 <mesa-stable at lists.freedesktop.org>
> ---
>   src/gallium/drivers/radeonsi/si_blit.c       | 31 ++++++++++++++++-------
>   src/gallium/drivers/radeonsi/si_pipe.h       |  1 -
>   src/gallium/drivers/radeonsi/si_state.c      | 38 ++++++++++++++++++++++++++--
>   src/gallium/drivers/radeonsi/si_state.h      |  1 +
>   src/gallium/drivers/radeonsi/si_state_draw.c | 31 -----------------------
>   5 files changed, 59 insertions(+), 43 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 631676b..caa4c3c 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -114,23 +114,21 @@ si_blit_dbcb_copy(struct si_context *sctx,
>   	unsigned fully_copied_levels = 0;
>   
>   	if (planes & PIPE_MASK_Z)
>   		sctx->dbcb_depth_copy_enabled = true;
>   	if (planes & PIPE_MASK_S)
>   		sctx->dbcb_stencil_copy_enabled = true;
>   	si_mark_atom_dirty(sctx, &sctx->db_render_state);
>   
>   	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
>   
> -	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
>   	sctx->decompression_enabled = true;
> -	sctx->framebuffer.do_update_surf_dirtiness = false;
>   
>   	while (level_mask) {
>   		unsigned level = u_bit_scan(&level_mask);
>   
>   		/* The smaller the mipmap level, the less layers there are
>   		 * as far as 3D textures are concerned. */
>   		max_layer = util_max_layer(&src->resource.b.b, level);
>   		checked_last_layer = MIN2(last_layer, max_layer);
>   
>   		surf_tmpl.u.tex.level = level;
> @@ -162,21 +160,20 @@ si_blit_dbcb_copy(struct si_context *sctx,
>   			pipe_surface_reference(&zsurf, NULL);
>   			pipe_surface_reference(&cbsurf, NULL);
>   		}
>   
>   		if (first_layer == 0 && last_layer >= max_layer &&
>   		    first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b))
>   			fully_copied_levels |= 1u << level;
>   	}
>   
>   	sctx->decompression_enabled = false;
> -	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
>   	sctx->dbcb_depth_copy_enabled = false;
>   	sctx->dbcb_stencil_copy_enabled = false;
>   	si_mark_atom_dirty(sctx, &sctx->db_render_state);
>   
>   	return fully_copied_levels;
>   }
>   
>   static void si_blit_decompress_depth(struct pipe_context *ctx,
>   				     struct r600_texture *texture,
>   				     struct r600_texture *staging,
> @@ -218,23 +215,21 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
>   		return;
>   
>   	if (planes & PIPE_MASK_S)
>   		sctx->db_flush_stencil_inplace = true;
>   	if (planes & PIPE_MASK_Z)
>   		sctx->db_flush_depth_inplace = true;
>   	si_mark_atom_dirty(sctx, &sctx->db_render_state);
>   
>   	surf_tmpl.format = texture->resource.b.b.format;
>   
> -	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
>   	sctx->decompression_enabled = true;
> -	sctx->framebuffer.do_update_surf_dirtiness = false;
>   
>   	while (level_mask) {
>   		unsigned level = u_bit_scan(&level_mask);
>   
>   		surf_tmpl.u.tex.level = level;
>   
>   		/* The smaller the mipmap level, the less layers there are
>   		 * as far as 3D textures are concerned. */
>   		max_layer = util_max_layer(&texture->resource.b.b, level);
>   		checked_last_layer = MIN2(last_layer, max_layer);
> @@ -260,21 +255,20 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
>   			fully_decompressed_mask |= 1u << level;
>   		}
>   	}
>   
>   	if (planes & PIPE_MASK_Z)
>   		texture->dirty_level_mask &= ~fully_decompressed_mask;
>   	if (planes & PIPE_MASK_S)
>   		texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
>   
>   	sctx->decompression_enabled = false;
> -	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
>   	sctx->db_flush_depth_inplace = false;
>   	sctx->db_flush_stencil_inplace = false;
>   	si_mark_atom_dirty(sctx, &sctx->db_render_state);
>   }
>   
>   /* Helper function of si_flush_depth_texture: decompress the given levels
>    * of Z and/or S planes in place.
>    */
>   static void
>   si_blit_decompress_zs_in_place(struct si_context *sctx,
> @@ -467,23 +461,21 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
>   		for (int i = first_level; i <= last_level; i++) {
>   			if (!vi_dcc_enabled(rtex, i))
>   				level_mask &= ~(1 << i);
>   		}
>   	} else if (rtex->fmask.size) {
>   		custom_blend = sctx->custom_blend_fmask_decompress;
>   	} else {
>   		custom_blend = sctx->custom_blend_eliminate_fastclear;
>   	}
>   
> -	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
>   	sctx->decompression_enabled = true;
> -	sctx->framebuffer.do_update_surf_dirtiness = false;
>   
>   	while (level_mask) {
>   		unsigned level = u_bit_scan(&level_mask);
>   
>   		/* The smaller the mipmap level, the less layers there are
>   		 * as far as 3D textures are concerned. */
>   		max_layer = util_max_layer(&rtex->resource.b.b, level);
>   		checked_last_layer = MIN2(last_layer, max_layer);
>   
>   		for (layer = first_layer; layer <= checked_last_layer; layer++) {
> @@ -512,21 +504,20 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
>   		}
>   
>   		/* The texture will always be dirty if some layers aren't flushed.
>   		 * I don't think this case occurs often though. */
>   		if (first_layer == 0 && last_layer >= max_layer) {
>   			rtex->dirty_level_mask &= ~(1 << level);
>   		}
>   	}
>   
>   	sctx->decompression_enabled = false;
> -	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
>   
>   	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
>   			 SI_CONTEXT_INV_GLOBAL_L2 |
>   			 SI_CONTEXT_INV_VMEM_L1;
>   }
>   
>   static void
>   si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
>   			    unsigned first_level, unsigned last_level)
>   {
> @@ -964,24 +955,46 @@ static void si_decompress_subresource(struct pipe_context *ctx,
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   	struct r600_texture *rtex = (struct r600_texture*)tex;
>   
>   	if (rtex->db_compatible) {
>   		planes &= PIPE_MASK_Z | PIPE_MASK_S;
>   
>   		if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
>   			planes &= ~PIPE_MASK_S;
>   
> +		/* If we've rendered into the framebuffer and it's a blitting
> +		 * source, make sure the decompression pass is invoked
> +		 * by dirtying the framebuffer.
> +		 */
> +		if (sctx->framebuffer.state.zsbuf &&
> +		    sctx->framebuffer.state.zsbuf->u.tex.level == level &&
> +		    sctx->framebuffer.state.zsbuf->texture == tex)
> +			si_update_fb_dirtiness_after_rendering(sctx);
> +
>   		si_decompress_depth(sctx, rtex, planes,
>   				    level, level,
>   				    first_layer, last_layer);
>   	} else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
> +		/* If we've rendered into the framebuffer and it's a blitting
> +		 * source, make sure the decompression pass is invoked
> +		 * by dirtying the framebuffer.
> +		 */
> +		for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
> +			if (sctx->framebuffer.state.cbufs[i] &&
> +			    sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
> +			    sctx->framebuffer.state.cbufs[i]->texture == tex) {
> +				si_update_fb_dirtiness_after_rendering(sctx);
> +				break;
> +			}
> +		}
> +
>   		si_blit_decompress_color(ctx, rtex, level, level,
>   					 first_layer, last_layer, false);
>   	}
>   }
>   
>   struct texture_orig_info {
>   	unsigned format;
>   	unsigned width0;
>   	unsigned height0;
>   	unsigned npix_x;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index d25705b..2e8a3bf 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -180,21 +180,20 @@ struct si_framebuffer {
>   	unsigned			spi_shader_col_format_blend;
>   	unsigned			spi_shader_col_format_blend_alpha;
>   	ubyte				nr_samples:5; /* at most 16xAA */
>   	ubyte				log_samples:3; /* at most 4 = 16xAA */
>   	ubyte				compressed_cb_mask;
>   	ubyte				color_is_int8;
>   	ubyte				color_is_int10;
>   	ubyte				dirty_cbufs;
>   	bool				dirty_zsbuf;
>   	bool				any_dst_linear;
> -	bool				do_update_surf_dirtiness;
>   };
>   
>   struct si_clip_state {
>   	struct r600_atom		atom;
>   	struct pipe_clip_state		state;
>   };
>   
>   struct si_sample_locs {
>   	struct r600_atom	atom;
>   	unsigned		nr_samples;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 42d81e7..b7f5566 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -2445,20 +2445,52 @@ static void si_init_depth_surface(struct si_context *sctx,
>   			}
>   		}
>   	}
>   
>   	surf->db_z_info = z_info;
>   	surf->db_stencil_info = s_info;
>   
>   	surf->depth_initialized = true;
>   }
>   
> +void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
> +{
> +	if (sctx->decompression_enabled)
> +		return;
> +
> +	if (sctx->framebuffer.state.zsbuf) {
> +		struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
> +		struct r600_texture *rtex = (struct r600_texture *)surf->texture;
> +
> +		rtex->dirty_level_mask |= 1 << surf->u.tex.level;
> +
> +		if (rtex->surface.flags & RADEON_SURF_SBUFFER)
> +			rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
> +	}
> +	if (sctx->framebuffer.compressed_cb_mask) {
> +		struct pipe_surface *surf;
> +		struct r600_texture *rtex;
> +		unsigned mask = sctx->framebuffer.compressed_cb_mask;
> +
> +		do {
> +			unsigned i = u_bit_scan(&mask);
> +			surf = sctx->framebuffer.state.cbufs[i];
> +			rtex = (struct r600_texture*)surf->texture;
> +
> +			if (rtex->fmask.size)
> +				rtex->dirty_level_mask |= 1 << surf->u.tex.level;
> +			if (rtex->dcc_gather_statistics)
> +				rtex->separate_dcc_dirty = true;
> +		} while (mask);
> +	}
> +}
> +
>   static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
>   {
>   	for (int i = 0; i < state->nr_cbufs; ++i) {
>   		struct r600_surface *surf = NULL;
>   		struct r600_texture *rtex;
>   
>   		if (!state->cbufs[i])
>   			continue;
>   		surf = (struct r600_surface*)state->cbufs[i];
>   		rtex = (struct r600_texture*)surf->base.texture;
> @@ -2472,20 +2504,22 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   	struct pipe_constant_buffer constbuf = {0};
>   	struct r600_surface *surf = NULL;
>   	struct r600_texture *rtex;
>   	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
>   	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
>   	bool unbound = false;
>   	int i;
>   
> +	si_update_fb_dirtiness_after_rendering(sctx);
> +
>   	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
>   		if (!sctx->framebuffer.state.cbufs[i])
>   			continue;
>   
>   		rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
>   		if (rtex->dcc_gather_statistics)
>   			vi_separate_dcc_stop_query(ctx, rtex);
>   	}
>   
>   	/* Disable DCC if the formats are incompatible. */
> @@ -2669,21 +2703,20 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   		si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
>   	}
>   
>   	sctx->do_update_shaders = true;
>   
>   	if (!sctx->decompression_enabled) {
>   		/* Prevent textures decompression when the framebuffer state
>   		 * changes come from the decompression passes themselves.
>   		 */
>   		sctx->need_check_render_feedback = true;
> -		sctx->framebuffer.do_update_surf_dirtiness = true;
>   	}
>   }
>   
>   static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
>   {
>   	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
>   	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
>   	unsigned i, nr_cbufs = state->nr_cbufs;
>   	struct r600_texture *tex = NULL;
>   	struct r600_surface *cb = NULL;
> @@ -3977,28 +4010,29 @@ static void si_set_tess_state(struct pipe_context *ctx,
>   			       &cb.buffer_offset);
>   
>   	si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
>   	pipe_resource_reference(&cb.buffer, NULL);
>   }
>   
>   static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   
> +	si_update_fb_dirtiness_after_rendering(sctx);
> +
>   	/* Multisample surfaces are flushed in si_decompress_textures. */
>   	if (sctx->framebuffer.nr_samples <= 1 &&
>   	    sctx->framebuffer.state.nr_cbufs) {
>   		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
>   				 SI_CONTEXT_INV_GLOBAL_L2 |
>   				 SI_CONTEXT_FLUSH_AND_INV_CB;
>   	}
> -	sctx->framebuffer.do_update_surf_dirtiness = true;
>   }
>   
>   /* This only ensures coherency for shader image/buffer stores. */
>   static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   
>   	/* Subsequent commands must wait for all shader invocations to
>   	 * complete. */
>   	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index ec28aba..acc8fb7 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -377,20 +377,21 @@ si_make_texture_descriptor(struct si_screen *screen,
>   			   unsigned first_layer, unsigned last_layer,
>   			   unsigned width, unsigned height, unsigned depth,
>   			   uint32_t *state,
>   			   uint32_t *fmask_state);
>   struct pipe_sampler_view *
>   si_create_sampler_view_custom(struct pipe_context *ctx,
>   			      struct pipe_resource *texture,
>   			      const struct pipe_sampler_view *state,
>   			      unsigned width0, unsigned height0,
>   			      unsigned force_level);
> +void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);
>   
>   /* si_state_shader.c */
>   bool si_update_shaders(struct si_context *sctx);
>   void si_init_shader_functions(struct si_context *sctx);
>   bool si_init_shader_cache(struct si_screen *sscreen);
>   void si_destroy_shader_cache(struct si_screen *sscreen);
>   void si_init_shader_selector_async(void *job, int thread_index);
>   void si_get_active_slot_masks(const struct tgsi_shader_info *info,
>   			      uint32_t *const_and_shader_buffers,
>   			      uint64_t *samplers_and_images);
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 332e0c4..c1edf7f 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1200,21 +1200,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
>   		return;
>   	}
>   
>   	/* Recompute and re-emit the texture resource states if needed. */
>   	dirty_tex_counter = p_atomic_read(&sctx->b.screen->dirty_tex_counter);
>   	if (unlikely(dirty_tex_counter != sctx->b.last_dirty_tex_counter)) {
>   		sctx->b.last_dirty_tex_counter = dirty_tex_counter;
>   		sctx->framebuffer.dirty_cbufs |=
>   			((1 << sctx->framebuffer.state.nr_cbufs) - 1);
>   		sctx->framebuffer.dirty_zsbuf = true;
> -		sctx->framebuffer.do_update_surf_dirtiness = true;
>   		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
>   		si_update_all_texture_descriptors(sctx);
>   	}
>   
>   	si_decompress_graphics_textures(sctx);
>   
>   	/* Set the rasterization primitive type.
>   	 *
>   	 * This must be done after si_decompress_textures, which can call
>   	 * draw_vbo recursively, and before si_update_shaders, which uses
> @@ -1385,50 +1384,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
>   
>   	/* Workaround for a VGT hang when streamout is enabled.
>   	 * It must be done after drawing. */
>   	if ((sctx->b.family == CHIP_HAWAII ||
>   	     sctx->b.family == CHIP_TONGA ||
>   	     sctx->b.family == CHIP_FIJI) &&
>   	    r600_get_strmout_en(&sctx->b)) {
>   		sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
>   	}
>   
> -	if (sctx->framebuffer.do_update_surf_dirtiness) {
> -		/* Set the depth buffer as dirty. */
> -		if (sctx->framebuffer.state.zsbuf) {
> -			struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
> -			struct r600_texture *rtex = (struct r600_texture *)surf->texture;
> -
> -			rtex->dirty_level_mask |= 1 << surf->u.tex.level;
> -
> -			if (rtex->surface.flags & RADEON_SURF_SBUFFER)
> -				rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
> -		}
> -		if (sctx->framebuffer.compressed_cb_mask) {
> -			struct pipe_surface *surf;
> -			struct r600_texture *rtex;
> -			unsigned mask = sctx->framebuffer.compressed_cb_mask;
> -
> -			do {
> -				unsigned i = u_bit_scan(&mask);
> -				surf = sctx->framebuffer.state.cbufs[i];
> -				rtex = (struct r600_texture*)surf->texture;
> -
> -				if (rtex->fmask.size)
> -					rtex->dirty_level_mask |= 1 << surf->u.tex.level;
> -				if (rtex->dcc_gather_statistics)
> -					rtex->separate_dcc_dirty = true;
> -			} while (mask);
> -		}
> -		sctx->framebuffer.do_update_surf_dirtiness = false;
> -	}
> -
>   	sctx->b.num_draw_calls++;
>   	if (info->primitive_restart)
>   		sctx->b.num_prim_restart_calls++;
>   	if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
>   		sctx->b.num_spill_draw_calls++;
>   	if (index_size && indexbuf != info->index.resource)
>   		pipe_resource_reference(&indexbuf, NULL);
>   }
>   
>   void si_trace_emit(struct si_context *sctx)
> 


More information about the mesa-dev mailing list