[Mesa-dev] [PATCH] radeonsi: update dirty_level_mask only when flushing or unbinding framebuffer
Samuel Pitoiset
samuel.pitoiset at gmail.com
Wed Jul 26 13:10:21 UTC 2017
Thanks for the fix, it is better than mine. :-)
Tested-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
On 07/26/2017 03:03 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> This fixes corruption with bindless textures in Dawn Of War 3.
>
> The do_update_surf_dirtiness mechanism was complicated and dirty_level_mask
> was only updated after the first draw call. The problem is bindless textures
> are checked for decompression every draw call and we would only decompress
> after the first draw call. The solution is to set dirtiness after the last
> draw call to the framebuffer, so the (unconditional) decompression of
> bindless textures happens at the right time.
>
> Cc: 17.2 <mesa-stable at lists.freedesktop.org>
> ---
> src/gallium/drivers/radeonsi/si_blit.c | 31 ++++++++++++++++-------
> src/gallium/drivers/radeonsi/si_pipe.h | 1 -
> src/gallium/drivers/radeonsi/si_state.c | 38 ++++++++++++++++++++++++++--
> src/gallium/drivers/radeonsi/si_state.h | 1 +
> src/gallium/drivers/radeonsi/si_state_draw.c | 31 -----------------------
> 5 files changed, 59 insertions(+), 43 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 631676b..caa4c3c 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -114,23 +114,21 @@ si_blit_dbcb_copy(struct si_context *sctx,
> unsigned fully_copied_levels = 0;
>
> if (planes & PIPE_MASK_Z)
> sctx->dbcb_depth_copy_enabled = true;
> if (planes & PIPE_MASK_S)
> sctx->dbcb_stencil_copy_enabled = true;
> si_mark_atom_dirty(sctx, &sctx->db_render_state);
>
> assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
>
> - bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
> sctx->decompression_enabled = true;
> - sctx->framebuffer.do_update_surf_dirtiness = false;
>
> while (level_mask) {
> unsigned level = u_bit_scan(&level_mask);
>
> /* The smaller the mipmap level, the less layers there are
> * as far as 3D textures are concerned. */
> max_layer = util_max_layer(&src->resource.b.b, level);
> checked_last_layer = MIN2(last_layer, max_layer);
>
> surf_tmpl.u.tex.level = level;
> @@ -162,21 +160,20 @@ si_blit_dbcb_copy(struct si_context *sctx,
> pipe_surface_reference(&zsurf, NULL);
> pipe_surface_reference(&cbsurf, NULL);
> }
>
> if (first_layer == 0 && last_layer >= max_layer &&
> first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b))
> fully_copied_levels |= 1u << level;
> }
>
> sctx->decompression_enabled = false;
> - sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
> sctx->dbcb_depth_copy_enabled = false;
> sctx->dbcb_stencil_copy_enabled = false;
> si_mark_atom_dirty(sctx, &sctx->db_render_state);
>
> return fully_copied_levels;
> }
>
> static void si_blit_decompress_depth(struct pipe_context *ctx,
> struct r600_texture *texture,
> struct r600_texture *staging,
> @@ -218,23 +215,21 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
> return;
>
> if (planes & PIPE_MASK_S)
> sctx->db_flush_stencil_inplace = true;
> if (planes & PIPE_MASK_Z)
> sctx->db_flush_depth_inplace = true;
> si_mark_atom_dirty(sctx, &sctx->db_render_state);
>
> surf_tmpl.format = texture->resource.b.b.format;
>
> - bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
> sctx->decompression_enabled = true;
> - sctx->framebuffer.do_update_surf_dirtiness = false;
>
> while (level_mask) {
> unsigned level = u_bit_scan(&level_mask);
>
> surf_tmpl.u.tex.level = level;
>
> /* The smaller the mipmap level, the less layers there are
> * as far as 3D textures are concerned. */
> max_layer = util_max_layer(&texture->resource.b.b, level);
> checked_last_layer = MIN2(last_layer, max_layer);
> @@ -260,21 +255,20 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
> fully_decompressed_mask |= 1u << level;
> }
> }
>
> if (planes & PIPE_MASK_Z)
> texture->dirty_level_mask &= ~fully_decompressed_mask;
> if (planes & PIPE_MASK_S)
> texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
>
> sctx->decompression_enabled = false;
> - sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
> sctx->db_flush_depth_inplace = false;
> sctx->db_flush_stencil_inplace = false;
> si_mark_atom_dirty(sctx, &sctx->db_render_state);
> }
>
> /* Helper function of si_flush_depth_texture: decompress the given levels
> * of Z and/or S planes in place.
> */
> static void
> si_blit_decompress_zs_in_place(struct si_context *sctx,
> @@ -467,23 +461,21 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
> for (int i = first_level; i <= last_level; i++) {
> if (!vi_dcc_enabled(rtex, i))
> level_mask &= ~(1 << i);
> }
> } else if (rtex->fmask.size) {
> custom_blend = sctx->custom_blend_fmask_decompress;
> } else {
> custom_blend = sctx->custom_blend_eliminate_fastclear;
> }
>
> - bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
> sctx->decompression_enabled = true;
> - sctx->framebuffer.do_update_surf_dirtiness = false;
>
> while (level_mask) {
> unsigned level = u_bit_scan(&level_mask);
>
> /* The smaller the mipmap level, the less layers there are
> * as far as 3D textures are concerned. */
> max_layer = util_max_layer(&rtex->resource.b.b, level);
> checked_last_layer = MIN2(last_layer, max_layer);
>
> for (layer = first_layer; layer <= checked_last_layer; layer++) {
> @@ -512,21 +504,20 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
> }
>
> /* The texture will always be dirty if some layers aren't flushed.
> * I don't think this case occurs often though. */
> if (first_layer == 0 && last_layer >= max_layer) {
> rtex->dirty_level_mask &= ~(1 << level);
> }
> }
>
> sctx->decompression_enabled = false;
> - sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
>
> sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
> SI_CONTEXT_INV_GLOBAL_L2 |
> SI_CONTEXT_INV_VMEM_L1;
> }
>
> static void
> si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
> unsigned first_level, unsigned last_level)
> {
> @@ -964,24 +955,46 @@ static void si_decompress_subresource(struct pipe_context *ctx,
> {
> struct si_context *sctx = (struct si_context *)ctx;
> struct r600_texture *rtex = (struct r600_texture*)tex;
>
> if (rtex->db_compatible) {
> planes &= PIPE_MASK_Z | PIPE_MASK_S;
>
> if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
> planes &= ~PIPE_MASK_S;
>
> + /* If we've rendered into the framebuffer and it's a blitting
> + * source, make sure the decompression pass is invoked
> + * by dirtying the framebuffer.
> + */
> + if (sctx->framebuffer.state.zsbuf &&
> + sctx->framebuffer.state.zsbuf->u.tex.level == level &&
> + sctx->framebuffer.state.zsbuf->texture == tex)
> + si_update_fb_dirtiness_after_rendering(sctx);
> +
> si_decompress_depth(sctx, rtex, planes,
> level, level,
> first_layer, last_layer);
> } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
> + /* If we've rendered into the framebuffer and it's a blitting
> + * source, make sure the decompression pass is invoked
> + * by dirtying the framebuffer.
> + */
> + for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
> + if (sctx->framebuffer.state.cbufs[i] &&
> + sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
> + sctx->framebuffer.state.cbufs[i]->texture == tex) {
> + si_update_fb_dirtiness_after_rendering(sctx);
> + break;
> + }
> + }
> +
> si_blit_decompress_color(ctx, rtex, level, level,
> first_layer, last_layer, false);
> }
> }
>
> struct texture_orig_info {
> unsigned format;
> unsigned width0;
> unsigned height0;
> unsigned npix_x;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index d25705b..2e8a3bf 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -180,21 +180,20 @@ struct si_framebuffer {
> unsigned spi_shader_col_format_blend;
> unsigned spi_shader_col_format_blend_alpha;
> ubyte nr_samples:5; /* at most 16xAA */
> ubyte log_samples:3; /* at most 4 = 16xAA */
> ubyte compressed_cb_mask;
> ubyte color_is_int8;
> ubyte color_is_int10;
> ubyte dirty_cbufs;
> bool dirty_zsbuf;
> bool any_dst_linear;
> - bool do_update_surf_dirtiness;
> };
>
> struct si_clip_state {
> struct r600_atom atom;
> struct pipe_clip_state state;
> };
>
> struct si_sample_locs {
> struct r600_atom atom;
> unsigned nr_samples;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 42d81e7..b7f5566 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -2445,20 +2445,52 @@ static void si_init_depth_surface(struct si_context *sctx,
> }
> }
> }
>
> surf->db_z_info = z_info;
> surf->db_stencil_info = s_info;
>
> surf->depth_initialized = true;
> }
>
> +void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
> +{
> + if (sctx->decompression_enabled)
> + return;
> +
> + if (sctx->framebuffer.state.zsbuf) {
> + struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
> + struct r600_texture *rtex = (struct r600_texture *)surf->texture;
> +
> + rtex->dirty_level_mask |= 1 << surf->u.tex.level;
> +
> + if (rtex->surface.flags & RADEON_SURF_SBUFFER)
> + rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
> + }
> + if (sctx->framebuffer.compressed_cb_mask) {
> + struct pipe_surface *surf;
> + struct r600_texture *rtex;
> + unsigned mask = sctx->framebuffer.compressed_cb_mask;
> +
> + do {
> + unsigned i = u_bit_scan(&mask);
> + surf = sctx->framebuffer.state.cbufs[i];
> + rtex = (struct r600_texture*)surf->texture;
> +
> + if (rtex->fmask.size)
> + rtex->dirty_level_mask |= 1 << surf->u.tex.level;
> + if (rtex->dcc_gather_statistics)
> + rtex->separate_dcc_dirty = true;
> + } while (mask);
> + }
> +}
> +
> static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
> {
> for (int i = 0; i < state->nr_cbufs; ++i) {
> struct r600_surface *surf = NULL;
> struct r600_texture *rtex;
>
> if (!state->cbufs[i])
> continue;
> surf = (struct r600_surface*)state->cbufs[i];
> rtex = (struct r600_texture*)surf->base.texture;
> @@ -2472,20 +2504,22 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
> {
> struct si_context *sctx = (struct si_context *)ctx;
> struct pipe_constant_buffer constbuf = {0};
> struct r600_surface *surf = NULL;
> struct r600_texture *rtex;
> bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
> unsigned old_nr_samples = sctx->framebuffer.nr_samples;
> bool unbound = false;
> int i;
>
> + si_update_fb_dirtiness_after_rendering(sctx);
> +
> for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
> if (!sctx->framebuffer.state.cbufs[i])
> continue;
>
> rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
> if (rtex->dcc_gather_statistics)
> vi_separate_dcc_stop_query(ctx, rtex);
> }
>
> /* Disable DCC if the formats are incompatible. */
> @@ -2669,21 +2703,20 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
> si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
> }
>
> sctx->do_update_shaders = true;
>
> if (!sctx->decompression_enabled) {
> /* Prevent textures decompression when the framebuffer state
> * changes come from the decompression passes themselves.
> */
> sctx->need_check_render_feedback = true;
> - sctx->framebuffer.do_update_surf_dirtiness = true;
> }
> }
>
> static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
> {
> struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
> unsigned i, nr_cbufs = state->nr_cbufs;
> struct r600_texture *tex = NULL;
> struct r600_surface *cb = NULL;
> @@ -3977,28 +4010,29 @@ static void si_set_tess_state(struct pipe_context *ctx,
> &cb.buffer_offset);
>
> si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
> pipe_resource_reference(&cb.buffer, NULL);
> }
>
> static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
> {
> struct si_context *sctx = (struct si_context *)ctx;
>
> + si_update_fb_dirtiness_after_rendering(sctx);
> +
> /* Multisample surfaces are flushed in si_decompress_textures. */
> if (sctx->framebuffer.nr_samples <= 1 &&
> sctx->framebuffer.state.nr_cbufs) {
> sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> SI_CONTEXT_INV_GLOBAL_L2 |
> SI_CONTEXT_FLUSH_AND_INV_CB;
> }
> - sctx->framebuffer.do_update_surf_dirtiness = true;
> }
>
> /* This only ensures coherency for shader image/buffer stores. */
> static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
> {
> struct si_context *sctx = (struct si_context *)ctx;
>
> /* Subsequent commands must wait for all shader invocations to
> * complete. */
> sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index ec28aba..acc8fb7 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -377,20 +377,21 @@ si_make_texture_descriptor(struct si_screen *screen,
> unsigned first_layer, unsigned last_layer,
> unsigned width, unsigned height, unsigned depth,
> uint32_t *state,
> uint32_t *fmask_state);
> struct pipe_sampler_view *
> si_create_sampler_view_custom(struct pipe_context *ctx,
> struct pipe_resource *texture,
> const struct pipe_sampler_view *state,
> unsigned width0, unsigned height0,
> unsigned force_level);
> +void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);
>
> /* si_state_shader.c */
> bool si_update_shaders(struct si_context *sctx);
> void si_init_shader_functions(struct si_context *sctx);
> bool si_init_shader_cache(struct si_screen *sscreen);
> void si_destroy_shader_cache(struct si_screen *sscreen);
> void si_init_shader_selector_async(void *job, int thread_index);
> void si_get_active_slot_masks(const struct tgsi_shader_info *info,
> uint32_t *const_and_shader_buffers,
> uint64_t *samplers_and_images);
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 332e0c4..c1edf7f 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1200,21 +1200,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
> return;
> }
>
> /* Recompute and re-emit the texture resource states if needed. */
> dirty_tex_counter = p_atomic_read(&sctx->b.screen->dirty_tex_counter);
> if (unlikely(dirty_tex_counter != sctx->b.last_dirty_tex_counter)) {
> sctx->b.last_dirty_tex_counter = dirty_tex_counter;
> sctx->framebuffer.dirty_cbufs |=
> ((1 << sctx->framebuffer.state.nr_cbufs) - 1);
> sctx->framebuffer.dirty_zsbuf = true;
> - sctx->framebuffer.do_update_surf_dirtiness = true;
> si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
> si_update_all_texture_descriptors(sctx);
> }
>
> si_decompress_graphics_textures(sctx);
>
> /* Set the rasterization primitive type.
> *
> * This must be done after si_decompress_textures, which can call
> * draw_vbo recursively, and before si_update_shaders, which uses
> @@ -1385,50 +1384,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
>
> /* Workaround for a VGT hang when streamout is enabled.
> * It must be done after drawing. */
> if ((sctx->b.family == CHIP_HAWAII ||
> sctx->b.family == CHIP_TONGA ||
> sctx->b.family == CHIP_FIJI) &&
> r600_get_strmout_en(&sctx->b)) {
> sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
> }
>
> - if (sctx->framebuffer.do_update_surf_dirtiness) {
> - /* Set the depth buffer as dirty. */
> - if (sctx->framebuffer.state.zsbuf) {
> - struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
> - struct r600_texture *rtex = (struct r600_texture *)surf->texture;
> -
> - rtex->dirty_level_mask |= 1 << surf->u.tex.level;
> -
> - if (rtex->surface.flags & RADEON_SURF_SBUFFER)
> - rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
> - }
> - if (sctx->framebuffer.compressed_cb_mask) {
> - struct pipe_surface *surf;
> - struct r600_texture *rtex;
> - unsigned mask = sctx->framebuffer.compressed_cb_mask;
> -
> - do {
> - unsigned i = u_bit_scan(&mask);
> - surf = sctx->framebuffer.state.cbufs[i];
> - rtex = (struct r600_texture*)surf->texture;
> -
> - if (rtex->fmask.size)
> - rtex->dirty_level_mask |= 1 << surf->u.tex.level;
> - if (rtex->dcc_gather_statistics)
> - rtex->separate_dcc_dirty = true;
> - } while (mask);
> - }
> - sctx->framebuffer.do_update_surf_dirtiness = false;
> - }
> -
> sctx->b.num_draw_calls++;
> if (info->primitive_restart)
> sctx->b.num_prim_restart_calls++;
> if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
> sctx->b.num_spill_draw_calls++;
> if (index_size && indexbuf != info->index.resource)
> pipe_resource_reference(&indexbuf, NULL);
> }
>
> void si_trace_emit(struct si_context *sctx)
>
More information about the mesa-dev
mailing list