[Mesa-dev] [PATCH 10/15] radeonsi/gfx9: don't flush L2 metadata for DB if not needed
Marek Olšák
maraeo at gmail.com
Mon Aug 21 21:54:09 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_blit.c | 12 +++++-------
src/gallium/drivers/radeonsi/si_pipe.h | 17 ++++++++++++++---
src/gallium/drivers/radeonsi/si_state.c | 8 +++++++-
3 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 3228933..734eeaa 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -378,35 +378,33 @@ si_decompress_depth(struct si_context *sctx,
si_blit_decompress_zs_in_place(
sctx, tex,
levels_z, levels_s,
first_layer, last_layer);
}
/* Only in-place decompression needs to flush DB caches, or
* when we don't decompress but TC-compatible planes are dirty.
*/
si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
- inplace_planes & PIPE_MASK_S);
+ inplace_planes & PIPE_MASK_S,
+ tex->tc_compatible_htile &&
+ first_level == 0);
- /* If we flush DB caches for TC-compatible depth, the dirty
- * state becomes 0 for the whole mipmap tree and all planes.
- * (there is nothing else to flush)
- */
if (tex->tc_compatible_htile) {
/* Only clear the mask that we are flushing, because
* si_make_DB_shader_coherent() can treat depth and
* stencil differently.
*/
if (inplace_planes & PIPE_MASK_Z)
- tex->dirty_level_mask = 0;
+ tex->dirty_level_mask &= ~levels_z;
if (inplace_planes & PIPE_MASK_S)
- tex->stencil_dirty_level_mask = 0;
+ tex->stencil_dirty_level_mask &= ~levels_s;
}
}
/* set_framebuffer_state takes care of coherency for single-sample.
* The DB->CB copy uses CB for the final writes.
*/
if (copy_planes && tex->resource.b.b.nr_samples > 1)
si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
false);
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 3e59e21..cdc8109 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -194,20 +194,21 @@ struct si_framebuffer {
unsigned spi_shader_col_format_blend_alpha;
ubyte nr_samples:5; /* at most 16xAA */
ubyte log_samples:3; /* at most 4 = 16xAA */
ubyte compressed_cb_mask;
ubyte color_is_int8;
ubyte color_is_int10;
ubyte dirty_cbufs;
bool dirty_zsbuf;
bool any_dst_linear;
bool CB_has_shader_readable_metadata;
+ bool DB_has_shader_readable_metadata;
};
struct si_clip_state {
struct r600_atom atom;
struct pipe_clip_state state;
bool any_nonzeros;
};
struct si_sample_locs {
struct r600_atom atom;
@@ -615,21 +616,31 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
else if (shaders_read_metadata)
sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
} else {
/* SI-CI-VI */
sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
}
}
static inline void
si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
- bool include_stencil)
+ bool include_stencil, bool shaders_read_metadata)
{
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_INV_VMEM_L1;
- /* Single-sample depth (not stencil) is coherent with shaders on GFX9. */
- if (sctx->b.chip_class <= VI || num_samples >= 2 || include_stencil)
+ if (sctx->b.chip_class >= GFX9) {
+ /* Single-sample depth (not stencil) is coherent with shaders
+ * on GFX9, but L2 metadata must be flushed if shaders read
+ * metadata.
+ */
+ if (num_samples >= 2 || include_stencil)
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ else if (shaders_read_metadata)
+ sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
+ } else {
+ /* SI-CI-VI */
sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ }
}
#endif
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index e5d8d21..bb533d7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2578,21 +2578,22 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
/* u_blitter doesn't invoke depth decompression when it does multiple
* blits in a row, but the only case when it matters for DB is when
* doing generate_mipmap. So here we flush DB manually between
* individual generate_mipmap blits.
* Note that lower mipmap levels aren't compressed.
*/
if (sctx->generate_mipmap_for_depth)
- si_make_DB_shader_coherent(sctx, 1, false);
+ si_make_DB_shader_coherent(sctx, 1, false,
+ sctx->framebuffer.DB_has_shader_readable_metadata);
/* Take the maximum of the old and new count. If the new count is lower,
* dirtying is needed to disable the unbound colorbuffers.
*/
sctx->framebuffer.dirty_cbufs |=
(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
si_dec_framebuffer_counters(&sctx->framebuffer.state);
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
@@ -2603,20 +2604,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
sctx->framebuffer.spi_shader_col_format_blend = 0;
sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
sctx->framebuffer.color_is_int8 = 0;
sctx->framebuffer.color_is_int10 = 0;
sctx->framebuffer.compressed_cb_mask = 0;
sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
sctx->framebuffer.any_dst_linear = false;
sctx->framebuffer.CB_has_shader_readable_metadata = false;
+ sctx->framebuffer.DB_has_shader_readable_metadata = false;
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
continue;
surf = (struct r600_surface*)state->cbufs[i];
rtex = (struct r600_texture*)surf->base.texture;
if (!surf->color_initialized) {
si_initialize_color_surface(sctx, surf);
@@ -2658,20 +2660,24 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
}
}
if (state->zsbuf) {
surf = (struct r600_surface*)state->zsbuf;
rtex = (struct r600_texture*)surf->base.texture;
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
}
+
+ if (rtex->tc_compatible_htile && !surf->base.u.tex.level)
+ sctx->framebuffer.DB_has_shader_readable_metadata = true;
+
r600_context_add_resource_size(ctx, surf->base.texture);
}
si_update_poly_offset_state(sctx);
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
si_mark_atom_dirty(sctx, &sctx->msaa_config);
--
2.7.4
More information about the mesa-dev
mailing list