[Mesa-dev] [PATCH 2/9] radeonsi: flush DB caches only when transitioning from DB to texturing
Marek Olšák
maraeo at gmail.com
Fri Jun 16 12:57:58 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
Use the mechanism of si_decompress_textures, but instead of doing
the actual decompression, just flag the DB cache flush there.
This removes a lot of unnecessary DB cache flushes.
---
src/gallium/drivers/radeonsi/si_blit.c | 36 +++++++++++++++++++++------
src/gallium/drivers/radeonsi/si_descriptors.c | 17 +++++++------
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
src/gallium/drivers/radeonsi/si_state.c | 21 ++++++++++++----
src/gallium/drivers/radeonsi/si_state_draw.c | 6 ++---
5 files changed, 56 insertions(+), 25 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 9c38ae9..f0abfdc 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -337,24 +337,20 @@ si_decompress_depth(struct si_context *sctx,
levels_s = level_mask & tex->stencil_dirty_level_mask;
if (levels_s) {
if (r600_can_sample_zs(tex, true))
inplace_planes |= PIPE_MASK_S;
else
copy_planes |= PIPE_MASK_S;
}
}
- assert(!tex->tc_compatible_htile || levels_z == 0);
- assert(!tex->tc_compatible_htile || levels_s == 0 ||
- !r600_can_sample_zs(tex, true));
-
/* We may have to allocate the flushed texture here when called from
* si_decompress_subresource.
*/
if (copy_planes &&
(tex->flushed_depth_texture ||
r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
struct r600_texture *dst = tex->flushed_depth_texture;
unsigned fully_copied_levels;
unsigned levels = 0;
@@ -377,24 +373,44 @@ si_decompress_depth(struct si_context *sctx,
first_layer, last_layer,
0, u_max_sample(&tex->resource.b.b));
if (copy_planes & PIPE_MASK_Z)
tex->dirty_level_mask &= ~fully_copied_levels;
if (copy_planes & PIPE_MASK_S)
tex->stencil_dirty_level_mask &= ~fully_copied_levels;
}
if (inplace_planes) {
- si_blit_decompress_zs_in_place(
- sctx, tex,
- levels_z, levels_s,
- first_layer, last_layer);
+ if (!tex->tc_compatible_htile) {
+ si_blit_decompress_zs_in_place(
+ sctx, tex,
+ levels_z, levels_s,
+ first_layer, last_layer);
+ }
+
+ /* Only in-place decompression needs to flush DB caches, or
+ * when we don't decompress but TC-compatible planes are dirty.
+ */
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_INV_VMEM_L1;
+
+ /* If we flush DB caches for TC-compatible depth, the dirty
+ * state becomes 0 for the whole mipmap tree and all planes.
+ * (there is nothing else to flush)
+ */
+ if (tex->tc_compatible_htile) {
+ if (r600_can_sample_zs(tex, false))
+ tex->dirty_level_mask = 0;
+ if (r600_can_sample_zs(tex, true))
+ tex->stencil_dirty_level_mask = 0;
+ }
}
}
static void
si_decompress_sampler_depth_textures(struct si_context *sctx,
struct si_textures_info *textures)
{
unsigned i;
unsigned mask = textures->needs_depth_decompress_mask;
@@ -1343,25 +1359,29 @@ static boolean si_generate_mipmap(struct pipe_context *ctx,
vi_disable_dcc_if_incompatible_format(&sctx->b, tex, base_level,
format);
si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS,
base_level, first_layer, last_layer);
/* Clear dirty_level_mask for the levels that will be overwritten. */
assert(base_level < last_level);
rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1,
last_level - base_level);
+ sctx->generate_mipmap_for_depth = rtex->is_depth;
+
si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND);
util_blitter_generate_mipmap(sctx->blitter, tex, format,
base_level, last_level,
first_layer, last_layer);
si_blitter_end(ctx);
+
+ sctx->generate_mipmap_for_depth = false;
return true;
}
static void si_flush_resource(struct pipe_context *ctx,
struct pipe_resource *res)
{
struct r600_texture *rtex = (struct r600_texture*)res;
assert(res->target != PIPE_BUFFER);
assert(!rtex->dcc_separate_buffer || rtex->dcc_gather_statistics);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 375bcae..ee2e80a 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -577,26 +577,28 @@ static void si_set_sampler_view(struct si_context *sctx,
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
}
static bool color_needs_decompression(struct r600_texture *rtex)
{
return rtex->fmask.size ||
(rtex->dirty_level_mask &&
(rtex->cmask.size || rtex->dcc_offset));
}
-static bool depth_needs_decompression(struct r600_texture *rtex,
- struct si_sampler_view *sview)
+static bool depth_needs_decompression(struct r600_texture *rtex)
{
- return rtex->db_compatible &&
- (!rtex->tc_compatible_htile ||
- !r600_can_sample_zs(rtex, sview->is_stencil_sampler));
+ /* If the depth/stencil texture is TC-compatible, no decompression
+ * will be done. The decompression function will only flush DB caches
+ * to make it coherent with shaders. That's necessary because the driver
+ * doesn't flush DB caches in any other case.
+ */
+ return rtex->db_compatible;
}
static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
unsigned shader)
{
struct si_textures_info *samplers = &sctx->samplers[shader];
unsigned shader_bit = 1 << shader;
if (samplers->needs_depth_decompress_mask ||
samplers->needs_color_decompress_mask ||
@@ -626,23 +628,22 @@ static void si_set_sampler_views(struct pipe_context *ctx,
samplers->needs_color_decompress_mask &= ~(1u << slot);
si_set_sampler_view(sctx, shader, slot, NULL, false);
continue;
}
si_set_sampler_view(sctx, shader, slot, views[i], false);
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
struct r600_texture *rtex =
(struct r600_texture*)views[i]->texture;
- struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
- if (depth_needs_decompression(rtex, rview)) {
+ if (depth_needs_decompression(rtex)) {
samplers->needs_depth_decompress_mask |= 1u << slot;
} else {
samplers->needs_depth_decompress_mask &= ~(1u << slot);
}
if (color_needs_decompression(rtex)) {
samplers->needs_color_decompress_mask |= 1u << slot;
} else {
samplers->needs_color_decompress_mask &= ~(1u << slot);
}
@@ -2373,21 +2374,21 @@ static void si_make_texture_handle_resident(struct pipe_context *ctx,
tex_handle = (struct si_texture_handle *)entry->data;
sview = (struct si_sampler_view *)tex_handle->view;
if (resident) {
if (sview->base.texture->target != PIPE_BUFFER) {
struct r600_texture *rtex =
(struct r600_texture *)sview->base.texture;
tex_handle->needs_depth_decompress =
- depth_needs_decompression(rtex, sview);
+ depth_needs_decompression(rtex);
tex_handle->needs_color_decompress =
color_needs_decompression(rtex);
if (rtex->dcc_offset &&
p_atomic_read(&rtex->framebuffers_bound))
sctx->need_check_render_feedback = true;
} else {
si_invalidate_bindless_buf_desc(sctx, tex_handle->desc,
sview->base.texture,
sview->base.u.buf.offset);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 427ac1c..e734595 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -357,20 +357,21 @@ struct si_context {
unsigned dbcb_copy_sample;
bool dbcb_depth_copy_enabled:1;
bool dbcb_stencil_copy_enabled:1;
bool db_flush_depth_inplace:1;
bool db_flush_stencil_inplace:1;
bool db_depth_clear:1;
bool db_depth_disable_expclear:1;
bool db_stencil_clear:1;
bool db_stencil_disable_expclear:1;
bool occlusion_queries_disabled:1;
+ bool generate_mipmap_for_depth:1;
/* Emitted draw state. */
bool gs_tri_strip_adj_fix:1;
int last_index_size;
int last_base_vertex;
int last_start_instance;
int last_drawid;
int last_sh_base_reg;
int last_primitive_restart_en;
int last_restart_index;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 1cd1f91..44e5f1c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2518,29 +2518,40 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (!r600_texture_disable_dcc(&sctx->b, rtex))
sctx->b.decompress_dcc(ctx, rtex);
surf->dcc_incompatible = false;
}
/* Only flush TC when changing the framebuffer state, because
* the only client not using TC that can change textures is
* the framebuffer.
*
- * Flush all CB and DB caches here because all buffers can be used
- * for write by both TC (with shader image stores) and CB/DB.
+ * Wait for compute shaders because of possible transitions:
+ * - FB write -> shader read
+ * - shader write -> FB read
+ *
+ * DB caches are flushed on demand (using si_decompress_textures).
*/
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_CS_PARTIAL_FLUSH;
+ /* u_blitter doesn't invoke depth decompression when it does multiple
+ * blits in a row, but the only case when it matters for DB is when
+ * doing generate_mipmap. So here we flush DB manually between
+ * individual generate_mipmap blits.
+ * Note that lower mipmap levels aren't compressed.
+ */
+ if (sctx->generate_mipmap_for_depth)
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+
/* Take the maximum of the old and new count. If the new count is lower,
* dirtying is needed to disable the unbound colorbuffers.
*/
sctx->framebuffer.dirty_cbufs |=
(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
si_dec_framebuffer_counters(&sctx->framebuffer.state);
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
@@ -3977,23 +3988,23 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
}
if (flags & PIPE_BARRIER_INDEX_BUFFER) {
/* Indices are read through TC L2 since VI.
* L1 isn't used.
*/
if (sctx->screen->b.chip_class <= CIK)
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
+ /* Depth and stencil are flushed in si_decompress_textures when needed. */
if (flags & PIPE_BARRIER_FRAMEBUFFER)
- sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB;
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
if (flags & (PIPE_BARRIER_FRAMEBUFFER |
PIPE_BARRIER_INDIRECT_BUFFER))
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
{
struct pipe_blend_state blend;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index d039e01..d13c8b7 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1395,25 +1395,23 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_get_strmout_en(&sctx->b)) {
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
}
if (sctx->framebuffer.do_update_surf_dirtiness) {
/* Set the depth buffer as dirty. */
if (sctx->framebuffer.state.zsbuf) {
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
- if (!rtex->tc_compatible_htile)
- rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+ rtex->dirty_level_mask |= 1 << surf->u.tex.level;
- if (rtex->surface.flags & RADEON_SURF_SBUFFER &&
- (!rtex->tc_compatible_htile || !rtex->can_sample_s))
+ if (rtex->surface.flags & RADEON_SURF_SBUFFER)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
if (sctx->framebuffer.compressed_cb_mask) {
struct pipe_surface *surf;
struct r600_texture *rtex;
unsigned mask = sctx->framebuffer.compressed_cb_mask;
do {
unsigned i = u_bit_scan(&mask);
surf = sctx->framebuffer.state.cbufs[i];
--
2.7.4
More information about the mesa-dev
mailing list