[Mesa-dev] [PATCH 4/9] radeonsi: flush CB after MSAA only when transitioning from CB to textures
Nicolai Hähnle
nhaehnle at gmail.com
Mon Jun 19 12:27:37 UTC 2017
On 16.06.2017 14:58, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> The main flush before texturing is done after the FMASK decompress pass.
>
> CB after MSAA rendering is not flushed in set_framebuffer_state and also
> not in memory_barrier if the current color buffer is MSAA. We fully rely
> on the FMASK decompress pass for the flushing.
>
> Some CB decompress and resolve passes need an explicit flush before and
> after.
> ---
> src/gallium/drivers/radeonsi/si_blit.c | 29 ++++++++++++++++++++++
> src/gallium/drivers/radeonsi/si_state.c | 43 ++++++++++++++++++++++-----------
> 2 files changed, 58 insertions(+), 14 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 0993ebd..1159594 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -398,20 +398,28 @@ si_decompress_depth(struct si_context *sctx,
> * state becomes 0 for the whole mipmap tree and all planes.
> * (there is nothing else to flush)
> */
> if (tex->tc_compatible_htile) {
> if (r600_can_sample_zs(tex, false))
> tex->dirty_level_mask = 0;
> if (r600_can_sample_zs(tex, true))
> tex->stencil_dirty_level_mask = 0;
> }
> }
> + /* set_framebuffer_state takes care of coherency for single-sample.
> + * The DB->CB copy uses CB for the final writes.
> + */
> + if (copy_planes && tex->resource.b.b.nr_samples > 1) {
> + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> + SI_CONTEXT_INV_GLOBAL_L2 |
> + SI_CONTEXT_FLUSH_AND_INV_CB;
> + }
> }
>
> static void
> si_decompress_sampler_depth_textures(struct si_context *sctx,
> struct si_textures_info *textures)
> {
> unsigned i;
> unsigned mask = textures->needs_depth_decompress_mask;
>
> while (mask) {
> @@ -480,36 +488,49 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
>
> for (layer = first_layer; layer <= checked_last_layer; layer++) {
> struct pipe_surface *cbsurf, surf_tmpl;
>
> surf_tmpl.format = rtex->resource.b.b.format;
> surf_tmpl.u.tex.level = level;
> surf_tmpl.u.tex.first_layer = layer;
> surf_tmpl.u.tex.last_layer = layer;
> cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
>
> + /* Required before and after FMASK and DCC_DECOMPRESS. */
> + if (custom_blend == sctx->custom_blend_fmask_decompress ||
> + custom_blend == sctx->custom_blend_dcc_decompress)
> + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
> +
> si_blitter_begin(ctx, SI_DECOMPRESS);
> util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
> si_blitter_end(ctx);
>
> + if (custom_blend == sctx->custom_blend_fmask_decompress ||
> + custom_blend == sctx->custom_blend_dcc_decompress)
> + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
> +
> pipe_surface_reference(&cbsurf, NULL);
> }
>
> /* The texture will always be dirty if some layers aren't flushed.
> * I don't think this case occurs often though. */
> if (first_layer == 0 && last_layer >= max_layer) {
> rtex->dirty_level_mask &= ~(1 << level);
> }
> }
>
> sctx->decompression_enabled = false;
> sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
> +
> + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
> + SI_CONTEXT_INV_GLOBAL_L2 |
> + SI_CONTEXT_INV_VMEM_L1;
> }
>
> static void
> si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
> unsigned first_level, unsigned last_level)
> {
> /* CMASK or DCC can be discarded and we can still end up here. */
> if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
> return;
>
> @@ -1148,27 +1169,35 @@ void si_resource_copy_region(struct pipe_context *ctx,
> pipe_surface_reference(&dst_view, NULL);
> pipe_sampler_view_reference(&src_view, NULL);
> }
>
> static void si_do_CB_resolve(struct si_context *sctx,
> const struct pipe_blit_info *info,
> struct pipe_resource *dst,
> unsigned dst_level, unsigned dst_z,
> enum pipe_format format)
> {
> + /* Required before and after CB_RESOLVE. */
> + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
> +
> si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE |
> (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
> util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z,
> info->src.resource, info->src.box.z,
> ~0, sctx->custom_blend_resolve,
> format);
> si_blitter_end(&sctx->b.b);
> +
> + /* Flush caches for possible texturing. */
> + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
> + SI_CONTEXT_INV_GLOBAL_L2 |
> + SI_CONTEXT_INV_VMEM_L1;
> }
>
> static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
> const struct pipe_blit_info *info)
> {
> struct si_context *sctx = (struct si_context*)ctx;
> struct r600_texture *src = (struct r600_texture*)info->src.resource;
> struct r600_texture *dst = (struct r600_texture*)info->dst.resource;
> MAYBE_UNUSED struct r600_texture *rtmp;
> unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 44e5f1c..ab27af2 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -2523,34 +2523,42 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>
> /* Only flush TC when changing the framebuffer state, because
> * the only client not using TC that can change textures is
> * the framebuffer.
> *
> * Wait for compute shaders because of possible transitions:
> * - FB write -> shader read
> * - shader write -> FB read
> *
> * DB caches are flushed on demand (using si_decompress_textures).
> + *
> + * When MSAA is enabled, CB and TC caches are flushed on demand
> + * (after FMASK decompression).
> */
> - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> - SI_CONTEXT_INV_GLOBAL_L2 |
> - SI_CONTEXT_FLUSH_AND_INV_CB |
> - SI_CONTEXT_CS_PARTIAL_FLUSH;
> + if (sctx->framebuffer.nr_samples <= 1) {
> + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> + SI_CONTEXT_INV_GLOBAL_L2 |
> + SI_CONTEXT_FLUSH_AND_INV_CB;
> + }
This looks correct to me, but only because we happen not to support MSAA
shader images today. Luckily, people seem to agree that writing MSAA
images from shaders is not very useful, but if we ever do need to
support those, we'll have to support shader write -> FB access transitions.
I suppose that's okay -- we can add a flag if an MSAA texture is bound
as a shader image and write access, and check that when binding the
framebuffer. But I think this should be mentioned in the comment: Shader
write -> FB read transitions cannot happen for MSAA textures, because
MSAA shader images are not supported.
Cheers,
Nicolai
> + sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
>
> /* u_blitter doesn't invoke depth decompression when it does multiple
> * blits in a row, but the only case when it matters for DB is when
> * doing generate_mipmap. So here we flush DB manually between
> * individual generate_mipmap blits.
> * Note that lower mipmap levels aren't compressed.
> */
> - if (sctx->generate_mipmap_for_depth)
> - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
> + if (sctx->generate_mipmap_for_depth) {
> + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> + SI_CONTEXT_INV_GLOBAL_L2 |
> + SI_CONTEXT_FLUSH_AND_INV_DB;
> + }
>
> /* Take the maximum of the old and new count. If the new count is lower,
> * dirtying is needed to disable the unbound colorbuffers.
> */
> sctx->framebuffer.dirty_cbufs |=
> (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
> sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
>
> si_dec_framebuffer_counters(&sctx->framebuffer.state);
> util_copy_framebuffer_state(&sctx->framebuffer.state, state);
> @@ -3948,23 +3956,26 @@ static void si_set_tess_state(struct pipe_context *ctx,
> &cb.buffer_offset);
>
> si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
> pipe_resource_reference(&cb.buffer, NULL);
> }
>
> static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
> {
> struct si_context *sctx = (struct si_context *)ctx;
>
> - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> - SI_CONTEXT_INV_GLOBAL_L2 |
> - SI_CONTEXT_FLUSH_AND_INV_CB;
> + /* Multisample surfaces are flushed in si_decompress_textures. */
> + if (sctx->framebuffer.nr_samples <= 1) {
> + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> + SI_CONTEXT_INV_GLOBAL_L2 |
> + SI_CONTEXT_FLUSH_AND_INV_CB;
> + }
> sctx->framebuffer.do_update_surf_dirtiness = true;
> }
>
> /* This only ensures coherency for shader image/buffer stores. */
> static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
> {
> struct si_context *sctx = (struct si_context *)ctx;
>
> /* Subsequent commands must wait for all shader invocations to
> * complete. */
> @@ -3988,26 +3999,30 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
> }
>
> if (flags & PIPE_BARRIER_INDEX_BUFFER) {
> /* Indices are read through TC L2 since VI.
> * L1 isn't used.
> */
> if (sctx->screen->b.chip_class <= CIK)
> sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
> }
>
> - /* Depth and stencil are flushed in si_decompress_textures when needed. */
> - if (flags & PIPE_BARRIER_FRAMEBUFFER)
> - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
> + /* MSAA color, any depth and any stencil are flushed in
> + * si_decompress_textures when needed.
> + */
> + if (flags & PIPE_BARRIER_FRAMEBUFFER &&
> + sctx->framebuffer.nr_samples <= 1) {
> + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
> + SI_CONTEXT_WRITEBACK_GLOBAL_L2;
> + }
>
> - if (flags & (PIPE_BARRIER_FRAMEBUFFER |
> - PIPE_BARRIER_INDIRECT_BUFFER))
> + if (flags & PIPE_BARRIER_INDIRECT_BUFFER)
> sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
> }
>
> static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
> {
> struct pipe_blend_state blend;
>
> memset(&blend, 0, sizeof(blend));
> blend.independent_blend_enable = true;
> blend.rt[0].colormask = 0xf;
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list