[Mesa-dev] [PATCH 4/9] radeonsi: flush CB after MSAA only when transitioning from CB to textures

Marek Olšák maraeo at gmail.com
Mon Jun 19 13:16:21 UTC 2017


On Mon, Jun 19, 2017 at 2:27 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> On 16.06.2017 14:58, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> The main flush before texturing is done after the FMASK decompress pass.
>>
>> CB after MSAA rendering is not flushed in set_framebuffer_state and also
>> not in memory_barrier if the current color buffer is MSAA. We fully rely
>> on the FMASK decompress pass for the flushing.
>>
>> Some CB decompress and resolve passes need an explicit flush before and
>> after.
>> ---
>>   src/gallium/drivers/radeonsi/si_blit.c  | 29 ++++++++++++++++++++++
>>   src/gallium/drivers/radeonsi/si_state.c | 43
>> ++++++++++++++++++++++-----------
>>   2 files changed, 58 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>> b/src/gallium/drivers/radeonsi/si_blit.c
>> index 0993ebd..1159594 100644
>> --- a/src/gallium/drivers/radeonsi/si_blit.c
>> +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> @@ -398,20 +398,28 @@ si_decompress_depth(struct si_context *sctx,
>>                  * state becomes 0 for the whole mipmap tree and all
>> planes.
>>                  * (there is nothing else to flush)
>>                  */
>>                 if (tex->tc_compatible_htile) {
>>                         if (r600_can_sample_zs(tex, false))
>>                                 tex->dirty_level_mask = 0;
>>                         if (r600_can_sample_zs(tex, true))
>>                                 tex->stencil_dirty_level_mask = 0;
>>                 }
>>         }
>> +       /* set_framebuffer_state takes care of coherency for
>> single-sample.
>> +        * The DB->CB copy uses CB for the final writes.
>> +        */
>> +       if (copy_planes && tex->resource.b.b.nr_samples > 1) {
>> +               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
>> +                                SI_CONTEXT_INV_GLOBAL_L2 |
>> +                                SI_CONTEXT_FLUSH_AND_INV_CB;
>> +       }
>>   }
>>     static void
>>   si_decompress_sampler_depth_textures(struct si_context *sctx,
>>                                      struct si_textures_info *textures)
>>   {
>>         unsigned i;
>>         unsigned mask = textures->needs_depth_decompress_mask;
>>         while (mask) {
>> @@ -480,36 +488,49 @@ static void si_blit_decompress_color(struct
>> pipe_context *ctx,
>>                 for (layer = first_layer; layer <= checked_last_layer;
>> layer++) {
>>                         struct pipe_surface *cbsurf, surf_tmpl;
>>                         surf_tmpl.format = rtex->resource.b.b.format;
>>                         surf_tmpl.u.tex.level = level;
>>                         surf_tmpl.u.tex.first_layer = layer;
>>                         surf_tmpl.u.tex.last_layer = layer;
>>                         cbsurf = ctx->create_surface(ctx,
>> &rtex->resource.b.b, &surf_tmpl);
>>   +                     /* Required before and after FMASK and
>> DCC_DECOMPRESS. */
>> +                       if (custom_blend ==
>> sctx->custom_blend_fmask_decompress ||
>> +                           custom_blend ==
>> sctx->custom_blend_dcc_decompress)
>> +                               sctx->b.flags |=
>> SI_CONTEXT_FLUSH_AND_INV_CB;
>> +
>>                         si_blitter_begin(ctx, SI_DECOMPRESS);
>>                         util_blitter_custom_color(sctx->blitter, cbsurf,
>> custom_blend);
>>                         si_blitter_end(ctx);
>>   +                     if (custom_blend ==
>> sctx->custom_blend_fmask_decompress ||
>> +                           custom_blend ==
>> sctx->custom_blend_dcc_decompress)
>> +                               sctx->b.flags |=
>> SI_CONTEXT_FLUSH_AND_INV_CB;
>> +
>>                         pipe_surface_reference(&cbsurf, NULL);
>>                 }
>>                 /* The texture will always be dirty if some layers aren't
>> flushed.
>>                  * I don't think this case occurs often though. */
>>                 if (first_layer == 0 && last_layer >= max_layer) {
>>                         rtex->dirty_level_mask &= ~(1 << level);
>>                 }
>>         }
>>         sctx->decompression_enabled = false;
>>         sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
>> +
>> +       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
>> +                        SI_CONTEXT_INV_GLOBAL_L2 |
>> +                        SI_CONTEXT_INV_VMEM_L1;
>>   }
>>     static void
>>   si_decompress_color_texture(struct si_context *sctx, struct r600_texture
>> *tex,
>>                             unsigned first_level, unsigned last_level)
>>   {
>>         /* CMASK or DCC can be discarded and we can still end up here. */
>>         if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
>>                 return;
>>   @@ -1148,27 +1169,35 @@ void si_resource_copy_region(struct pipe_context
>> *ctx,
>>         pipe_surface_reference(&dst_view, NULL);
>>         pipe_sampler_view_reference(&src_view, NULL);
>>   }
>>     static void si_do_CB_resolve(struct si_context *sctx,
>>                              const struct pipe_blit_info *info,
>>                              struct pipe_resource *dst,
>>                              unsigned dst_level, unsigned dst_z,
>>                              enum pipe_format format)
>>   {
>> +       /* Required before and after CB_RESOLVE. */
>> +       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
>> +
>>         si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE |
>>                          (info->render_condition_enable ? 0 :
>> SI_DISABLE_RENDER_COND));
>>         util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level,
>> dst_z,
>>                                           info->src.resource,
>> info->src.box.z,
>>                                           ~0, sctx->custom_blend_resolve,
>>                                           format);
>>         si_blitter_end(&sctx->b.b);
>> +
>> +       /* Flush caches for possible texturing. */
>> +       sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
>> +                        SI_CONTEXT_INV_GLOBAL_L2 |
>> +                        SI_CONTEXT_INV_VMEM_L1;
>>   }
>>     static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
>>                                      const struct pipe_blit_info *info)
>>   {
>>         struct si_context *sctx = (struct si_context*)ctx;
>>         struct r600_texture *src = (struct
>> r600_texture*)info->src.resource;
>>         struct r600_texture *dst = (struct
>> r600_texture*)info->dst.resource;
>>         MAYBE_UNUSED struct r600_texture *rtmp;
>>         unsigned dst_width = u_minify(info->dst.resource->width0,
>> info->dst.level);
>> diff --git a/src/gallium/drivers/radeonsi/si_state.c
>> b/src/gallium/drivers/radeonsi/si_state.c
>> index 44e5f1c..ab27af2 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -2523,34 +2523,42 @@ static void si_set_framebuffer_state(struct
>> pipe_context *ctx,
>>         /* Only flush TC when changing the framebuffer state, because
>>          * the only client not using TC that can change textures is
>>          * the framebuffer.
>>          *
>>          * Wait for compute shaders because of possible transitions:
>>          * - FB write -> shader read
>>          * - shader write -> FB read
>>          *
>>          * DB caches are flushed on demand (using si_decompress_textures).
>> +        *
>> +        * When MSAA is enabled, CB and TC caches are flushed on demand
>> +        * (after FMASK decompression).
>>          */
>> -       sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
>> -                        SI_CONTEXT_INV_GLOBAL_L2 |
>> -                        SI_CONTEXT_FLUSH_AND_INV_CB |
>> -                        SI_CONTEXT_CS_PARTIAL_FLUSH;
>> +       if (sctx->framebuffer.nr_samples <= 1) {
>> +               sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
>> +                                SI_CONTEXT_INV_GLOBAL_L2 |
>> +                                SI_CONTEXT_FLUSH_AND_INV_CB;
>> +       }
>
>
> This looks correct to me, but only because we happen not to support MSAA
> shader images today. Luckily, people seem to agree that writing MSAA images
> from shaders is not very useful, but if we ever do need to support those,
> we'll have to support shader write -> FB access transitions.
>
> I suppose that's okay -- we can add a flag if an MSAA texture is bound as a
> shader image and write access, and check that when binding the framebuffer.
> But I think this should be mentioned in the comment: Shader write -> FB read
> transitions cannot happen for MSAA textures, because MSAA shader images are
> not supported.

Indeed. Added the comment.

Marek


More information about the mesa-dev mailing list