[Mesa-dev] [PATCH 4/8] radeonsi: add a fast path for sRGB enable and no-op framebuffer changes
Nicolai Hähnle
nhaehnle at gmail.com
Wed Jun 7 12:09:00 UTC 2017
On 05.06.2017 18:50, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
> src/gallium/drivers/radeon/r600_texture.c | 1 +
> src/gallium/drivers/radeonsi/si_state.c | 65 ++++++++++++++++++++++-----
> 3 files changed, 55 insertions(+), 12 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index b17b690..6b78a07 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -270,20 +270,21 @@ struct r600_texture {
> unsigned num_slow_clears;
>
> /* Counter that should be non-zero if the texture is bound to a
> * framebuffer. Implemented in radeonsi only.
> */
> uint32_t framebuffers_bound;
> };
>
> struct r600_surface {
> struct pipe_surface base;
> + enum pipe_format linear_format;
>
> /* These can vary with block-compressed textures. */
> unsigned width0;
> unsigned height0;
>
> bool color_initialized;
> bool depth_initialized;
>
> /* Misc. color flags. */
> bool alphatest_bypass;
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 4d72b86..48ae788 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -1943,20 +1943,21 @@ struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
> assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
>
> pipe_reference_init(&surface->base.reference, 1);
> pipe_resource_reference(&surface->base.texture, texture);
> surface->base.context = pipe;
> surface->base.format = templ->format;
> surface->base.width = width;
> surface->base.height = height;
> surface->base.u = templ->u;
>
> + surface->linear_format = util_format_linear(templ->format);
> surface->width0 = width0;
> surface->height0 = height0;
>
> surface->dcc_incompatible =
> texture->target != PIPE_BUFFER &&
> vi_dcc_formats_are_incompatible(texture, templ->u.tex.level,
> templ->format);
> return &surface->base;
> }
>
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 6cf3559..50a0bd1 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -2423,32 +2423,84 @@ static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *sta
>
> if (!state->cbufs[i])
> continue;
> surf = (struct r600_surface*)state->cbufs[i];
> rtex = (struct r600_texture*)surf->base.texture;
>
> p_atomic_dec(&rtex->framebuffers_bound);
> }
> }
>
> +static void si_framebuffer_cache_flush(struct si_context *sctx)
> +{
> + /* Only flush vL1 and L2 when changing the framebuffer state, because
> + * the only client not using TC that can change textures is
> + * the framebuffer.
> + *
> + * Flush all CB and DB caches here because all buffers can be used
> + * for write by both TC (with shader image stores) and CB/DB.
> + */
> + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> + SI_CONTEXT_INV_GLOBAL_L2 |
This one bit shouldn't be needed on gfx9, right? Maybe for a separate patch.
> + SI_CONTEXT_FLUSH_AND_INV_CB |
> + SI_CONTEXT_FLUSH_AND_INV_DB |
> + SI_CONTEXT_CS_PARTIAL_FLUSH;
> +}
> +
> static void si_set_framebuffer_state(struct pipe_context *ctx,
> const struct pipe_framebuffer_state *state)
> {
> struct si_context *sctx = (struct si_context *)ctx;
> struct pipe_constant_buffer constbuf = {0};
> struct r600_surface *surf = NULL;
> struct r600_texture *rtex;
> bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
> unsigned old_nr_samples = sctx->framebuffer.nr_samples;
> bool unbound = false;
> int i;
>
> + /* Fast path for linear <-> sRGB changes and no-op changes. */
Are linear <-> sRGB changes really that common to deserve this special
treatment? Especially considering that IMO it makes the function as a
whole quite a bit more difficult to follow.
Perhaps we could instead use the initial comparison loop to also speed
up cases where only a subset of the cbufs changes? Or the cbufs are
unchanged but zsbuf is changed? Although I wouldn't be surprised if
those cases are even less common. Hmm...
If anything, in case you do want to keep the optimization as-is, could
you please move it to a separate function?
> + if (state->nr_cbufs &&
> + state->nr_cbufs == sctx->framebuffer.state.nr_cbufs &&
> + state->zsbuf == sctx->framebuffer.state.zsbuf) {
> + struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs;
> +
> + /* See if the surfaces are equivalent. */
> + for (i = 0; i < state->nr_cbufs; i++) {
> + if (!!state->cbufs[i] != !!cbufs[i] ||
> + state->cbufs[i]->texture != cbufs[i]->texture ||
> + memcmp(&state->cbufs[i]->u.tex, &cbufs[i]->u.tex,
> + sizeof(cbufs[i]->u.tex)) ||
> + ((struct r600_surface*)state->cbufs[i])->linear_format !=
> + ((struct r600_surface*)cbufs[i])->linear_format)
> + break;
> + }
> + if (i == state->nr_cbufs) {
> + /* We need to make sure the surfaces are initialized. */
> + for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
> + surf = (struct r600_surface*)state->cbufs[i];
> + if (!surf->color_initialized)
> + si_initialize_color_surface(sctx, surf);
> + }
> +
> + /* Decompression passes may need a cache flush. */
> + if (sctx->decompression_enabled)
> + si_framebuffer_cache_flush(sctx);
> +
> + /* Just re-emit the FB state and get out. */
> + util_copy_framebuffer_state(&sctx->framebuffer.state, state);
> + sctx->framebuffer.dirty_cbufs |= (1 << state->nr_cbufs) - 1;
> + si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
> + return;
> + }
> + }
I don't know if it's just my mail client, but the indentation of the
last brace looks wrong.
Cheers,
Nicolai
> +
> for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
> if (!sctx->framebuffer.state.cbufs[i])
> continue;
>
> rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
> if (rtex->dcc_gather_statistics)
> vi_separate_dcc_stop_query(ctx, rtex);
> }
>
> /* Disable DCC if the formats are incompatible. */
> @@ -2472,32 +2524,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
> unbound = true;
> }
>
> if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
> if (!r600_texture_disable_dcc(&sctx->b, rtex))
> sctx->b.decompress_dcc(ctx, rtex);
>
> surf->dcc_incompatible = false;
> }
>
> - /* Only flush TC when changing the framebuffer state, because
> - * the only client not using TC that can change textures is
> - * the framebuffer.
> - *
> - * Flush all CB and DB caches here because all buffers can be used
> - * for write by both TC (with shader image stores) and CB/DB.
> - */
> - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> - SI_CONTEXT_INV_GLOBAL_L2 |
> - SI_CONTEXT_FLUSH_AND_INV_CB |
> - SI_CONTEXT_FLUSH_AND_INV_DB |
> - SI_CONTEXT_CS_PARTIAL_FLUSH;
> + si_framebuffer_cache_flush(sctx);
>
> /* Take the maximum of the old and new count. If the new count is lower,
> * dirtying is needed to disable the unbound colorbuffers.
> */
> sctx->framebuffer.dirty_cbufs |=
> (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
> sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
>
> si_dec_framebuffer_counters(&sctx->framebuffer.state);
> util_copy_framebuffer_state(&sctx->framebuffer.state, state);
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list