[Mesa-dev] [PATCH 4/8] radeonsi: add a fast path for sRGB enable and no-op framebuffer changes

Nicolai Hähnle nhaehnle at gmail.com
Wed Jun 7 12:09:00 UTC 2017


On 05.06.2017 18:50, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> ---
>   src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
>   src/gallium/drivers/radeon/r600_texture.c     |  1 +
>   src/gallium/drivers/radeonsi/si_state.c       | 65 ++++++++++++++++++++++-----
>   3 files changed, 55 insertions(+), 12 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index b17b690..6b78a07 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -270,20 +270,21 @@ struct r600_texture {
>   	unsigned			num_slow_clears;
>   
>   	/* Counter that should be non-zero if the texture is bound to a
>   	 * framebuffer. Implemented in radeonsi only.
>   	 */
>   	uint32_t			framebuffers_bound;
>   };
>   
>   struct r600_surface {
>   	struct pipe_surface		base;
> +	enum pipe_format		linear_format;
>   
>   	/* These can vary with block-compressed textures. */
>   	unsigned width0;
>   	unsigned height0;
>   
>   	bool color_initialized;
>   	bool depth_initialized;
>   
>   	/* Misc. color flags. */
>   	bool alphatest_bypass;
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 4d72b86..48ae788 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -1943,20 +1943,21 @@ struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
>   	assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
>   
>   	pipe_reference_init(&surface->base.reference, 1);
>   	pipe_resource_reference(&surface->base.texture, texture);
>   	surface->base.context = pipe;
>   	surface->base.format = templ->format;
>   	surface->base.width = width;
>   	surface->base.height = height;
>   	surface->base.u = templ->u;
>   
> +	surface->linear_format = util_format_linear(templ->format);
>   	surface->width0 = width0;
>   	surface->height0 = height0;
>   
>   	surface->dcc_incompatible =
>   		texture->target != PIPE_BUFFER &&
>   		vi_dcc_formats_are_incompatible(texture, templ->u.tex.level,
>   						templ->format);
>   	return &surface->base;
>   }
>   
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 6cf3559..50a0bd1 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -2423,32 +2423,84 @@ static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *sta
>   
>   		if (!state->cbufs[i])
>   			continue;
>   		surf = (struct r600_surface*)state->cbufs[i];
>   		rtex = (struct r600_texture*)surf->base.texture;
>   
>   		p_atomic_dec(&rtex->framebuffers_bound);
>   	}
>   }
>   
> +static void si_framebuffer_cache_flush(struct si_context *sctx)
> +{
> +	/* Only flush vL1 and L2 when changing the framebuffer state, because
> +	 * the only client not using TC that can change textures is
> +	 * the framebuffer.
> +	 *
> +	 * Flush all CB and DB caches here because all buffers can be used
> +	 * for write by both TC (with shader image stores) and CB/DB.
> +	 */
> +	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> +			 SI_CONTEXT_INV_GLOBAL_L2 |

This one bit shouldn't be needed on gfx9, right? Maybe for a separate patch.



> +			 SI_CONTEXT_FLUSH_AND_INV_CB |
> +			 SI_CONTEXT_FLUSH_AND_INV_DB |
> +			 SI_CONTEXT_CS_PARTIAL_FLUSH;
> +}
> +
>   static void si_set_framebuffer_state(struct pipe_context *ctx,
>   				     const struct pipe_framebuffer_state *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   	struct pipe_constant_buffer constbuf = {0};
>   	struct r600_surface *surf = NULL;
>   	struct r600_texture *rtex;
>   	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
>   	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
>   	bool unbound = false;
>   	int i;
>   
> +	/* Fast path for linear <-> sRGB changes and no-op changes. */

Are linear <-> sRGB changes really that common to deserve this special 
treatment? Especially considering that IMO it makes the function as a 
whole quite a bit more difficult to follow.

Perhaps we could instead use the initial comparison loop to also speed 
up cases where only a subset of the cbufs changes? Or the cbufs are 
unchanged but zsbuf is changed? Although I wouldn't be surprised if 
those cases are even less common. Hmm...

If anything, in case you do want to keep the optimization as-is, could 
you please move it to a separate function?


> +	if (state->nr_cbufs &&
> +	    state->nr_cbufs == sctx->framebuffer.state.nr_cbufs &&
> +	    state->zsbuf == sctx->framebuffer.state.zsbuf) {
> +		struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs;
> +
> +		/* See if the surfaces are equivalent. */
> +		for (i = 0; i < state->nr_cbufs; i++) {
> +			if (!!state->cbufs[i] != !!cbufs[i] ||
> +			    state->cbufs[i]->texture != cbufs[i]->texture ||
> +			    memcmp(&state->cbufs[i]->u.tex, &cbufs[i]->u.tex,
> +				   sizeof(cbufs[i]->u.tex)) ||
> +			    ((struct r600_surface*)state->cbufs[i])->linear_format !=
> +			    ((struct r600_surface*)cbufs[i])->linear_format)
> +				break;
> +		}
> +		if (i == state->nr_cbufs) {
> +			/* We need to make sure the surfaces are initialized. */
> +			for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
> +				surf = (struct r600_surface*)state->cbufs[i];
> +				if (!surf->color_initialized)
> +					si_initialize_color_surface(sctx, surf);
> +			}
> +
> +			/* Decompression passes may need a cache flush. */
> +			if (sctx->decompression_enabled)
> +				si_framebuffer_cache_flush(sctx);
> +
> +			/* Just re-emit the FB state and get out. */
> +			util_copy_framebuffer_state(&sctx->framebuffer.state, state);
> +			sctx->framebuffer.dirty_cbufs |= (1 << state->nr_cbufs) - 1;
> +			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
> +			return;
> +		}
> +        }

I don't know if it's just my mail client, but the indentation of the 
last brace looks wrong.

Cheers,
Nicolai


> +
>   	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
>   		if (!sctx->framebuffer.state.cbufs[i])
>   			continue;
>   
>   		rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
>   		if (rtex->dcc_gather_statistics)
>   			vi_separate_dcc_stop_query(ctx, rtex);
>   	}
>   
>   	/* Disable DCC if the formats are incompatible. */
> @@ -2472,32 +2524,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   			unbound = true;
>   		}
>   
>   		if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
>   			if (!r600_texture_disable_dcc(&sctx->b, rtex))
>   				sctx->b.decompress_dcc(ctx, rtex);
>   
>   		surf->dcc_incompatible = false;
>   	}
>   
> -	/* Only flush TC when changing the framebuffer state, because
> -	 * the only client not using TC that can change textures is
> -	 * the framebuffer.
> -	 *
> -	 * Flush all CB and DB caches here because all buffers can be used
> -	 * for write by both TC (with shader image stores) and CB/DB.
> -	 */
> -	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> -			 SI_CONTEXT_INV_GLOBAL_L2 |
> -			 SI_CONTEXT_FLUSH_AND_INV_CB |
> -			 SI_CONTEXT_FLUSH_AND_INV_DB |
> -			 SI_CONTEXT_CS_PARTIAL_FLUSH;
> +	si_framebuffer_cache_flush(sctx);
>   
>   	/* Take the maximum of the old and new count. If the new count is lower,
>   	 * dirtying is needed to disable the unbound colorbuffers.
>   	 */
>   	sctx->framebuffer.dirty_cbufs |=
>   		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
>   	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
>   
>   	si_dec_framebuffer_counters(&sctx->framebuffer.state);
>   	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list