[Mesa-dev] [PATCH 3/3] radeonsi: use TC write-back instead of full cache invalidation

Nicolai Hähnle nhaehnle at gmail.com
Wed Oct 12 11:04:52 UTC 2016


That's a nice improvement. For the series:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

On 11.10.2016 16:48, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
>  src/gallium/drivers/radeonsi/si_compute.c    |  2 +-
>  src/gallium/drivers/radeonsi/si_state.c      | 12 +++---------
>  src/gallium/drivers/radeonsi/si_state_draw.c |  6 +++---
>  3 files changed, 7 insertions(+), 13 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index 632839f..e785106 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -694,21 +694,21 @@ static void si_launch_grid(
>
>  	/* Add buffer sizes for memory checking in need_cs_space. */
>  	r600_context_add_resource_size(ctx, &program->shader.bo->b.b);
>  	/* TODO: add the scratch buffer */
>
>  	if (info->indirect) {
>  		r600_context_add_resource_size(ctx, info->indirect);
>
>  		/* The hw doesn't read the indirect buffer via TC L2. */
>  		if (r600_resource(info->indirect)->TC_L2_dirty) {
> -			sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
> +			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>  			r600_resource(info->indirect)->TC_L2_dirty = false;
>  		}
>  	}
>
>  	si_need_cs_space(sctx);
>
>  	if (!sctx->cs_shader_state.initialized)
>  		si_initialize_compute(sctx);
>
>  	if (sctx->b.flags)
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 34f3ed7..ad65fc2 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -3390,35 +3390,29 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
>  		 * automatically at end of shader, but the contents of other
>  		 * L1 caches might still be stale. */
>  		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
>  	}
>
>  	if (flags & PIPE_BARRIER_INDEX_BUFFER) {
>  		/* Indices are read through TC L2 since VI.
>  		 * L1 isn't used.
>  		 */
>  		if (sctx->screen->b.chip_class <= CIK)
> -			sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
> +			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>  	}
>
>  	if (flags & PIPE_BARRIER_FRAMEBUFFER)
>  		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
>
>  	if (flags & (PIPE_BARRIER_FRAMEBUFFER |
> -		     PIPE_BARRIER_INDIRECT_BUFFER)) {
> -		/* Not sure if INV_GLOBAL_L2 is the best thing here.
> -		 *
> -		 * We need to make sure that TC L1 & L2 are written back to
> -		 * memory, because CB fetches don't consider TC, but there's
> -		 * no need to invalidate any TC cache lines. */
> -		sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
> -	}
> +		     PIPE_BARRIER_INDIRECT_BUFFER))
> +		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>  }
>
>  static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
>  {
>  	struct pipe_blend_state blend;
>
>  	memset(&blend, 0, sizeof(blend));
>  	blend.independent_blend_enable = true;
>  	blend.rt[0].colormask = 0xf;
>  	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 33b6b23..c14e852 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1040,32 +1040,32 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
>  			if (!ib.buffer)
>  				return;
>  			/* info->start will be added by the drawing code */
>  			ib.offset -= start_offset;
>  		}
>  	}
>
>  	/* VI reads index buffers through TC L2. */
>  	if (info->indexed && sctx->b.chip_class <= CIK &&
>  	    r600_resource(ib.buffer)->TC_L2_dirty) {
> -		sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
> +		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>  		r600_resource(ib.buffer)->TC_L2_dirty = false;
>  	}
>
>  	if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) {
> -		sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
> +		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>  		r600_resource(info->indirect)->TC_L2_dirty = false;
>  	}
>
>  	if (info->indirect_params &&
>  	    r600_resource(info->indirect_params)->TC_L2_dirty) {
> -		sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
> +		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>  		r600_resource(info->indirect_params)->TC_L2_dirty = false;
>  	}
>
>  	/* Add buffer sizes for memory checking in need_cs_space. */
>  	if (sctx->emit_scratch_reloc && sctx->scratch_buffer)
>  		r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b);
>  	if (info->indirect)
>  		r600_context_add_resource_size(ctx, info->indirect);
>
>  	si_need_cs_space(sctx);
>


More information about the mesa-dev mailing list