[PATCH] drm/i915: Remove unnecessary memory quiescing for aux inval

Matt Roper matthew.d.roper at intel.com
Wed Sep 20 17:28:13 UTC 2023


On Wed, Sep 20, 2023 at 01:11:31PM +0200, Nirmoy Das wrote:
> i915 already does memory quiesce before signaling
> breadcrumb so remove extra memory quiescing for aux
> invalidation which can cause unnecessary side effects.

This explanation seems confusing to me.  If we've already performed the
necessary flushing and quiesced all cache<->memory traffic, then
performing another flush should just be a noop, right?  If we're seeing
side effects, then doesn't that imply that there was still something in
the cache that hadn't made its way to memory yet?

Is the breadcrumb code flushing all the necessary bits?  What about
PIPE_CONTROL_CCS_FLUSH?


Matt

> 
> Fixes: 78a6ccd65fa3 ("drm/i915/gt: Ensure memory quiesced before invalidation")
> Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> Cc: Andi Shyti <andi.shyti at linux.intel.com>
> Cc: <stable at vger.kernel.org> # v5.8+
> Cc: Andrzej Hajda <andrzej.hajda at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Cc: Tejas Upadhyay <tejas.upadhyay at intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Cc: Prathap Kumar Valsan <prathap.kumar.valsan at intel.com>
> Cc: Tapani Pälli <tapani.palli at intel.com>
> Cc: Mark Janes <mark.janes at intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Signed-off-by: Nirmoy Das <nirmoy.das at intel.com>
> ---
>  drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 50 ++++++++++++------------
>  1 file changed, 26 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index 0143445dba83..5001670046a0 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -248,11 +248,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>  {
>  	struct intel_engine_cs *engine = rq->engine;
>  
> -	/*
> -	 * On Aux CCS platforms the invalidation of the Aux
> -	 * table requires quiescing memory traffic beforehand
> -	 */
> -	if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
> +	if (mode & EMIT_FLUSH) {
>  		u32 bit_group_0 = 0;
>  		u32 bit_group_1 = 0;
>  		int err;
> @@ -264,13 +260,6 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>  
>  		bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
>  
> -		/*
> -		 * When required, in MTL and beyond platforms we
> -		 * need to set the CCS_FLUSH bit in the pipe control
> -		 */
> -		if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> -			bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
> -
>  		bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
>  		bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
>  		bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> @@ -800,14 +789,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
>  {
>  	struct drm_i915_private *i915 = rq->i915;
>  	struct intel_gt *gt = rq->engine->gt;
> -	u32 flags = (PIPE_CONTROL_CS_STALL |
> -		     PIPE_CONTROL_TLB_INVALIDATE |
> -		     PIPE_CONTROL_TILE_CACHE_FLUSH |
> -		     PIPE_CONTROL_FLUSH_L3 |
> -		     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> -		     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> -		     PIPE_CONTROL_DC_FLUSH_ENABLE |
> -		     PIPE_CONTROL_FLUSH_ENABLE);
> +	u32 bit_group_0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
> +	u32 bit_group_1 = (PIPE_CONTROL_CS_STALL |
> +			   PIPE_CONTROL_TLB_INVALIDATE |
> +			   PIPE_CONTROL_TILE_CACHE_FLUSH |
> +			   PIPE_CONTROL_FLUSH_L3 |
> +			   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +			   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +			   PIPE_CONTROL_DC_FLUSH_ENABLE |
> +			   PIPE_CONTROL_FLUSH_ENABLE);
>  
>  	/* Wa_14016712196 */
>  	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
> @@ -817,14 +807,26 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
>  
>  	if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
>  		/* Wa_1409600907 */
> -		flags |= PIPE_CONTROL_DEPTH_STALL;
> +		bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
>  
>  	if (!HAS_3D_PIPELINE(rq->i915))
> -		flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
> +		bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
>  	else if (rq->engine->class == COMPUTE_CLASS)
> -		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
> +		bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
> +
> +	/*
> +	 * On Aux CCS platforms the invalidation of the Aux
> +	 * table requires quiescing memory traffic beforehand.
> +	 * gen12_emit_fini_breadcrumb_rcs() does this for us as
> +	 * all memory traffic has to be completed before we signal
> +	 * the breadcrumb. In MTL and beyond platforms, we need to also
> +	 * add CCS_FLUSH bit in the pipe control for that purpose.
> +	 */
> +
> +	if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> +		bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
>  
> -	cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
> +	cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, 0);
>  
>  	/*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
>  	cs = gen12_emit_ggtt_write_rcs(cs,
> -- 
> 2.41.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the dri-devel mailing list