[Intel-gfx] [PATCH v5 7/9] drm/i915/gt: Ensure memory quiesced before invalidation for all engines

Nirmoy Das nirmoy.das at intel.com
Tue Jul 18 14:12:58 UTC 2023


Hi Andi,

On 7/18/2023 3:38 PM, Andi Shyti wrote:
> Commit af9e423a8aae ("drm/i915/gt: Ensure memory quiesced before
> invalidation") has made sure that the memory is quiesced before
> invalidating the AUX CCS table. Do it for all the other engines
> and not just RCS.
>
> Signed-off-by: Andi Shyti <andi.shyti at linux.intel.com>
> Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> ---
>   drivers/gpu/drm/i915/gt/gen8_engine_cs.c     | 71 +++++++++++++++++---
>   drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  1 +
>   2 files changed, 62 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index 3275e55b18d90..2f40cd515cc78 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -225,6 +225,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>   
>   		bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
>   
> +		/*
> +		 * When required, in MTL+ platforms we need to
> +		 * set the CCS_FLUSH bit in the pipe control
> +		 */
> +		if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> +			bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
> +
>   		bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
>   		bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
>   		bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> @@ -309,20 +316,64 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>   int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
>   {
>   	intel_engine_mask_t aux_inv = 0;
> -	u32 cmd, *cs;
> +	u32 cmd = 4;
> +	u32 *cs;
>   
> -	cmd = 4;
> -	if (mode & EMIT_INVALIDATE) {
> +	if (mode & EMIT_INVALIDATE)
>   		cmd += 2;
>   
> -		if (HAS_AUX_CCS(rq->engine->i915) &&
> -		    (rq->engine->class == VIDEO_DECODE_CLASS ||
> -		     rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
> -			aux_inv = rq->engine->mask &
> -				~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
> -			if (aux_inv)
> -				cmd += 4;
> +	if (HAS_AUX_CCS(rq->engine->i915))
> +		aux_inv = rq->engine->mask &
> +			  ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
> +
> +	/*
> +	 * Aux invalidations on Aux CCS platforms require
> +	 * memory traffic is quiesced prior.
> +	 */
> +	if (aux_inv) {
> +		u32 bit_group_0 = 0;
> +		u32 bit_group_1 = 0;
> +
> +		cmd += 4;
> +
> +		bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
> +
> +		switch (rq->engine->class) {
> +		case VIDEO_DECODE_CLASS:
> +			bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> +			bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +			bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
> +			bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
> +			bit_group_1 |= PIPE_CONTROL_CS_STALL;
> +
> +			/*
> +			 * When required, in MTL+ platforms we need to
> +			 * set the CCS_FLUSH bit in the pipe control
> +			 */
> +			if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> +				bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
> +
> +			break;
> +
> +		case VIDEO_ENHANCEMENT_CLASS:
> +		case COMPUTE_CLASS:
> +			bit_group_1 |= MI_FLUSH_DW;
> +
> +			break;
> +
> +		case COPY_ENGINE_CLASS:
> +			/*
> +			 * When required, in MTL+ platforms we need to
> +			 * set the CCS_FLUSH bit in the pipe control
> +			 */
> +			if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> +				bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
> +			break;
>   		}
> +
> +		if (bit_group_1 || bit_group_0)
> +			intel_emit_pipe_control_cs(rq, bit_group_0, bit_group_1,
> +						   LRC_PPHWSP_SCRATCH_ADDR);


The pipe control is limited to render and compute engines only and

gen12_emit_flush_xcs() gets called only for other engines(BCS,VE,VD) AFAIU. So I imagine changes for this patch as:

gen12_emit_flush_rcs()
	pipe_control with CCS_FLUSH
	AUX CCS inval
gen12_emit_flush_xcs()
	MI_FLUSH_DW (with CCS flush for BCS)
	AUX CCS inval

(Note that ccs flush bit for MI_FLUSH_DW is at 16 )

Regards,
Nirmoy

>   	}
>   
>   	cs = intel_ring_begin(rq, cmd);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index 5d143e2a8db03..5df7cce23197c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -299,6 +299,7 @@
>   #define   PIPE_CONTROL_QW_WRITE				(1<<14)
>   #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
>   #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
> +#define   PIPE_CONTROL_CCS_FLUSH			(1<<13) /* MTL+ */
>   #define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
>   #define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
>   #define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on ILK */


More information about the Intel-gfx mailing list