[Intel-xe] [PATCH 2/2] drm/xe: Emit a render cache flush after each rcs/ccs batch

Souza, Jose jose.souza at intel.com
Wed Jun 7 18:44:36 UTC 2023


On Wed, 2023-06-07 at 19:47 +0200, Thomas Hellström wrote:
> We need to flush render caches before fence signalling, where we might
> release the memory for reuse. We can't rely on userspace doing this,
> so flush render caches after the batch, but before user fence- and
> dma_fence signalling.
> 
> Copy the cache flush from i915, but omit PIPE_CONTROL_FLUSH_L3, since it
> should be implied by the other flushes. Also omit
> PIPE_CONTROL_TLB_INVALIDATE since there should be no apparent need to
> invalidate TLB after batch completion.

Reviewed-by: José Roberto de Souza <jose.souza at intel.com>

> 
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
>  drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  3 ++
>  drivers/gpu/drm/xe/xe_ring_ops.c          | 35 +++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_wa_oob.rules        |  1 +
>  3 files changed, 39 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> index d2d41f717525..dd3408fd3d33 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> @@ -66,6 +66,9 @@
>  #define   PVC_MS_MOCS_INDEX_MASK	GENMASK(6, 1)
>  
>  #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
> +
> +#define   PIPE_CONTROL0_HDC_PIPELINE_FLUSH		REG_BIT(9)  /* gen12 */
> +
>  #define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE		(1<<29)
>  #define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
>  #define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index c20fe41c0729..91511f72d971 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -5,6 +5,7 @@
>  
>  #include "xe_ring_ops.h"
>  
> +#include "generated/xe_wa_oob.h"
>  #include "regs/xe_gpu_commands.h"
>  #include "regs/xe_gt_regs.h"
>  #include "regs/xe_lrc_layout.h"
> @@ -16,6 +17,7 @@
>  #include "xe_sched_job.h"
>  #include "xe_vm_types.h"
>  #include "xe_vm.h"
> +#include "xe_wa.h"
>  
>  /*
>   * 3D-related flags that can't be set on _engines_ that lack access to the 3D
> @@ -147,6 +149,37 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
>  	return i;
>  }
>  
> +static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
> +{
> +	struct xe_gt *gt = job->engine->gt;
> +	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
> +	u32 flags;
> +
> +	flags = (PIPE_CONTROL_CS_STALL |
> +		 PIPE_CONTROL_TILE_CACHE_FLUSH |
> +		 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +		 PIPE_CONTROL_DC_FLUSH_ENABLE |
> +		 PIPE_CONTROL_FLUSH_ENABLE);
> +
> +	if (XE_WA(gt, 1409600907))
> +		flags |= PIPE_CONTROL_DEPTH_STALL;
> +
> +	if (lacks_render)
> +		flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
> +	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
> +		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
> +
> +	dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
> +	dw[i++] = flags;
> +	dw[i++] = 0;
> +	dw[i++] = 0;
> +	dw[i++] = 0;
> +	dw[i++] = 0;
> +
> +	return i;
> +}
> +
>  static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
>  			      int i)
>  {
> @@ -279,6 +312,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
>  
>  	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
>  
> +	i = emit_render_cache_flush(job, dw, i);
> +
>  	if (job->user_fence.used)
>  		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
>  						job->user_fence.value,
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index 1ecb10390b28..15c23813398a 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -14,3 +14,4 @@
>  		SUBPLATFORM(DG2, G12)
>  18020744125	PLATFORM(PVC)
>  1509372804	PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
> +1409600907	GRAPHICS_VERSION_RANGE(1200, 1250)



More information about the Intel-xe mailing list