[Intel-xe] [PATCH 2/2] drm/xe: Emit a render cache flush after each rcs/ccs batch
Souza, Jose
jose.souza at intel.com
Wed Jun 7 18:44:36 UTC 2023
On Wed, 2023-06-07 at 19:47 +0200, Thomas Hellström wrote:
> We need to flush render caches before fence signalling, where we might
> release the memory for reuse. We can't rely on userspace doing this,
> so flush render caches after the batch, but before user fence- and
> dma_fence signalling.
>
> Copy the cache flush from i915, but omit PIPE_CONTROL_FLUSH_L3, since it
> should be implied by the other flushes. Also omit
> PIPE_CONTROL_TLB_INVALIDATE since there should be no apparent need to
> invalidate TLB after batch completion.
Reviewed-by: José Roberto de Souza <jose.souza at intel.com>
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 3 ++
> drivers/gpu/drm/xe/xe_ring_ops.c | 35 +++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
> 3 files changed, 39 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> index d2d41f717525..dd3408fd3d33 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> @@ -66,6 +66,9 @@
> #define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1)
>
> #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
> +
> +#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
> +
> #define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29)
> #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28)
> #define PIPE_CONTROL_AMFS_FLUSH (1<<25)
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index c20fe41c0729..91511f72d971 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -5,6 +5,7 @@
>
> #include "xe_ring_ops.h"
>
> +#include "generated/xe_wa_oob.h"
> #include "regs/xe_gpu_commands.h"
> #include "regs/xe_gt_regs.h"
> #include "regs/xe_lrc_layout.h"
> @@ -16,6 +17,7 @@
> #include "xe_sched_job.h"
> #include "xe_vm_types.h"
> #include "xe_vm.h"
> +#include "xe_wa.h"
>
> /*
> * 3D-related flags that can't be set on _engines_ that lack access to the 3D
> @@ -147,6 +149,37 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
> return i;
> }
>
> +static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
> +{
> + struct xe_gt *gt = job->engine->gt;
> + bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
> + u32 flags;
> +
> + flags = (PIPE_CONTROL_CS_STALL |
> + PIPE_CONTROL_TILE_CACHE_FLUSH |
> + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> + PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> + PIPE_CONTROL_DC_FLUSH_ENABLE |
> + PIPE_CONTROL_FLUSH_ENABLE);
> +
> + if (XE_WA(gt, 1409600907))
> + flags |= PIPE_CONTROL_DEPTH_STALL;
> +
> + if (lacks_render)
> + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
> + else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
> + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
> +
> + dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
> + dw[i++] = flags;
> + dw[i++] = 0;
> + dw[i++] = 0;
> + dw[i++] = 0;
> + dw[i++] = 0;
> +
> + return i;
> +}
> +
> static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
> int i)
> {
> @@ -279,6 +312,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
>
> i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
>
> + i = emit_render_cache_flush(job, dw, i);
> +
> if (job->user_fence.used)
> i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
> job->user_fence.value,
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index 1ecb10390b28..15c23813398a 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -14,3 +14,4 @@
> SUBPLATFORM(DG2, G12)
> 18020744125 PLATFORM(PVC)
> 1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
> +1409600907 GRAPHICS_VERSION_RANGE(1200, 1250)
More information about the Intel-xe
mailing list