[Intel-gfx] [PATCH v2] drm/i915/tgl: Suspend pre-parser across GTT invalidations

Fri Sep 20 08:14:36 UTC 2019

Chris Wilson <chris at chris-wilson.co.uk> writes:

> Before we execute a batch, we must first issue any and all TLB
> invalidations so that batch picks up the new page table entries.
> Tigerlake's preparser is weakening our post-sync CS_STALL inside the
> invalidate pipe-control and allowing the loading of the batch buffer
> before we have setup its page table (and so it loads the wrong page and
> executes indefinitely).
>
> The igt_cs_tlb indicates that this issue can only be observed on rcs,
> even though the preparser is common to all engines. Alternatively, we
> could do TLB shootdown via mmio on updating the GTT.
>
> By inserting the pre-parser disable inside EMIT_INVALIDATE, we will also
> accidentally fixup execution that writes into subsequent batches, such
> as gem_exec_whisper and even relocations performed on the GPU. We should
> be careful not to allow this disable to become baked into the uABI!
>
> Testcase: igt/i915_selftests/live_gtt/igt_cs_tlb
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 75 ++++++++++++++++++++++++++++-
>  1 file changed, 74 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index a99166a2d2eb..60b7b163c3d0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2807,6 +2807,79 @@ static int gen11_emit_flush_render(struct i915_request *request,
>  	return 0;
>  }
>  
> +static u32 preparser_disable(bool state)
> +{
> +	return MI_ARB_CHECK | 1 << 8 | state;
> +}

Descriptive enough, so no need to define the mask.

Acked-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>

> +
> +static int gen12_emit_flush_render(struct i915_request *request,
> +				   u32 mode)
> +{
> +	struct intel_engine_cs *engine = request->engine;
> +	const u32 scratch_addr =
> +		intel_gt_scratch_offset(engine->gt,
> +					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
> +
> +	if (mode & EMIT_FLUSH) {
> +		u32 flags = 0;
> +		u32 *cs;
> +
> +		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
> +		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> +		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
> +		flags |= PIPE_CONTROL_FLUSH_ENABLE;
> +
> +		flags |= PIPE_CONTROL_QW_WRITE;
> +		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
> +
> +		flags |= PIPE_CONTROL_CS_STALL;
> +
> +		cs = intel_ring_begin(request, 6);
> +		if (IS_ERR(cs))
> +			return PTR_ERR(cs);
> +
> +		cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
> +		intel_ring_advance(request, cs);
> +	}
> +
> +	if (mode & EMIT_INVALIDATE) {
> +		u32 flags = 0;
> +		u32 *cs;
> +
> +		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_TLB_INVALIDATE;
> +		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
> +
> +		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
> +		flags |= PIPE_CONTROL_QW_WRITE;
> +
> +		flags |= PIPE_CONTROL_CS_STALL;
> +
> +		cs = intel_ring_begin(request, 8);
> +		if (IS_ERR(cs))
> +			return PTR_ERR(cs);
> +
> +		/*
> +		 * Prevent the pre-parser from skipping past the TLB
> +		 * invalidate and loading a stale page for the batch
> +		 * buffer / request payload.
> +		 */
> +		*cs++ = preparser_disable(true);
> +
> +		cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
> +
> +		*cs++ = preparser_disable(false);
> +		intel_ring_advance(request, cs);
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * Reserve space for 2 NOOPs at the end of each request to be
>   * used as a workaround for not being allowed to do lite
> @@ -3072,7 +3145,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
>  {
>  	switch (INTEL_GEN(engine->i915)) {
>  	case 12:
> -		engine->emit_flush = gen11_emit_flush_render;
> +		engine->emit_flush = gen12_emit_flush_render;
>  		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
>  		break;
>  	case 11:
> -- 
> 2.23.0