[Intel-gfx] [PATCH 3/3] drm/i915/gt: Clear SET_PREDICATE_RESULT prior to executing the ring

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed May 4 14:44:29 UTC 2022


On 25/04/2022 16:23, Ramalingam C wrote:
> From: Chris Wilson <chris.p.wilson at intel.com>
> 
> Userspace may leave predication enabled upon return from the batch
> buffer, which has the consequent of preventing all operation from the
> ring from being executed, including all the synchronisation, coherency
> control, arbitration and user signaling. This is more than just a local
> gpu hang in one client, as the user has the ability to prevent the
> kernel from applying critical workarounds and can cause a full GT reset.
> 
> We could simply execute MI_SET_PREDICATE upon return from the user
> batch, but this has the repercussion of modifying the user's context
> state. Instead, we opt to execute a fixup batch which by mixing
> predicated operations can determine the state of the
> SET_PREDICATE_RESULT register and restore it prior to the next userspace
> batch. This allows us to protect the kernel's ring without changing the
> uABI.
> 
> Suggested-by: Zbigniew Kempczynski <zbigniew.kempczynski at intel.com>
> Signed-off-by: Chris Wilson <chris.p.wilson at intel.com>
> Cc: Zbigniew Kempczynski <zbigniew.kempczynski at intel.com>
> Cc: Thomas Hellstrom <thomas.hellstrom at intel.com>
> Signed-off-by: Ramalingam C <ramalingam.c at intel.com>

Applies to Gen12+ so Cc: stable?

> ---
>   drivers/gpu/drm/i915/gt/gen8_engine_cs.c      | 54 +++++++++++++
>   drivers/gpu/drm/i915/gt/gen8_engine_cs.h      |  7 ++
>   drivers/gpu/drm/i915/gt/intel_engine_regs.h   |  1 +
>   .../drm/i915/gt/intel_execlists_submission.c  | 15 +++-
>   drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  2 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c           | 75 ++++++++++++++-----
>   drivers/gpu/drm/i915/gt/intel_lrc.h           |  5 ++
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  2 +
>   8 files changed, 137 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index 9529c5455bc3..3e13960615bd 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -5,6 +5,7 @@
>   
>   #include "gen8_engine_cs.h"
>   #include "i915_drv.h"
> +#include "intel_engine_regs.h"
>   #include "intel_gpu_commands.h"
>   #include "intel_lrc.h"
>   #include "intel_ring.h"
> @@ -385,6 +386,59 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
>   	return 0;
>   }
>   
> +static int __gen125_emit_bb_start(struct i915_request *rq,
> +				  u64 offset, u32 len,
> +				  const unsigned int flags,
> +				  u32 arb)
> +{
> +	struct intel_context *ce = rq->context;
> +	u32 wa_offset = lrc_indirect_bb(ce);
> +	u32 *cs;
> +
> +	cs = intel_ring_begin(rq, 12);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_ARB_ON_OFF | arb;
> +
> +	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
> +		MI_SRM_LRM_GLOBAL_GTT |
> +		MI_LRI_LRM_CS_MMIO;
> +	*cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0));
> +	*cs++ = wa_offset + DG2_PREDICATE_RESULT_WA;
> +	*cs++ = 0;
> +
> +	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
> +		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
> +	*cs++ = lower_32_bits(offset);
> +	*cs++ = upper_32_bits(offset);
> +
> +	/* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */
> +	*cs++ = MI_BATCH_BUFFER_START_GEN8;
> +	*cs++ = wa_offset + DG2_PREDICATE_RESULT_BB;
> +	*cs++ = 0;
> +
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> +
> +	intel_ring_advance(rq, cs);
> +
> +	return 0;
> +}
> +
> +int gen125_emit_bb_start_noarb(struct i915_request *rq,
> +			       u64 offset, u32 len,
> +			       const unsigned int flags)
> +{
> +	return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
> +}
> +
> +int gen125_emit_bb_start(struct i915_request *rq,
> +			 u64 offset, u32 len,
> +			 const unsigned int flags)
> +{
> +	return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
> +}
> +
>   int gen8_emit_bb_start_noarb(struct i915_request *rq,
>   			     u64 offset, u32 len,
>   			     const unsigned int flags)
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> index 107ab42539ab..32e3d2b831bb 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> @@ -31,6 +31,13 @@ int gen8_emit_bb_start(struct i915_request *rq,
>   		       u64 offset, u32 len,
>   		       const unsigned int flags);
>   
> +int gen125_emit_bb_start_noarb(struct i915_request *rq,
> +			       u64 offset, u32 len,
> +			       const unsigned int flags);
> +int gen125_emit_bb_start(struct i915_request *rq,
> +			 u64 offset, u32 len,
> +			 const unsigned int flags);
> +
>   u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
>   u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> index 1dab554bf640..75a0c55c5aa5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> @@ -148,6 +148,7 @@
>   		(REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \
>   		 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
>   
> +#define RING_PREDICATE_RESULT(base)		_MMIO((base) + 0x3b8) /* gen12+ */
>   #define MI_PREDICATE_RESULT_2(base)		_MMIO((base) + 0x3bc)
>   #define   LOWER_SLICE_ENABLED			(1 << 0)
>   #define   LOWER_SLICE_DISABLED			(0 << 0)
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index f8749c433b7c..86f7a9ac1c39 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -3433,10 +3433,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   		}
>   	}
>   
> -	if (intel_engine_has_preemption(engine))
> -		engine->emit_bb_start = gen8_emit_bb_start;
> -	else
> -		engine->emit_bb_start = gen8_emit_bb_start_noarb;
> +	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
> +		if (intel_engine_has_preemption(engine))
> +			engine->emit_bb_start = gen125_emit_bb_start;
> +		else
> +			engine->emit_bb_start = gen125_emit_bb_start_noarb;
> +	} else {
> +		if (intel_engine_has_preemption(engine))
> +			engine->emit_bb_start = gen8_emit_bb_start;
> +		else
> +			engine->emit_bb_start = gen8_emit_bb_start_noarb;
> +	}
>   
>   	engine->busyness = execlists_engine_busyness;
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index e52718a87f14..556bca3be804 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -39,6 +39,8 @@
>   #define  MI_GLOBAL_GTT    (1<<22)
>   
>   #define MI_NOOP			MI_INSTR(0, 0)
> +#define MI_SET_PREDICATE	MI_INSTR(0x01, 0)
> +#define   MI_SET_PREDICATE_DISABLE	(0 << 0)
>   #define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
>   #define MI_WAIT_FOR_EVENT       MI_INSTR(0x03, 0)
>   #define   MI_WAIT_FOR_OVERLAY_FLIP	(1<<16)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 3f83a9038e13..eec73c66406c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -904,6 +904,24 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
>   			     engine->name);
>   }
>   
> +static u32 context_wa_bb_offset(const struct intel_context *ce)
> +{
> +	return PAGE_SIZE * ce->wa_bb_page;
> +}
> +
> +static u32 *context_indirect_bb(const struct intel_context *ce)
> +{
> +	void *ptr;
> +
> +	GEM_BUG_ON(!ce->wa_bb_page);
> +
> +	ptr = ce->lrc_reg_state;
> +	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
> +	ptr += context_wa_bb_offset(ce);
> +
> +	return ptr;
> +}
> +
>   void lrc_init_state(struct intel_context *ce,
>   		    struct intel_engine_cs *engine,
>   		    void *state)
> @@ -922,6 +940,10 @@ void lrc_init_state(struct intel_context *ce,
>   	/* Clear the ppHWSP (inc. per-context counters) */
>   	memset(state, 0, PAGE_SIZE);
>   
> +	/* Clear the indirect wa and storage */
> +	if (ce->wa_bb_page)
> +		memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
> +
>   	/*
>   	 * The second page of the context object contains some registers which
>   	 * must be set up prior to the first execution.
> @@ -929,6 +951,35 @@ void lrc_init_state(struct intel_context *ce,
>   	__lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
>   }
>   
> +u32 lrc_indirect_bb(const struct intel_context *ce)
> +{
> +	return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
> +}
> +
> +static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
> +{
> +	/* If predication is active, this will be noop'ed */
> +	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
> +	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
> +	*cs++ = 0;
> +	*cs++ = 0; /* No predication */
> +
> +	/* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
> +	*cs++ = MI_BATCH_BUFFER_END | BIT(15);
> +	*cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
> +
> +	/* Instructions are no longer predicated (disabled), we can proceed */
> +	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
> +	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
> +	*cs++ = 0;
> +	*cs++ = 1; /* enable predication before the next BB */
> +
> +	*cs++ = MI_BATCH_BUFFER_END;
> +	GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
> +
> +	return cs;
> +}
> +
>   static struct i915_vma *
>   __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
>   {
> @@ -1240,24 +1291,6 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
>   	return cs;
>   }
>   
> -static u32 context_wa_bb_offset(const struct intel_context *ce)
> -{
> -	return PAGE_SIZE * ce->wa_bb_page;
> -}
> -
> -static u32 *context_indirect_bb(const struct intel_context *ce)
> -{
> -	void *ptr;
> -
> -	GEM_BUG_ON(!ce->wa_bb_page);
> -
> -	ptr = ce->lrc_reg_state;
> -	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
> -	ptr += context_wa_bb_offset(ce);
> -
> -	return ptr;
> -}
> -
>   static void
>   setup_indirect_ctx_bb(const struct intel_context *ce,
>   		      const struct intel_engine_cs *engine,
> @@ -1271,9 +1304,11 @@ setup_indirect_ctx_bb(const struct intel_context *ce,
>   	while ((unsigned long)cs % CACHELINE_BYTES)
>   		*cs++ = MI_NOOP;
>   
> +	GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
> +	setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));

AFAICT this applies to Gen12+ but mention of DG2 is then confusing me. 
Should DG2_PREDICATE_RESULT_BB have been named GEN12_..?

Regards,

Tvrtko

> +
>   	lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
> -			       i915_ggtt_offset(ce->state) +
> -			       context_wa_bb_offset(ce),
> +			       lrc_indirect_bb(ce),
>   			       (cs - start) * sizeof(*cs));
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index 7371bb5c8129..31be734010db 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -145,4 +145,9 @@ static inline void lrc_runtime_stop(struct intel_context *ce)
>   	WRITE_ONCE(stats->active, 0);
>   }
>   
> +#define DG2_PREDICATE_RESULT_WA (PAGE_SIZE - sizeof(u64))
> +#define DG2_PREDICATE_RESULT_BB (2048)
> +
> +u32 lrc_indirect_bb(const struct intel_context *ce);
> +
>   #endif /* __INTEL_LRC_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 61a6f2424e24..addfdc2c2642 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -3910,6 +3910,8 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
>   	 */
>   
>   	engine->emit_bb_start = gen8_emit_bb_start;
> +	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
> +		engine->emit_bb_start = gen125_emit_bb_start;
>   }
>   
>   static void rcs_submission_override(struct intel_engine_cs *engine)


More information about the Intel-gfx mailing list