[Intel-gfx] [PATCH] drm/i915/gt: Scrub execlists state on resume

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Apr 17 12:09:52 UTC 2020


On 16/04/2020 12:41, Chris Wilson wrote:
> Before we resume, we reset the HW so we restart from a known good state.
> However, as a part of the reset process, we drain our pending CS event
> queue -- and if we are resuming that does not correspond to internal
> state. On setup, we are scrubbing the CS pointers, but alas only on
> setup.
> 
> Apply the sanitization not just to setup, but to all resumes.
> 
> Reported-by: Venkata Ramana Nayana <venkata.ramana.nayana at intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Venkata Ramana Nayana <venkata.ramana.nayana at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  1 +
>   drivers/gpu/drm/i915/gt/intel_gt_pm.c        |  4 ++
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 72 +++++++++++---------
>   3 files changed, 43 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index f3c9d302ecf8..ebe20de7eb70 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -411,6 +411,7 @@ struct intel_engine_cs {
>   	void		(*irq_enable)(struct intel_engine_cs *engine);
>   	void		(*irq_disable)(struct intel_engine_cs *engine);
>   
> +	void		(*sanitize)(struct intel_engine_cs *engine);
>   	int		(*resume)(struct intel_engine_cs *engine);
>   
>   	struct {
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index 3e8a56c7d818..6bdb74892a1e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -147,6 +147,10 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
>   	if (intel_gt_is_wedged(gt))
>   		intel_gt_unset_wedged(gt);
>   
> +	for_each_engine(engine, gt, id)
> +		if (engine->sanitize)
> +			engine->sanitize(engine);
> +
>   	intel_uc_sanitize(&gt->uc);
>   
>   	for_each_engine(engine, gt, id)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 6fbad5e2343f..34f67eb9bfa1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3615,6 +3615,43 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
>   	return ret;
>   }
>   
> +static void reset_csb_pointers(struct intel_engine_cs *engine)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +	const unsigned int reset_value = execlists->csb_size - 1;
> +
> +	ring_set_paused(engine, 0);
> +
> +	/*
> +	 * After a reset, the HW starts writing into CSB entry [0]. We
> +	 * therefore have to set our HEAD pointer back one entry so that
> +	 * the *first* entry we check is entry 0. To complicate this further,
> +	 * as we don't wait for the first interrupt after reset, we have to
> +	 * fake the HW write to point back to the last entry so that our
> +	 * inline comparison of our cached head position against the last HW
> +	 * write works even before the first interrupt.
> +	 */
> +	execlists->csb_head = reset_value;
> +	WRITE_ONCE(*execlists->csb_write, reset_value);
> +	wmb(); /* Make sure this is visible to HW (paranoia?) */
> +
> +	/*
> +	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
> +	 * Bludgeon them with a mmio update to be sure.
> +	 */
> +	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
> +		     reset_value << 8 | reset_value);
> +	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
> +
> +	invalidate_csb_entries(&execlists->csb_status[0],
> +			       &execlists->csb_status[reset_value]);
> +}
> +
> +static void execlists_sanitize(struct intel_engine_cs *engine)
> +{
> +	reset_csb_pointers(engine);
> +}
> +
>   static void enable_error_interrupt(struct intel_engine_cs *engine)
>   {
>   	u32 status;
> @@ -3754,38 +3791,6 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
>   	intel_engine_stop_cs(engine);
>   }
>   
> -static void reset_csb_pointers(struct intel_engine_cs *engine)
> -{
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	const unsigned int reset_value = execlists->csb_size - 1;
> -
> -	ring_set_paused(engine, 0);
> -
> -	/*
> -	 * After a reset, the HW starts writing into CSB entry [0]. We
> -	 * therefore have to set our HEAD pointer back one entry so that
> -	 * the *first* entry we check is entry 0. To complicate this further,
> -	 * as we don't wait for the first interrupt after reset, we have to
> -	 * fake the HW write to point back to the last entry so that our
> -	 * inline comparison of our cached head position against the last HW
> -	 * write works even before the first interrupt.
> -	 */
> -	execlists->csb_head = reset_value;
> -	WRITE_ONCE(*execlists->csb_write, reset_value);
> -	wmb(); /* Make sure this is visible to HW (paranoia?) */
> -
> -	/*
> -	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
> -	 * Bludgeon them with a mmio update to be sure.
> -	 */
> -	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
> -		     reset_value << 8 | reset_value);
> -	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
> -
> -	invalidate_csb_entries(&execlists->csb_status[0],
> -			       &execlists->csb_status[reset_value]);
> -}
> -
>   static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
>   {
>   	int x;
> @@ -4545,6 +4550,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   {
>   	/* Default vfuncs which can be overriden by each engine. */
>   
> +	engine->sanitize = execlists_sanitize;
>   	engine->resume = execlists_resume;
>   
>   	engine->cops = &execlists_context_ops;
> @@ -4659,8 +4665,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>   	else
>   		execlists->csb_size = GEN11_CSB_ENTRIES;
>   
> -	reset_csb_pointers(engine);
> -
>   	/* Finally, take ownership and responsibility for cleanup! */
>   	engine->release = execlists_release;
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko


More information about the Intel-gfx mailing list