[Intel-gfx] [PATCH 2/5] drm/i915/gt: Push engine stopping into reset-prepare
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jul 17 13:04:34 UTC 2019
On 16/07/2019 13:49, Chris Wilson wrote:
> Push the engine stop into the back reset_prepare (where it already was!)
> This allows us to avoid dangerously setting the RING registers to 0 for
> logical contexts. If we clear the register on a live context, those
> invalid register values are recorded in the logical context state and
> replayed (with hilarious results).
So essentially statement is gen3_stop_engine is not needed and even
dangerous with execlists?
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 16 +++++-
> drivers/gpu/drm/i915/gt/intel_reset.c | 58 ----------------------
> drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 40 ++++++++++++++-
> 3 files changed, 53 insertions(+), 61 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 9e0992498087..9b87a2fc186c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2173,11 +2173,23 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
> __tasklet_disable_sync_once(&execlists->tasklet);
> GEM_BUG_ON(!reset_in_progress(execlists));
>
> - intel_engine_stop_cs(engine);
> -
> /* And flush any current direct submission. */
> spin_lock_irqsave(&engine->active.lock, flags);
> spin_unlock_irqrestore(&engine->active.lock, flags);
> +
> + /*
> + * We stop engines, otherwise we might get failed reset and a
> + * dead gpu (on elk). Also as modern gpu as kbl can suffer
> + * from system hang if batchbuffer is progressing when
> + * the reset is issued, regardless of READY_TO_RESET ack.
> + * Thus assume it is best to stop engines on all gens
> + * where we have a gpu reset.
> + *
> + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
> + *
> + * FIXME: Wa for more modern gens needs to be validated
> + */
> + intel_engine_stop_cs(engine);
> }
>
> static void reset_csb_pointers(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 7ddedfb16aa2..55e2ddcbd215 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -135,47 +135,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
> }
> }
>
> -static void gen3_stop_engine(struct intel_engine_cs *engine)
> -{
> - struct intel_uncore *uncore = engine->uncore;
> - const u32 base = engine->mmio_base;
> -
> - GEM_TRACE("%s\n", engine->name);
> -
> - if (intel_engine_stop_cs(engine))
> - GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
> -
> - intel_uncore_write_fw(uncore,
> - RING_HEAD(base),
> - intel_uncore_read_fw(uncore, RING_TAIL(base)));
> - intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
> -
> - intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
> - intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
> - intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
> -
> - /* The ring must be empty before it is disabled */
> - intel_uncore_write_fw(uncore, RING_CTL(base), 0);
> -
> - /* Check acts as a post */
> - if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
> - GEM_TRACE("%s: ring head [%x] not parked\n",
> - engine->name,
> - intel_uncore_read_fw(uncore, RING_HEAD(base)));
> -}
> -
> -static void stop_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask)
> -{
> - struct intel_engine_cs *engine;
> - intel_engine_mask_t tmp;
> -
> - if (INTEL_GEN(gt->i915) < 3)
> - return;
> -
> - for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
> - gen3_stop_engine(engine);
> -}
> -
> static bool i915_in_reset(struct pci_dev *pdev)
> {
> u8 gdrst;
> @@ -607,23 +566,6 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
> */
> intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
> for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
> - /*
> - * We stop engines, otherwise we might get failed reset and a
> - * dead gpu (on elk). Also as modern gpu as kbl can suffer
> - * from system hang if batchbuffer is progressing when
> - * the reset is issued, regardless of READY_TO_RESET ack.
> - * Thus assume it is best to stop engines on all gens
> - * where we have a gpu reset.
> - *
> - * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
> - *
> - * WaMediaResetMainRingCleanup:ctg,elk (presumably)
> - *
> - * FIXME: Wa for more modern gens needs to be validated
> - */
> - if (retry)
> - stop_engines(gt, engine_mask);
> -
Only other functional change I see is that we stop retrying to stop the
engines before reset attempts. I don't know if that is a concern or not.
Regards,
Tvrtko
> GEM_TRACE("engine_mask=%x\n", engine_mask);
> preempt_disable();
> ret = reset(gt, engine_mask, retry);
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index f1e571fa2e6d..213df144be15 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -739,7 +739,45 @@ static int xcs_resume(struct intel_engine_cs *engine)
>
> static void reset_prepare(struct intel_engine_cs *engine)
> {
> - intel_engine_stop_cs(engine);
> + struct intel_uncore *uncore = engine->uncore;
> + const u32 base = engine->mmio_base;
> +
> + /*
> + * We stop engines, otherwise we might get failed reset and a
> + * dead gpu (on elk). Also as modern gpu as kbl can suffer
> + * from system hang if batchbuffer is progressing when
> + * the reset is issued, regardless of READY_TO_RESET ack.
> + * Thus assume it is best to stop engines on all gens
> + * where we have a gpu reset.
> + *
> + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
> + *
> + * WaMediaResetMainRingCleanup:ctg,elk (presumably)
> + *
> + * FIXME: Wa for more modern gens needs to be validated
> + */
> + GEM_TRACE("%s\n", engine->name);
> +
> + if (intel_engine_stop_cs(engine))
> + GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
> +
> + intel_uncore_write_fw(uncore,
> + RING_HEAD(base),
> + intel_uncore_read_fw(uncore, RING_TAIL(base)));
> + intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
> +
> + intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
> + intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
> + intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
> +
> + /* The ring must be empty before it is disabled */
> + intel_uncore_write_fw(uncore, RING_CTL(base), 0);
> +
> + /* Check acts as a post */
> + if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
> + GEM_TRACE("%s: ring head [%x] not parked\n",
> + engine->name,
> + intel_uncore_read_fw(uncore, RING_HEAD(base)));
> }
>
> static void reset_ring(struct intel_engine_cs *engine, bool stalled)
>
More information about the Intel-gfx
mailing list