[Intel-gfx] [PATCH 07/29] drm/i915: Only reset the pinned kernel contexts on resume
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Apr 10 09:39:01 UTC 2019
On 08/04/2019 10:17, Chris Wilson wrote:
> On resume, we know that the only pinned contexts in danger of seeing
> corruption are the kernel context, and so we do not need to walk the
> list of all GEM contexts as we tracked them on each engine.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gt/intel_context_types.h | 1 +
> drivers/gpu/drm/i915/gt/intel_engine.h | 3 +-
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 18 +++++++
> drivers/gpu/drm/i915/gt/intel_lrc.c | 49 +++++++++----------
> drivers/gpu/drm/i915/gt/intel_lrc.h | 1 -
> drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 17 +++----
> drivers/gpu/drm/i915/i915_drv.h | 1 -
> drivers/gpu/drm/i915/i915_gem.c | 9 ++--
> 8 files changed, 54 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 2109610e21e8..9ec4f787c908 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -25,6 +25,7 @@ struct intel_context_ops {
> int (*pin)(struct intel_context *ce);
> void (*unpin)(struct intel_context *ce);
>
> + void (*reset)(struct intel_context *ce);
> void (*destroy)(struct kref *kref);
> };
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index e58d6f04177b..0dea6c7fd438 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -268,8 +268,6 @@ static inline void intel_ring_put(struct intel_ring *ring)
> void intel_engine_stop(struct intel_engine_cs *engine);
> void intel_engine_cleanup(struct intel_engine_cs *engine);
>
> -void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
> -
> int __must_check intel_ring_cacheline_align(struct i915_request *rq);
>
> u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
> @@ -463,6 +461,7 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
> }
>
> void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
> +void intel_gt_resume(struct drm_i915_private *i915);
>
> bool intel_engine_is_idle(struct intel_engine_cs *engine);
> bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 68eebf292314..fdfce11e4944 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -758,6 +758,24 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
> return ret;
> }
>
> +void intel_gt_resume(struct drm_i915_private *i915)
> +{
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> +
> + /*
> + * After resume, we may need to poke into the pinned kernel
> + * contexts to paper over any damage caused by the sudden suspend.
> + * Only the kernel contexts should remain pinned over suspend,
> + * allowing us to fixup the user contexts on their first pin.
> + */
> + for_each_engine(engine, i915, id) {
> + struct intel_context *ce = engine->kernel_context;
> +
> + ce->ops->reset(ce);
> + }
> +}
> +
> /**
> * intel_engines_cleanup_common - cleans up the engine state created by
> * the common initiailizers.
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 85bdae1db76c..1565b11b15f6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1378,9 +1378,33 @@ static int execlists_context_pin(struct intel_context *ce)
> return __execlists_context_pin(ce, ce->engine);
> }
>
> +static void execlists_context_reset(struct intel_context *ce)
> +{
> + /*
> + * Because we emit WA_TAIL_DWORDS there may be a disparity
> + * between our bookkeeping in ce->ring->head and ce->ring->tail and
> + * that stored in context. As we only write new commands from
> + * ce->ring->tail onwards, everything before that is junk. If the GPU
> + * starts reading from its RING_HEAD from the context, it may try to
> + * execute that junk and die.
> + *
> + * The contexts that are stilled pinned on resume belong to the
> + * kernel, and are local to each engine. All other contexts will
> + * have their head/tail sanitized upon pinning before use, so they
> + * will never see garbage,
> + *
> + * So to avoid that we reset the context images upon resume. For
> + * simplicity, we just zero everything out.
> + */
> + intel_ring_reset(ce->ring, 0);
> + __execlists_update_reg_state(ce, ce->engine);
> +}
> +
> static const struct intel_context_ops execlists_context_ops = {
> .pin = execlists_context_pin,
> .unpin = execlists_context_unpin,
> +
> + .reset = execlists_context_reset,
> .destroy = execlists_context_destroy,
> };
>
> @@ -2762,31 +2786,6 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
> return ret;
> }
>
> -void intel_lr_context_resume(struct drm_i915_private *i915)
> -{
> - struct i915_gem_context *ctx;
> - struct intel_context *ce;
> -
> - /*
> - * Because we emit WA_TAIL_DWORDS there may be a disparity
> - * between our bookkeeping in ce->ring->head and ce->ring->tail and
> - * that stored in context. As we only write new commands from
> - * ce->ring->tail onwards, everything before that is junk. If the GPU
> - * starts reading from its RING_HEAD from the context, it may try to
> - * execute that junk and die.
> - *
> - * So to avoid that we reset the context images upon resume. For
> - * simplicity, we just zero everything out.
> - */
> - list_for_each_entry(ctx, &i915->contexts.list, link) {
> - list_for_each_entry(ce, &ctx->active_engines, active_link) {
> - GEM_BUG_ON(!ce->ring);
> - intel_ring_reset(ce->ring, 0);
> - __execlists_update_reg_state(ce, ce->engine);
> - }
> - }
> -}
> -
> void intel_execlists_show_requests(struct intel_engine_cs *engine,
> struct drm_printer *m,
> void (*show_request)(struct drm_printer *m,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index b3274e707423..a598f2d56de3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -100,7 +100,6 @@ struct drm_printer;
>
> struct drm_i915_private;
>
> -void intel_lr_context_resume(struct drm_i915_private *dev_priv);
> void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
>
> void intel_execlists_show_requests(struct intel_engine_cs *engine,
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index be83cb8167f3..bcc842b8bbe7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1507,9 +1507,16 @@ static int ring_context_pin(struct intel_context *ce)
> return err;
> }
>
> +static void ring_context_reset(struct intel_context *ce)
> +{
> + intel_ring_reset(ce->ring, 0);
> +}
> +
> static const struct intel_context_ops ring_context_ops = {
> .pin = ring_context_pin,
> .unpin = ring_context_unpin,
> +
> + .reset = ring_context_reset,
> .destroy = ring_context_destroy,
> };
>
> @@ -1580,16 +1587,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
> kfree(engine);
> }
>
> -void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
> -{
> - struct intel_engine_cs *engine;
> - enum intel_engine_id id;
> -
> - /* Restart from the beginning of the rings for convenience */
> - for_each_engine(engine, dev_priv, id)
> - intel_ring_reset(engine->buffer, 0);
> -}
> -
> static int load_pd_dir(struct i915_request *rq,
> const struct i915_hw_ppgtt *ppgtt)
> {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 312278723712..fad5306f07da 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1996,7 +1996,6 @@ struct drm_i915_private {
>
> /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
> struct {
> - void (*resume)(struct drm_i915_private *);
> void (*cleanup_engine)(struct intel_engine_cs *engine);
>
> struct i915_gt_timelines {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0aa28d622d2a..a8b0aa05288b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4514,7 +4514,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
> * guarantee that the context image is complete. So let's just reset
> * it and start again.
> */
> - i915->gt.resume(i915);
> + intel_gt_resume(i915);
>
> if (i915_gem_init_hw(i915))
> goto err_wedged;
> @@ -4854,13 +4854,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>
> dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
>
> - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
> - dev_priv->gt.resume = intel_lr_context_resume;
> + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
> dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
> - } else {
> - dev_priv->gt.resume = intel_legacy_submission_resume;
> + else
> dev_priv->gt.cleanup_engine = intel_engine_cleanup;
> - }
>
> i915_timelines_init(dev_priv);
>
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list