[Intel-gfx] [PATCH v8] drm/i915: Extend LRC pinning to cover GPU context writeback
Yu Dai
yu.dai at intel.com
Wed Dec 9 08:44:00 PST 2015
Reviewed-by: Alex Dai <yu.dai at intel.com>
On 12/07/2015 09:10 AM, Nick Hoath wrote:
> Use the first retired request on a new context to unpin
> the old context. This ensures that the hw context remains
> bound until it has been written back to by the GPU.
> Now that the context is pinned until later in the request/context
> lifecycle, it no longer needs to be pinned from context_queue to
> retire_requests.
> This fixes an issue with GuC submission where the GPU might not
> have finished writing back the context before it is unpinned. This
> results in a GPU hang.
>
> v2: Moved the new pin to cover GuC submission (Alex Dai)
> Moved the new unpin to request_retire to fix coverage leak
> v3: Added switch to default context if freeing a still pinned
> context just in case the hw was actually still using it
> v4: Unwrapped context unpin to allow calling without a request
> v5: Only create a switch to idle context if the ring doesn't
> already have a request pending on it (Alex Dai)
> Rename unsaved to dirty to avoid double negatives (Dave Gordon)
> Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
> Split out per engine cleanup from context_free as it
> was getting unwieldy
> Corrected locking (Dave Gordon)
> v6: Removed some bikeshedding (Mika Kuoppala)
> Added explanation of the GuC hang that this fixes (Daniel Vetter)
> v7: Removed extra per request pinning from ring reset code (Alex Dai)
> Added forced ring unpin/clean in error case in context free (Alex Dai)
> v8: Renamed lrc specific last_context to lrc_last_context as there
> were some reset cases where the codepaths leaked (Mika Kuoppala)
> NULL'd last_context in reset case - there was a pointer leak
> if someone did reset->close context.
> Signed-off-by: Nick Hoath <nicholas.hoath at intel.com>
> Issue: VIZ-4277
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Cc: David Gordon <david.s.gordon at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Alex Dai <yu.dai at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_gem.c | 7 +-
> drivers/gpu/drm/i915/intel_lrc.c | 138 ++++++++++++++++++++++++++------
> drivers/gpu/drm/i915/intel_lrc.h | 1 +
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> 5 files changed, 121 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9ab3e25..a59ca13 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -884,6 +884,7 @@ struct intel_context {
> struct {
> struct drm_i915_gem_object *state;
> struct intel_ringbuffer *ringbuf;
> + bool dirty;
> int pin_count;
> } engine[I915_NUM_RINGS];
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a6997a8..cd27ecc 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1362,6 +1362,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> {
> trace_i915_gem_request_retire(request);
>
> + if (i915.enable_execlists)
> + intel_lr_context_complete_check(request);
> +
> /* We know the GPU must have read the request to have
> * sent us the seqno + interrupt, so use the position
> * of tail of the request to update the last known position
> @@ -2772,10 +2775,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
> struct drm_i915_gem_request,
> execlist_link);
> list_del(&submit_req->execlist_link);
> -
> - if (submit_req->ctx != ring->default_context)
> - intel_lr_context_unpin(submit_req);
> -
> i915_gem_request_unreference(submit_req);
> }
> spin_unlock_irq(&ring->execlist_lock);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4ebafab..f96fb51 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -571,9 +571,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
> struct drm_i915_gem_request *cursor;
> int num_elements = 0;
>
> - if (request->ctx != ring->default_context)
> - intel_lr_context_pin(request);
> -
> i915_gem_request_reference(request);
>
> spin_lock_irq(&ring->execlist_lock);
> @@ -737,6 +734,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
> if (intel_ring_stopped(ring))
> return;
>
> + if (request->ctx != ring->default_context) {
> + if (!request->ctx->engine[ring->id].dirty) {
> + intel_lr_context_pin(request);
> + request->ctx->engine[ring->id].dirty = true;
> + }
> + }
> +
> if (dev_priv->guc.execbuf_client)
> i915_guc_submit(dev_priv->guc.execbuf_client, request);
> else
> @@ -963,12 +967,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
> spin_unlock_irq(&ring->execlist_lock);
>
> list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> - struct intel_context *ctx = req->ctx;
> - struct drm_i915_gem_object *ctx_obj =
> - ctx->engine[ring->id].state;
> -
> - if (ctx_obj && (ctx != ring->default_context))
> - intel_lr_context_unpin(req);
> list_del(&req->execlist_link);
> i915_gem_request_unreference(req);
> }
> @@ -1063,21 +1061,39 @@ reset_pin_count:
> return ret;
> }
>
> -void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> +static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
> + struct intel_context *ctx)
> {
> - struct intel_engine_cs *ring = rq->ring;
> - struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
> - struct intel_ringbuffer *ringbuf = rq->ringbuf;
> -
> + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> if (ctx_obj) {
> WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> - if (--rq->ctx->engine[ring->id].pin_count == 0) {
> + if (--ctx->engine[ring->id].pin_count == 0) {
> intel_unpin_ringbuffer_obj(ringbuf);
> i915_gem_object_ggtt_unpin(ctx_obj);
> }
> }
> }
>
> +void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> +{
> + __intel_lr_context_unpin(rq->ring, rq->ctx);
> +}
> +
> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
> +{
> + struct intel_engine_cs *ring = req->ring;
> +
> + if (ring->lrc_last_context && ring->lrc_last_context != req->ctx &&
> + ring->lrc_last_context->engine[ring->id].dirty) {
> + __intel_lr_context_unpin(
> + ring,
> + ring->lrc_last_context);
> + ring->lrc_last_context->engine[ring->id].dirty = false;
> + }
> + ring->lrc_last_context = req->ctx;
> +}
> +
> static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
> {
> int ret, i;
> @@ -2352,6 +2368,76 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
> }
>
> /**
> + * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
> + * @ctx: the LR context being freed.
> + * @ring: the engine being cleaned
> + * @ctx_obj: the hw context being unreferenced
> + * @ringbuf: the ringbuf being freed
> + *
> + * Take care of cleaning up the per-engine backing
> + * objects and the logical ringbuffer.
> + */
> +static void
> +intel_lr_context_clean_ring(struct intel_context *ctx,
> + struct intel_engine_cs *ring,
> + struct drm_i915_gem_object *ctx_obj,
> + struct intel_ringbuffer *ringbuf)
> +{
> + int ret;
> +
> + WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +
> + if (ctx == ring->default_context) {
> + intel_unpin_ringbuffer_obj(ringbuf);
> + i915_gem_object_ggtt_unpin(ctx_obj);
> + }
> +
> + if (ctx->engine[ring->id].dirty) {
> + struct drm_i915_gem_request *req = NULL;
> +
> + /**
> + * If there is already a request pending on
> + * this ring, wait for that to complete,
> + * otherwise create a switch to idle request
> + */
> + if (list_empty(&ring->request_list)) {
> + int ret;
> +
> + ret = i915_gem_request_alloc(
> + ring,
> + ring->default_context,
> + &req);
> + if (!ret)
> + i915_add_request(req);
> + else
> + DRM_DEBUG("Failed to ensure context saved");
> + } else {
> + req = list_first_entry(
> + &ring->request_list,
> + typeof(*req), list);
> + }
> + if (req) {
> + ret = i915_wait_request(req);
> + if (ret != 0) {
> + /**
> + * If we get here, there's probably been a ring
> + * reset, so we just clean up the dirty flag.&
> + * pin count.
> + */
> + ctx->engine[ring->id].dirty = false;
> + __intel_lr_context_unpin(
> + ring,
> + ctx);
> + }
> + }
> + }
> +
> + WARN_ON(ctx->engine[ring->id].pin_count);
> + intel_ringbuffer_free(ringbuf);
> + drm_gem_object_unreference(&ctx_obj->base);
> +}
> +
> +/**
> * intel_lr_context_free() - free the LRC specific bits of a context
> * @ctx: the LR context to free.
> *
> @@ -2363,7 +2449,7 @@ void intel_lr_context_free(struct intel_context *ctx)
> {
> int i;
>
> - for (i = 0; i < I915_NUM_RINGS; i++) {
> + for (i = 0; i < I915_NUM_RINGS; ++i) {
> struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
>
> if (ctx_obj) {
> @@ -2371,13 +2457,10 @@ void intel_lr_context_free(struct intel_context *ctx)
> ctx->engine[i].ringbuf;
> struct intel_engine_cs *ring = ringbuf->ring;
>
> - if (ctx == ring->default_context) {
> - intel_unpin_ringbuffer_obj(ringbuf);
> - i915_gem_object_ggtt_unpin(ctx_obj);
> - }
> - WARN_ON(ctx->engine[ring->id].pin_count);
> - intel_ringbuffer_free(ringbuf);
> - drm_gem_object_unreference(&ctx_obj->base);
> + intel_lr_context_clean_ring(ctx,
> + ring,
> + ctx_obj,
> + ringbuf);
> }
> }
> }
> @@ -2539,5 +2622,14 @@ void intel_lr_context_reset(struct drm_device *dev,
>
> ringbuf->head = 0;
> ringbuf->tail = 0;
> +
> + if (ctx->engine[ring->id].dirty) {
> + __intel_lr_context_unpin(
> + ring,
> + ctx);
> + ctx->engine[ring->id].dirty = false;
> + if (ring->lrc_last_context == ctx)
> + ring->lrc_last_context = NULL;
> + }
> }
> }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 0b821b9..fd1b6b4 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -91,6 +91,7 @@ void intel_lr_context_reset(struct drm_device *dev,
> struct intel_context *ctx);
> uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
> struct intel_engine_cs *ring);
> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
>
> /* Execlists */
> int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5d1eb20..7c913e7 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -275,6 +275,7 @@ struct intel_engine_cs {
> u32 flush_domains);
> int (*emit_bb_start)(struct drm_i915_gem_request *req,
> u64 offset, unsigned dispatch_flags);
> + struct intel_context *lrc_last_context;
>
> /**
> * List of objects currently involved in rendering from the
More information about the Intel-gfx
mailing list