[Intel-gfx] [PATCH] drm/i915: Change context lifecycle

Thu Nov 26 01:19:44 PST 2015

On 26/11/2015 08:48, Daniel Vetter wrote:
> On Wed, Nov 25, 2015 at 05:02:44PM +0200, Mika Kuoppala wrote:
>> Nick Hoath <nicholas.hoath at intel.com> writes:
>>
>>> Use the first retired request on a new context to unpin
>>> the old context. This ensures that the hw context remains
>>> bound until it has been written back to by the GPU.
>>> Now that the context is pinned until later in the request/context
>>> lifecycle, it no longer needs to be pinned from context_queue to
>>> retire_requests.
>>>
>>> v2: Moved the new pin to cover GuC submission (Alex Dai)
>>>      Moved the new unpin to request_retire to fix coverage leak
>>> v3: Added switch to default context if freeing a still pinned
>>>      context just in case the hw was actually still using it
>>> v4: Unwrapped context unpin to allow calling without a request
>>> v5: Only create a switch to idle context if the ring doesn't
>>>      already have a request pending on it (Alex Dai)
>>>      Rename unsaved to dirty to avoid double negatives (Dave Gordon)
>>>      Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
>>>      Split out per engine cleanup from context_free as it
>>>      was getting unwieldy
>>>      Corrected locking (Dave Gordon)
>>>
>>> Signed-off-by: Nick Hoath <nicholas.hoath at intel.com>
>>> Issue: VIZ-4277
>>> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
>>> Cc: David Gordon <david.s.gordon at intel.com>
>>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Alex Dai <yu.dai at intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h  |   1 +
>>>   drivers/gpu/drm/i915/i915_gem.c  |   3 +
>>>   drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++--------
>>>   drivers/gpu/drm/i915/intel_lrc.h |   1 +
>>>   4 files changed, 105 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index d5cf30b..e82717a 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -889,6 +889,7 @@ struct intel_context {
>>>   	struct {
>>>   		struct drm_i915_gem_object *state;
>>>   		struct intel_ringbuffer *ringbuf;
>>> +		bool dirty;
>>>   		int pin_count;
>>>   	} engine[I915_NUM_RINGS];
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index e955499..3829bc1 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>>>   {
>>>   	trace_i915_gem_request_retire(request);
>>>
>>> +	if (i915.enable_execlists)
>>> +		intel_lr_context_complete_check(request);
>>> +
>>>   	/* We know the GPU must have read the request to have
>>>   	 * sent us the seqno + interrupt, so use the position
>>>   	 * of tail of the request to update the last known position
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 06180dc..03d5bca 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
>>>   	struct drm_i915_gem_request *cursor;
>>>   	int num_elements = 0;
>>>
>>> -	if (request->ctx != ring->default_context)
>>> -		intel_lr_context_pin(request);
>>> -
>>>   	i915_gem_request_reference(request);
>>>
>>>   	spin_lock_irq(&ring->execlist_lock);
>>> @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>>>   	if (intel_ring_stopped(ring))
>>>   		return;
>>>
>>> +	if (request->ctx != ring->default_context) {
>>> +		if (!request->ctx->engine[ring->id].dirty) {
>>> +			intel_lr_context_pin(request);
>>> +			request->ctx->engine[ring->id].dirty = true;
>>> +		}
>>> +	}
>>> +
>>>   	if (dev_priv->guc.execbuf_client)
>>>   		i915_guc_submit(dev_priv->guc.execbuf_client, request);
>>>   	else
>>> @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
>>>   	spin_unlock_irq(&ring->execlist_lock);
>>>
>>>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
>>> -		struct intel_context *ctx = req->ctx;
>>> -		struct drm_i915_gem_object *ctx_obj =
>>> -				ctx->engine[ring->id].state;
>>> -
>>> -		if (ctx_obj && (ctx != ring->default_context))
>>> -			intel_lr_context_unpin(req);
>>>   		list_del(&req->execlist_link);
>>>   		i915_gem_request_unreference(req);
>>>   	}
>>> @@ -1058,21 +1056,39 @@ reset_pin_count:
>>>   	return ret;
>>>   }
>>>
>>> -void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>>> +static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
>>> +		struct intel_context *ctx)
>>>   {
>>> -	struct intel_engine_cs *ring = rq->ring;
>>> -	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
>>> -	struct intel_ringbuffer *ringbuf = rq->ringbuf;
>>> -
>>> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>>> +	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>>>   	if (ctx_obj) {
>>>   		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>>> -		if (--rq->ctx->engine[ring->id].pin_count == 0) {
>>> +		if (--ctx->engine[ring->id].pin_count == 0) {
>>>   			intel_unpin_ringbuffer_obj(ringbuf);
>>>   			i915_gem_object_ggtt_unpin(ctx_obj);
>>>   		}
>>>   	}
>>>   }
>>>
>>> +void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>>> +{
>>> +	__intel_lr_context_unpin(rq->ring, rq->ctx);
>>> +}
>>> +
>>> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
>>> +{
>>> +	struct intel_engine_cs *ring = req->ring;
>>> +
>>> +	if (ring->last_context && ring->last_context != req->ctx &&
>>> +			ring->last_context->engine[ring->id].dirty) {
>>> +		__intel_lr_context_unpin(
>>> +				ring,
>>> +				ring->last_context);
>>> +		ring->last_context->engine[ring->id].dirty = false;
>>> +	}
>>> +	ring->last_context = req->ctx;
>>
>> What ensures that the last_context stays alive?
>
> The other part that's missing compared to the legacy context cycling logic
> is move_to_active. Without that the shrinker can't eat into retired
> contexts, which renders this exercise fairly moot imo.
>
> The overall idea is that since i915 does dynamic memory management, and
> that infects everything in the code that deals with gpu objects, we need
> to make sure that everyone is using the same logicy to cycle through
> active objects. Otherwise the shrinker will be an unmaintainable hydra
> with per-feature special cases.
> -Daniel

This is the first part of the VIZ-4277 fix changeset that I'm working 
on. It has been 'fasttracked' as it is needed to fix a lockup with GuC 
submission. If you like, I can update the commit message to explain that?

>
>>
>>> +}
>>> +
>>>   static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
>>>   {
>>>   	int ret, i;
>>> @@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
>>>   }
>>>
>>>   /**
>>> + * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
>>> + * @ctx: the LR context being freed.
>>> + * @ring: the engine being cleaned
>>> + * @ctx_obj: the hw context being unreferenced
>>> + * @ringbuf: the ringbuf being freed
>>> + *
>>> + * Take care of cleaning up the per-engine backing
>>> + * objects and the logical ringbuffer.
>>> + */
>>> +static void
>>> +intel_lr_context_clean_ring(struct intel_context *ctx,
>>> +			    struct intel_engine_cs *ring,
>>> +			    struct drm_i915_gem_object *ctx_obj,
>>> +			    struct intel_ringbuffer *ringbuf)
>>> +{
>>> +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>>> +
>>> +	if (ctx == ring->default_context) {
>>> +		intel_unpin_ringbuffer_obj(ringbuf);
>>> +		i915_gem_object_ggtt_unpin(ctx_obj);
>>> +	}
>>> +
>>> +	if (ctx->engine[ring->id].dirty) {
>>> +		struct drm_i915_gem_request *req = NULL;
>>> +
>>> +		/**
>>> +		 * If there is already a request pending on
>>> +		 * this ring, wait for that to complete,
>>> +		 * otherwise create a switch to idle request
>>> +		 */
>>> +		if (list_empty(&ring->request_list)) {
>>> +			int ret;
>>> +
>>> +			ret = i915_gem_request_alloc(
>>> +					ring,
>>> +					ring->default_context,
>>> +					&req);
>>> +			if (!ret)
>>> +				i915_add_request(req);
>>> +			else
>>> +				DRM_DEBUG("Failed to ensure context saved");
>>> +		} else {
>>> +			req = list_first_entry(
>>> +					&ring->request_list,
>>> +					typeof(*req), list);
>>> +		}
>>> +		if (req)
>>> +			i915_wait_request(req);
>>> +	}
>>> +
>>> +	WARN_ON(ctx->engine[ring->id].pin_count);
>>> +	intel_ringbuffer_free(ringbuf);
>>> +	drm_gem_object_unreference(&ctx_obj->base);
>>> +}
>>> +
>>> +/**
>>>    * intel_lr_context_free() - free the LRC specific bits of a context
>>>    * @ctx: the LR context to free.
>>>    *
>>> @@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context *ctx)
>>>   {
>>>   	int i;
>>>
>>> -	for (i = 0; i < I915_NUM_RINGS; i++) {
>>> +	for (i = 0; i < I915_NUM_RINGS; ++i) {
>>>   		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
>>>
>>> -		if (ctx_obj) {
>>> +		if (ctx->engine[i].state) {
>>
>> ++i and the above are superflouous?
>>
>> -Mika
>>
>>>   			struct intel_ringbuffer *ringbuf =
>>>   					ctx->engine[i].ringbuf;
>>>   			struct intel_engine_cs *ring = ringbuf->ring;
>>>
>>> -			if (ctx == ring->default_context) {
>>> -				intel_unpin_ringbuffer_obj(ringbuf);
>>> -				i915_gem_object_ggtt_unpin(ctx_obj);
>>> -			}
>>> -			WARN_ON(ctx->engine[ring->id].pin_count);
>>> -			intel_ringbuffer_free(ringbuf);
>>> -			drm_gem_object_unreference(&ctx_obj->base);
>>> +			intel_lr_context_clean_ring(ctx,
>>> +						    ring,
>>> +						    ctx_obj,
>>> +						    ringbuf);
>>>   		}
>>>   	}
>>>   }
>>> @@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev,
>>>
>>>   		ringbuf->head = 0;
>>>   		ringbuf->tail = 0;
>>> +
>>> +		if (ctx->engine[ring->id].dirty) {
>>> +			__intel_lr_context_unpin(
>>> +					ring,
>>> +					ctx);
>>> +			ctx->engine[ring->id].dirty = false;
>>> +		}
>>>   	}
>>>   }
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
>>> index 4e60d54..cbc42b8 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>> @@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
>>>   			struct intel_context *ctx);
>>>   uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>>>   				     struct intel_engine_cs *ring);
>>> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
>>>
>>>   /* Execlists */
>>>   int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
>>> --
>>> 1.9.1
>>>
>>> _______________________________________________
>>> Intel-gfx mailing list
>>> Intel-gfx at lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>