[Intel-gfx] [PATCH] drm/i915: Change context lifecycle

Wed Nov 25 04:52:04 PST 2015

On 25/11/2015 01:11, Dai, Yu wrote:
>
>
> On 11/24/2015 08:23 AM, Nick Hoath wrote:
>> Use the first retired request on a new context to unpin
>> the old context. This ensures that the hw context remains
>> bound until it has been saved.
>> Now that the context is pinned until later in the request/context
>> lifecycle, it no longer needs to be pinned from context_queue to
>> retire_requests.
>> This is to solve a hang with GuC submission, and a theoretical
>> issue with execlist submission.
>>
>> v2: Moved the new pin to cover GuC submission (Alex Dai)
>>       Moved the new unpin to request_retire to fix coverage leak
>> v3: Added switch to default context if freeing a still pinned
>>       context just in case the hw was actually still using it
>> v4: Unwrapped context unpin to allow calling without a request
>>
>> Signed-off-by: Nick Hoath <nicholas.hoath at intel.com>
>> Issue: VIZ-4277
>> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
>> Cc: David Gordon <david.s.gordon at intel.com>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> Cc: Alex Dai <yu.dai at intel.com>
>> ---
>>    drivers/gpu/drm/i915/i915_drv.h  |  1 +
>>    drivers/gpu/drm/i915/i915_gem.c  |  9 ++++-
>>    drivers/gpu/drm/i915/intel_lrc.c | 73 ++++++++++++++++++++++++++++++----------
>>    drivers/gpu/drm/i915/intel_lrc.h |  1 +
>>    4 files changed, 65 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index d5cf30b..4d2f44c 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -889,6 +889,7 @@ struct intel_context {
>>    	struct {
>>    		struct drm_i915_gem_object *state;
>>    		struct intel_ringbuffer *ringbuf;
>> +		bool unsaved;
>>    		int pin_count;
>>    	} engine[I915_NUM_RINGS];
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index e955499..6fee473 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -1354,6 +1354,14 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>>    {
>>    	trace_i915_gem_request_retire(request);
>>
>> +	if (i915.enable_execlists) {
>> +		unsigned long flags;
>> +
>> +		spin_lock_irqsave(&request->ring->execlist_lock, flags);
>> +		intel_lr_context_complete_check(request);
>> +		spin_unlock_irqrestore(&request->ring->execlist_lock, flags);
>> +	}
>> +
>>    	/* We know the GPU must have read the request to have
>>    	 * sent us the seqno + interrupt, so use the position
>>    	 * of tail of the request to update the last known position
>> @@ -1384,7 +1392,6 @@ __i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
>>    	do {
>>    		tmp = list_first_entry(&engine->request_list,
>>    				       typeof(*tmp), list);
>> -
>>    		i915_gem_request_retire(tmp);
>>    	} while (tmp != req);
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index 06180dc..a527c21 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
>>    	struct drm_i915_gem_request *cursor;
>>    	int num_elements = 0;
>>
>> -	if (request->ctx != ring->default_context)
>> -		intel_lr_context_pin(request);
>> -
>>    	i915_gem_request_reference(request);
>>
>>    	spin_lock_irq(&ring->execlist_lock);
>> @@ -728,10 +725,16 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>>    	intel_logical_ring_advance(request->ringbuf);
>>
>>    	request->tail = request->ringbuf->tail;
>> -
>>    	if (intel_ring_stopped(ring))
>>    		return;
>>
>> +	if (request->ctx != ring->default_context) {
>> +		if (!request->ctx->engine[ring->id].unsaved) {
>> +			intel_lr_context_pin(request);
>> +			request->ctx->engine[ring->id].unsaved = true;
>> +		}
>> +	}
>> +
>>    	if (dev_priv->guc.execbuf_client)
>>    		i915_guc_submit(dev_priv->guc.execbuf_client, request);
>>    	else
>> @@ -958,12 +961,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
>>    	spin_unlock_irq(&ring->execlist_lock);
>>
>>    	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
>> -		struct intel_context *ctx = req->ctx;
>> -		struct drm_i915_gem_object *ctx_obj =
>> -				ctx->engine[ring->id].state;
>> -
>> -		if (ctx_obj && (ctx != ring->default_context))
>> -			intel_lr_context_unpin(req);
>>    		list_del(&req->execlist_link);
>>    		i915_gem_request_unreference(req);
>>    	}
>> @@ -1058,21 +1055,41 @@ reset_pin_count:
>>    	return ret;
>>    }
>>
>> -void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>> +static void intel_lr_context_unpin_no_req(struct intel_engine_cs *ring,
>> +		struct intel_context *ctx)
>>    {
>> -	struct intel_engine_cs *ring = rq->ring;
>> -	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
>> -	struct intel_ringbuffer *ringbuf = rq->ringbuf;
>> -
>> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>> +	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>>    	if (ctx_obj) {
>>    		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>> -		if (--rq->ctx->engine[ring->id].pin_count == 0) {
>> +		if (--ctx->engine[ring->id].pin_count == 0) {
>>    			intel_unpin_ringbuffer_obj(ringbuf);
>>    			i915_gem_object_ggtt_unpin(ctx_obj);
>>    		}
>>    	}
>>    }
>>
>> +void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>> +{
>> +	intel_lr_context_unpin_no_req(rq->ring, rq->ctx);
>> +}
>> +
>> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
>> +{
>> +	struct intel_engine_cs *ring = req->ring;
>> +
>> +	assert_spin_locked(&ring->execlist_lock);
>> +
>> +	if (ring->last_context && ring->last_context != req->ctx &&
>> +			ring->last_context->engine[ring->id].unsaved) {
>
> Context pin is done for every submission but unpin is done with
> condition. I think the engine.pin_count will be out of sync.
The context pin is only done when the unsaved flag is NOT set. The 
pin_count does not go out of sync.
>
>> +		intel_lr_context_unpin_no_req(
>> +				ring,
>> +				ring->last_context);
>> +		ring->last_context->engine[ring->id].unsaved = false;
>> +	}
>> +	ring->last_context = req->ctx;
>> +}
>> +
>>    static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
>>    {
>>    	int ret, i;
>> @@ -2378,7 +2395,7 @@ void intel_lr_context_free(struct intel_context *ctx)
>>    {
>>    	int i;
>>
>> -	for (i = 0; i < I915_NUM_RINGS; i++) {
>> +	for (i = 0; i < I915_NUM_RINGS; ++i) {
>>    		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
>>
>>    		if (ctx_obj) {
>> @@ -2390,7 +2407,20 @@ void intel_lr_context_free(struct intel_context *ctx)
>>    				intel_unpin_ringbuffer_obj(ringbuf);
>>    				i915_gem_object_ggtt_unpin(ctx_obj);
>>    			}
>> -			WARN_ON(ctx->engine[ring->id].pin_count);
>> +
>> +			if (ctx->engine[ring->id].unsaved) {
>> +				int ret;
>> +				struct drm_i915_gem_request *req;
>> +
>> +				ret = i915_gem_request_alloc(ring,
>> +					ring->default_context, &req);
>> +				if (!ret) {
>> +					i915_add_request(req);
>> +					i915_wait_request(req);
>> +				}
>
> The will wait for all requests to be finished. Waiting for the head is
> good enough. If no request in queue, then add one from default context.
>
I'll make this change & upload a new patch.
> Alex
>> +			}
>> +
>> +			WARN_ON(ctx->engine[i].pin_count);
>>    			intel_ringbuffer_free(ringbuf);
>>    			drm_gem_object_unreference(&ctx_obj->base);
>>    		}
>> @@ -2554,5 +2584,12 @@ void intel_lr_context_reset(struct drm_device *dev,
>>
>>    		ringbuf->head = 0;
>>    		ringbuf->tail = 0;
>> +
>> +		if (ctx->engine[ring->id].unsaved) {
>> +			intel_lr_context_unpin_no_req(
>> +					ring,
>> +					ctx);
>> +			ctx->engine[ring->id].unsaved = false;
>> +		}
>>    	}
>>    }
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
>> index 4e60d54..cbc42b8 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>> @@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
>>    			struct intel_context *ctx);
>>    uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>>    				     struct intel_engine_cs *ring);
>> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
>>
>>    /* Execlists */
>>    int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
>