[Intel-gfx] [PATCH] drm/i915: Change context lifecycle
Nick Hoath
nicholas.hoath at intel.com
Thu Nov 26 01:19:44 PST 2015
On 26/11/2015 08:48, Daniel Vetter wrote:
> On Wed, Nov 25, 2015 at 05:02:44PM +0200, Mika Kuoppala wrote:
>> Nick Hoath <nicholas.hoath at intel.com> writes:
>>
>>> Use the first retired request on a new context to unpin
>>> the old context. This ensures that the hw context remains
>>> bound until it has been written back to by the GPU.
>>> Now that the context is pinned until later in the request/context
>>> lifecycle, it no longer needs to be pinned from context_queue to
>>> retire_requests.
>>>
>>> v2: Moved the new pin to cover GuC submission (Alex Dai)
>>> Moved the new unpin to request_retire to fix coverage leak
>>> v3: Added switch to default context if freeing a still pinned
>>> context just in case the hw was actually still using it
>>> v4: Unwrapped context unpin to allow calling without a request
>>> v5: Only create a switch to idle context if the ring doesn't
>>> already have a request pending on it (Alex Dai)
>>> Rename unsaved to dirty to avoid double negatives (Dave Gordon)
>>> Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
>>> Split out per engine cleanup from context_free as it
>>> was getting unwieldy
>>> Corrected locking (Dave Gordon)
>>>
>>> Signed-off-by: Nick Hoath <nicholas.hoath at intel.com>
>>> Issue: VIZ-4277
>>> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
>>> Cc: David Gordon <david.s.gordon at intel.com>
>>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Alex Dai <yu.dai at intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_drv.h | 1 +
>>> drivers/gpu/drm/i915/i915_gem.c | 3 +
>>> drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++--------
>>> drivers/gpu/drm/i915/intel_lrc.h | 1 +
>>> 4 files changed, 105 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index d5cf30b..e82717a 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -889,6 +889,7 @@ struct intel_context {
>>> struct {
>>> struct drm_i915_gem_object *state;
>>> struct intel_ringbuffer *ringbuf;
>>> + bool dirty;
>>> int pin_count;
>>> } engine[I915_NUM_RINGS];
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index e955499..3829bc1 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>>> {
>>> trace_i915_gem_request_retire(request);
>>>
>>> + if (i915.enable_execlists)
>>> + intel_lr_context_complete_check(request);
>>> +
>>> /* We know the GPU must have read the request to have
>>> * sent us the seqno + interrupt, so use the position
>>> * of tail of the request to update the last known position
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 06180dc..03d5bca 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
>>> struct drm_i915_gem_request *cursor;
>>> int num_elements = 0;
>>>
>>> - if (request->ctx != ring->default_context)
>>> - intel_lr_context_pin(request);
>>> -
>>> i915_gem_request_reference(request);
>>>
>>> spin_lock_irq(&ring->execlist_lock);
>>> @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>>> if (intel_ring_stopped(ring))
>>> return;
>>>
>>> + if (request->ctx != ring->default_context) {
>>> + if (!request->ctx->engine[ring->id].dirty) {
>>> + intel_lr_context_pin(request);
>>> + request->ctx->engine[ring->id].dirty = true;
>>> + }
>>> + }
>>> +
>>> if (dev_priv->guc.execbuf_client)
>>> i915_guc_submit(dev_priv->guc.execbuf_client, request);
>>> else
>>> @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
>>> spin_unlock_irq(&ring->execlist_lock);
>>>
>>> list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
>>> - struct intel_context *ctx = req->ctx;
>>> - struct drm_i915_gem_object *ctx_obj =
>>> - ctx->engine[ring->id].state;
>>> -
>>> - if (ctx_obj && (ctx != ring->default_context))
>>> - intel_lr_context_unpin(req);
>>> list_del(&req->execlist_link);
>>> i915_gem_request_unreference(req);
>>> }
>>> @@ -1058,21 +1056,39 @@ reset_pin_count:
>>> return ret;
>>> }
>>>
>>> -void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>>> +static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
>>> + struct intel_context *ctx)
>>> {
>>> - struct intel_engine_cs *ring = rq->ring;
>>> - struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
>>> - struct intel_ringbuffer *ringbuf = rq->ringbuf;
>>> -
>>> + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>>> + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>>> if (ctx_obj) {
>>> WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>>> - if (--rq->ctx->engine[ring->id].pin_count == 0) {
>>> + if (--ctx->engine[ring->id].pin_count == 0) {
>>> intel_unpin_ringbuffer_obj(ringbuf);
>>> i915_gem_object_ggtt_unpin(ctx_obj);
>>> }
>>> }
>>> }
>>>
>>> +void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>>> +{
>>> + __intel_lr_context_unpin(rq->ring, rq->ctx);
>>> +}
>>> +
>>> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
>>> +{
>>> + struct intel_engine_cs *ring = req->ring;
>>> +
>>> + if (ring->last_context && ring->last_context != req->ctx &&
>>> + ring->last_context->engine[ring->id].dirty) {
>>> + __intel_lr_context_unpin(
>>> + ring,
>>> + ring->last_context);
>>> + ring->last_context->engine[ring->id].dirty = false;
>>> + }
>>> + ring->last_context = req->ctx;
>>
>> What ensures that the last_context stays alive?
>
> The other part that's missing compared to the legacy context cycling logic
> is move_to_active. Without that the shrinker can't eat into retired
> contexts, which renders this exercise fairly moot imo.
>
> The overall idea is that since i915 does dynamic memory management, and
> that infects everything in the code that deals with gpu objects, we need
> to make sure that everyone is using the same logicy to cycle through
> active objects. Otherwise the shrinker will be an unmaintainable hydra
> with per-feature special cases.
> -Daniel
This is the first part of the VIZ-4277 fix changeset that I'm working
on. It has been 'fasttracked' as it is needed to fix a lockup with GuC
submission. If you like, I can update the commit message to explain that?
>
>>
>>> +}
>>> +
>>> static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
>>> {
>>> int ret, i;
>>> @@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
>>> }
>>>
>>> /**
>>> + * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
>>> + * @ctx: the LR context being freed.
>>> + * @ring: the engine being cleaned
>>> + * @ctx_obj: the hw context being unreferenced
>>> + * @ringbuf: the ringbuf being freed
>>> + *
>>> + * Take care of cleaning up the per-engine backing
>>> + * objects and the logical ringbuffer.
>>> + */
>>> +static void
>>> +intel_lr_context_clean_ring(struct intel_context *ctx,
>>> + struct intel_engine_cs *ring,
>>> + struct drm_i915_gem_object *ctx_obj,
>>> + struct intel_ringbuffer *ringbuf)
>>> +{
>>> + WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>>> +
>>> + if (ctx == ring->default_context) {
>>> + intel_unpin_ringbuffer_obj(ringbuf);
>>> + i915_gem_object_ggtt_unpin(ctx_obj);
>>> + }
>>> +
>>> + if (ctx->engine[ring->id].dirty) {
>>> + struct drm_i915_gem_request *req = NULL;
>>> +
>>> + /**
>>> + * If there is already a request pending on
>>> + * this ring, wait for that to complete,
>>> + * otherwise create a switch to idle request
>>> + */
>>> + if (list_empty(&ring->request_list)) {
>>> + int ret;
>>> +
>>> + ret = i915_gem_request_alloc(
>>> + ring,
>>> + ring->default_context,
>>> + &req);
>>> + if (!ret)
>>> + i915_add_request(req);
>>> + else
>>> + DRM_DEBUG("Failed to ensure context saved");
>>> + } else {
>>> + req = list_first_entry(
>>> + &ring->request_list,
>>> + typeof(*req), list);
>>> + }
>>> + if (req)
>>> + i915_wait_request(req);
>>> + }
>>> +
>>> + WARN_ON(ctx->engine[ring->id].pin_count);
>>> + intel_ringbuffer_free(ringbuf);
>>> + drm_gem_object_unreference(&ctx_obj->base);
>>> +}
>>> +
>>> +/**
>>> * intel_lr_context_free() - free the LRC specific bits of a context
>>> * @ctx: the LR context to free.
>>> *
>>> @@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context *ctx)
>>> {
>>> int i;
>>>
>>> - for (i = 0; i < I915_NUM_RINGS; i++) {
>>> + for (i = 0; i < I915_NUM_RINGS; ++i) {
>>> struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
>>>
>>> - if (ctx_obj) {
>>> + if (ctx->engine[i].state) {
>>
>> ++i and the above are superflouous?
>>
>> -Mika
>>
>>> struct intel_ringbuffer *ringbuf =
>>> ctx->engine[i].ringbuf;
>>> struct intel_engine_cs *ring = ringbuf->ring;
>>>
>>> - if (ctx == ring->default_context) {
>>> - intel_unpin_ringbuffer_obj(ringbuf);
>>> - i915_gem_object_ggtt_unpin(ctx_obj);
>>> - }
>>> - WARN_ON(ctx->engine[ring->id].pin_count);
>>> - intel_ringbuffer_free(ringbuf);
>>> - drm_gem_object_unreference(&ctx_obj->base);
>>> + intel_lr_context_clean_ring(ctx,
>>> + ring,
>>> + ctx_obj,
>>> + ringbuf);
>>> }
>>> }
>>> }
>>> @@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev,
>>>
>>> ringbuf->head = 0;
>>> ringbuf->tail = 0;
>>> +
>>> + if (ctx->engine[ring->id].dirty) {
>>> + __intel_lr_context_unpin(
>>> + ring,
>>> + ctx);
>>> + ctx->engine[ring->id].dirty = false;
>>> + }
>>> }
>>> }
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
>>> index 4e60d54..cbc42b8 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>> @@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
>>> struct intel_context *ctx);
>>> uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>>> struct intel_engine_cs *ring);
>>> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
>>>
>>> /* Execlists */
>>> int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
>>> --
>>> 1.9.1
>>>
>>> _______________________________________________
>>> Intel-gfx mailing list
>>> Intel-gfx at lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
More information about the Intel-gfx
mailing list