[Intel-gfx] [PATCH 08/16] drm/i915: Unify active context tracking between legacy/execlists/guc
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Dec 9 11:48:15 UTC 2016
On 07/12/2016 13:58, Chris Wilson wrote:
> The requests conversion introduced a nasty bug where we could generate a
> new request in the middle of constructing a request. The new request
> would be executed (and waited upon) before the current one, creating a
> minor havoc in the seqno accounting. (Prior to deferred seqno
> assignment, this would mean that the seqno would be out of order, and
> the current request would be deemed complete even before it was
> submitted.)
How exactly it can generate and submit a request while creating a
request? It would be good to describe it here.
> We also employed two different mechanisms to track the active context
> until it was switched out. The legacy method allowed for waiting upon an
> active context (it could forcibly evict any vma, including context's),
> but the execlists method took a step backwards by pinning the vma for
> the entire active lifespan of the context (the only way to evict was to
> idle the entire GPU, not individual contexts). However, to circumvent
Because of the pin in request creation?
> the tricky issue of locking (i.e. we cannot take struct_mutex at the
> time of i915_gem_request_submit(), where we would want to move the
> previous context onto the active tracker and unpin it), we take the
> execlists approach and keep the contexts pinned until retirement.
> The benefit of the execlists approach, more important for execlists than
> legacy, was the reduction in work in pinning the context for each
> request - as the context was kept pinned until idle, it could short
> circuit the pinning for all active contexts.
>
> We introduce new engine vfuncs to pin and unpin the context
> respectively. The context is pinned at the start of the request, and
> only unpinned when the following request is retired (this ensures that
> the context is idle and coherent in main memory before we unpin it). We
> move the engine->last_context tracking into the retirement itself
> (rather than during request submission) in order to allow the submission
> to be reordered or unwound without undue difficultly.
>
> And finally an ulterior motive for unifying context handling was to
> prepare for mock requests.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 4 --
> drivers/gpu/drm/i915/i915_gem_context.c | 102 +++--------------------------
> drivers/gpu/drm/i915/i915_gem_request.c | 38 +++++++----
> drivers/gpu/drm/i915/i915_gem_request.h | 11 ----
> drivers/gpu/drm/i915/i915_guc_submission.c | 11 ----
> drivers/gpu/drm/i915/i915_perf.c | 18 ++---
> drivers/gpu/drm/i915/intel_engine_cs.c | 21 +++++-
> drivers/gpu/drm/i915/intel_lrc.c | 62 ++++++------------
> drivers/gpu/drm/i915/intel_lrc.h | 5 +-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 57 +++++++++-------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++
> 11 files changed, 122 insertions(+), 211 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8daa4fb13b52..7c228622716a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2250,7 +2250,6 @@ struct drm_i915_private {
> struct i915_perf_stream *exclusive_stream;
>
> u32 specific_ctx_id;
> - struct i915_vma *pinned_rcs_vma;
>
> struct hrtimer poll_check_timer;
> wait_queue_head_t poll_wq;
> @@ -3290,9 +3289,6 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
> int i915_switch_context(struct drm_i915_gem_request *req);
> int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv);
> -struct i915_vma *
> -i915_gem_context_pin_legacy(struct i915_gem_context *ctx,
> - unsigned int flags);
> void i915_gem_context_free(struct kref *ctx_ref);
> struct i915_gem_context *
> i915_gem_context_create_gvt(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index a57c22659a3c..95812c26767c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -416,21 +416,6 @@ i915_gem_context_create_gvt(struct drm_device *dev)
> return ctx;
> }
>
> -static void i915_gem_context_unpin(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine)
> -{
> - if (i915.enable_execlists) {
> - intel_lr_context_unpin(ctx, engine);
> - } else {
> - struct intel_context *ce = &ctx->engine[engine->id];
> -
> - if (ce->state)
> - i915_vma_unpin(ce->state);
> -
> - i915_gem_context_put(ctx);
> - }
> -}
> -
> int i915_gem_context_init(struct drm_i915_private *dev_priv)
> {
> struct i915_gem_context *ctx;
> @@ -490,10 +475,11 @@ void i915_gem_context_lost(struct drm_i915_private *dev_priv)
> lockdep_assert_held(&dev_priv->drm.struct_mutex);
>
> for_each_engine(engine, dev_priv, id) {
> - if (engine->last_context) {
> - i915_gem_context_unpin(engine->last_context, engine);
> - engine->last_context = NULL;
> - }
> + if (!engine->last_context)
> + continue;
> +
> + engine->context_unpin(engine, engine->last_context);
> + engine->last_context = NULL;
> }
>
> /* Force the GPU state to be restored on enabling */
> @@ -761,57 +747,20 @@ needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
> return false;
> }
>
> -struct i915_vma *
> -i915_gem_context_pin_legacy(struct i915_gem_context *ctx,
> - unsigned int flags)
> -{
> - struct i915_vma *vma = ctx->engine[RCS].state;
> - int ret;
> -
> - /* Clear this page out of any CPU caches for coherent swap-in/out.
> - * We only want to do this on the first bind so that we do not stall
> - * on an active context (which by nature is already on the GPU).
> - */
> - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
> - ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
> - if (ret)
> - return ERR_PTR(ret);
> - }
> -
> - ret = i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags);
> - if (ret)
> - return ERR_PTR(ret);
> -
> - return vma;
> -}
> -
> static int do_rcs_switch(struct drm_i915_gem_request *req)
> {
> struct i915_gem_context *to = req->ctx;
> struct intel_engine_cs *engine = req->engine;
> struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
> - struct i915_vma *vma;
> - struct i915_gem_context *from;
> + struct i915_gem_context *from = engine->last_context;
> u32 hw_flags;
> int ret, i;
>
> + GEM_BUG_ON(engine->id != RCS);
> +
> if (skip_rcs_switch(ppgtt, engine, to))
> return 0;
>
> - /* Trying to pin first makes error handling easier. */
> - vma = i915_gem_context_pin_legacy(to, 0);
> - if (IS_ERR(vma))
> - return PTR_ERR(vma);
> -
> - /*
> - * Pin can switch back to the default context if we end up calling into
> - * evict_everything - as a last ditch gtt defrag effort that also
> - * switches to the default context. Hence we need to reload from here.
> - *
> - * XXX: Doing so is painfully broken!
> - */
> - from = engine->last_context;
> -
> if (needs_pd_load_pre(ppgtt, engine, to)) {
> /* Older GENs and non render rings still want the load first,
> * "PP_DCLV followed by PP_DIR_BASE register through Load
> @@ -820,7 +769,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
> trace_switch_mm(engine, to);
> ret = ppgtt->switch_mm(ppgtt, req);
> if (ret)
> - goto err;
> + return ret;
> }
>
> if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
> @@ -837,29 +786,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
> if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
> ret = mi_set_context(req, hw_flags);
> if (ret)
> - goto err;
> - }
> -
> - /* The backing object for the context is done after switching to the
> - * *next* context. Therefore we cannot retire the previous context until
> - * the next context has already started running. In fact, the below code
> - * is a bit suboptimal because the retiring can occur simply after the
> - * MI_SET_CONTEXT instead of when the next seqno has completed.
> - */
> - if (from != NULL) {
> - /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
> - * whole damn pipeline, we don't need to explicitly mark the
> - * object dirty. The only exception is that the context must be
> - * correct in case the object gets swapped out. Ideally we'd be
> - * able to defer doing this until we know the object would be
> - * swapped, but there is no way to do that yet.
> - */
> - i915_vma_move_to_active(from->engine[RCS].state, req, 0);
> - /* state is kept alive until the next request */
> - i915_vma_unpin(from->engine[RCS].state);
> - i915_gem_context_put(from);
> + return ret;
> }
> - engine->last_context = i915_gem_context_get(to);
>
> /* GEN8 does *not* require an explicit reload if the PDPs have been
> * setup, and we do not wish to move them.
> @@ -900,10 +828,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
> }
>
> return 0;
> -
> -err:
> - i915_vma_unpin(vma);
> - return ret;
> }
>
> /**
> @@ -943,12 +867,6 @@ int i915_switch_context(struct drm_i915_gem_request *req)
> ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> }
>
> - if (to != engine->last_context) {
> - if (engine->last_context)
> - i915_gem_context_put(engine->last_context);
> - engine->last_context = i915_gem_context_get(to);
> - }
> -
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index fcf22b0e2967..06e9a607d934 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -206,6 +206,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
>
> static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> {
> + struct intel_engine_cs *engine = request->engine;
> struct i915_gem_active *active, *next;
>
> lockdep_assert_held(&request->i915->drm.struct_mutex);
> @@ -216,9 +217,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>
> trace_i915_gem_request_retire(request);
>
> - spin_lock_irq(&request->engine->timeline->lock);
> + spin_lock_irq(&engine->timeline->lock);
> list_del_init(&request->link);
> - spin_unlock_irq(&request->engine->timeline->lock);
> + spin_unlock_irq(&engine->timeline->lock);
>
> /* We know the GPU must have read the request to have
> * sent us the seqno + interrupt, so use the position
> @@ -266,17 +267,20 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>
> i915_gem_request_remove_from_client(request);
>
> - if (request->previous_context) {
> - if (i915.enable_execlists)
> - intel_lr_context_unpin(request->previous_context,
> - request->engine);
> - }
> -
> /* Retirement decays the ban score as it is a sign of ctx progress */
> if (request->ctx->ban_score > 0)
> request->ctx->ban_score--;
>
> - i915_gem_context_put(request->ctx);
> + /* The backing object for the context is done after switching to the
> + * *next* context. Therefore we cannot retire the previous context until
> + * the next context has already started running. However, since we
> + * cannot take the required locks at i915_gem_request_submit() we
> + * defer the unpinning of the active context to now, retirement of
> + * the subsequent request.
> + */
> + if (engine->last_context)
> + engine->context_unpin(engine, engine->last_context);
> + engine->last_context = request->ctx;
Would it be worth renaming this member to last_retired_context for clarity?
>
> dma_fence_signal(&request->fence);
>
> @@ -524,10 +528,18 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> if (ret)
> return ERR_PTR(ret);
>
> - ret = reserve_global_seqno(dev_priv);
> + /* Pinning the contexts may generate requests in order to acquire
> + * GGTT space, so do this first before we reserve a seqno for
> + * ourselves.
> + */
> + ret = engine->context_pin(engine, ctx);
> if (ret)
> return ERR_PTR(ret);
>
> + ret = reserve_global_seqno(dev_priv);
> + if (ret)
> + goto err_unpin;
> +
> /* Move the oldest request to the slab-cache (if not in use!) */
> req = list_first_entry_or_null(&engine->timeline->requests,
> typeof(*req), link);
> @@ -593,11 +605,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> INIT_LIST_HEAD(&req->active_list);
> req->i915 = dev_priv;
> req->engine = engine;
> - req->ctx = i915_gem_context_get(ctx);
> + req->ctx = ctx;
>
> /* No zalloc, must clear what we need by hand */
> req->global_seqno = 0;
> - req->previous_context = NULL;
> req->file_priv = NULL;
> req->batch = NULL;
>
> @@ -633,10 +644,11 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> GEM_BUG_ON(!list_empty(&req->priotree.signalers_list));
> GEM_BUG_ON(!list_empty(&req->priotree.waiters_list));
>
> - i915_gem_context_put(ctx);
> kmem_cache_free(dev_priv->requests, req);
> err_unreserve:
> dev_priv->gt.active_requests--;
> +err_unpin:
> + engine->context_unpin(engine, ctx);
> return ERR_PTR(ret);
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index e2b077df2da0..8569b35a332a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -170,17 +170,6 @@ struct drm_i915_gem_request {
> /** Preallocate space in the ring for the emitting the request */
> u32 reserved_space;
>
> - /**
> - * Context related to the previous request.
> - * As the contexts are accessed by the hardware until the switch is
> - * completed to a new context, the hardware may still be writing
> - * to the context object after the breadcrumb is visible. We must
> - * not unpin/unbind/prune that object whilst still active and so
> - * we keep the previous context pinned until the following (this)
> - * request is retired.
> - */
> - struct i915_gem_context *previous_context;
> -
> /** Batch buffer related to this request if any (used for
> * error state dump only).
> */
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 7fa4e74c1dd3..3f144216e188 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -533,17 +533,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
>
> static void i915_guc_submit(struct drm_i915_gem_request *rq)
> {
> - struct intel_engine_cs *engine = rq->engine;
> -
> - /* We keep the previous context alive until we retire the following
> - * request. This ensures that any the context object is still pinned
> - * for any residual writes the HW makes into it on the context switch
> - * into the next object following the breadcrumb. Otherwise, we may
> - * retire the context too early.
> - */
> - rq->previous_context = engine->last_context;
> - engine->last_context = rq->ctx;
> -
> i915_gem_request_submit(rq);
> __i915_guc_submit(rq);
> }
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 5669f0862458..cfe4152212b9 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -641,7 +641,7 @@ static int i915_oa_read(struct i915_perf_stream *stream,
> static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
> {
> struct drm_i915_private *dev_priv = stream->dev_priv;
> - struct i915_vma *vma;
> + struct intel_engine_cs *engine = dev_priv->engine[RCS];
> int ret;
>
> ret = i915_mutex_lock_interruptible(&dev_priv->drm);
> @@ -653,19 +653,16 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
> *
> * NB: implied RCS engine...
> */
> - vma = i915_gem_context_pin_legacy(stream->ctx, 0);
> - if (IS_ERR(vma)) {
> - ret = PTR_ERR(vma);
> + ret = engine->context_pin(engine, stream->ctx);
> + if (ret)
> goto unlock;
> - }
> -
> - dev_priv->perf.oa.pinned_rcs_vma = vma;
>
> /* Explicitly track the ID (instead of calling i915_ggtt_offset()
> * on the fly) considering the difference with gen8+ and
> * execlists
> */
> - dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma);
> + dev_priv->perf.oa.specific_ctx_id =
> + i915_ggtt_offset(stream->ctx->engine[engine->id].state);
>
> unlock:
> mutex_unlock(&dev_priv->drm.struct_mutex);
> @@ -676,13 +673,12 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
> static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
> {
> struct drm_i915_private *dev_priv = stream->dev_priv;
> + struct intel_engine_cs *engine = dev_priv->engine[RCS];
>
> mutex_lock(&dev_priv->drm.struct_mutex);
>
> - i915_vma_unpin(dev_priv->perf.oa.pinned_rcs_vma);
> - dev_priv->perf.oa.pinned_rcs_vma = NULL;
> -
> dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
> + engine->context_unpin(engine, stream->ctx);
>
> mutex_unlock(&dev_priv->drm.struct_mutex);
> }
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index e8afe1185831..97bbbc3d6aa8 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -304,15 +304,30 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
> {
> int ret;
>
> - ret = intel_engine_init_breadcrumbs(engine);
> + /* We may need to do things with the shrinker which
> + * require us to immediately switch back to the default
> + * context. This can cause a problem as pinning the
> + * default context also requires GTT space which may not
> + * be available. To avoid this we always pin the default
> + * context.
> + */
> + ret = engine->context_pin(engine, engine->i915->kernel_context);
> if (ret)
> return ret;
>
> + ret = intel_engine_init_breadcrumbs(engine);
> + if (ret)
> + goto err_unpin;
> +
> ret = i915_gem_render_state_init(engine);
> if (ret)
> - return ret;
> + goto err_unpin;
>
> return 0;
> +
> +err_unpin:
> + engine->context_unpin(engine, engine->i915->kernel_context);
> + return ret;
> }
>
> /**
> @@ -330,6 +345,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
> intel_engine_fini_breadcrumbs(engine);
> intel_engine_cleanup_cmd_parser(engine);
> i915_gem_batch_pool_fini(&engine->batch_pool);
> +
> + engine->context_unpin(engine, engine->i915->kernel_context);
> }
>
> u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5cabe4e9d22f..58cea1f9ad27 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -512,15 +512,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> RB_CLEAR_NODE(&cursor->priotree.node);
> cursor->priotree.priority = INT_MAX;
>
> - /* We keep the previous context alive until we retire the
> - * following request. This ensures that any the context object
> - * is still pinned for any residual writes the HW makes into it
> - * on the context switch into the next object following the
> - * breadcrumb. Otherwise, we may retire the context too early.
> - */
> - cursor->previous_context = engine->last_context;
> - engine->last_context = cursor->ctx;
> -
> __i915_gem_request_submit(cursor);
> last = cursor;
> submit = true;
> @@ -772,8 +763,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> /* XXX Do we need to preempt to make room for us and our deps? */
> }
>
> -static int intel_lr_context_pin(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine)
> +static int execlists_context_pin(struct intel_engine_cs *engine,
> + struct i915_gem_context *ctx)
> {
> struct intel_context *ce = &ctx->engine[engine->id];
> void *vaddr;
> @@ -784,6 +775,12 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
> if (ce->pin_count++)
> return 0;
>
> + if (!ce->state) {
> + ret = execlists_context_deferred_alloc(ctx, engine);
> + if (ret)
> + goto err;
> + }
> +
> ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN,
> PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL);
> if (ret)
> @@ -825,8 +822,8 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
> return ret;
> }
>
> -void intel_lr_context_unpin(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine)
> +static void execlists_context_unpin(struct intel_engine_cs *engine,
> + struct i915_gem_context *ctx)
> {
> struct intel_context *ce = &ctx->engine[engine->id];
>
> @@ -850,24 +847,17 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
> struct intel_context *ce = &request->ctx->engine[engine->id];
> int ret;
>
> + GEM_BUG_ON(!ce->pin_count);
> +
> /* Flush enough space to reduce the likelihood of waiting after
> * we start building the request - in which case we will just
> * have to repeat work.
> */
> request->reserved_space += EXECLISTS_REQUEST_SIZE;
>
> - if (!ce->state) {
> - ret = execlists_context_deferred_alloc(request->ctx, engine);
> - if (ret)
> - return ret;
> - }
> -
> + GEM_BUG_ON(!ce->ring);
> request->ring = ce->ring;
>
> - ret = intel_lr_context_pin(request->ctx, engine);
> - if (ret)
> - return ret;
> -
> if (i915.enable_guc_submission) {
> /*
> * Check that the GuC has space for the request before
> @@ -876,7 +866,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
> */
> ret = i915_guc_wq_reserve(request);
> if (ret)
> - goto err_unpin;
> + goto err;
> }
>
> ret = intel_ring_begin(request, 0);
> @@ -904,8 +894,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
> err_unreserve:
> if (i915.enable_guc_submission)
> i915_guc_wq_unreserve(request);
> -err_unpin:
> - intel_lr_context_unpin(request->ctx, engine);
> +err:
> return ret;
> }
>
> @@ -1789,13 +1778,12 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
> if (engine->cleanup)
> engine->cleanup(engine);
>
> - intel_engine_cleanup_common(engine);
> -
> if (engine->status_page.vma) {
> i915_gem_object_unpin_map(engine->status_page.vma->obj);
> engine->status_page.vma = NULL;
> }
> - intel_lr_context_unpin(dev_priv->kernel_context, engine);
> +
> + intel_engine_cleanup_common(engine);
>
> lrc_destroy_wa_ctx_obj(engine);
> engine->i915 = NULL;
> @@ -1820,6 +1808,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
> /* Default vfuncs which can be overriden by each engine. */
> engine->init_hw = gen8_init_common_ring;
> engine->reset_hw = reset_common_ring;
> +
> + engine->context_pin = execlists_context_pin;
> + engine->context_unpin = execlists_context_unpin;
> +
> engine->emit_flush = gen8_emit_flush;
> engine->emit_breadcrumb = gen8_emit_breadcrumb;
> engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
> @@ -1902,18 +1894,6 @@ logical_ring_init(struct intel_engine_cs *engine)
> if (ret)
> goto error;
>
> - ret = execlists_context_deferred_alloc(dctx, engine);
> - if (ret)
> - goto error;
> -
> - /* As this is the default context, always pin it */
> - ret = intel_lr_context_pin(dctx, engine);
> - if (ret) {
> - DRM_ERROR("Failed to pin context for %s: %d\n",
> - engine->name, ret);
> - goto error;
> - }
> -
> /* And setup the hardware status page. */
> ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
> if (ret) {
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 7c6403243394..b5630331086a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -79,13 +79,10 @@ int intel_engines_init(struct drm_i915_private *dev_priv);
> #define LRC_PPHWSP_PN (LRC_GUCSHR_PN + 1)
> #define LRC_STATE_PN (LRC_PPHWSP_PN + 1)
>
> +struct drm_i915_private;
> struct i915_gem_context;
>
> uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
> -void intel_lr_context_unpin(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine);
> -
> -struct drm_i915_private;
>
> void intel_lr_context_resume(struct drm_i915_private *dev_priv);
> uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 0b7d13b6e228..a57eb5dec991 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1939,8 +1939,26 @@ intel_ring_free(struct intel_ring *ring)
> kfree(ring);
> }
>
> -static int intel_ring_context_pin(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine)
> +static int context_pin(struct i915_gem_context *ctx, unsigned int flags)
> +{
> + struct i915_vma *vma = ctx->engine[RCS].state;
> + int ret;
> +
> + /* Clear this page out of any CPU caches for coherent swap-in/out.
> + * We only want to do this on the first bind so that we do not stall
> + * on an active context (which by nature is already on the GPU).
> + */
> + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
> + ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
> + if (ret)
> + return ret;
> + }
> +
> + return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags);
> +}
> +
> +static int intel_ring_context_pin(struct intel_engine_cs *engine,
> + struct i915_gem_context *ctx)
> {
> struct intel_context *ce = &ctx->engine[engine->id];
> int ret;
> @@ -1951,13 +1969,15 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
> return 0;
>
> if (ce->state) {
> - struct i915_vma *vma;
> + unsigned int flags;
> +
> + flags = 0;
> + if (ctx == ctx->i915->kernel_context)
> + flags = PIN_HIGH;
>
> - vma = i915_gem_context_pin_legacy(ctx, PIN_HIGH);
> - if (IS_ERR(vma)) {
> - ret = PTR_ERR(vma);
> + ret = context_pin(ctx, flags);
> + if (ret)
> goto error;
> - }
> }
>
> /* The kernel context is only used as a placeholder for flushing the
> @@ -1978,8 +1998,8 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
> return ret;
> }
>
> -static void intel_ring_context_unpin(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine)
> +static void intel_ring_context_unpin(struct intel_engine_cs *engine,
> + struct i915_gem_context *ctx)
> {
> struct intel_context *ce = &ctx->engine[engine->id];
>
> @@ -2008,17 +2028,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
> if (ret)
> goto error;
>
> - /* We may need to do things with the shrinker which
> - * require us to immediately switch back to the default
> - * context. This can cause a problem as pinning the
> - * default context also requires GTT space which may not
> - * be available. To avoid this we always pin the default
> - * context.
> - */
> - ret = intel_ring_context_pin(dev_priv->kernel_context, engine);
> - if (ret)
> - goto error;
> -
> ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
> if (IS_ERR(ring)) {
> ret = PTR_ERR(ring);
> @@ -2077,8 +2086,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
>
> intel_engine_cleanup_common(engine);
>
> - intel_ring_context_unpin(dev_priv->kernel_context, engine);
> -
> engine->i915 = NULL;
> dev_priv->engine[engine->id] = NULL;
> kfree(engine);
> @@ -2099,12 +2106,15 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
> {
> int ret;
>
> + GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count);
> +
> /* Flush enough space to reduce the likelihood of waiting after
> * we start building the request - in which case we will just
> * have to repeat work.
> */
> request->reserved_space += LEGACY_REQUEST_SIZE;
>
> + GEM_BUG_ON(!request->engine->buffer);
> request->ring = request->engine->buffer;
>
> ret = intel_ring_begin(request, 0);
> @@ -2584,6 +2594,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
> engine->init_hw = init_ring_common;
> engine->reset_hw = reset_ring_common;
>
> + engine->context_pin = intel_ring_context_pin;
> + engine->context_unpin = intel_ring_context_unpin;
> +
> engine->emit_breadcrumb = i9xx_emit_breadcrumb;
> engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
> if (i915.semaphores) {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d8b066fd5dcf..0b69a50ab833 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -268,6 +268,10 @@ struct intel_engine_cs {
> void (*reset_hw)(struct intel_engine_cs *engine,
> struct drm_i915_gem_request *req);
>
> + int (*context_pin)(struct intel_engine_cs *engine,
> + struct i915_gem_context *ctx);
> + void (*context_unpin)(struct intel_engine_cs *engine,
> + struct i915_gem_context *ctx);
> int (*init_context)(struct drm_i915_gem_request *req);
>
> int (*emit_flush)(struct drm_i915_gem_request *request,
>
Well well well, this is nice! I really like the unification (and
simplification), it only worries me a bit I did not spot any problems.
:) But the code is so much easier to follow now and maintain. And for
the newcomers especially. I know how much it took me to get to the grips
with execlists, and the legacy was completely different.
I would consider the engine->last_context rename to be clear it is not
the last submitted context which was the established use until this patch.
With that:
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list