[Intel-gfx] [RFC 2/5] drm/i915: Unify execlist and legacy request life-cycles
Yu Dai
yu.dai at intel.com
Thu Jul 9 09:54:59 PDT 2015
On 07/09/2015 03:57 AM, Nick Hoath wrote:
> There is a desire to simplify the i915 driver by reducing the number of
> different code paths introduced by the LRC / execlists support. As the
> execlists request is now part of the gem request it is possible and
> desirable to unify the request life-cycles for execlist and legacy
> requests.
>
> Added a context complete flag to a request which gets set during the
> context switch interrupt.
>
> Added a function i915_gem_request_retireable(). A request is considered
> retireable if its seqno passed (i.e. the request has completed) and either
> it was never submitted to the ELSP or its context completed. This ensures
> that context save is carried out before the last request for a context is
> considered retireable. retire_requests_ring() now uses
> i915_gem_request_retireable() rather than request_complete() when deciding
> which requests to retire. Requests that were not waiting for a context
> switch interrupt (either as a result of being merged into a following
> request or by being a legacy request) will be considered retireable as
> soon as their seqno has passed.
>
> Removed the extra request reference held for the execlist request.
>
> Removed intel_execlists_retire_requests() and all references to
> intel_engine_cs.execlist_retired_req_list.
>
> Moved context unpinning into retire_requests_ring() for now. Further work
> is pending for the context pinning - this patch should allow us to use the
> active list to track context and ring buffer objects later.
Just heads up on potential performance drop on certain workloads. Since
retire_reuests_ring is called before each submission, for those CPU
bound workloads, you will see context pin & unpin very often. The
ioremap_wc / iounmap for ring buffer consumes more CPU time. I found
this issue during GuC implementation because GuC does not use execlist
request queue but legacy one. On SKL, there is about 3~5% performance
drop in workloads such as SynMark2 oglbatch5/6/7.
Thanks,
Alex
> Changed gen8_cs_irq_handler() so that notify_ring() is called when
> contexts complete as well as when a user interrupt occurs so that
> notification happens when a request is complete and context save has
> finished.
>
> v2: Rebase over the read-read optimisation changes
>
> Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
> Signed-off-by: Nick Hoath <nicholas.hoath at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 6 ++++
> drivers/gpu/drm/i915/i915_gem.c | 49 +++++++++++++++++++++++----------
> drivers/gpu/drm/i915/i915_irq.c | 6 ++--
> drivers/gpu/drm/i915/intel_lrc.c | 44 ++++++-----------------------
> drivers/gpu/drm/i915/intel_lrc.h | 2 +-
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 -
> 6 files changed, 53 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1dbd957..ef02378 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2213,6 +2213,12 @@ struct drm_i915_gem_request {
> /** Execlists no. of times this request has been sent to the ELSP */
> int elsp_submitted;
>
> + /**
> + * Execlists: whether this requests's context has completed after
> + * submission to the ELSP
> + */
> + bool ctx_complete;
> +
> };
>
> int i915_gem_request_alloc(struct intel_engine_cs *ring,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 49016e0..3681a33 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2368,6 +2368,12 @@ void i915_vma_move_to_active(struct i915_vma *vma,
> list_move_tail(&vma->mm_list, &vma->vm->active_list);
> }
>
> +static bool i915_gem_request_retireable(struct drm_i915_gem_request *req)
> +{
> + return (i915_gem_request_completed(req, true) &&
> + (!req->elsp_submitted || req->ctx_complete));
> +}
> +
> static void
> i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
> {
> @@ -2853,10 +2859,28 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
> struct drm_i915_gem_request,
> list);
>
> - if (!i915_gem_request_completed(request, true))
> + if (!i915_gem_request_retireable(request))
> break;
>
> i915_gem_request_retire(request);
> +
> + if (i915.enable_execlists) {
> + struct intel_context *ctx = request->ctx;
> + struct drm_i915_private *dev_priv =
> + ring->dev->dev_private;
> + unsigned long flags;
> + struct drm_i915_gem_object *ctx_obj =
> + ctx->engine[ring->id].state;
> +
> + spin_lock_irqsave(&ring->execlist_lock, flags);
> +
> + if (ctx_obj && (ctx != ring->default_context))
> + intel_lr_context_unpin(ring, ctx);
> +
> + intel_runtime_pm_put(dev_priv);
> + spin_unlock_irqrestore(&ring->execlist_lock, flags);
> + }
> +
> }
>
> /* Move any buffers on the active list that are no longer referenced
> @@ -2872,12 +2896,14 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>
> if (!list_empty(&obj->last_read_req[ring->id]->list))
> break;
> + if (!i915_gem_request_retireable(obj->last_read_req[ring->id]))
> + break;
>
> i915_gem_object_retire__read(obj, ring->id);
> }
>
> if (unlikely(ring->trace_irq_req &&
> - i915_gem_request_completed(ring->trace_irq_req, true))) {
> + i915_gem_request_retireable(ring->trace_irq_req))) {
> ring->irq_put(ring);
> i915_gem_request_assign(&ring->trace_irq_req, NULL);
> }
> @@ -2896,15 +2922,6 @@ i915_gem_retire_requests(struct drm_device *dev)
> for_each_ring(ring, dev_priv, i) {
> i915_gem_retire_requests_ring(ring);
> idle &= list_empty(&ring->request_list);
> - if (i915.enable_execlists) {
> - unsigned long flags;
> -
> - spin_lock_irqsave(&ring->execlist_lock, flags);
> - idle &= list_empty(&ring->execlist_queue);
> - spin_unlock_irqrestore(&ring->execlist_lock, flags);
> -
> - intel_execlists_retire_requests(ring);
> - }
> }
>
> if (idle)
> @@ -2980,12 +2997,14 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
> if (req == NULL)
> continue;
>
> - if (list_empty(&req->list))
> - goto retire;
> + if (list_empty(&req->list)) {
> + if (i915_gem_request_retireable(req))
> + i915_gem_object_retire__read(obj, i);
> + continue;
> + }
>
> - if (i915_gem_request_completed(req, true)) {
> + if (i915_gem_request_retireable(req)) {
> __i915_gem_request_retire__upto(req);
> -retire:
> i915_gem_object_retire__read(obj, i);
> }
> }
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3ac30b8..dfa2379 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1158,9 +1158,11 @@ static void snb_gt_irq_handler(struct drm_device *dev,
>
> static void gen8_cs_irq_handler(struct intel_engine_cs *ring, u32 iir)
> {
> + bool need_notify = false;
> +
> if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> - intel_lrc_irq_handler(ring);
> - if (iir & GT_RENDER_USER_INTERRUPT)
> + need_notify = intel_lrc_irq_handler(ring);
> + if ((iir & GT_RENDER_USER_INTERRUPT) || need_notify)
> notify_ring(ring);
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 9b928ab..8373900 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -411,9 +411,8 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
> /* Same ctx: ignore first request, as second request
> * will update tail past first request's workload */
> cursor->elsp_submitted = req0->elsp_submitted;
> + req0->elsp_submitted = 0;
> list_del(&req0->execlist_link);
> - list_add_tail(&req0->execlist_link,
> - &ring->execlist_retired_req_list);
> req0 = cursor;
> } else {
> req1 = cursor;
> @@ -450,6 +449,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
> req0->elsp_submitted++;
> if (req1)
> req1->elsp_submitted++;
> +
> }
>
> static bool execlists_check_remove_request(struct intel_engine_cs *ring,
> @@ -469,11 +469,9 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
> if (intel_execlists_ctx_id(ctx_obj) == request_id) {
> WARN(head_req->elsp_submitted == 0,
> "Never submitted head request\n");
> -
> if (--head_req->elsp_submitted <= 0) {
> + head_req->ctx_complete = 1;
> list_del(&head_req->execlist_link);
> - list_add_tail(&head_req->execlist_link,
> - &ring->execlist_retired_req_list);
> return true;
> }
> }
> @@ -488,8 +486,9 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
> *
> * Check the unread Context Status Buffers and manage the submission of new
> * contexts to the ELSP accordingly.
> + * @return whether a context completed
> */
> -void intel_lrc_irq_handler(struct intel_engine_cs *ring)
> +bool intel_lrc_irq_handler(struct intel_engine_cs *ring)
> {
> struct drm_i915_private *dev_priv = ring->dev->dev_private;
> u32 status_pointer;
> @@ -540,6 +539,8 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
>
> I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
> ((u32)ring->next_context_status_buffer & 0x07) << 8);
> +
> + return (submit_contexts != 0);
> }
>
> static int execlists_context_queue(struct drm_i915_gem_request *request)
> @@ -551,7 +552,7 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
> if (request->ctx != ring->default_context)
> intel_lr_context_pin(ring, request->ctx);
>
> - i915_gem_request_reference(request);
> + i915_gem_context_reference(request->ctx);
>
> request->tail = request->ringbuf->tail;
>
> @@ -572,8 +573,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
> WARN(tail_req->elsp_submitted != 0,
> "More than 2 already-submitted reqs queued\n");
> list_del(&tail_req->execlist_link);
> - list_add_tail(&tail_req->execlist_link,
> - &ring->execlist_retired_req_list);
> }
> }
>
> @@ -938,32 +937,6 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
> return 0;
> }
>
> -void intel_execlists_retire_requests(struct intel_engine_cs *ring)
> -{
> - struct drm_i915_gem_request *req, *tmp;
> - struct list_head retired_list;
> -
> - WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> - if (list_empty(&ring->execlist_retired_req_list))
> - return;
> -
> - INIT_LIST_HEAD(&retired_list);
> - spin_lock_irq(&ring->execlist_lock);
> - list_replace_init(&ring->execlist_retired_req_list, &retired_list);
> - spin_unlock_irq(&ring->execlist_lock);
> -
> - list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> - struct intel_context *ctx = req->ctx;
> - struct drm_i915_gem_object *ctx_obj =
> - ctx->engine[ring->id].state;
> -
> - if (ctx_obj && (ctx != ring->default_context))
> - intel_lr_context_unpin(ring, ctx);
> - list_del(&req->execlist_link);
> - i915_gem_request_unreference(req);
> - }
> -}
> -
> void intel_logical_ring_stop(struct intel_engine_cs *ring)
> {
> struct drm_i915_private *dev_priv = ring->dev->dev_private;
> @@ -1706,7 +1679,6 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
> init_waitqueue_head(&ring->irq_queue);
>
> INIT_LIST_HEAD(&ring->execlist_queue);
> - INIT_LIST_HEAD(&ring->execlist_retired_req_list);
> spin_lock_init(&ring->execlist_lock);
>
> ret = i915_cmd_parser_init_ring(ring);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index f59940a..9d2e98a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -82,7 +82,7 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
> struct list_head *vmas);
> u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
>
> -void intel_lrc_irq_handler(struct intel_engine_cs *ring);
> +bool intel_lrc_irq_handler(struct intel_engine_cs *ring);
> void intel_execlists_retire_requests(struct intel_engine_cs *ring);
>
> #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 304cac4..73b0bd8 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -263,7 +263,6 @@ struct intel_engine_cs {
> /* Execlists */
> spinlock_t execlist_lock;
> struct list_head execlist_queue;
> - struct list_head execlist_retired_req_list;
> u8 next_context_status_buffer;
> u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */
> int (*emit_request)(struct drm_i915_gem_request *request);
More information about the Intel-gfx
mailing list