[Intel-gfx] [PATCH 02/12] drm/i915: Defer transfer onto execution timeline to actual hw submission
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Nov 3 10:34:26 UTC 2016
On 02/11/2016 17:50, Chris Wilson wrote:
> Defer the transfer from the client's timeline onto the execution
> timeline from the point of readiness to the point of actual submission.
> For example, in execlists, a request is finally submitted to hardware
> when the hardware is ready, and only put onto the hardware queue when
> the request is ready. By deferring the transfer, we ensure that the
> timeline is maintained in retirement order if we decide to queue the
> requests onto the hardware in a different order than fifo.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_request.c | 36 ++++++++++++++++++------------
> drivers/gpu/drm/i915/i915_gem_request.h | 2 ++
> drivers/gpu/drm/i915/i915_guc_submission.c | 2 ++
> drivers/gpu/drm/i915/intel_lrc.c | 5 +++++
> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++
> 5 files changed, 33 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 1ae5a2f8953f..05544dec5de9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -307,25 +307,16 @@ static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
> return atomic_inc_return(&tl->next_seqno);
> }
>
> -static int __i915_sw_fence_call
> -submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> +void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> {
> - struct drm_i915_gem_request *request =
> - container_of(fence, typeof(*request), submit);
> struct intel_engine_cs *engine = request->engine;
> struct intel_timeline *timeline;
> - unsigned long flags;
> u32 seqno;
>
> - if (state != FENCE_COMPLETE)
> - return NOTIFY_DONE;
> -
> /* Transfer from per-context onto the global per-engine timeline */
> timeline = engine->timeline;
> GEM_BUG_ON(timeline == request->timeline);
> -
> - /* Will be called from irq-context when using foreign DMA fences */
> - spin_lock_irqsave(&timeline->lock, flags);
> + assert_spin_locked(&timeline->lock);
>
> seqno = timeline_get_seqno(timeline->common);
> GEM_BUG_ON(!seqno);
> @@ -345,15 +336,32 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> GEM_BUG_ON(!request->global_seqno);
> engine->emit_breadcrumb(request,
> request->ring->vaddr + request->postfix);
> - engine->submit_request(request);
>
> - spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING);
> + /* nested from submit_notify */
> + spin_lock_nested(&request->timeline->lock, 2);
Is this because lockdep does not differentiate between
request->timeline->lock and engine->timeline->lock?
Could we pass different lock classes to i915_gem_timeline_init and
remove this deep nested annotation. Would be better I think.
> list_move_tail(&request->link, &timeline->requests);
> spin_unlock(&request->timeline->lock);
>
> i915_sw_fence_commit(&request->execute);
> +}
>
> - spin_unlock_irqrestore(&timeline->lock, flags);
> +static int __i915_sw_fence_call
> +submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> +{
> + if (state == FENCE_COMPLETE) {
> + struct drm_i915_gem_request *request =
> + container_of(fence, typeof(*request), submit);
> + struct intel_engine_cs *engine = request->engine;
> + unsigned long flags;
> +
> + /* Will be called from irq-context when using foreign fences,
> + * and may be called from the execution callback of another
> + * engine.
> + */
> + spin_lock_irqsave_nested(&engine->timeline->lock, flags, 1);
> + engine->submit_request(request);
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
> + }
>
> return NOTIFY_DONE;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index ed13f37fea0f..725d04128eaf 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -228,6 +228,8 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
> #define i915_add_request_no_flush(req) \
> __i915_add_request(req, false)
>
> +void __i915_gem_request_submit(struct drm_i915_gem_request *request);
> +
> struct intel_rps_client;
> #define NO_WAITBOOST ERR_PTR(-1)
> #define IS_RPS_CLIENT(p) (!IS_ERR(p))
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 857ef914cae7..ddb574d5ceda 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -640,6 +640,8 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
> struct i915_guc_client *client = guc->execbuf_client;
> int b_ret;
>
> + __i915_gem_request_submit(rq);
> +
> spin_lock(&client->wq_lock);
> guc_wq_item_append(client, rq);
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index fa3012c342cc..b9aba16851ac 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -434,6 +434,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> {
> struct drm_i915_gem_request *cursor, *last;
> struct execlist_port *port = engine->execlist_port;
> + unsigned long flags;
> bool submit = false;
>
> last = port->request;
> @@ -469,6 +470,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> * and context switches) submission.
> */
>
> + spin_lock_irqsave(&engine->timeline->lock, flags);
> spin_lock(&engine->execlist_lock);
> list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
> /* Can we combine this request with the current port? It has to
> @@ -501,6 +503,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> i915_gem_request_assign(&port->request, last);
> port++;
> }
> +
> + __i915_gem_request_submit(cursor);
> last = cursor;
> submit = true;
> }
> @@ -512,6 +516,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> i915_gem_request_assign(&port->request, last);
> }
> spin_unlock(&engine->execlist_lock);
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
>
> if (submit)
> execlists_submit_ports(engine);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 700e93d80616..f91ee24e2763 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1294,6 +1294,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
> {
> struct drm_i915_private *dev_priv = request->i915;
>
> + __i915_gem_request_submit(request);
> +
> I915_WRITE_TAIL(request->engine, request->tail);
> }
>
>
Otherwise looking good to me.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list