[Intel-gfx] [PATCH v4] drm/i915/scheduler: emulate a scheduler for guc
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jan 12 07:02:56 UTC 2017
On 11/01/2017 21:24, Chris Wilson wrote:
> This emulates execlists on top of the GuC in order to defer submission of
> requests to the hardware. This deferral allows time for high priority
> requests to gazump their way to the head of the queue, however it nerfs
> the GuC by converting it back into a simple execlist (where the CPU has
> to wake up after every request to feed new commands into the GuC).
>
> v2: Drop hack status - though iirc there is still a lockdep inversion
> between fence and engine->timeline->lock (which is impossible as the
> nesting only occurs on different fences - hopefully just requires some
> judicious lockdep annotation)
> v3: Apply lockdep nesting to enabling signaling on the request, using
> the pattern we already have in __i915_gem_request_submit();
> v4: Replaying requests after a hang also now needs the timeline
> spinlock, to disable the interrupts at least
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_guc_submission.c | 95 +++++++++++++++++++++++++++---
> drivers/gpu/drm/i915/i915_irq.c | 4 +-
> drivers/gpu/drm/i915/intel_lrc.c | 5 +-
> 3 files changed, 92 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 913d87358972..2f0a853f820a 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
> u32 freespace;
> int ret;
>
> - spin_lock(&client->wq_lock);
> + spin_lock_irq(&client->wq_lock);
> freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
> freespace -= client->wq_rsvd;
> if (likely(freespace >= wqi_size)) {
> @@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
> client->no_wq_space++;
> ret = -EAGAIN;
> }
> - spin_unlock(&client->wq_lock);
> + spin_unlock_irq(&client->wq_lock);
>
> return ret;
> }
> @@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
>
> GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
>
> - spin_lock(&client->wq_lock);
> + spin_lock_irq(&client->wq_lock);
> client->wq_rsvd -= wqi_size;
> - spin_unlock(&client->wq_lock);
> + spin_unlock_irq(&client->wq_lock);
> }
>
> /* Construct a Work Item and append it to the GuC's Work Queue */
> @@ -534,10 +534,87 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
>
> static void i915_guc_submit(struct drm_i915_gem_request *rq)
> {
> - i915_gem_request_submit(rq);
> + __i915_gem_request_submit(rq);
> __i915_guc_submit(rq);
> }
>
> +static void nested_enable_signaling(struct drm_i915_gem_request *rq)
> +{
> + if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> + &rq->fence.flags))
> + return;
You didn't like the idea of duplicating the tracepoint from
dma_fence_enable_sw_signalling here?
> +
> + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
> +
> + spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
> + intel_engine_enable_signaling(rq);
> + spin_unlock(&rq->lock);
> +}
> +
> +static bool i915_guc_dequeue(struct intel_engine_cs *engine)
> +{
> + struct execlist_port *port = engine->execlist_port;
> + struct drm_i915_gem_request *last = port[0].request;
> + unsigned long flags;
> + struct rb_node *rb;
> + bool submit = false;
> +
> + spin_lock_irqsave(&engine->timeline->lock, flags);
> + rb = engine->execlist_first;
> + while (rb) {
> + struct drm_i915_gem_request *cursor =
> + rb_entry(rb, typeof(*cursor), priotree.node);
> +
> + if (last && cursor->ctx != last->ctx) {
> + if (port != engine->execlist_port)
> + break;
> +
> + i915_gem_request_assign(&port->request, last);
> + nested_enable_signaling(last);
> + port++;
> + }
> +
> + rb = rb_next(rb);
> + rb_erase(&cursor->priotree.node, &engine->execlist_queue);
> + RB_CLEAR_NODE(&cursor->priotree.node);
> + cursor->priotree.priority = INT_MAX;
> +
> + i915_guc_submit(cursor);
> + last = cursor;
> + submit = true;
> + }
> + if (submit) {
> + i915_gem_request_assign(&port->request, last);
> + nested_enable_signaling(last);
> + engine->execlist_first = rb;
> + }
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
> +
> + return submit;
> +}
> +
> +static void i915_guc_irq_handler(unsigned long data)
> +{
> + struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> + struct execlist_port *port = engine->execlist_port;
> + struct drm_i915_gem_request *rq;
> + bool submit;
> +
> + do {
> + rq = port[0].request;
> + while (rq && i915_gem_request_completed(rq)) {
> + i915_gem_request_put(rq);
> + rq = port[1].request;
> + port[0].request = rq;
> + port[1].request = NULL;
> + }
> +
> + submit = false;
> + if (!port[1].request)
> + submit = i915_guc_dequeue(engine);
> + } while (submit);
> +}
> +
> /*
> * Everything below here is concerned with setup & teardown, and is
> * therefore not part of the somewhat time-critical batch-submission
> @@ -1427,15 +1504,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
> /* Take over from manual control of ELSP (execlists) */
> for_each_engine(engine, dev_priv, id) {
> struct drm_i915_gem_request *rq;
> + unsigned long flags;
>
> - engine->submit_request = i915_guc_submit;
> - engine->schedule = NULL;
> + tasklet_init(&engine->irq_tasklet,
> + i915_guc_irq_handler,
> + (unsigned long)engine);
>
> /* Replay the current set of previously submitted requests */
> + spin_lock_irqsave(&engine->timeline->lock, flags);
> list_for_each_entry(rq, &engine->timeline->requests, link) {
> client->wq_rsvd += sizeof(struct guc_wq_item);
Strictly speaking the guc client wq lock as well so maybe just put a
comment here saying it is not needed or maybe just take it.
> __i915_guc_submit(rq);
> }
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
> }
>
> return 0;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 75fb1f66cc0c..624267bebf56 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1341,8 +1341,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
> static __always_inline void
> gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
> {
> - if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
> + if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
> + tasklet_schedule(&engine->irq_tasklet);
> notify_ring(engine);
> + }
> if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
> tasklet_schedule(&engine->irq_tasklet);
> }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d9de455da131..33fe7e5c9364 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1352,7 +1352,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
> DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
>
> /* After a GPU reset, we may have requests to replay */
> - if (!execlists_elsp_idle(engine)) {
> + if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
> engine->execlist_port[0].count = 0;
> engine->execlist_port[1].count = 0;
> execlists_submit_ports(engine);
> @@ -1420,9 +1420,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
> request->ring->last_retired_head = -1;
> intel_ring_update_space(request->ring);
>
> - if (i915.enable_guc_submission)
> - return;
> -
> /* Catch up with any missed context-switch interrupts */
> I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
> if (request->ctx != port[0].request->ctx) {
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list