[Intel-gfx] [PATCH 30/43] drm/i915/bdw: Two-stage execlist submit process

Fri Aug 15 10:51:22 CEST 2014


> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter at ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Thursday, August 14, 2014 9:10 PM
> To: Daniel, Thomas
> Cc: intel-gfx at lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH 30/43] drm/i915/bdw: Two-stage execlist
> submit process
> 
> On Thu, Jul 24, 2014 at 05:04:38PM +0100, Thomas Daniel wrote:
> > From: Michel Thierry <michel.thierry at intel.com>
> >
> > Context switch (and execlist submission) should happen only when other
> > contexts are not active, otherwise pre-emption occurs.
> >
> > To assure this, we place context switch requests in a queue and those
> > request are later consumed when the right context switch interrupt is
> > received (still TODO).
> >
> > v2: Use a spinlock, do not remove the requests on unqueue (wait for
> > context switch completion).
> >
> > Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
> >
> > v3: Several rebases and code changes. Use unique ID.
> >
> > v4:
> > - Move the queue/lock init to the late ring initialization.
> > - Damien's kmalloc review comments: check return, use sizeof(*req), do
> > not cast.
> >
> > v5:
> > - Do not reuse drm_i915_gem_request. Instead, create our own.
> > - New namespace.
> >
> > Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v1)
> > Signed-off-by: Oscar Mateo <oscar.mateo at intel.com> (v2-v5)
> > ---
> >  drivers/gpu/drm/i915/intel_lrc.c        |   63
> ++++++++++++++++++++++++++++++-
> >  drivers/gpu/drm/i915/intel_lrc.h        |    8 ++++
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 +
> >  3 files changed, 71 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> > b/drivers/gpu/drm/i915/intel_lrc.c
> > index 5b6f416..9e91169 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -217,6 +217,63 @@ static int execlists_submit_context(struct
> intel_engine_cs *ring,
> >  	return 0;
> >  }
> >
> > +static void execlists_context_unqueue(struct intel_engine_cs *ring) {
> > +	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
> > +	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
> > +
> > +	if (list_empty(&ring->execlist_queue))
> > +		return;
> > +
> > +	/* Try to read in pairs */
> > +	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
> > +execlist_link) {
> 
> Ok, because checkpatch I've looked at this. Imo open-coding this would be
> much easier to read i.e.
> 
> 	if (!list_empty)
> 		grab&remove first item;
> 	if (!list_empty)
> 		grab&remove 2nd item;
> 
> Care to follow up with a patch for that?
> 
> Thanks, Daniel
This needs to be kept as a loop because if there are two consecutive
requests for the same context they are squashed.  Also the non-squashed
requests are not removed here (unfortunately the remove is in the next
patch).

> 
> > +		if (!req0)
> > +			req0 = cursor;
> > +		else if (req0->ctx == cursor->ctx) {
This:

> > +			/* Same ctx: ignore first request, as second request
> > +			 * will update tail past first request's workload */
> > +			list_del(&req0->execlist_link);
> > +			i915_gem_context_unreference(req0->ctx);
> > +			kfree(req0);
> > +			req0 = cursor;
> > +		} else {
> > +			req1 = cursor;
> > +			break;
> > +		}
> > +	}
> > +
> > +	BUG_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
> > +			req1? req1->ctx : NULL, req1? req1->tail : 0)); }
> > +
> > +static int execlists_context_queue(struct intel_engine_cs *ring,
> > +				   struct intel_context *to,
> > +				   u32 tail)
> > +{
> > +	struct intel_ctx_submit_request *req = NULL;
> > +	unsigned long flags;
> > +	bool was_empty;
> > +
> > +	req = kzalloc(sizeof(*req), GFP_KERNEL);
> > +	if (req == NULL)
> > +		return -ENOMEM;
> > +	req->ctx = to;
> > +	i915_gem_context_reference(req->ctx);
> > +	req->ring = ring;
> > +	req->tail = tail;
> > +
> > +	spin_lock_irqsave(&ring->execlist_lock, flags);
> > +
> > +	was_empty = list_empty(&ring->execlist_queue);
> > +	list_add_tail(&req->execlist_link, &ring->execlist_queue);
> > +	if (was_empty)
> > +		execlists_context_unqueue(ring);
> > +
> > +	spin_unlock_irqrestore(&ring->execlist_lock, flags);
> > +
> > +	return 0;
> > +}
> > +
> >  static int logical_ring_invalidate_all_caches(struct intel_ringbuffer
> > *ringbuf)  {
> >  	struct intel_engine_cs *ring = ringbuf->ring; @@ -405,8 +462,7 @@
> > void intel_logical_ring_advance_and_submit(struct intel_ringbuffer
> *ringbuf)
> >  	if (intel_ring_stopped(ring))
> >  		return;
> >
> > -	/* FIXME: too cheeky, we don't even check if the ELSP is ready */
> > -	execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0);
> > +	execlists_context_queue(ring, ctx, ringbuf->tail);
> >  }
> >
> >  static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, @@
> > -850,6 +906,9 @@ static int logical_ring_init(struct drm_device *dev, struct
> intel_engine_cs *rin
> >  	INIT_LIST_HEAD(&ring->request_list);
> >  	init_waitqueue_head(&ring->irq_queue);
> >
> > +	INIT_LIST_HEAD(&ring->execlist_queue);
> > +	spin_lock_init(&ring->execlist_lock);
> > +
> >  	ret = intel_lr_context_deferred_create(dctx, ring);
> >  	if (ret)
> >  		return ret;
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.h
> > b/drivers/gpu/drm/i915/intel_lrc.h
> > index b59965b..14492a9 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.h
> > +++ b/drivers/gpu/drm/i915/intel_lrc.h
> > @@ -60,4 +60,12 @@ int intel_execlists_submission(struct drm_device
> *dev, struct drm_file *file,
> >  			       u64 exec_start, u32 flags);
> >  u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
> >
> > +struct intel_ctx_submit_request {
> > +	struct intel_context *ctx;
> > +	struct intel_engine_cs *ring;
> > +	u32 tail;
> > +
> > +	struct list_head execlist_link;
> > +};
> > +
> >  #endif /* _INTEL_LRC_H_ */
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > index c885d5c..6358823 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > @@ -223,6 +223,8 @@ struct  intel_engine_cs {
> >  	} semaphore;
> >
> >  	/* Execlists */
> > +	spinlock_t execlist_lock;
> > +	struct list_head execlist_queue;
> >  	u32             irq_keep_mask; /* bitmask for interrupts that should not
> be masked */
> >  	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
> >  	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,
> > --
> > 1.7.9.5
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch