[Intel-gfx] [PATCH] drm/i915/guc: Move wait for GuC out of spinlock/unlock

Tue Nov 24 10:36:54 PST 2015

On 11/24/2015 10:08 AM, Daniel Vetter wrote:
> On Tue, Nov 24, 2015 at 07:05:47PM +0200, Imre Deak wrote:
> > On ti, 2015-11-24 at 09:00 -0800, Yu Dai wrote:
> > >
> > > On 11/24/2015 05:26 AM, Imre Deak wrote:
> > > > On ti, 2015-11-24 at 14:04 +0100, Daniel Vetter wrote:
> > > > > On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai at intel.com wrote:
> > > > > > From: Alex Dai <yu.dai at intel.com>
> > > > > >
> > > > > > When GuC Work Queue is full, driver will wait GuC for avaliable
> > > > > > space by delaying 1ms. The wait needs to be out of spinlockirq
> > > > > > /
> > > > > > unlock. Otherwise, lockup happens because jiffies won't be
> > > > > > updated
> > > > > > dur to irq is disabled.
> > > > > >
> > > > > > Issue is found in igt/gem_close_race.
> > > > > >
> > > > > > Signed-off-by: Alex Dai <yu.dai at intel.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/i915_guc_submission.c | 27
> > > > > > +++++++++++++++++-
> > > > > > ---------
> > > > > >  1 file changed, 17 insertions(+), 10 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > > b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > > index 0a6b007..1418397 100644
> > > > > > --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > > @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct
> > > > > > i915_guc_client *gc)
> > > > > >  	union guc_doorbell_qw *db;
> > > > > >  	void *base;
> > > > > >  	int attempt = 2, ret = -EAGAIN;
> > > > > > +	unsigned long flags;
> > > > > >
> > > > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > > > > > client_obj, 0));
> > > > >
> > > > > We don't need kmap_atomic anymore here now, since it's outside of
> > > > > the
> > > > > spinlock.
> > > > >
> > > > > >  	desc = base + gc->proc_desc_offset;
> > > > > >
> > > > > > +	spin_lock_irqsave(&gc->wq_lock, flags);
> > > > >
> > > > > Please don't use the super-generic _irqsave. It's expensive and
> > > > > results in
> > > > > fragile code when someone accidentally reuses something in an
> > > > > interrupt
> > > > > handler that was never meant to run in that context.
> > > > >
> > > > > Instead please use the most specific funtion:
> > > > > - spin_lock if you know you are in irq context.
> > > > > - sipn_lock_irq if you know you are not.
> > > >
> > > > Right, and simply spin_lock() if the lock is not taken in IRQ
> > > > context
> > > > ever.
> > >
> > > This is not in IRQ context. So I will use spin_lock_irq instead.
> >
> > You can just use spin_lock(). spin_lock_irq() makes only sense if you
> > take the lock in IRQ context too, which is not the case.
>
> Imo just drop both spinlocks, adding locks for debugfs is overkill imo.
>
How about using mutex_lock_interruptible(&dev->struct_mutex) instead in 
debugfs, which is to replace host2guc lock.

spinlock during ring the door bell is still needed.

Alex
> >
> > > > > - spin_lock_irqsave should be a big warning sign that your code
> > > > > has
> > > > >   layering issues.
> > > > >
> > > > > Please audit the entire guc code for the above two issues.
> > > >
> > > > Agreed, it looks inconsistent atm: we do spin_lock(wq_lock) from
> > > > debugfs and spin_lock_irq(wq_lock) from i915_guc_submit(). Neither
> > > > of
> > > > them are called from IRQ context AFAICS, in which case a simple
> > > > spin_lock() would do.
> > > >
> > > > --Imre
> > > >
> > > > > > +
> > > > > >  	/* Update the tail so it is visible to GuC */
> > > > > >  	desc->tail = gc->wq_tail;
> > > > > >
> > > > > > @@ -248,7 +251,10 @@ static int guc_ring_doorbell(struct
> > > > > > i915_guc_client *gc)
> > > > > >  			db_exc.cookie = 1;
> > > > > >  	}
> > > > > >
> > > > > > +	spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > > > > +
> > > > > >  	kunmap_atomic(base);
> > > > > > +
> > > > > >  	return ret;
> > > > > >  }
> > > > > >
> > > > > > @@ -487,16 +493,16 @@ static int guc_get_workqueue_space(struct
> > > > > > i915_guc_client *gc, u32 *offset)
> > > > > >  	struct guc_process_desc *desc;
> > > > > >  	void *base;
> > > > > >  	u32 size = sizeof(struct guc_wq_item);
> > > > > > -	int ret = 0, timeout_counter = 200;
> > > > > > +	int ret = -ETIMEDOUT, timeout_counter = 200;
> > > > > > +	unsigned long flags;
> > > > > >
> > > > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > > > > > client_obj, 0));
> > > > > >  	desc = base + gc->proc_desc_offset;
> > > > > >
> > > > > >  	while (timeout_counter-- > 0) {
> > > > > > -		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail,
> > > > > > desc->head,
> > > > > > -				gc->wq_size) >= size, 1);
> > > > > > +		spin_lock_irqsave(&gc->wq_lock, flags);
> > > > > >
> > > > > > -		if (!ret) {
> > > > > > +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc-
> > > > > > > wq_size) >= size) {
> > > > > >  			*offset = gc->wq_tail;
> > > > > >
> > > > > >  			/* advance the tail for next workqueue
> > > > > > item */
> > > > > > @@ -505,7 +511,13 @@ static int guc_get_workqueue_space(struct
> > > > > > i915_guc_client *gc, u32 *offset)
> > > > > >
> > > > > >  			/* this will break the loop */
> > > > > >  			timeout_counter = 0;
> > > > > > +			ret = 0;
> > > > > >  		}
> > > > > > +
> > > > > > +		spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > > > > +
> > > > > > +		if (timeout_counter)
> > > > > > +			usleep_range(1000, 2000);
> > > > >
> > > > > Do we really not have a interrupt/signal from the guc when it has
> > > > > cleared
> > > > > up some space?
> > > > >
> > >
> > > This is not implemented in fw although I think it could be done
> > > through
> > > the guc to host interrupt. I am worry about that if we implement
> > > this,
> > > it will end up with driver handles too many interrupts (maybe same
> > > amount of context switch). However, ideally we don't want to handle
> > > interrupts at all.
> > > > > >  	};
> > > > > >
> > > > > >  	kunmap_atomic(base);
> > > > > > @@ -597,19 +609,17 @@ int i915_guc_submit(struct
> > > > > > i915_guc_client
> > > > > > *client,
> > > > > >  {
> > > > > >  	struct intel_guc *guc = client->guc;
> > > > > >  	enum intel_ring_id ring_id = rq->ring->id;
> > > > > > -	unsigned long flags;
> > > > > >  	int q_ret, b_ret;
> > > > > >
> > > > > >  	/* Need this because of the deferred pin ctx and ring
> > > > > > */
> > > > > >  	/* Shall we move this right after ring is pinned? */
> > > > > >  	lr_context_update(rq);
> > > > > >
> > > > > > -	spin_lock_irqsave(&client->wq_lock, flags);
> > > > > > -
> > > > > >  	q_ret = guc_add_workqueue_item(client, rq);
> > > > > >  	if (q_ret == 0)
> > > > > >  		b_ret = guc_ring_doorbell(client);
> > > > > >
> > > > > > +	spin_lock(&guc->host2guc_lock);
> > > > >
> > > > > So at first I thought there's a race now, but then I looked at
> > > > > what
> > > > > host2guc and wq_lock protect. It seems like the only thing they
> > > > > do is
> > > > > protect against debugfs, all the real protection against
> > > > > inconsistent
> > > > > state is done through dev->struct_mutex.
> > > > >
> > > > > Can't we just rip out all this spinlock business from the guc
> > > > > code?
> > > > > It would be easier than fixing up the races in here.
> > > >
> > > >
> > >
> > > Yes, host2guc lock can be done through dev->struct_mutex. But
> > > definitely
> > > we don't want to interrupt the process when driver program guc work
> > > queue and ring the door bell.
> > > > > -Daniel
> > > > >
> > > > > >  	client->submissions[ring_id] += 1;
> > > > > >  	if (q_ret) {
> > > > > >  		client->q_fail += 1;
> > > > > > @@ -620,9 +630,6 @@ int i915_guc_submit(struct i915_guc_client
> > > > > > *client,
> > > > > >  	} else {
> > > > > >  		client->retcode = 0;
> > > > > >  	}
> > > > > > -	spin_unlock_irqrestore(&client->wq_lock, flags);
> > > > > > -
> > > > > > -	spin_lock(&guc->host2guc_lock);
> > > > > >  	guc->submissions[ring_id] += 1;
> > > > > >  	guc->last_seqno[ring_id] = rq->seqno;
> > > > > >  	spin_unlock(&guc->host2guc_lock);
> > > > > > --
> > > > > > 2.5.0
> > > > > >
> > > > >
> > >
>