[Intel-gfx] [PATCH 3/3] drm/i915: Group the irq breadcrumb variables into the same cacheline

Wed Jul 6 09:18:32 UTC 2016

On 06/07/16 08:45, Chris Wilson wrote:
> As we inspect both the tasklet (to check for an active bottom-half) and
> set the irq-posted flag at the same time (both in the interrupt handler
> and then in the bottom-halt), group those two together into the same
> cacheline. (Not having total control over placement of the struct means
> we can't guarantee the cacheline boundary, we need to align the kmalloc
> and then each struct, but the grouping should help.)

Any actual difference or just tidy?

> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c      |  8 ++++----
>   drivers/gpu/drm/i915/i915_drv.h          |  6 +++---
>   drivers/gpu/drm/i915/i915_irq.c          | 12 ++++++------
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 28 ++++++++++++++--------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h  | 18 ++++++++++--------
>   5 files changed, 37 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index a59e0caeda64..8f7aadb16418 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -793,8 +793,8 @@ static void i915_ring_seqno_info(struct seq_file *m,
>
>   	seq_printf(m, "Current sequence (%s): %x\n",
>   		   engine->name, intel_engine_get_seqno(engine));
> -	seq_printf(m, "Current user interrupts (%s): %x\n",
> -		   engine->name, READ_ONCE(engine->user_interrupts));
> +	seq_printf(m, "Current user interrupts (%s): %lx\n",
> +		   engine->name, READ_ONCE(engine->breadcrumbs.irq_count));
>
>   	spin_lock(&b->lock);
>   	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
> @@ -1442,9 +1442,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>   			   engine->last_submitted_seqno);
>   		seq_printf(m, "\twaiters? %d\n",
>   			   intel_engine_has_waiter(engine));
> -		seq_printf(m, "\tuser interrupts = %x [current %x]\n",
> +		seq_printf(m, "\tuser interrupts = %lx [current %lx]\n",
>   			   engine->hangcheck.user_interrupts,
> -			   READ_ONCE(engine->user_interrupts));
> +			   READ_ONCE(engine->breadcrumbs.irq_count));
>   		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
>   			   (long long)engine->hangcheck.acthd,
>   			   (long long)acthd[id]);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 11e9769411e9..12229f3d27b2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3998,8 +3998,8 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   	 * is woken.
>   	 */
>   	if (engine->irq_seqno_barrier &&
> -	    READ_ONCE(engine->breadcrumbs.tasklet) == current &&
> -	    cmpxchg_relaxed(&engine->irq_posted, 1, 0)) {
> +	    READ_ONCE(engine->breadcrumbs.irq_tasklet) == current &&
> +	    cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
>   		struct task_struct *tsk;
>
>   		/* The ordering of irq_posted versus applying the barrier
> @@ -4023,7 +4023,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   		 * irq_posted == false but we are still running).
>   		 */
>   		rcu_read_lock();
> -		tsk = READ_ONCE(engine->breadcrumbs.tasklet);
> +		tsk = READ_ONCE(engine->breadcrumbs.irq_tasklet);
>   		if (tsk && tsk != current)
>   			/* Note that if the bottom-half is changed as we
>   			 * are sending the wake-up, the new bottom-half will
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index b77d808b71cd..355ae9e5ff44 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -977,10 +977,10 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>
>   static void notify_ring(struct intel_engine_cs *engine)
>   {
> -	smp_store_mb(engine->irq_posted, true);
> +	smp_store_mb(engine->breadcrumbs.irq_posted, true);
>   	if (intel_engine_wakeup(engine)) {
>   		trace_i915_gem_request_notify(engine);
> -		engine->user_interrupts++;
> +		engine->breadcrumbs.irq_count++;
>   	}
>   }
>
> @@ -3054,12 +3054,12 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd)
>   	return HANGCHECK_HUNG;
>   }
>
> -static unsigned kick_waiters(struct intel_engine_cs *engine)
> +static unsigned long kick_waiters(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *i915 = engine->i915;
> -	unsigned user_interrupts = READ_ONCE(engine->user_interrupts);
> +	unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_count);
>
> -	if (engine->hangcheck.user_interrupts == user_interrupts &&
> +	if (engine->hangcheck.user_interrupts == irq_count &&
>   	    !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
>   		if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
>   			DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
> @@ -3068,7 +3068,7 @@ static unsigned kick_waiters(struct intel_engine_cs *engine)
>   		intel_engine_enable_fake_irq(engine);
>   	}
>
> -	return user_interrupts;
> +	return irq_count;
>   }
>   /*
>    * This is called when the chip hasn't reported back with completed
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 6fcbb52e50fb..f2edd956772a 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -49,7 +49,7 @@ static void irq_enable(struct intel_engine_cs *engine)
>   	 * we still need to force the barrier before reading the seqno,
>   	 * just in case.
>   	 */
> -	engine->irq_posted = true;
> +	engine->breadcrumbs.irq_posted = true;
>
>   	spin_lock_irq(&engine->i915->irq_lock);
>   	engine->irq_enable(engine);
> @@ -62,7 +62,7 @@ static void irq_disable(struct intel_engine_cs *engine)
>   	engine->irq_disable(engine);
>   	spin_unlock_irq(&engine->i915->irq_lock);
>
> -	engine->irq_posted = false;
> +	engine->breadcrumbs.irq_posted = false;
>   }
>
>   static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> @@ -195,7 +195,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
>   	}
>   	rb_link_node(&wait->node, parent, p);
>   	rb_insert_color(&wait->node, &b->waiters);
> -	GEM_BUG_ON(!first && !b->tasklet);
> +	GEM_BUG_ON(!first && !b->irq_tasklet);
>
>   	if (completed) {
>   		struct rb_node *next = rb_next(completed);
> @@ -204,7 +204,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
>   		if (next && next != &wait->node) {
>   			GEM_BUG_ON(first);
>   			b->first_wait = to_wait(next);
> -			smp_store_mb(b->tasklet, b->first_wait->tsk);
> +			smp_store_mb(b->irq_tasklet, b->first_wait->tsk);
>   			/* As there is a delay between reading the current
>   			 * seqno, processing the completed tasks and selecting
>   			 * the next waiter, we may have missed the interrupt
> @@ -216,7 +216,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
>   			 * in case the seqno passed.
>   			 */
>   			__intel_breadcrumbs_enable_irq(b);
> -			if (READ_ONCE(engine->irq_posted))
> +			if (READ_ONCE(b->irq_posted))
>   				wake_up_process(to_wait(next)->tsk);
>   		}
>
> @@ -230,18 +230,18 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
>   	if (first) {
>   		GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
>   		b->first_wait = wait;
> -		smp_store_mb(b->tasklet, wait->tsk);
> +		smp_store_mb(b->irq_tasklet, wait->tsk);
>   		/* After assigning ourselves as the new bottom-half, we must
>   		 * perform a cursory check to prevent a missed interrupt.
>   		 * Either we miss the interrupt whilst programming the hardware,
>   		 * or if there was a previous waiter (for a later seqno) they
>   		 * may be woken instead of us (due to the inherent race
> -		 * in the unlocked read of b->tasklet in the irq handler) and
> -		 * so we miss the wake up.
> +		 * in the unlocked read of b->irq_tasklet in the irq handler)
> +		 * and so we miss the wake up.
>   		 */
>   		__intel_breadcrumbs_enable_irq(b);
>   	}
> -	GEM_BUG_ON(!b->tasklet);
> +	GEM_BUG_ON(!b->irq_tasklet);
>   	GEM_BUG_ON(!b->first_wait);
>   	GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
>
> @@ -301,7 +301,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
>   		const int priority = wakeup_priority(b, wait->tsk);
>   		struct rb_node *next;
>
> -		GEM_BUG_ON(b->tasklet != wait->tsk);
> +		GEM_BUG_ON(b->irq_tasklet != wait->tsk);
>
>   		/* We are the current bottom-half. Find the next candidate,
>   		 * the first waiter in the queue on the remaining oldest
> @@ -344,13 +344,13 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
>   			 * exception rather than a seqno completion.
>   			 */
>   			b->first_wait = to_wait(next);
> -			smp_store_mb(b->tasklet, b->first_wait->tsk);
> +			smp_store_mb(b->irq_tasklet, b->first_wait->tsk);
>   			if (b->first_wait->seqno != wait->seqno)
>   				__intel_breadcrumbs_enable_irq(b);
> -			wake_up_process(b->tasklet);
> +			wake_up_process(b->irq_tasklet);
>   		} else {
>   			b->first_wait = NULL;
> -			WRITE_ONCE(b->tasklet, NULL);
> +			WRITE_ONCE(b->irq_tasklet, NULL);
>   			__intel_breadcrumbs_disable_irq(b);
>   		}
>   	} else {
> @@ -364,7 +364,7 @@ out_unlock:
>   	GEM_BUG_ON(b->first_wait == wait);
>   	GEM_BUG_ON(rb_first(&b->waiters) !=
>   		   (b->first_wait ? &b->first_wait->node : NULL));
> -	GEM_BUG_ON(!b->tasklet ^ RB_EMPTY_ROOT(&b->waiters));
> +	GEM_BUG_ON(!b->irq_tasklet ^ RB_EMPTY_ROOT(&b->waiters));
>   	spin_unlock(&b->lock);
>   }
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 121294c602c3..cadf9f3e67d6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -74,8 +74,8 @@ enum intel_ring_hangcheck_action {
>
>   struct intel_ring_hangcheck {
>   	u64 acthd;
> +	unsigned long user_interrupts;
>   	u32 seqno;
> -	unsigned user_interrupts;
>   	int score;
>   	enum intel_ring_hangcheck_action action;
>   	int deadlock;
> @@ -167,16 +167,20 @@ struct intel_engine_cs {
>   	 * the overhead of waking that client is much preferred.
>   	 */
>   	struct intel_breadcrumbs {
> +		struct task_struct *irq_tasklet; /* bh for user interrupts */

Tasklet was kind of passable, irq_tasklet is imho worse. I think anyone 
who see that name would thing this handles interrupts of some sort. :)

How about first_wait_task ?

I know it may feel like pointless bike-shedding and maybe it is so I am 
leaving it with you.

> +		unsigned long irq_count;
> +		bool irq_posted;
> +
>   		spinlock_t lock; /* protects the lists of requests */
>   		struct rb_root waiters; /* sorted by retirement, priority */
>   		struct rb_root signals; /* sorted by retirement */
>   		struct intel_wait *first_wait; /* oldest waiter by retirement */
> -		struct task_struct *tasklet; /* bh for user interrupts */
>   		struct task_struct *signaler; /* used for fence signalling */
>   		struct drm_i915_gem_request *first_signal;
>   		struct timer_list fake_irq; /* used after a missed interrupt */
> -		bool irq_enabled;
> -		bool rpm_wakelock;
> +
> +		bool irq_enabled : 1;
> +		bool rpm_wakelock : 1;

Is there much point in having bitfields? To me two plain bools would be 
just fine and smaller code.

>   	} breadcrumbs;
>
>   	/*
> @@ -189,7 +193,6 @@ struct intel_engine_cs {
>   	struct intel_hw_status_page status_page;
>   	struct i915_ctx_workarounds wa_ctx;
>
> -	bool		irq_posted;
>   	u32             irq_keep_mask; /* always keep these interrupts */
>   	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
>   	void		(*irq_enable)(struct intel_engine_cs *ring);
> @@ -319,7 +322,6 @@ struct intel_engine_cs {
>   	 * inspecting request list.
>   	 */
>   	u32 last_submitted_seqno;
> -	unsigned user_interrupts;
>
>   	bool gpu_caches_dirty;
>
> @@ -543,13 +545,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
>
>   static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
>   {
> -	return READ_ONCE(engine->breadcrumbs.tasklet);
> +	return READ_ONCE(engine->breadcrumbs.irq_tasklet);
>   }
>
>   static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
>   {
>   	bool wakeup = false;
> -	struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.tasklet);
> +	struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_tasklet);
>   	/* Note that for this not to dangerously chase a dangling pointer,
>   	 * the caller is responsible for ensure that the task remain valid for
>   	 * wake_up_process() i.e. that the RCU grace period cannot expire.
>

Either way,

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko