[Intel-gfx] [PATCH 1/2] drm/i915: Detach hangcheck from request lists

Tue May 19 04:03:44 PDT 2015

On 08/05/2015 14:39, Mika Kuoppala wrote:
> Hangcheck tries to peek into request list to see
> if the ring was busy or not. But that leads to race
> against the list addition in request submission.
> And hangcheck saw a ring being idle, when in fact work was
> just being submitted.
>
> There is strong desire to keep hangcheck without
> locks of any kind as it is our last line of defense
> against failures that surpass our imagination.
>
> Rework the hangcheck logic so that we find out
> the ring busyness by inspecting hw engine state
> and omit the request->list inspection.
>
> v2: start is u32, push for 80 cols (Chris)
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=89931
> References: https://bugs.freedesktop.org/show_bug.cgi?id=89644
> References: https://bugs.freedesktop.org/show_bug.cgi?id=88984
> References: https://bugs.freedesktop.org/show_bug.cgi?id=88981
> References: https://bugs.freedesktop.org/show_bug.cgi?id=87730
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c     |  10 +-
>   drivers/gpu/drm/i915/i915_gpu_error.c   |   4 +-
>   drivers/gpu/drm/i915/i915_irq.c         | 214 ++++++++++++++++++++------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |   3 +
>   4 files changed, 149 insertions(+), 82 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index adbbdda..8c647bf 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1295,6 +1295,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *ring;
>   	u64 acthd[I915_NUM_RINGS];
> +	u32 start[I915_NUM_RINGS];
>   	u32 seqno[I915_NUM_RINGS];
>   	int i;
>
> @@ -1308,6 +1309,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>   	for_each_ring(ring, dev_priv, i) {
>   		seqno[i] = ring->get_seqno(ring, false);
>   		acthd[i] = intel_ring_get_active_head(ring);
> +		start[i] = I915_READ_START(ring);
>   	}
>
>   	intel_runtime_pm_put(dev_priv);
> @@ -1328,8 +1330,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>   			   (long long)acthd[i]);
>   		seq_printf(m, "\tmax ACTHD = 0x%08llx\n",
>   			   (long long)ring->hangcheck.max_acthd);
> +		seq_printf(m, "\tSTART = 0x%08x [current 0x%08x]\n",
> +			   ring->hangcheck.start,
> +			   start[i]);
> +
>   		seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
> -		seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
> +		seq_printf(m, "\taction = %s [%d]\n",
> +			   i915_hangcheck_action_to_str(ring->hangcheck.action),
> +			   ring->hangcheck.action);
>   	}
>

Based on feedback I have seen in the past it seems like we want to keep 
unrelated changes to separate patches. So in this case maybe we should 
move the debugfs changes into its own patch rather keep it together with 
the hangcheck changes?

>   	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index a3e330d..9c0db19 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -220,7 +220,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>   	}
>   }
>
> -static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a)
> +const char *i915_hangcheck_action_to_str(enum intel_ring_hangcheck_action a)
>   {
>   	switch (a) {
>   	case HANGCHECK_IDLE:
> @@ -301,7 +301,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>   	err_printf(m, "  ring->head: 0x%08x\n", ring->cpu_ring_head);
>   	err_printf(m, "  ring->tail: 0x%08x\n", ring->cpu_ring_tail);
>   	err_printf(m, "  hangcheck: %s [%d]\n",
> -		   hangcheck_action_to_str(ring->hangcheck_action),
> +		   i915_hangcheck_action_to_str(ring->hangcheck_action),
>   		   ring->hangcheck_score);

See above. Maybe you could put this change together with the debugfs 
changes in a separate patch seeing as it's more about presenting the 
hangcheck action in a more informative way rather than anything related 
to the hangcheck logic itself.

>   }
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 9da955e..a3244bd 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2685,20 +2685,6 @@ static void gen8_disable_vblank(struct drm_device *dev, int pipe)
>   	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
>   }
>
> -static struct drm_i915_gem_request *
> -ring_last_request(struct intel_engine_cs *ring)
> -{
> -	return list_entry(ring->request_list.prev,
> -			  struct drm_i915_gem_request, list);
> -}
> -
> -static bool
> -ring_idle(struct intel_engine_cs *ring)
> -{
> -	return (list_empty(&ring->request_list) ||
> -		i915_gem_request_completed(ring_last_request(ring), false));
> -}
> -
>   static bool
>   ipehr_is_semaphore_wait(struct drm_device *dev, u32 ipehr)
>   {
> @@ -2882,6 +2868,109 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
>   	return HANGCHECK_HUNG;
>   }
>
> +static bool engine_busy(struct intel_engine_cs *ring,
> +			const u64 acthd, const u32 start)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +	u32 tail, head;
> +
> +	if (ring->hangcheck.acthd != acthd)
> +		return true;
> +
> +	if (ring->hangcheck.start != start)
> +		return true;
> +
> +	head = I915_READ_HEAD(ring) & HEAD_ADDR;
> +	tail = I915_READ_TAIL(ring) & TAIL_ADDR;
> +
> +	if (head != tail)
> +		return true;
> +
> +	/* Stop rings mechanism keeps head==tail even if
> +	 *  there is work to be done.
> +	 */

1. The preferred style for multi-line comments is:

/* (empty line)
  * (first line)
  * (second line)
  */

2. I don't quite understand the comment. I know there is such a thing as 
stop_rings, which we use for simulating hangs, but how does that factor 
in here? If stop_rings affects the state of MMIO tail register vs. 
ringbuffer tail then a small remark explaining that would be helpful.

> +	if (ring->buffer &&
> +	    ring->buffer->tail != tail &&
> +	    waitqueue_active(&ring->irq_queue))
> +		return true;
> +

1. For some reason going from one waitqueue_active() check in 
i915_hangcheck_elapsed() to two separate calls in two separate functions 
does not sit perfectly well with me. Maybe it's not that important but 
would it make sense to take the body of check_for_missed_irq() and 
integrate it in engine_idle(), call waitqueue_active() once and use the 
result twice: first in the check in the block above and then in the 
missing irq check that follows immediately?

2. In the block above where we check waitqueue_active() together with 
tail: Are there no cases when we might be interested in the waitqueue 
being active and MMIO tail == ringbuffer tail? Are there no cases where 
some client might hang on indefinitely even when the tail has caught up 
that we might want to do something about? Do we know for certain that 
when tail has caught up the state of waitqueue_active() is irrelevant? I 
think I'm missing something here - please enlighten me. (and maybe 
consider adding a comment explaining this in no uncertain terms)

3. Doing the ring->buffer check ensures that it won't run in execlist 
mode since ring->buffer is only set in ringbuffer submission mode. 
That's a pity since it's useful to check waitqueue_active().

> +	return false;
> +}
> +
> +#define engine_idle(ring, acthd, start) (!engine_busy((ring), (acthd), (start)))

engine_idle() has one caller. Why not just skip this #define and add a ! 
in front of the calling statement instead of adding one #define for one 
function for its sole caller? It's not like you're making this rewrite 
any smoother by retaining parts of the semantics of the old 
i915_hangcheck_elapsed() implementation (by letting the check be on 
idleness rather than busyness) since you're renaming it from ring_idle() 
to engine_idle() and adding more parameters. Is it just a question of 
personal taste or is there anything else to justify this?

> +
> +static bool check_for_missed_irq(struct intel_engine_cs *ring)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +	bool irq_missing;
> +
> +	if (!waitqueue_active(&ring->irq_queue))
> +		return false;
> +
> +	irq_missing = !test_and_set_bit(ring->id,
> +					&dev_priv->gpu_error.missed_irq_rings);
> +
> +	if (irq_missing) {
> +		const bool irq_test =
> +			dev_priv->gpu_error.test_irq_rings &
> +			intel_ring_flag(ring);
> +
> +		if (irq_test)
> +			DRM_INFO("Fake missed irq on %s\n", ring->name);
> +		else
> +			DRM_ERROR("Missed irq on %s\n", ring->name);
> +	}
> +
> +	return true;
> +}
> +
> +static bool hangcheck_handle_stuck_ring(struct intel_engine_cs *ring, u64 acthd)
> +{
> +#define BUSY 1
> +#define KICK 5
> +#define HUNG 20
> +
> +	struct intel_ring_hangcheck *hc = &ring->hangcheck;
> +	bool there_is_hope = true;
> +
> +	/* We always increment the hangcheck score
> +	 * if the ring is busy and still processing
> +	 * the same request, so that no single request
> +	 * can run indefinitely (such as a chain of
> +	 * batches). If we detect a loop in acthd,
> +	 * we increment the busyness twice as fast.
> +	 * If this ring is in a legitimate wait for another
> +	 * ring, we omit incrementing the score. In that case
> +	 * the waiting ring is a victim and we want to be sure we
> +	 * catch the right culprit. Then every time we do kick
> +	 * the ring, add a small increment to the
> +	 * score so that we can catch a batch that is
> +	 * being repeatedly kicked and so responsible
> +	 * for stalling the machine.
> +	 */

See preferred multi-line comment format.

> +	hc->action = ring_stuck(ring, acthd);
> +
> +	switch (hc->action) {
> +	case HANGCHECK_IDLE:
> +	case HANGCHECK_WAIT:
> +	case HANGCHECK_ACTIVE:
> +		hc->score += BUSY;
> +		break;
> +	case HANGCHECK_ACTIVE_LOOP:
> +		hc->score += 2 * BUSY;
> +		break;
> +	case HANGCHECK_KICK:
> +		hc->score += KICK;
> +		break;
> +	case HANGCHECK_HUNG:
> +		hc->score += HUNG;
> +		there_is_hope = false;
> +		break;
> +	}
> +
> +	return there_is_hope;
> +}
> +
>   /*
>    * This is called when the chip hasn't reported back with completed
>    * batchbuffers in a long time. We keep track per ring seqno progress and
> @@ -2900,15 +2989,14 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   	int i;
>   	int busy_count = 0, rings_hung = 0;
>   	bool stuck[I915_NUM_RINGS] = { 0 };
> -#define BUSY 1
> -#define KICK 5
> -#define HUNG 20
>
>   	if (!i915.enable_hangcheck)
>   		return;
>
>   	for_each_ring(ring, dev_priv, i) {
> +		struct intel_ring_hangcheck *hc = &ring->hangcheck;
>   		u64 acthd;
> +		u32 start;
>   		u32 seqno;
>   		bool busy = true;
>
> @@ -2916,76 +3004,44 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>
>   		seqno = ring->get_seqno(ring, false);
>   		acthd = intel_ring_get_active_head(ring);
> +		start = I915_READ_START(ring);
>
> -		if (ring->hangcheck.seqno == seqno) {
> -			if (ring_idle(ring)) {
> -				ring->hangcheck.action = HANGCHECK_IDLE;
> -
> -				if (waitqueue_active(&ring->irq_queue)) {
> -					/* Issue a wake-up to catch stuck h/w. */
> -					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
> -						if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
> -							DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
> -								  ring->name);
> -						else
> -							DRM_INFO("Fake missed irq on %s\n",
> -								 ring->name);
> -						wake_up_all(&ring->irq_queue);
> -					}
> -					/* Safeguard against driver failure */
> -					ring->hangcheck.score += BUSY;
> -				} else
> -					busy = false;
> -			} else {
> -				/* We always increment the hangcheck score
> -				 * if the ring is busy and still processing
> -				 * the same request, so that no single request
> -				 * can run indefinitely (such as a chain of
> -				 * batches). The only time we do not increment
> -				 * the hangcheck score on this ring, if this
> -				 * ring is in a legitimate wait for another
> -				 * ring. In that case the waiting ring is a
> -				 * victim and we want to be sure we catch the
> -				 * right culprit. Then every time we do kick
> -				 * the ring, add a small increment to the
> -				 * score so that we can catch a batch that is
> -				 * being repeatedly kicked and so responsible
> -				 * for stalling the machine.
> -				 */
> -				ring->hangcheck.action = ring_stuck(ring,
> -								    acthd);
> -
> -				switch (ring->hangcheck.action) {
> -				case HANGCHECK_IDLE:
> -				case HANGCHECK_WAIT:
> -				case HANGCHECK_ACTIVE:
> -					break;
> -				case HANGCHECK_ACTIVE_LOOP:
> -					ring->hangcheck.score += BUSY;
> -					break;
> -				case HANGCHECK_KICK:
> -					ring->hangcheck.score += KICK;
> -					break;
> -				case HANGCHECK_HUNG:
> -					ring->hangcheck.score += HUNG;
> -					stuck[i] = true;
> -					break;
> -				}
> -			}
> -		} else {
> -			ring->hangcheck.action = HANGCHECK_ACTIVE;
> +		if (hc->seqno != seqno) {
> +			hc->action = HANGCHECK_ACTIVE;
>
>   			/* Gradually reduce the count so that we catch DoS
>   			 * attempts across multiple batches.
>   			 */
> -			if (ring->hangcheck.score > 0)
> -				ring->hangcheck.score--;
> +			if (hc->score > 0)
> +				hc->score--;
>
> -			ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0;
> +			hc->max_acthd = 0;
> +			goto engine_check_done;
>   		}
>
> -		ring->hangcheck.seqno = seqno;
> -		ring->hangcheck.acthd = acthd;
> +		if (engine_idle(ring, acthd, start)) {
> +			busy = check_for_missed_irq(ring);
> +			if (busy) {
> +				/* Start waking up irrespective of
> +				   our missed_irq bookkeeping */

See preferred multi-line comment format.

> +				if (hc->score >= KICK)
> +					wake_up_all(&ring->irq_queue);
> +
> +				hc->score += KICK;
> +				hc->action = HANGCHECK_ACTIVE;
> +			} else {
> +				hc->score = 0;
> +				hc->action = HANGCHECK_IDLE;
> +			}
> +			goto engine_check_done;
> +		}
> +
> +		hangcheck_handle_stuck_ring(ring, acthd);
> +
> +engine_check_done:
> +		hc->seqno = seqno;
> +		hc->acthd = acthd;
> +		hc->start = start;
>   		busy_count += busy;
>   	}
>

You've rearranged quite a few things, especially the general flow of the 
if-statements and replaced several branches with goto engine_check_done. 
Maybe it would make things even more clearer if you would add 1-2 
intermediate patches where you introduce these structural changes first 
before you start changing logic, such as:

1. Replacing request checks with MMIO register checks to determine 
engine idleness.

2. Setting score 0 and action IDLE upon engine idle and 
check_for_missed_irq non-busy.

I'm sure I could do a deeper analysis of all of the changes here but I 
think it might be a good idea to start by splitting things up a bit. Or 
am I being overzealous here?

Thanks,
Tomas

> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 39f6dfc..83edcef 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -83,11 +83,14 @@ enum intel_ring_hangcheck_action {
>   	HANGCHECK_HUNG,
>   };
>
> +const char *i915_hangcheck_action_to_str(enum intel_ring_hangcheck_action a);
> +
>   #define HANGCHECK_SCORE_RING_HUNG 31
>
>   struct intel_ring_hangcheck {
>   	u64 acthd;
>   	u64 max_acthd;
> +	u32 start;
>   	u32 seqno;
>   	int score;
>   	enum intel_ring_hangcheck_action action;
>