[Intel-gfx] [PATCH 08/15] drm/i915: Slaughter the thundering i915_wait_request herd

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon Nov 30 04:05:50 PST 2015


Hi,

On 29/11/15 08:48, Chris Wilson wrote:
> One particularly stressful scenario consists of many independent tasks
> all competing for GPU time and waiting upon the results (e.g. realtime
> transcoding of many, many streams). One bottleneck in particular is that
> each client waits on its own results, but every client is woken up after
> every batchbuffer - hence the thunder of hooves as then every client must
> do its heavyweight dance to read a coherent seqno to see if it is the
> lucky one. Alternatively, we can have one kthread responsible for waking
> after an interrupt, checking the seqno and only waking up the waiting
> clients who are complete. The disadvantage is that in the uncontended
> scenario (i.e. only one waiter) we incur an extra context switch in the
> wakeup path - though that should be mitigated somewhat by the busy-wait
> we do first before sleeping.
>
> v2: Convert from a kworker per engine into a dedicated kthread for the
> bottom-half.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin at intel.com>
> Cc: "Gong, Zhipeng" <zhipeng.gong at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile            |   1 +
>   drivers/gpu/drm/i915/i915_dma.c          |   9 +-
>   drivers/gpu/drm/i915/i915_drv.h          |  35 ++++-
>   drivers/gpu/drm/i915/i915_gem.c          |  99 +++---------
>   drivers/gpu/drm/i915/i915_gpu_error.c    |   8 +-
>   drivers/gpu/drm/i915/i915_irq.c          |  17 +--
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 253 +++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.c  |   3 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   3 +-
>   10 files changed, 333 insertions(+), 97 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/intel_breadcrumbs.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 0851de07bd13..d3b9d3618719 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
>   	  i915_gem_userptr.o \
>   	  i915_gpu_error.o \
>   	  i915_trace_points.o \
> +	  intel_breadcrumbs.o \
>   	  intel_lrc.o \
>   	  intel_mocs.o \
>   	  intel_ringbuffer.o \
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index a81c76603544..93c6762d87b4 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -388,12 +388,16 @@ static int i915_load_modeset_init(struct drm_device *dev)
>   	if (ret)
>   		goto cleanup_vga_client;
>
> +	ret = intel_breadcrumbs_init(dev_priv);
> +	if (ret)
> +		goto cleanup_vga_switcheroo;
> +
>   	/* Initialise stolen first so that we may reserve preallocated
>   	 * objects for the BIOS to KMS transition.
>   	 */
>   	ret = i915_gem_init_stolen(dev);
>   	if (ret)
> -		goto cleanup_vga_switcheroo;
> +		goto cleanup_breadcrumbs;
>
>   	intel_power_domains_init_hw(dev_priv, false);
>
> @@ -454,6 +458,8 @@ cleanup_irq:
>   	drm_irq_uninstall(dev);
>   cleanup_gem_stolen:
>   	i915_gem_cleanup_stolen(dev);
> +cleanup_breadcrumbs:
> +	intel_breadcrumbs_fini(dev_priv);
>   cleanup_vga_switcheroo:
>   	vga_switcheroo_unregister_client(dev->pdev);
>   cleanup_vga_client:
> @@ -1193,6 +1199,7 @@ int i915_driver_unload(struct drm_device *dev)
>   	mutex_unlock(&dev->struct_mutex);
>   	intel_fbc_cleanup_cfb(dev_priv);
>   	i915_gem_cleanup_stolen(dev);
> +	intel_breadcrumbs_fini(dev_priv);
>
>   	intel_csr_ucode_fini(dev_priv);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e1980212ba37..782d08ab6264 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -564,6 +564,7 @@ struct drm_i915_error_state {
>   			long jiffies;
>   			u32 seqno;
>   			u32 tail;
> +			u32 waiters;
>   		} *requests;
>
>   		struct {
> @@ -1383,7 +1384,7 @@ struct i915_gpu_error {
>   #define I915_STOP_RING_ALLOW_WARN      (1 << 30)
>
>   	/* For missed irq/seqno simulation. */
> -	unsigned int test_irq_rings;
> +	unsigned long test_irq_rings;
>   };
>
>   enum modeset_restore {
> @@ -1695,6 +1696,27 @@ struct drm_i915_private {
>
>   	struct intel_uncore uncore;
>
> +	/* Rather than have every client wait upon all user interrupts,
> +	 * with the herd waking after every interrupt and each doing the
> +	 * heavyweight seqno dance, we delegate the task to a bottom-half
> +	 * for the user interrupt (one bh handling all engines). Now,
> +	 * just one task is woken up and does the coherent seqno read and
> +	 * then wakes up all the waiters registered with the bottom-half
> +	 * that are complete. This avoid the thundering herd problem
> +	 * with lots of concurrent waiters, at the expense of an extra
> +	 * context switch between the interrupt and the client. That should
> +	 * be mitigated somewhat by the busyspin in the client before we go to
> +	 * sleep waiting for an interrupt from the bottom-half.
> +	 */
> +	struct intel_breadcrumbs {
> +		spinlock_t lock; /* protects the per-engine request list */

s/list/tree/

> +		struct task_struct *task; /* bh for all user interrupts */
> +		struct intel_breadcrumbs_engine {
> +			struct rb_root requests; /* sorted by retirement */
> +			struct drm_i915_gem_request *first;

/* First request in the tree to be completed next by the hw. */

?

> +		} engine[I915_NUM_RINGS];
> +	} breadcrumbs;
> +
>   	struct i915_virtual_gpu vgpu;
>
>   	struct intel_guc guc;
> @@ -2228,6 +2250,11 @@ struct drm_i915_gem_request {
>   	/** global list entry for this request */
>   	struct list_head list;
>
> +	/** List of clients waiting for completion of this request */
> +	wait_queue_head_t wait;
> +	struct rb_node irq_node;
> +	unsigned irq_count;

To me irq prefix does not fit here that well.

req_tree_node?

waiter_count, waiters?

> +
>   	struct drm_i915_file_private *file_priv;
>   	/** file_priv list entry for this request */
>   	struct list_head client_list;
> @@ -2800,6 +2827,12 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
>   }
>
>
> +/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
> +int intel_breadcrumbs_init(struct drm_i915_private *i915);
> +bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request);
> +void intel_breadcrumbs_remove_waiter(struct drm_i915_gem_request *request);
> +void intel_breadcrumbs_fini(struct drm_i915_private *i915);
> +
>   /* i915_gem.c */
>   int i915_gem_create_ioctl(struct drm_device *dev, void *data,
>   			  struct drm_file *file_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 65d101b47d8e..969592fb0969 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1126,17 +1126,6 @@ i915_gem_check_wedge(unsigned reset_counter,
>   	return 0;
>   }
>
> -static void fake_irq(unsigned long data)
> -{
> -	wake_up_process((struct task_struct *)data);
> -}
> -
> -static bool missed_irq(struct drm_i915_private *dev_priv,
> -		       struct intel_engine_cs *ring)
> -{
> -	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
> -}
> -
>   static unsigned long local_clock_us(unsigned *cpu)
>   {
>   	unsigned long t;
> @@ -1184,9 +1173,6 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
>   	 * takes to sleep on a request, on the order of a microsecond.
>   	 */
>
> -	if (req->ring->irq_refcount)
> -		return -EBUSY;
> -
>   	/* Only spin if we know the GPU is processing this request */
>   	if (!i915_gem_request_started(req, false))
>   		return -EAGAIN;
> @@ -1232,26 +1218,19 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   			s64 *timeout,
>   			struct intel_rps_client *rps)
>   {
> -	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	const bool irq_test_in_progress =
> -		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
>   	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
>   	DEFINE_WAIT(wait);
> -	unsigned long timeout_expire;
> +	unsigned long timeout_remain;
>   	s64 before, now;
>   	int ret;
>
> -	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
> -
>   	if (list_empty(&req->list))
>   		return 0;
>
>   	if (i915_gem_request_completed(req, true))
>   		return 0;
>
> -	timeout_expire = 0;
> +	timeout_remain = 0;
>   	if (timeout) {
>   		if (WARN_ON(*timeout < 0))
>   			return -EINVAL;
> @@ -1259,43 +1238,28 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   		if (*timeout == 0)
>   			return -ETIME;
>
> -		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
> +		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
>   	}
>
> -	if (INTEL_INFO(dev_priv)->gen >= 6)
> -		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
>
>   	/* Record current time in case interrupted by signal, or wedged */
>   	trace_i915_gem_request_wait_begin(req);
>   	before = ktime_get_raw_ns();
>
> -	/* Optimistic spin for the next jiffie before touching IRQs */
> -	ret = __i915_spin_request(req, state);
> -	if (ret == 0)
> -		goto out;
> +	if (INTEL_INFO(req->i915)->gen >= 6)
> +		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
>
> -	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
> -		ret = -ENODEV;
> -		goto out;
> +	/* Optimistic spin for the next jiffie before touching IRQs */
> +	if (intel_breadcrumbs_add_waiter(req)) {
> +		ret = __i915_spin_request(req, state);
> +		if (ret == 0)
> +			goto out;
>   	}
>
>   	for (;;) {
> -		struct timer_list timer;
> -
> -		prepare_to_wait(&ring->irq_queue, &wait, state);
> +		prepare_to_wait(&req->wait, &wait, state);
>
> -		/* We need to check whether any gpu reset happened in between
> -		 * the caller grabbing the seqno and now ... */
> -		if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) {
> -			/* As we do not requeue the request over a GPU reset,
> -			 * if one does occur we know that the request is
> -			 * effectively complete.
> -			 */
> -			ret = 0;
> -			break;
> -		}
> -
> -		if (i915_gem_request_completed(req, false)) {
> +		if (i915_gem_request_completed(req, true) ||

Why it is OK to change the lazy coherency mode here?

> +		    req->reset_counter != i915_reset_counter(&req->i915->gpu_error)) {

It looks like it would be valuable to preserve the comment about no 
re-queuing etc on GPU reset.

>   			ret = 0;
>   			break;
>   		}
> @@ -1305,36 +1269,19 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   			break;
>   		}
>
> -		if (timeout && time_after_eq(jiffies, timeout_expire)) {
> -			ret = -ETIME;
> -			break;
> -		}
> -
> -		i915_queue_hangcheck(dev_priv);
> -
> -		timer.function = NULL;
> -		if (timeout || missed_irq(dev_priv, ring)) {
> -			unsigned long expire;
> -
> -			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
> -			expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
> -			mod_timer(&timer, expire);
> -		}
> -
> -		io_schedule();
> -
> -		if (timer.function) {
> -			del_singleshot_timer_sync(&timer);
> -			destroy_timer_on_stack(&timer);
> -		}
> +		if (timeout) {
> +			timeout_remain = io_schedule_timeout(timeout_remain);
> +			if (timeout_remain == 0) {
> +				ret = -ETIME;
> +				break;
> +			}
> +		} else
> +			io_schedule();

Could set timeout_remain to that MAX value when timeout == NULL and have 
a single call site to io_schedule_timeout and less indentation. It 
doesn't matter hugely, maybe it would be a bit easier to read.

>   	}
> -	if (!irq_test_in_progress)
> -		ring->irq_put(ring);
> -
> -	finish_wait(&ring->irq_queue, &wait);
> -
> +	finish_wait(&req->wait, &wait);
>   out:
>   	now = ktime_get_raw_ns();
> +	intel_breadcrumbs_remove_waiter(req);
>   	trace_i915_gem_request_wait_end(req);
>
>   	if (timeout) {
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 06ca4082735b..d1df405b905c 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -453,10 +453,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>   				   dev_priv->ring[i].name,
>   				   error->ring[i].num_requests);
>   			for (j = 0; j < error->ring[i].num_requests; j++) {
> -				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
> +				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x, waiters? %d\n",
>   					   error->ring[i].requests[j].seqno,
>   					   error->ring[i].requests[j].jiffies,
> -					   error->ring[i].requests[j].tail);
> +					   error->ring[i].requests[j].tail,
> +					   error->ring[i].requests[j].waiters);
>   			}
>   		}
>
> @@ -900,7 +901,7 @@ static void i915_record_ring_state(struct drm_device *dev,
>   		ering->instdone = I915_READ(GEN2_INSTDONE);
>   	}
>
> -	ering->waiting = waitqueue_active(&ring->irq_queue);
> +	ering->waiting = READ_ONCE(dev_priv->breadcrumbs.engine[ring->id].first);

What does the READ_ONCE do here?

>   	ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
>   	ering->seqno = ring->get_seqno(ring, false);
>   	ering->acthd = intel_ring_get_active_head(ring);
> @@ -1105,6 +1106,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>   			erq->seqno = request->seqno;
>   			erq->jiffies = request->emitted_jiffies;
>   			erq->tail = request->postfix;
> +			erq->waiters = request->irq_count;
>   		}
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 28c0329f8281..0c2390476bdb 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -996,12 +996,11 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev)
>
>   static void notify_ring(struct intel_engine_cs *ring)
>   {
> -	if (!intel_ring_initialized(ring))
> +	if (ring->i915 == NULL)
>   		return;
>
>   	trace_i915_gem_request_notify(ring);
> -
> -	wake_up_all(&ring->irq_queue);
> +	wake_up_process(ring->i915->breadcrumbs.task);

For a later extension:

How would you view, some time in the future, adding a bool return to 
ring->put_irq() which would say to the thread that it failed to disable 
interrupts?

In this case thread would exit and would need to be restarted when the 
next waiter queues up. Possibly extending the idle->put_irq->exit 
durations as well then.

>   }
>
>   static void vlv_c0_read(struct drm_i915_private *dev_priv,
> @@ -2399,9 +2398,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
>   static void i915_error_wake_up(struct drm_i915_private *dev_priv,
>   			       bool reset_completed)
>   {
> -	struct intel_engine_cs *ring;
> -	int i;
> -
>   	/*
>   	 * Notify all waiters for GPU completion events that reset state has
>   	 * been changed, and that they need to restart their wait after
> @@ -2410,8 +2406,7 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,
>   	 */
>
>   	/* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
> -	for_each_ring(ring, dev_priv, i)
> -		wake_up_all(&ring->irq_queue);
> +	wake_up_process(dev_priv->breadcrumbs.task);
>
>   	/* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
>   	wake_up_all(&dev_priv->pending_flip_queue);
> @@ -2986,16 +2981,16 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   			if (ring_idle(ring, seqno)) {
>   				ring->hangcheck.action = HANGCHECK_IDLE;
>
> -				if (waitqueue_active(&ring->irq_queue)) {
> +				if (READ_ONCE(dev_priv->breadcrumbs.engine[ring->id].first)) {

READ_ONCE is again strange, but other than that I don't know the 
hangcheck code to really evaulate it.

Perhaps this also means this line needs a comment describing what 
condition/state is actually approximating with the check.

>   					/* Issue a wake-up to catch stuck h/w. */
>   					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
> -						if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
> +						if (!test_bit(ring->id, &dev_priv->gpu_error.test_irq_rings))
>   							DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
>   								  ring->name);
>   						else
>   							DRM_INFO("Fake missed irq on %s\n",
>   								 ring->name);
> -						wake_up_all(&ring->irq_queue);
> +						wake_up_process(dev_priv->breadcrumbs.task);
>   					}
>   					/* Safeguard against driver failure */
>   					ring->hangcheck.score += BUSY;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> new file mode 100644
> index 000000000000..0a18405c8689
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -0,0 +1,253 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include <linux/kthread.h>
> +
> +#include "i915_drv.h"
> +
> +static bool __irq_enable(struct intel_engine_cs *engine)
> +{
> +	if (test_bit(engine->id, &engine->i915->gpu_error.test_irq_rings))
> +		return false;
> +
> +	if (!intel_irqs_enabled(engine->i915))
> +		return false;
> +
> +	return engine->irq_get(engine);
> +}
> +
> +static struct drm_i915_gem_request *to_request(struct rb_node *rb)
> +{
> +	if (rb == NULL)
> +		return NULL;
> +
> +	return rb_entry(rb, struct drm_i915_gem_request, irq_node);
> +}
> +
> +/*
> + * intel_breadcrumbs_irq() acts as the bottom-half for the user interrupt,
> + * which we use as the breadcrumb after every request. In order to scale
> + * to many concurrent waiters, we delegate the task of reading the coherent
> + * seqno to this bottom-half. (Otherwise every waiter is woken after each
> + * interrupt and they all attempt to perform the heavyweight coherent
> + * seqno check.) Individual clients register with the bottom-half when
> + * waiting on a request, and are then woken when the bottom-half notices
> + * their seqno is complete. This incurs an extra context switch from the
> + * interrupt to the client in the uncontended case.
> + */
> +static int intel_breadcrumbs_irq(void *data)
> +{
> +	struct drm_i915_private *i915 = data;
> +	struct intel_breadcrumbs *b = &i915->breadcrumbs;
> +	unsigned irq_get = 0;
> +	unsigned irq_enabled = 0;
> +	int i;
> +
> +	while (!kthread_should_stop()) {
> +		unsigned reset_counter = i915_reset_counter(&i915->gpu_error);
> +		unsigned long timeout_jiffies;
> +		bool idling = false;
> +
> +		/* On every tick, walk the seqno-ordered list of requests
> +		 * and for each retired request wakeup its clients. If we
> +		 * find an unfinished request, go back to sleep.
> +		 */

s/tick/loop/ ?

And if we don't find and unfinished request still go back to sleep. :)

> +		set_current_state(TASK_INTERRUPTIBLE);
> +
> +		/* Note carefully that we do not hold a reference for the
> +		 * requests on this list. Therefore we only inspect them
> +		 * whilst holding the spinlock to ensure that they are not
> +		 * freed in the meantime, and the client must remove the
> +		 * request from the list if it is interrupted (before it
> +		 * itself releases its reference).
> +		 */
> +		spin_lock(&b->lock);

This lock is more per engine that global in its nature unless you think 
it was more efficient to do fewer lock atomics vs potential overlap of 
waiter activity per engines?

Would need a new lock for req->irq_count, per request. So 
req->lock/req->irq_count and be->lock for the tree operations.

Thread would only need to deal with the later. Feels like it would work, 
just not sure if it is worth it.

> +		for (i = 0; i < I915_NUM_RINGS; i++) {
> +			struct intel_engine_cs *engine = &i915->ring[i];
> +			struct intel_breadcrumbs_engine *be = &b->engine[i];
> +			struct drm_i915_gem_request *request = be->first;
> +
> +			if (request == NULL) {
> +				if ((irq_get & (1 << i))) {
> +					if (irq_enabled & (1 << i)) {
> +						engine->irq_put(engine);
> +						irq_enabled &= ~ (1 << i);
> +					}
> +					intel_runtime_pm_put(i915);
> +					irq_get &= ~(1 << i);
> +				}
> +				continue;
> +			}
> +
> +			if ((irq_get & (1 << i)) == 0) {
> +				intel_runtime_pm_get(i915);
> +				irq_enabled |= __irq_enable(engine) << i;
> +				irq_get |= 1 << i;
> +			}

irq_get and irq_enabled are creating a little bit of a headache for me. 
And that would probably mean a comment is be in order to explain what 
they are for and how they work.

Like this I don't see the need for irq_get to persist across loops?

irq_get looks like "try to get these", irq_enabled is "these ones I 
got". Apart for the weird usage of it to balance the runtime pm gets and 
puts.

A bit confusing as I said.

> +			do {
> +				struct rb_node *next;
> +
> +				if (request->reset_counter == reset_counter &&
> +				    !i915_gem_request_completed(request, false))
> +					break;
> +
> +				next = rb_next(&request->irq_node);
> +				rb_erase(&request->irq_node, &be->requests);
> +				RB_CLEAR_NODE(&request->irq_node);
> +
> +				wake_up_all(&request->wait);
> +
> +				request = to_request(next);
> +			} while (request);
> +			be->first = request;
> +			idling |= request == NULL;
> +
> +			/* Make sure the hangcheck timer is armed in case
> +			 * the GPU hangs and we are never woken up.
> +			 */
> +			if (request)
> +				i915_queue_hangcheck(i915);
> +		}
> +		spin_unlock(&b->lock);
> +
> +		/* If we don't have interrupts available (either we have
> +		 * detected the hardware is missing interrupts or the
> +		 * interrupt delivery was disabled by the user), wake up
> +		 * every jiffie to check for request completion.
> +		 *
> +		 * If we have processed all requests for one engine, we
> +		 * also wish to wake up in a jiffie to turn off the
> +		 * breadcrumb interrupt for that engine. We delay
> +		 * switching off the interrupt in order to allow another
> +		 * waiter to start without incurring additional latency
> +		 * enabling the interrupt.
> +		 */
> +		timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
> +		if (idling || i915->gpu_error.missed_irq_rings & irq_enabled)
> +			timeout_jiffies = 1;
> +
> +		/* Unlike the individual clients, we do not want this
> +		 * background thread to contribute to the system load,
> +		 * i.e. we do not want to use io_schedule() here.
> +		 */
> +		schedule_timeout(timeout_jiffies);
> +	}
> +
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		if ((irq_get & (1 << i))) {
> +			if (irq_enabled & (1 << i)) {
> +				struct intel_engine_cs *engine = &i915->ring[i];
> +				engine->irq_put(engine);
> +			}
> +			intel_runtime_pm_put(i915);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request)
> +{
> +	struct intel_breadcrumbs *b = &request->i915->breadcrumbs;
> +	bool first = false;
> +
> +	spin_lock(&b->lock);
> +	if (request->irq_count++ == 0) {
> +		struct intel_breadcrumbs_engine *be =
> +			&b->engine[request->ring->id];
> +		struct rb_node **p, *parent;
> +
> +		if (be->first == NULL)
> +			wake_up_process(b->task);

With a global b->lock it makes no difference but in the logical sense 
this would usually go last, after the request has been inserted into the 
tree and waiter initialized.

> +
> +		init_waitqueue_head(&request->wait);
> +
> +		first = true;
> +		parent = NULL;
> +		p = &be->requests.rb_node;
> +		while (*p) {
> +			struct drm_i915_gem_request *__req;
> +
> +			parent = *p;
> +			__req = rb_entry(parent, typeof(*__req), irq_node);
> +
> +			if (i915_seqno_passed(request->seqno, __req->seqno)) {
> +				p = &parent->rb_right;
> +				first = false;
> +			} else
> +				p = &parent->rb_left;
> +		}
> +		if (first)
> +			be->first = request;
> +
> +		rb_link_node(&request->irq_node, parent, p);
> +		rb_insert_color(&request->irq_node, &be->requests);
> +	}
> +	spin_unlock(&b->lock);
> +
> +	return first;
> +}
> +
> +void intel_breadcrumbs_remove_waiter(struct drm_i915_gem_request *request)
> +{
> +	struct intel_breadcrumbs *b = &request->i915->breadcrumbs;
> +
> +	spin_lock(&b->lock);
> +	if (--request->irq_count == 0 && !RB_EMPTY_NODE(&request->irq_node)) {
> +		struct intel_breadcrumbs_engine *be =
> +			&b->engine[request->ring->id];
> +		if (be->first == request)
> +			be->first = to_request(rb_next(&request->irq_node));
> +		rb_erase(&request->irq_node, &be->requests);
> +	}
> +	spin_unlock(&b->lock);
> +}
> +
> +int intel_breadcrumbs_init(struct drm_i915_private *i915)
> +{
> +	struct intel_breadcrumbs *b = &i915->breadcrumbs;
> +	struct task_struct *task;
> +
> +	spin_lock_init(&b->lock);
> +
> +	task = kthread_run(intel_breadcrumbs_irq, i915, "irq/i915");
> +	if (IS_ERR(task))
> +		return PTR_ERR(task);
> +
> +	b->task = task;
> +
> +	return 0;
> +}
> +
> +void intel_breadcrumbs_fini(struct drm_i915_private *i915)
> +{
> +	struct intel_breadcrumbs *b = &i915->breadcrumbs;
> +
> +	if (b->task == NULL)
> +		return;
> +
> +	kthread_stop(b->task);
> +	b->task = NULL;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b40ffc3607e7..604c081b0a0f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1922,10 +1922,10 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
>   	ring->buffer = NULL;
>
>   	ring->dev = dev;
> +	ring->i915 = to_i915(dev);
>   	INIT_LIST_HEAD(&ring->active_list);
>   	INIT_LIST_HEAD(&ring->request_list);
>   	i915_gem_batch_pool_init(dev, &ring->batch_pool);
> -	init_waitqueue_head(&ring->irq_queue);
>
>   	INIT_LIST_HEAD(&ring->buffers);
>   	INIT_LIST_HEAD(&ring->execlist_queue);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 511efe556d73..5a5a7195dd54 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2157,6 +2157,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>   	WARN_ON(ring->buffer);
>
>   	ring->dev = dev;
> +	ring->i915 = to_i915(dev);
>   	INIT_LIST_HEAD(&ring->active_list);
>   	INIT_LIST_HEAD(&ring->request_list);
>   	INIT_LIST_HEAD(&ring->execlist_queue);
> @@ -2164,8 +2165,6 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>   	i915_gem_batch_pool_init(dev, &ring->batch_pool);
>   	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
>
> -	init_waitqueue_head(&ring->irq_queue);
> -
>   	ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
>   	if (IS_ERR(ringbuf))
>   		return PTR_ERR(ringbuf);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5d1eb206151d..49b5bded2767 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -157,6 +157,7 @@ struct  intel_engine_cs {
>   #define LAST_USER_RING (VECS + 1)
>   	u32		mmio_base;
>   	struct		drm_device *dev;
> +	struct drm_i915_private *i915;
>   	struct intel_ringbuffer *buffer;
>   	struct list_head buffers;
>
> @@ -303,8 +304,6 @@ struct  intel_engine_cs {
>
>   	bool gpu_caches_dirty;
>
> -	wait_queue_head_t irq_queue;
> -
>   	struct intel_context *default_context;
>   	struct intel_context *last_context;
>
>

Regards,

Tvrtko


More information about the Intel-gfx mailing list