[Intel-gfx] [PATCH v5 19/35] drm/i915: Added scheduler flush calls to ring throttle and idle functions

Mon Mar 7 11:31:34 UTC 2016

On to, 2016-02-18 at 14:27 +0000, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
> 
> When requesting that all GPU work is completed, it is now necessary to
> get the scheduler involved in order to flush out work that queued and
> not yet submitted.
> 
> v2: Updated to add support for flushing the scheduler queue by time
> stamp rather than just doing a blanket flush.
> 
> v3: Moved submit_max_priority() to this patch from an earlier patch
> is it is no longer required in the other.
> 
> v4: Corrected the format of a comment to keep the style checker happy.
> Downgraded a BUG_ON to a WARN_ON as the latter is preferred.
> 
> v5: Shuffled functions around to remove forward prototypes, removed
> similarly offensive white space and added documentation. Re-worked the
> mutex locking around the submit function. [Joonas Lahtinen]
> 
> Used lighter weight spinlocks.
> 
> For: VIZ-1587
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c       |  24 ++++-
>  drivers/gpu/drm/i915/i915_scheduler.c | 178 ++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_scheduler.h |   3 +
>  3 files changed, 204 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a47a495..d946f53 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3786,6 +3786,10 @@ int i915_gpu_idle(struct drm_device *dev)
>  
>  	/* Flush everything onto the inactive list. */
>  	for_each_ring(ring, dev_priv, i) {
> +		ret = i915_scheduler_flush(ring, true);
> +		if (ret < 0)
> +			return ret;
> +
>  		if (!i915.enable_execlists) {
>  			struct drm_i915_gem_request *req;
>  
> @@ -4519,7 +4523,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
>  	struct drm_i915_gem_request *request, *target = NULL;
>  	unsigned reset_counter;
> -	int ret;
> +	int i, ret;
> +	struct intel_engine_cs *ring;
>  
>  	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
>  	if (ret)
> @@ -4529,6 +4534,23 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  	if (ret)
>  		return ret;
>  
> +	for_each_ring(ring, dev_priv, i) {
> +		/*
> +		 * Flush out scheduler entries that are getting 'stale'. Note
> +		 * that the following recent_enough test will only check
> +		 * against the time at which the request was submitted to the
> +		 * hardware (i.e. when it left the scheduler) not the time it
> +		 * was submitted to the driver.
> +		 *
> +		 * Also, there is not much point worring about busy return
> +		 * codes from the scheduler flush call. Even if more work
> +		 * cannot be submitted right now for whatever reason, we
> +		 * still want to throttle against stale work that has already
> +		 * been submitted.
> +		 */
> +		i915_scheduler_flush_stamp(ring, recent_enough, false);
> +	}
> +
>  	spin_lock(&file_priv->mm.lock);
>  	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
>  		if (time_after_eq(request->emitted_jiffies, recent_enough))
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index edab63d..8130a9c 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -304,6 +304,10 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring,
>   * attempting to acquire a mutex while holding a spin lock is a Bad Idea.
>   * And releasing the one before acquiring the other leads to other code
>   * being run and interfering.
> + *
> + * Hence any caller that does not already have the mutex lock for other
> + * reasons should call i915_scheduler_submit_unlocked() instead in order to
> + * obtain the lock first.
>   */
>  static int i915_scheduler_submit(struct intel_engine_cs *ring)
>  {
> @@ -428,6 +432,22 @@ error:
>  	return ret;
>  }
>  
> +static int i915_scheduler_submit_unlocked(struct intel_engine_cs *ring)
> +{
> +	struct drm_device *dev = ring->dev;
> +	int ret;
> +
> +	ret = i915_mutex_lock_interruptible(dev);
> +	if (ret)
> +		return ret;
> +
> +	ret = i915_scheduler_submit(ring);
> +
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	return ret;
> +}
> +

This kind of code should really be factored to upper level functions.

>  static void i915_generate_dependencies(struct i915_scheduler *scheduler,
>  				       struct i915_scheduler_queue_entry *node,
>  				       uint32_t ring)
> @@ -917,6 +937,164 @@ void i915_scheduler_work_handler(struct work_struct *work)
>  		i915_scheduler_process_work(ring);
>  }
>  
> +static int i915_scheduler_submit_max_priority(struct intel_engine_cs *ring,
> +					      bool is_locked)
> +{
> +	struct i915_scheduler_queue_entry *node;
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	struct i915_scheduler *scheduler = dev_priv->scheduler;
> +	int ret, count = 0;
> +	bool found;
> +
> +	do {
> +		found = false;
> +		spin_lock_irq(&scheduler->lock);
> +		list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
> +			if (!I915_SQS_IS_QUEUED(node))
> +				continue;
> +
> +			if (node->priority < scheduler->priority_level_max)
> +				continue;
> +
> +			found = true;
> +			break;
> +		}
> +		spin_unlock_irq(&scheduler->lock);
> +
> +		if (!found)
> +			break;
> +
> +		if (is_locked)
> +			ret = i915_scheduler_submit(ring);
> +		else
> +			ret = i915_scheduler_submit_unlocked(ring);

The widespread use of 'is_locked' notation is really bothering me, I
think the difference in code path should be handled in the upper level,
instead of being passed down, for example this will cause repetitive
lock/unlock in loop.

> +		if (ret < 0)
> +			return ret;
> +
> +		count += ret;
> +	} while (found);
> +
> +	return count;
> +}
> +
> +/**
> + * i915_scheduler_flush_stamp - force requests of a given age through the
> + * scheduler.
> + * @ring: Ring to be flushed
> + * @target: Jiffy based time stamp to flush up to
> + * @is_locked: Is the driver mutex lock held?
> + * DRM has a throttle by age of request facility. This requires waiting for
> + * outstanding work over a given age. This function helps that by forcing
> + * queued batch buffers over said age through the system.
> + * Returns zero on success or -EAGAIN if the scheduler is busy (e.g. waiting
> + * for a pre-emption event to complete) but the mutex lock is held which
> + * would prevent the scheduler's asynchronous processing from completing.
> + */
> +int i915_scheduler_flush_stamp(struct intel_engine_cs *ring,
> +			       unsigned long target,
> +			       bool is_locked)
> +{
> +	struct i915_scheduler_queue_entry *node;
> +	struct drm_i915_private *dev_priv;
> +	struct i915_scheduler *scheduler;
> +	int flush_count = 0;
> +
> +	if (!ring)
> +		return -EINVAL;
> +
> +	dev_priv  = ring->dev->dev_private;
> +	scheduler = dev_priv->scheduler;
> +
> +	if (!scheduler)
> +		return 0;
> +
> +	if (is_locked && (scheduler->flags[ring->id] & i915_sf_submitting)) {
> +		/*
> +		 * Scheduler is busy already submitting another batch,
> +		 * come back later rather than going recursive...
> +		 */
> +		return -EAGAIN;
> +	}
> +

Again, this would be handled better at higher level.

So I would like to see those 'is_locked' and other similar constructs
factored out from the series. It should make the code much more robust
to modify at later point.

Regards, Joonas

> +	spin_lock_irq(&scheduler->lock);
> +	i915_scheduler_priority_bump_clear(scheduler);
> +	list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
> +		if (!I915_SQS_IS_QUEUED(node))
> +			continue;
> +
> +		if (node->stamp > target)
> +			continue;
> +
> +		flush_count = i915_scheduler_priority_bump(scheduler,
> +					node, scheduler->priority_level_max);
> +	}
> +	spin_unlock_irq(&scheduler->lock);
> +
> +	if (flush_count) {
> +		DRM_DEBUG_DRIVER("<%s> Bumped %d entries\n", ring->name, flush_count);
> +		flush_count = i915_scheduler_submit_max_priority(ring, is_locked);
> +	}
> +
> +	return flush_count;
> +}
> +
> +/**
> + * i915_scheduler_flush - force all requests through the scheduler.
> + * @ring: Ring to be flushed
> + * @is_locked: Is the driver mutex lock held?
> + * For various reasons it is sometimes necessary to the scheduler out, e.g.
> + * due to ring reset.
> + * Returns zero on success or -EAGAIN if the scheduler is busy (e.g. waiting
> + * for a pre-emption event to complete) but the mutex lock is held which
> + * would prevent the scheduler's asynchronous processing from completing.
> + */
> +int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked)
> +{
> +	struct i915_scheduler_queue_entry *node;
> +	struct drm_i915_private *dev_priv;
> +	struct i915_scheduler *scheduler;
> +	bool found;
> +	int ret;
> +	uint32_t count = 0;
> +
> +	if (!ring)
> +		return -EINVAL;
> +
> +	dev_priv  = ring->dev->dev_private;
> +	scheduler = dev_priv->scheduler;
> +
> +	if (!scheduler)
> +		return 0;
> +
> +	WARN_ON(is_locked && (scheduler->flags[ring->id] & i915_sf_submitting));
> +
> +	do {
> +		found = false;
> +		spin_lock_irq(&scheduler->lock);
> +		list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
> +			if (!I915_SQS_IS_QUEUED(node))
> +				continue;
> +
> +			found = true;
> +			break;
> +		}
> +		spin_unlock_irq(&scheduler->lock);
> +
> +		if (found) {
> +			if (is_locked)
> +				ret = i915_scheduler_submit(ring);
> +			else
> +				ret = i915_scheduler_submit_unlocked(ring);
> +			if (ret < 0)
> +				return ret;
> +
> +			count += ret;
> +		}
> +	} while (found);
> +
> +	return count;
> +}
> +
>  /**
>   * i915_scheduler_is_request_tracked - return info to say what the scheduler's
>   * connection to this request is (if any).
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index a88adce..839b048 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -94,6 +94,9 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
>  bool i915_scheduler_notify_request(struct drm_i915_gem_request *req);
>  void i915_scheduler_wakeup(struct drm_device *dev);
>  void i915_scheduler_work_handler(struct work_struct *work);
> +int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked);
> +int i915_scheduler_flush_stamp(struct intel_engine_cs *ring,
> +			       unsigned long stamp, bool is_locked);
>  bool i915_scheduler_is_request_tracked(struct drm_i915_gem_request *req,
>  				       bool *completed, bool *busy);
>  
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation