[Intel-gfx] [PATCH 2/3] drm/i915: Priority boost for locked waits

Mon Jan 23 10:43:10 UTC 2017

On 21/01/2017 09:25, Chris Wilson wrote:
> We always try to do an unlocked wait before resorting to having a
> blocking wait under the mutex - so we very rarely have to sleep under
> the struct_mutex. However, when we do we want that wait to be as short
> as possible as the struct_mutex is our BKL that will stall the driver and
> all clients.
>
> There should be no impact for all typical workloads.
>
> v2: Move down a layer to apply to all waits.
> v3: Make the priority boost explicit. This makes the paths where we want
> boosting under the mutex clear and prevents boosting priority uselessly
> for when we are waiting for idle.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_gem.c         | 10 +++++++++-
>  drivers/gpu/drm/i915/i915_gem_request.c | 10 ++++++++++
>  drivers/gpu/drm/i915/i915_gem_request.h | 10 +++++++---
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  4 +++-
>  4 files changed, 29 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d1fd3ef7fad4..0c1694fef903 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -330,6 +330,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
>  	ret = i915_gem_object_wait(obj,
>  				   I915_WAIT_INTERRUPTIBLE |
>  				   I915_WAIT_LOCKED |
> +				   I915_WAIT_PRIORITY |
>  				   I915_WAIT_ALL,
>  				   MAX_SCHEDULE_TIMEOUT,
>  				   NULL);
> @@ -755,7 +756,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
>
>  	ret = i915_gem_object_wait(obj,
>  				   I915_WAIT_INTERRUPTIBLE |
> -				   I915_WAIT_LOCKED,
> +				   I915_WAIT_LOCKED |
> +				   I915_WAIT_PRIORITY,
>  				   MAX_SCHEDULE_TIMEOUT,
>  				   NULL);
>  	if (ret)
> @@ -806,6 +808,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>  	ret = i915_gem_object_wait(obj,
>  				   I915_WAIT_INTERRUPTIBLE |
>  				   I915_WAIT_LOCKED |
> +				   I915_WAIT_PRIORITY |
>  				   I915_WAIT_ALL,
>  				   MAX_SCHEDULE_TIMEOUT,
>  				   NULL);
> @@ -3074,6 +3077,8 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
>  {
>  	int ret;
>
> +	GEM_BUG_ON(flags & I915_WAIT_PRIORITY);
> +
>  	if (flags & I915_WAIT_LOCKED) {
>  		struct i915_gem_timeline *tl;
>
> @@ -3163,6 +3168,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  	ret = i915_gem_object_wait(obj,
>  				   I915_WAIT_INTERRUPTIBLE |
>  				   I915_WAIT_LOCKED |
> +				   I915_WAIT_PRIORITY |
>  				   (write ? I915_WAIT_ALL : 0),
>  				   MAX_SCHEDULE_TIMEOUT,
>  				   NULL);
> @@ -3285,6 +3291,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  		ret = i915_gem_object_wait(obj,
>  					   I915_WAIT_INTERRUPTIBLE |
>  					   I915_WAIT_LOCKED |
> +					   I915_WAIT_PRIORITY |
>  					   I915_WAIT_ALL,
>  					   MAX_SCHEDULE_TIMEOUT,
>  					   NULL);

As mentioned before, is this not a concern? Is it not letting any 
userspace boost their prio to max by just calling set cache level after 
execbuf?

> @@ -3566,6 +3573,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>  	ret = i915_gem_object_wait(obj,
>  				   I915_WAIT_INTERRUPTIBLE |
>  				   I915_WAIT_LOCKED |
> +				   I915_WAIT_PRIORITY |
>  				   (write ? I915_WAIT_ALL : 0),
>  				   MAX_SCHEDULE_TIMEOUT,
>  				   NULL);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index bacb875a6ef3..9910b565d1a0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1046,6 +1046,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		   !!lockdep_is_held(&req->i915->drm.struct_mutex) !=
>  		   !!(flags & I915_WAIT_LOCKED));
>  #endif
> +	GEM_BUG_ON((flags & I915_WAIT_PRIORITY) && !(flags & I915_WAIT_LOCKED));
>  	GEM_BUG_ON(timeout < 0);
>
>  	if (i915_gem_request_completed(req))
> @@ -1056,6 +1057,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>
>  	trace_i915_gem_request_wait_begin(req);
>
> +	/* Very rarely do we wait whilst holding the mutex. We try to always
> +	 * do an unlocked wait before using a locked wait. However, when we
> +	 * have to resort to a locked wait, we want that wait to be as short
> +	 * as possible as the struct_mutex is our BKL that will stall the
> +	 * driver and all clients.
> +	 */
> +	if (flags & I915_WAIT_PRIORITY && req->engine->schedule)
> +		req->engine->schedule(req, I915_PRIORITY_LOCKED);
> +
>  	add_wait_queue(&req->execute, &exec);
>  	if (flags & I915_WAIT_LOCKED) {
>  		add_wait_queue(errq, &reset);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index da71ae280b2a..ba83c507613b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -74,7 +74,8 @@ struct i915_priotree {
>  };
>
>  enum {
> -	I915_PRIORITY_DISPLAY = I915_PRIORITY_MAX
> +	I915_PRIORITY_LOCKED = I915_PRIORITY_MAX,
> +	I915_PRIORITY_DISPLAY
>  };
>
>  struct i915_gem_capture_list {
> @@ -301,7 +302,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	__attribute__((nonnull(1)));
>  #define I915_WAIT_INTERRUPTIBLE	BIT(0)
>  #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
> -#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
> +#define I915_WAIT_PRIORITY	BIT(2) /* struct_mutex held, boost priority */
> +#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
>
>  static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
>
> @@ -726,7 +728,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
>  		return 0;
>
>  	ret = i915_wait_request(request,
> -				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
> +				I915_WAIT_INTERRUPTIBLE |
> +				I915_WAIT_LOCKED |
> +				I915_WAIT_PRIORITY,
>  				MAX_SCHEDULE_TIMEOUT);
>  	if (ret < 0)
>  		return ret;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index cf4ec937d923..2f7651ef45d5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2158,7 +2158,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
>  		return -ENOSPC;
>
>  	timeout = i915_wait_request(target,
> -				    I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
> +				    I915_WAIT_INTERRUPTIBLE |
> +				    I915_WAIT_LOCKED |
> +				    I915_WAIT_PRIORITY,
>  				    MAX_SCHEDULE_TIMEOUT);

This one also look worrying unless I am missing something. Allowing 
clients who fill the ring to promote their priority?

>  	if (timeout < 0)
>  		return timeout;
>

Regards,

Tvrtko