[Intel-gfx] [PATCH 11/11] drm/i915: Allow user control over preempt timeout on the important context

Mon Mar 26 17:09:29 UTC 2018

On 26/03/2018 12:50, Chris Wilson wrote:
> EGL_NV_realtime_priority?
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c | 22 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.h | 13 +++++++++++++
>   drivers/gpu/drm/i915/i915_request.c     |  8 ++++++--
>   drivers/gpu/drm/i915/intel_lrc.c        |  2 +-
>   include/uapi/drm/i915_drm.h             | 12 ++++++++++++
>   5 files changed, 54 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 5cfac0255758..dfb0a2b698c3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -749,6 +749,15 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_CONTEXT_PARAM_PRIORITY:
>   		args->value = ctx->priority;
>   		break;
> +	case I915_CONTEXT_PARAM_PREEMPT_TIMEOUT:
> +		if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
> +			ret = -ENODEV;
> +		else if (args->size)
> +			ret = -EINVAL;
> +		else
> +			args->value = ctx->preempt_timeout;
> +		break;
> +
>   	default:
>   		ret = -EINVAL;
>   		break;
> @@ -824,6 +833,19 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   		}
>   		break;
>   
> +	case I915_CONTEXT_PARAM_PREEMPT_TIMEOUT:
> +		if (args->size)
> +			ret = -EINVAL;
> +		else if (args->value >= NSEC_PER_SEC)
> +			ret = -EINVAL;
> +		else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
> +			ret = -ENODEV;
> +		else if (args->value && !capable(CAP_SYS_ADMIN))
> +			ret = -EPERM;
> +		else
> +			ctx->preempt_timeout = args->value;
> +		break;
> +
>   	default:
>   		ret = -EINVAL;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 7854262ddfd9..74d4cadd729e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -150,6 +150,19 @@ struct i915_gem_context {
>   	 */
>   	int priority;
>   
> +	/**
> +	 * @preempt_timeout: QoS guarantee for the high priority context
> +	 *
> +	 * Some clients need a guarantee that they will start executing
> +	 * within a certain window, even at the expense of others. This entails
> +	 * that if a preemption request is not honoured by the active context
> +	 * within the timeout, we will reset the GPU to evict the hog and
> +	 * run the high priority context instead.
> +	 *
> +	 * Timeout is stored in nanoseconds; exclusive max of 1s.

Why did you think we would want to limit it to 1s?

> +	 */
> +	u32 preempt_timeout;
> +
>   	/** ggtt_offset_bias: placement restriction for context objects */
>   	u32 ggtt_offset_bias;
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 9d8dcebd9649..2cd4ea75d127 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1101,8 +1101,12 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
>   	 * run at the earliest possible convenience.
>   	 */
>   	rcu_read_lock();
> -	if (engine->schedule)
> -		engine->schedule(request, request->ctx->priority, 0);
> +	if (engine->schedule) {
> +		unsigned int timeout = request->ctx->preempt_timeout;
> +		int priority = request->ctx->priority;
> +
> +		engine->schedule(request, priority, timeout);
> +	}
>   	rcu_read_unlock();
>   
>   	local_bh_disable();
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e266657851e1..e782a621b40b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1148,7 +1148,7 @@ static void execlists_submit_request(struct i915_request *request)
>   	spin_lock_irqsave(&engine->timeline->lock, flags);
>   
>   	queue_request(engine, &request->priotree, rq_prio(request));
> -	submit_queue(engine, rq_prio(request), 0);
> +	submit_queue(engine, rq_prio(request), request->ctx->preempt_timeout);
>   
>   	GEM_BUG_ON(!engine->execlists.first);
>   	GEM_BUG_ON(list_empty(&request->priotree.link));
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634ce8e88..6f10bbe90304 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1456,6 +1456,18 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +
> +/*
> + * I915_CONTEXT_PARAM_PREEMPT_TIMEOUT:
> + *
> + * Preemption timeout give in nanoseconds.
> + *
> + * Only allowed for privileged clients (CAP_SYS_ADMIN), this property allows
> + * the preempting context to kick out a GPU hog using a GPU reset if they do
> + * not honour the preemption request in time.
> + */
> +#define I915_CONTEXT_PARAM_PREEMPT_TIMEOUT	0x7
> +

Since I expressed a concern on the "force preemption" naming in the 
other thread, I feel obliged to say that I am OK with PREEMPT_TIMEOUT. :)

Regards,

Tvrtko

>   	__u64 value;
>   };
>   
>