[Intel-gfx] [PATCH 4/9] drm/i915/execlists: Suppress redundant preemption

Wed Jan 30 08:15:56 UTC 2019

On 29/01/2019 17:02, Chris Wilson wrote:
> On unwinding the active request we give it a small (limited to internal
> priority levels) boost to prevent it from being gazumped a second time.
> However, this means that it can be promoted to above the request that
> triggered the preemption request, causing a preempt-to-idle cycle for no
> change. We can avoid this if we take the boost into account when
> checking if the preemption request is valid.
> 
> v2: After preemption the active request will be after the preemptee if
> they end up with equal priority.
> 
> v3: Tvrtko pointed out that this, the existing logic, makes
> I915_PRIORITY_WAIT non-preemptible. Document this interesting quirk!
> 
> v4: Prove Tvrtko was right about WAIT being non-preemptible and test it.

I thought there would be a simpler solution coming for now. :)

In this version WAIT only doesn't preempt if the last rq from a ctx in 
port0 is already active, otherwise it still preempts.

Also, by making WAIT not-preempt in this way, we kick the tasklet, 
right? Only to decide we won't preempt. Or might not, depending on 
i915_request_started.

I thought for now we would do something like:

static inline bool __execlists_need_preempt(int prio, int last)
{
	return prio > max(I915_MIN_PREEMPT_PRIORITY, last);
}

With some consideration about what to do about needless tasklet kicking 
as well..

Regards,

Tvrtko

> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c           |  45 ++++++++-
>   drivers/gpu/drm/i915/selftests/intel_lrc.c | 107 +++++++++++++++++++++
>   2 files changed, 148 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index dec4e1b04ae1..62d98b7edc7a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -164,6 +164,8 @@
>   #define WA_TAIL_DWORDS 2
>   #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
>   
> +#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT)
> +
>   static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   					    struct intel_engine_cs *engine,
>   					    struct intel_context *ce);
> @@ -188,6 +190,34 @@ static inline int rq_prio(const struct i915_request *rq)
>   	return rq->sched.attr.priority;
>   }
>   
> +static inline int active_prio(const struct i915_request *rq)
> +{
> +	int prio = rq_prio(rq);
> +
> +	/*
> +	 * On unwinding the active request, we give it a priority bump
> +	 * equivalent to a freshly submitted request. This protects it from
> +	 * being gazumped again, but it would be preferable if we didn't
> +	 * let it be gazumped in the first place!
> +	 *
> +	 * See __unwind_incomplete_requests()
> +	 */
> +	if ((prio & ACTIVE_PRIORITY) != ACTIVE_PRIORITY &&
> +	    i915_request_started(rq)) {
> +		/*
> +		 * After preemption, we insert the active request at the
> +		 * end of the new priority level. This means that we will be
> +		 * _lower_ priority than the preemptee all things equal (and
> +		 * so the preemption is valid), so adjust our comparison
> +		 * accordingly.
> +		 */
> +		prio |= ACTIVE_PRIORITY;
> +		prio--;
> +	}
> +
> +	return prio;
> +}
> +
>   static int queue_prio(const struct intel_engine_execlists *execlists)
>   {
>   	struct i915_priolist *p;
> @@ -208,7 +238,7 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
>   static inline bool need_preempt(const struct intel_engine_cs *engine,
>   				const struct i915_request *rq)
>   {
> -	const int last_prio = rq_prio(rq);
> +	int last_prio;
>   
>   	if (!intel_engine_has_preemption(engine))
>   		return false;
> @@ -228,6 +258,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>   	 * preempt. If that hint is stale or we may be trying to preempt
>   	 * ourselves, ignore the request.
>   	 */
> +	last_prio = active_prio(rq);
>   	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
>   				      last_prio))
>   		return false;
> @@ -353,7 +384,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   {
>   	struct i915_request *rq, *rn, *active = NULL;
>   	struct list_head *uninitialized_var(pl);
> -	int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
> +	int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
>   
>   	lockdep_assert_held(&engine->timeline.lock);
>   
> @@ -384,9 +415,15 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   	 * The active request is now effectively the start of a new client
>   	 * stream, so give it the equivalent small priority bump to prevent
>   	 * it being gazumped a second time by another peer.
> +	 *
> +	 * One consequence of this preemption boost is that we may jump
> +	 * over lesser priorities (such as I915_PRIORITY_WAIT), effectively
> +	 * making those priorities non-preemptible. They will be moved forward
> +	 * in the priority queue, but they will not gain immediate access to
> +	 * the GPU.
>   	 */
> -	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
> -		prio |= I915_PRIORITY_NEWCLIENT;
> +	if ((prio & ACTIVE_PRIORITY) != ACTIVE_PRIORITY) {
> +		prio |= ACTIVE_PRIORITY;
>   		active->sched.attr.priority = prio;
>   		list_move_tail(&active->sched.link,
>   			       i915_sched_lookup_priolist(engine, prio));
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index fb35f53c9ce3..28fdcbcb7267 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -405,6 +405,112 @@ static int live_suppress_self_preempt(void *arg)
>   	goto err_client_b;
>   }
>   
> +static int live_suppress_wait_preempt(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct preempt_client client[4];
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
> +	int err = -ENOMEM;
> +	int i;
> +
> +	/*
> +	 * Waiters are given a little priority nudge, but not enough
> +	 * to actually cause any preemption. Double check that we do
> +	 * not needlessly generate preempt-to-idle cycles.
> +	 */
> +
> +	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
> +		return 0;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0; /* presume black blox */
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
> +		goto err_unlock;
> +	if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
> +		goto err_client_0;
> +	if (preempt_client_init(i915, &client[2])) /* head of queue */
> +		goto err_client_1;
> +	if (preempt_client_init(i915, &client[3])) /* bystander */
> +		goto err_client_2;
> +
> +	for_each_engine(engine, i915, id) {
> +		int depth;
> +
> +		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
> +			struct i915_request *rq[ARRAY_SIZE(client)];
> +
> +			engine->execlists.preempt_hang.count = 0;
> +
> +			for (i = 0; i < ARRAY_SIZE(client); i++) {
> +				rq[i] = igt_spinner_create_request(&client[i].spin,
> +								   client[i].ctx, engine,
> +								   MI_NOOP);
> +				if (IS_ERR(rq[i])) {
> +					err = PTR_ERR(rq[i]);
> +					goto err_wedged;
> +				}
> +
> +				i915_request_add(rq[i]);
> +			}
> +			if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
> +				pr_err("First client failed to start\n");
> +				goto err_wedged;
> +			}
> +
> +			if (i915_request_wait(rq[depth],
> +					      I915_WAIT_LOCKED |
> +					      I915_WAIT_PRIORITY,
> +					      1) != -ETIME) {
> +				pr_err("Waiter depth:%d completed!\n", depth);
> +				goto err_wedged;
> +			}
> +
> +			for (i = 0; i < ARRAY_SIZE(client); i++)
> +				igt_spinner_end(&client[i].spin);
> +
> +			if (engine->execlists.preempt_hang.count) {
> +				pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n",
> +				       engine->execlists.preempt_hang.count,
> +				       depth);
> +				err = -EINVAL;
> +				goto err_client_3;
> +			}
> +
> +			if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +				goto err_wedged;
> +		}
> +	}
> +
> +	err = 0;
> +err_client_3:
> +	preempt_client_fini(&client[3]);
> +err_client_2:
> +	preempt_client_fini(&client[2]);
> +err_client_1:
> +	preempt_client_fini(&client[1]);
> +err_client_0:
> +	preempt_client_fini(&client[0]);
> +err_unlock:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +
> +err_wedged:
> +	for (i = 0; i < ARRAY_SIZE(client); i++)
> +		igt_spinner_end(&client[i].spin);
> +	i915_gem_set_wedged(i915);
> +	err = -EIO;
> +	goto err_client_3;
> +}
> +
>   static int live_preempt_hang(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> @@ -785,6 +891,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt),
>   		SUBTEST(live_late_preempt),
>   		SUBTEST(live_suppress_self_preempt),
> +		SUBTEST(live_suppress_wait_preempt),
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
>   	};
>