[Intel-gfx] [PATCH 4/9] drm/i915/execlists: Replace direct submit with direct call to tasklet

Tue Jun 16 09:35:00 UTC 2020

Chris Wilson <chris at chris-wilson.co.uk> writes:

> Rather than having special case code for opportunistically calling
> process_csb() and performing a direct submit while holding the engine
> spinlock for submitting the request, simply call the tasklet directly.
> This allows us to retain the direct submission path, including the CS
> draining to allow fast/immediate submissions, without requiring any
> duplicated code paths.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine.h        |  1 +
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 27 +++----
>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 78 +++++++------------
>  drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  1 +
>  drivers/gpu/drm/i915/selftests/i915_request.c |  6 +-
>  5 files changed, 46 insertions(+), 67 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 791897f8d847..c77b3c0d2b3b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -210,6 +210,7 @@ int intel_engine_resume(struct intel_engine_cs *engine);
>  
>  int intel_ring_submission_setup(struct intel_engine_cs *engine);
>  
> +void __intel_engine_stop_cs(struct intel_engine_cs *engine);
>  int intel_engine_stop_cs(struct intel_engine_cs *engine);
>  void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 045179c65c44..fbb8ac659b82 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -903,33 +903,34 @@ static unsigned long stop_timeout(const struct intel_engine_cs *engine)
>  	return READ_ONCE(engine->props.stop_timeout_ms);
>  }
>  
> -int intel_engine_stop_cs(struct intel_engine_cs *engine)
> +void __intel_engine_stop_cs(struct intel_engine_cs *engine)
>  {
>  	struct intel_uncore *uncore = engine->uncore;
> -	const u32 base = engine->mmio_base;
> -	const i915_reg_t mode = RING_MI_MODE(base);
> -	int err;
> +	const i915_reg_t mode = RING_MI_MODE(engine->mmio_base);
>  
> +	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
> +	intel_uncore_posting_read_fw(uncore, mode);
> +}
> +
> +int intel_engine_stop_cs(struct intel_engine_cs *engine)
> +{
>  	if (INTEL_GEN(engine->i915) < 3)
>  		return -ENODEV;
>  
>  	ENGINE_TRACE(engine, "\n");
>  
> -	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
> +	__intel_engine_stop_cs(engine);
>  
> -	err = 0;
> -	if (__intel_wait_for_register_fw(uncore,
> -					 mode, MODE_IDLE, MODE_IDLE,
> +	if (__intel_wait_for_register_fw(engine->uncore,
> +					 RING_MI_MODE(engine->mmio_base),
> +					 MODE_IDLE, MODE_IDLE,
>  					 1000, stop_timeout(engine),
>  					 NULL)) {
>  		ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n");
> -		err = -ETIMEDOUT;
> +		return -ETIMEDOUT;
>  	}
>  
> -	/* A final mmio read to let GPU writes be hopefully flushed to memory */
> -	intel_uncore_posting_read_fw(uncore, mode);
> -
> -	return err;
> +	return 0;
>  }
>  
>  void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e866b8d721ed..40c5085765da 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2703,16 +2703,6 @@ static void process_csb(struct intel_engine_cs *engine)
>  	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
>  }
>  
> -static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
> -{
> -	lockdep_assert_held(&engine->active.lock);
> -	if (!READ_ONCE(engine->execlists.pending[0])) {
> -		rcu_read_lock(); /* protect peeking at execlists->active */
> -		execlists_dequeue(engine);
> -		rcu_read_unlock();
> -	}
> -}
> -
>  static void __execlists_hold(struct i915_request *rq)
>  {
>  	LIST_HEAD(list);
> @@ -3102,7 +3092,7 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine)
>  	if (!timer_expired(t))
>  		return false;
>  
> -	return READ_ONCE(engine->execlists.pending[0]);
> +	return engine->execlists.pending[0];

Sometimes I yearn for intel_execlists_request_pending() but it would
be wonky and the port0 is quite core to the lrc...

Overall this patch makes things more straightfoward.

Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>

>  }
>  
>  /*
> @@ -3112,7 +3102,6 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine)
>  static void execlists_submission_tasklet(unsigned long data)
>  {
>  	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> -	bool timeout = preempt_timeout(engine);
>  
>  	process_csb(engine);
>  
> @@ -3122,16 +3111,17 @@ static void execlists_submission_tasklet(unsigned long data)
>  			execlists_reset(engine, "CS error");
>  	}
>  
> -	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
> +	if (unlikely(preempt_timeout(engine)))
> +		execlists_reset(engine, "preemption time out");
> +
> +	if (!engine->execlists.pending[0]) {
>  		unsigned long flags;
>  
> +		rcu_read_lock(); /* protect peeking at execlists->active */
>  		spin_lock_irqsave(&engine->active.lock, flags);
> -		__execlists_submission_tasklet(engine);
> +		execlists_dequeue(engine);
>  		spin_unlock_irqrestore(&engine->active.lock, flags);
> -
> -		/* Recheck after serialising with direct-submission */
> -		if (unlikely(timeout && preempt_timeout(engine)))
> -			execlists_reset(engine, "preemption time out");
> +		rcu_read_unlock();
>  	}
>  }
>  
> @@ -3163,26 +3153,16 @@ static void queue_request(struct intel_engine_cs *engine,
>  	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>  }
>  
> -static void __submit_queue_imm(struct intel_engine_cs *engine)
> -{
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
> -
> -	if (reset_in_progress(execlists))
> -		return; /* defer until we restart the engine following reset */
> -
> -	__execlists_submission_tasklet(engine);
> -}
> -
> -static void submit_queue(struct intel_engine_cs *engine,
> +static bool submit_queue(struct intel_engine_cs *engine,
>  			 const struct i915_request *rq)
>  {
>  	struct intel_engine_execlists *execlists = &engine->execlists;
>  
>  	if (rq_prio(rq) <= execlists->queue_priority_hint)
> -		return;
> +		return false;
>  
>  	execlists->queue_priority_hint = rq_prio(rq);
> -	__submit_queue_imm(engine);
> +	return true;
>  }
>  
>  static bool ancestor_on_hold(const struct intel_engine_cs *engine,
> @@ -3196,20 +3176,22 @@ static void flush_csb(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists *el = &engine->execlists;
>  
> -	if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
> -		if (!reset_in_progress(el))
> -			process_csb(engine);
> -		tasklet_unlock(&el->tasklet);
> +	if (!tasklet_trylock(&el->tasklet)) {
> +		tasklet_hi_schedule(&el->tasklet);
> +		return;
>  	}
> +
> +	if (!reset_in_progress(el))
> +		execlists_submission_tasklet((unsigned long)engine);
> +
> +	tasklet_unlock(&el->tasklet);
>  }
>  
>  static void execlists_submit_request(struct i915_request *request)
>  {
>  	struct intel_engine_cs *engine = request->engine;
>  	unsigned long flags;
> -
> -	/* Hopefully we clear execlists->pending[] to let us through */
> -	flush_csb(engine);
> +	bool submit = false;
>  
>  	/* Will be called from irq-context when using foreign fences. */
>  	spin_lock_irqsave(&engine->active.lock, flags);
> @@ -3224,10 +3206,13 @@ static void execlists_submit_request(struct i915_request *request)
>  		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>  		GEM_BUG_ON(list_empty(&request->sched.link));
>  
> -		submit_queue(engine, request);
> +		submit = submit_queue(engine, request);
>  	}
>  
>  	spin_unlock_irqrestore(&engine->active.lock, flags);
> +
> +	if (submit)
> +		flush_csb(engine);
>  }
>  
>  static void __execlists_context_fini(struct intel_context *ce)
> @@ -4113,7 +4098,6 @@ static int execlists_resume(struct intel_engine_cs *engine)
>  static void execlists_reset_prepare(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	unsigned long flags;
>  
>  	ENGINE_TRACE(engine, "depth<-%d\n",
>  		     atomic_read(&execlists->tasklet.count));
> @@ -4130,10 +4114,6 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
>  	__tasklet_disable_sync_once(&execlists->tasklet);
>  	GEM_BUG_ON(!reset_in_progress(execlists));
>  
> -	/* And flush any current direct submission. */
> -	spin_lock_irqsave(&engine->active.lock, flags);
> -	spin_unlock_irqrestore(&engine->active.lock, flags);
> -
>  	/*
>  	 * We stop engines, otherwise we might get failed reset and a
>  	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
> @@ -4147,7 +4127,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
>  	 * FIXME: Wa for more modern gens needs to be validated
>  	 */
>  	ring_set_paused(engine, 1);
> -	intel_engine_stop_cs(engine);
> +	__intel_engine_stop_cs(engine);
>  
>  	engine->execlists.reset_ccid = active_ccid(engine);
>  }
> @@ -4377,12 +4357,12 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
>  	 * to sleep before we restart and reload a context.
>  	 */
>  	GEM_BUG_ON(!reset_in_progress(execlists));
> -	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
> -		execlists->tasklet.func(execlists->tasklet.data);
> +	GEM_BUG_ON(engine->execlists.pending[0]);
>  
> +	/* And kick in case we missed a new request submission. */
>  	if (__tasklet_enable(&execlists->tasklet))
> -		/* And kick in case we missed a new request submission. */
> -		tasklet_hi_schedule(&execlists->tasklet);
> +		flush_csb(engine);
> +
>  	ENGINE_TRACE(engine, "depth->%d\n",
>  		     atomic_read(&execlists->tasklet.count));
>  }
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 7461936d549d..355ee8562bc1 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -1597,6 +1597,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
>  
>  	p->critical_section_end();
>  	tasklet_enable(t);
> +	tasklet_hi_schedule(t);
>  
>  	if (err)
>  		pr_err("i915_reset_engine(%s:%s) failed under %s\n",
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index 92c628f18c60..4f1b82c7eeaf 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -1925,9 +1925,7 @@ static int measure_inter_request(struct intel_context *ce)
>  		intel_ring_advance(rq, cs);
>  		i915_request_add(rq);
>  	}
> -	local_bh_disable();
>  	i915_sw_fence_commit(submit);
> -	local_bh_enable();
>  	intel_engine_flush_submission(ce->engine);
>  	heap_fence_put(submit);
>  
> @@ -2213,11 +2211,9 @@ static int measure_completion(struct intel_context *ce)
>  		intel_ring_advance(rq, cs);
>  
>  		dma_fence_add_callback(&rq->fence, &cb.base, signal_cb);
> -
> -		local_bh_disable();
>  		i915_request_add(rq);
> -		local_bh_enable();
>  
> +		intel_engine_flush_submission(ce->engine);
>  		if (wait_for(READ_ONCE(sema[i]) == -1, 50)) {
>  			err = -EIO;
>  			goto err;
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx