[Intel-gfx] [PATCH v2 01/15] drm/i915: Keep a global seqno per-engine
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Feb 22 12:24:00 UTC 2017
On 22/02/2017 11:45, Chris Wilson wrote:
> Replace the global device seqno with one for each engine, and account
> for in-flight seqno on each separately. This is consistent with
> dma-fence as each timeline has separate fence-contexts for each engine
> and a seqno is only ordered within a fence-context (i.e. seqno do not
> need to be ordered wrt to other engines, just ordered within a single
> engine). This is required to enable request rewinding for preemption on
> individual engines (we have to rewind the global seqno to avoid
> overflow, and we do not have to rewind all engines just to preempt one.)
>
> v2: Rename active_seqno to inflight_seqnos to more clearly indicate that
> it is a counter and not equivalent to the existing seqno. Update
> functions that operated on active_seqno similarly.
Missed a couple details but never mind.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 11 +----
> drivers/gpu/drm/i915/i915_gem_request.c | 83 +++++++++++++++++---------------
> drivers/gpu/drm/i915/i915_gem_request.h | 8 +--
> drivers/gpu/drm/i915/i915_gem_timeline.h | 9 +++-
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 ++++++-------
> drivers/gpu/drm/i915/intel_engine_cs.c | 2 -
> drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +-
> 7 files changed, 70 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 655e60d609c2..1a28b5279bec 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1080,15 +1080,6 @@ static const struct file_operations i915_error_state_fops = {
> #endif
>
> static int
> -i915_next_seqno_get(void *data, u64 *val)
> -{
> - struct drm_i915_private *dev_priv = data;
> -
> - *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
> - return 0;
> -}
> -
> -static int
> i915_next_seqno_set(void *data, u64 val)
> {
> struct drm_i915_private *dev_priv = data;
> @@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val)
> }
>
> DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
> - i915_next_seqno_get, i915_next_seqno_set,
> + NULL, i915_next_seqno_set,
> "0x%llx\n");
>
> static int i915_frequency_info(struct seq_file *m, void *unused)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 51bc24d1d7c9..e8b354cf2f06 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt)
> pt->priority = INT_MIN;
> }
>
> +static void unreserve_seqno(struct intel_engine_cs *engine)
> +{
> + GEM_BUG_ON(!engine->timeline->inflight_seqnos);
> + engine->timeline->inflight_seqnos--;
> +}
> +
> void i915_gem_retire_noop(struct i915_gem_active *active,
> struct drm_i915_gem_request *request)
> {
> @@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> &request->i915->gt.idle_work,
> msecs_to_jiffies(100));
> }
> + unreserve_seqno(request->engine);
>
> /* Walk through the active list, calling retire on each. This allows
> * objects to track their GPU activity and mark themselves as idle
> @@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
> } while (tmp != req);
> }
>
> -static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
> +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
> {
> struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
> struct intel_engine_cs *engine;
> @@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
> GEM_BUG_ON(i915->gt.active_requests > 1);
>
> /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
> - if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
> - while (intel_breadcrumbs_busy(i915))
> - cond_resched(); /* spin until threads are complete */
> - }
> - atomic_set(&timeline->seqno, seqno);
> + for_each_engine(engine, i915, id) {
> + struct intel_timeline *tl = &timeline->engine[id];
>
> - /* Finally reset hw state */
> - for_each_engine(engine, i915, id)
> + if (!i915_seqno_passed(seqno, tl->seqno)) {
> + /* spin until threads are complete */
> + while (intel_breadcrumbs_busy(engine))
> + cond_resched();
> + }
> +
> + /* Finally reset hw state */
> + tl->seqno = seqno;
> intel_engine_init_global_seqno(engine, seqno);
> + }
>
> list_for_each_entry(timeline, &i915->gt.timelines, link) {
> for_each_engine(engine, i915, id) {
> @@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
> /* HWS page needs to be set less than what we
> * will inject to ring
> */
> - return i915_gem_init_global_seqno(dev_priv, seqno - 1);
> + return reset_all_global_seqno(dev_priv, seqno - 1);
> }
>
> -static int reserve_global_seqno(struct drm_i915_private *i915)
> +static int reserve_seqno(struct intel_engine_cs *engine)
> {
> - u32 active_requests = ++i915->gt.active_requests;
> - u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
> + u32 active = ++engine->timeline->inflight_seqnos;
> + u32 seqno = engine->timeline->seqno;
> int ret;
>
> /* Reservation is fine until we need to wrap around */
> - if (likely(seqno + active_requests > seqno))
> + if (likely(!add_overflows(seqno, active)))
> return 0;
>
> - ret = i915_gem_init_global_seqno(i915, 0);
> + /* Even though we are tracking inflight seqno individually on each
> + * engine, other engines may be observing us using hw semaphores and
> + * so we need to idle all engines before wrapping around this engine.
> + * As all engines are then idle, we can reset the seqno on all, so
> + * we don't stall in quick succession if each engine is being
> + * similarly utilized.
> + */
> + ret = reset_all_global_seqno(engine->i915, 0);
> if (ret) {
> - i915->gt.active_requests--;
> + engine->timeline->inflight_seqnos--;
> return ret;
> }
>
> return 0;
> }
>
> -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
> -{
> - /* seqno only incremented under a mutex */
> - return ++tl->seqno.counter;
> -}
> -
> -static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
> +static u32 timeline_get_seqno(struct intel_timeline *tl)
> {
> - return atomic_inc_return(&tl->seqno);
> + return ++tl->seqno;
> }
>
> void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> @@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> GEM_BUG_ON(timeline == request->timeline);
> assert_spin_locked(&timeline->lock);
>
> - seqno = timeline_get_seqno(timeline->common);
> + seqno = timeline_get_seqno(timeline);
> GEM_BUG_ON(!seqno);
> GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
>
> - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
> - request->previous_seqno = timeline->last_submitted_seqno;
> - timeline->last_submitted_seqno = seqno;
> -
> /* We may be recursing from the signal callback of another i915 fence */
> spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> request->global_seqno = seqno;
> @@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> if (ret)
> return ERR_PTR(ret);
>
> - ret = reserve_global_seqno(dev_priv);
> + ret = reserve_seqno(engine);
> if (ret)
> goto err_unpin;
>
> @@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> &i915_fence_ops,
> &req->lock,
> req->timeline->fence_context,
> - __timeline_get_seqno(req->timeline->common));
> + timeline_get_seqno(req->timeline));
>
> /* We bump the ref for the fence chain */
> i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
> @@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> */
> req->head = req->ring->tail;
>
> + /* Check that we didn't interrupt ourselves with a new request */
> + GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
> return req;
>
> err_ctx:
> @@ -623,7 +633,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>
> kmem_cache_free(dev_priv->requests, req);
> err_unreserve:
> - dev_priv->gt.active_requests--;
> + unreserve_seqno(engine);
> err_unpin:
> engine->context_unpin(engine, ctx);
> return ERR_PTR(ret);
> @@ -837,8 +847,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
> * our i915_gem_request_alloc() and called __i915_add_request() before
> * us, the timeline will hold its seqno which is later than ours.
> */
> - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
> - request->fence.seqno));
> + GEM_BUG_ON(timeline->seqno != request->fence.seqno);
>
> /*
> * To ensure that this call will not fail, space for its emissions
> @@ -892,16 +901,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
> list_add_tail(&request->link, &timeline->requests);
> spin_unlock_irq(&timeline->lock);
>
> - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
> - request->fence.seqno));
> -
> - timeline->last_submitted_seqno = request->fence.seqno;
> + GEM_BUG_ON(timeline->seqno != request->fence.seqno);
> i915_gem_active_set(&timeline->last_request, request);
>
> list_add_tail(&request->ring_link, &ring->request_list);
> request->emitted_jiffies = jiffies;
>
> - i915_gem_mark_busy(engine);
> + if (!request->i915->gt.active_requests++)
> + i915_gem_mark_busy(engine);
>
> /* Let the backend know a new request has arrived that may need
> * to adjust the existing execution schedule due to a high priority
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index ea511f06efaf..9049936c571c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -145,12 +145,6 @@ struct drm_i915_gem_request {
>
> u32 global_seqno;
>
> - /** GEM sequence number associated with the previous request,
> - * when the HWS breadcrumb is equal to this the GPU is processing
> - * this request.
> - */
> - u32 previous_seqno;
> -
> /** Position in the ring of the start of the request */
> u32 head;
>
> @@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
> {
> GEM_BUG_ON(!req->global_seqno);
> return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> - req->previous_seqno);
> + req->global_seqno - 1);
> }
>
> static inline bool
> diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
> index f2e51f42cc2f..6c53e14cab2a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
> @@ -33,7 +33,13 @@ struct i915_gem_timeline;
>
> struct intel_timeline {
> u64 fence_context;
> - u32 last_submitted_seqno;
> + u32 seqno;
> +
> + /**
> + * Count of outstanding requests, from the time they are constructed
> + * to the moment they are retired. Loosely coupled to hardware.
> + */
> + u32 inflight_seqnos;
>
> spinlock_t lock;
>
> @@ -56,7 +62,6 @@ struct intel_timeline {
>
> struct i915_gem_timeline {
> struct list_head link;
> - atomic_t seqno;
>
> struct drm_i915_private *i915;
> const char *name;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 5932e2dc0c6f..4f4e703d1b14 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
> cancel_fake_irq(engine);
> }
>
> -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
> +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
> {
> - struct intel_engine_cs *engine;
> - enum intel_engine_id id;
> - unsigned int mask = 0;
> -
> - for_each_engine(engine, i915, id) {
> - struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
> - spin_lock_irq(&b->lock);
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> + bool busy = false;
>
> - if (b->first_wait) {
> - wake_up_process(b->first_wait->tsk);
> - mask |= intel_engine_flag(engine);
> - }
> + spin_lock_irq(&b->lock);
>
> - if (b->first_signal) {
> - wake_up_process(b->signaler);
> - mask |= intel_engine_flag(engine);
> - }
> + if (b->first_wait) {
> + wake_up_process(b->first_wait->tsk);
> + busy |= intel_engine_flag(engine);
> + }
>
> - spin_unlock_irq(&b->lock);
> + if (b->first_signal) {
> + wake_up_process(b->signaler);
> + busy |= intel_engine_flag(engine);
> }
>
> - return mask;
> + spin_unlock_irq(&b->lock);
> +
> + return busy;
> }
>
> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 46c94740be53..c4d4698f98e7 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
> engine->irq_seqno_barrier(engine);
>
> GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
> - engine->timeline->last_submitted_seqno = seqno;
> -
> engine->hangcheck.seqno = seqno;
>
> /* After manually advancing the seqno, fake the interrupt in case
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 4091c97be6ec..b91c2c7ef5a6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
> * wtih serialising this hint with anything, so document it as
> * a hint and nothing more.
> */
> - return READ_ONCE(engine->timeline->last_submitted_seqno);
> + return READ_ONCE(engine->timeline->seqno);
> }
>
> int init_workarounds_ring(struct intel_engine_cs *engine);
> @@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
>
> void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
> void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
> -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
> +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
>
> static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
> {
>
More information about the Intel-gfx
mailing list