[Intel-gfx] [PATCH v2 1/6] drm/i915: Stop tracking timeline->inflight_seqnos
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Apr 24 10:14:21 UTC 2018
On 23/04/2018 19:08, Chris Wilson wrote:
> In commit 9b6586ae9f6b ("drm/i915: Keep a global seqno per-engine"), we
> moved from a global inflight counter to per-engine counters in the
> hope that will be easy to run concurrently in future. However, with the
> advent of the desire to move requests between engines, we do need a
> global counter to preserve the semantics that no engine wraps in the
> middle of a submit. (Although this semantic is now only required for gen7
> semaphore support, which only supports greater-then comparisons!)
>
> v2: Keep a global counter of all requests ever submitted and force the
> reset when it wraps.
>
> References: 9b6586ae9f6b ("drm/i915: Keep a global seqno per-engine")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 5 ++--
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_gem_timeline.h | 6 -----
> drivers/gpu/drm/i915/i915_request.c | 33 ++++++++++++------------
> drivers/gpu/drm/i915/intel_engine_cs.c | 5 ++--
> 5 files changed, 22 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 2f05f5262bba..547e97102b85 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1340,10 +1340,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
> struct rb_node *rb;
>
> seq_printf(m, "%s:\n", engine->name);
> - seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n",
> + seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
> engine->hangcheck.seqno, seqno[id],
> - intel_engine_last_submit(engine),
> - engine->timeline->inflight_seqnos);
> + intel_engine_last_submit(engine));
> seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n",
> yesno(intel_engine_has_waiter(engine)),
> yesno(test_bit(engine->id,
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8444ca8d5aa3..8fd9fb6efba5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2061,6 +2061,7 @@ struct drm_i915_private {
> struct list_head timelines;
> struct i915_gem_timeline global_timeline;
> u32 active_requests;
> + u32 request_serial;
>
> /**
> * Is the GPU currently considered idle, or busy executing
> diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
> index 33e01bf6aa36..6e82119e2cd8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
> @@ -37,12 +37,6 @@ struct intel_timeline {
> u64 fence_context;
> u32 seqno;
>
> - /**
> - * Count of outstanding requests, from the time they are constructed
> - * to the moment they are retired. Loosely coupled to hardware.
> - */
> - u32 inflight_seqnos;
> -
> spinlock_t lock;
>
> /**
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index b692a9f7c357..f3d04f0ab21c 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -241,6 +241,7 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
> sizeof(timeline->engine[id].global_sync));
> }
>
> + i915->gt.request_serial = seqno;
> return 0;
> }
>
> @@ -257,18 +258,22 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
> return reset_all_global_seqno(i915, seqno - 1);
> }
>
> -static int reserve_engine(struct intel_engine_cs *engine)
> +static int reserve_gt(struct drm_i915_private *i915)
> {
> - struct drm_i915_private *i915 = engine->i915;
> - u32 active = ++engine->timeline->inflight_seqnos;
> - u32 seqno = engine->timeline->seqno;
> int ret;
>
> - /* Reservation is fine until we need to wrap around */
> - if (unlikely(add_overflows(seqno, active))) {
> + /*
> + * Reservation is fine until we may need to wrap around
> + *
> + * By incrementing the serial for every request, we know that no
> + * individual engine may exceed that serial (as each is reset to 0
> + * on any wrap). This protects even the most pessimistic of migrations
> + * of every request from all engines onto just one.
> + */
I didn't really figure out what was wrong with v1? Neither could handle
more than four billion of simultaneously active requests - but I thought
that should not concern us. :)
I don't quite get wrapping based on otherwise unused counter.
v1 made sense to wrap then any engine timeline couldn't handle all
active requests without wrapping.
> + if (unlikely(++i915->gt.request_serial == 0)) {
> ret = reset_all_global_seqno(i915, 0);
> if (ret) {
> - engine->timeline->inflight_seqnos--;
> + i915->gt.request_serial--;
> return ret;
> }
> }
> @@ -279,15 +284,10 @@ static int reserve_engine(struct intel_engine_cs *engine)
> return 0;
> }
>
> -static void unreserve_engine(struct intel_engine_cs *engine)
> +static void unreserve_gt(struct drm_i915_private *i915)
> {
> - struct drm_i915_private *i915 = engine->i915;
> -
> if (!--i915->gt.active_requests)
> i915_gem_park(i915);
> -
> - GEM_BUG_ON(!engine->timeline->inflight_seqnos);
> - engine->timeline->inflight_seqnos--;
> }
>
> void i915_gem_retire_noop(struct i915_gem_active *active,
> @@ -362,7 +362,6 @@ static void i915_request_retire(struct i915_request *request)
> list_del_init(&request->link);
> spin_unlock_irq(&engine->timeline->lock);
>
> - unreserve_engine(request->engine);
> advance_ring(request);
>
> free_capture_list(request);
> @@ -424,6 +423,8 @@ static void i915_request_retire(struct i915_request *request)
> }
> spin_unlock_irq(&request->lock);
>
> + unreserve_gt(request->i915);
> +
> i915_sched_node_fini(request->i915, &request->sched);
> i915_request_put(request);
> }
> @@ -642,7 +643,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> return ERR_CAST(ring);
> GEM_BUG_ON(!ring);
>
> - ret = reserve_engine(engine);
> + ret = reserve_gt(i915);
Maybe more descriptive name? reserve_gt_global_seqno?
> if (ret)
> goto err_unpin;
>
> @@ -784,7 +785,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>
> kmem_cache_free(i915->requests, rq);
> err_unreserve:
> - unreserve_engine(engine);
> + unreserve_gt(i915);
> err_unpin:
> engine->context_unpin(engine, ctx);
> return ERR_PTR(ret);
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index be608f7111f5..a55a849b81b6 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1318,12 +1318,11 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> if (i915_terminally_wedged(&engine->i915->gpu_error))
> drm_printf(m, "*** WEDGED ***\n");
>
> - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n",
> + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
> intel_engine_get_seqno(engine),
> intel_engine_last_submit(engine),
> engine->hangcheck.seqno,
> - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
> - engine->timeline->inflight_seqnos);
> + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
> drm_printf(m, "\tReset count: %d (global %d)\n",
> i915_reset_engine_count(error, engine),
> i915_reset_count(error));
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list