[Intel-gfx] [PATCH v2 07/15] drm/i915: Protect the request->global_seqno with the engine->timeline lock
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Feb 22 12:29:00 UTC 2017
On 22/02/2017 11:46, Chris Wilson wrote:
> A request is assigned a global seqno only when it is on the hardware
> execution queue. The global seqno can be used to maintain a list of
> requests on the same engine in retirement order, for example for
> constructing a priority queue for waiting. Prior to its execution, or
> if it is subsequently removed in the event of preemption, its global
> seqno is zero. As both insertion and removal from the execution queue
> may operate in IRQ context, it is not guarded by the usual struct_mutex
> BKL. Instead those relying on the global seqno must be prepared for its
> value to change between reads. Only when the request is complete can
> the global seqno be stable (due to the memory barriers on submitting
> the commands to the hardware to write the breadcrumb, if the HWS shows
> that it has passed the global seqno and the global seqno is unchanged
> after the read, it is indeed complete).
Missed some questions I've raised on this one in the previous round.
I never got round re-reading it if you were waiting for that by any chance.
Regards,
Tvrtko
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 16 ++++++--
> drivers/gpu/drm/i915/i915_gem.c | 16 +++++---
> drivers/gpu/drm/i915/i915_gem_request.c | 46 ++++++++++++++--------
> drivers/gpu/drm/i915/i915_gem_request.h | 66 +++++++++++++++++++++++++-------
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 ++++--
> 5 files changed, 114 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 994929660027..3e4feeaeef60 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -4022,14 +4022,24 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
> }
>
> static inline bool
> -__i915_request_irq_complete(struct drm_i915_gem_request *req)
> +__i915_request_irq_complete(const struct drm_i915_gem_request *req)
> {
> struct intel_engine_cs *engine = req->engine;
> + u32 seqno = i915_gem_request_global_seqno(req);
> +
> + /* The request was dequeued before we were awoken. We check after
> + * inspecting the hw to confirm that this was the same request
> + * that generated the HWS update. The memory barriers within
> + * the request execution are sufficient to ensure that a check
> + * after reading the value from hw matches this request.
> + */
> + if (!seqno)
> + return false;
>
> /* Before we do the heavier coherent read of the seqno,
> * check the value (hopefully) in the CPU cacheline.
> */
> - if (__i915_gem_request_completed(req))
> + if (__i915_gem_request_completed(req, seqno))
> return true;
>
> /* Ensure our read of the seqno is coherent so that we
> @@ -4080,7 +4090,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
> wake_up_process(tsk);
> rcu_read_unlock();
>
> - if (__i915_gem_request_completed(req))
> + if (__i915_gem_request_completed(req, seqno))
> return true;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fce2cec8e665..f950cedb6516 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -397,7 +397,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
> if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
> i915_gem_request_retire_upto(rq);
>
> - if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
> + if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
> /* The GPU is now idle and this client has stalled.
> * Since no other client has submitted a request in the
> * meantime, assume that this client is the only one
> @@ -2609,7 +2609,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
> struct drm_i915_gem_request *
> i915_gem_find_active_request(struct intel_engine_cs *engine)
> {
> - struct drm_i915_gem_request *request;
> + struct drm_i915_gem_request *request, *active = NULL;
> + unsigned long flags;
>
> /* We are called by the error capture and reset at a random
> * point in time. In particular, note that neither is crucially
> @@ -2619,17 +2620,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
> * extra delay for a recent interrupt is pointless. Hence, we do
> * not need an engine->irq_seqno_barrier() before the seqno reads.
> */
> + spin_lock_irqsave(&engine->timeline->lock, flags);
> list_for_each_entry(request, &engine->timeline->requests, link) {
> - if (__i915_gem_request_completed(request))
> + if (__i915_gem_request_completed(request,
> + request->global_seqno))
> continue;
>
> GEM_BUG_ON(request->engine != engine);
> GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> &request->fence.flags));
> - return request;
> +
> + active = request;
> + break;
> }
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
>
> - return NULL;
> + return active;
> }
>
> static bool engine_stalled(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 477e8fc125ce..d18f450977e0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -418,7 +418,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> intel_engine_enable_signaling(request);
> spin_unlock(&request->lock);
>
> - GEM_BUG_ON(!request->global_seqno);
> engine->emit_breadcrumb(request,
> request->ring->vaddr + request->postfix);
>
> @@ -505,7 +504,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> /* Move the oldest request to the slab-cache (if not in use!) */
> req = list_first_entry_or_null(&engine->timeline->requests,
> typeof(*req), link);
> - if (req && __i915_gem_request_completed(req))
> + if (req && i915_gem_request_completed(req))
> i915_gem_request_retire(req);
>
> /* Beware: Dragons be flying overhead.
> @@ -611,6 +610,7 @@ static int
> i915_gem_request_await_request(struct drm_i915_gem_request *to,
> struct drm_i915_gem_request *from)
> {
> + u32 seqno;
> int ret;
>
> GEM_BUG_ON(to == from);
> @@ -633,14 +633,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
> return ret < 0 ? ret : 0;
> }
>
> - if (!from->global_seqno) {
> + seqno = i915_gem_request_global_seqno(from);
> + if (!seqno) {
> ret = i915_sw_fence_await_dma_fence(&to->submit,
> &from->fence, 0,
> GFP_KERNEL);
> return ret < 0 ? ret : 0;
> }
>
> - if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
> + if (seqno <= to->timeline->sync_seqno[from->engine->id])
> return 0;
>
> trace_i915_gem_ring_sync_to(to, from);
> @@ -658,7 +659,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
> return ret;
> }
>
> - to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
> + to->timeline->sync_seqno[from->engine->id] = seqno;
> return 0;
> }
>
> @@ -939,7 +940,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
> }
>
> bool __i915_spin_request(const struct drm_i915_gem_request *req,
> - int state, unsigned long timeout_us)
> + u32 seqno, int state, unsigned long timeout_us)
> {
> struct intel_engine_cs *engine = req->engine;
> unsigned int irq, cpu;
> @@ -957,7 +958,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
> irq = atomic_read(&engine->irq_count);
> timeout_us += local_clock_us(&cpu);
> do {
> - if (__i915_gem_request_completed(req))
> + if (seqno != i915_gem_request_global_seqno(req))
> + break;
> +
> + if (i915_seqno_passed(intel_engine_get_seqno(req->engine),
> + seqno))
> return true;
>
> /* Seqno are meant to be ordered *before* the interrupt. If
> @@ -1029,11 +1034,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> if (flags & I915_WAIT_LOCKED)
> add_wait_queue(errq, &reset);
>
> + intel_wait_init(&wait, i915_gem_request_global_seqno(req));
> +
> reset_wait_queue(&req->execute, &exec);
> - if (!req->global_seqno) {
> + if (!wait.seqno) {
> do {
> set_current_state(state);
> - if (req->global_seqno)
> +
> + wait.seqno = i915_gem_request_global_seqno(req);
> + if (wait.seqno)
> break;
>
> if (flags & I915_WAIT_LOCKED &&
> @@ -1061,7 +1070,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> if (timeout < 0)
> goto complete;
>
> - GEM_BUG_ON(!req->global_seqno);
> + GEM_BUG_ON(!wait.seqno);
> }
> GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
>
> @@ -1070,7 +1079,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> goto complete;
>
> set_current_state(state);
> - intel_wait_init(&wait, req->global_seqno);
> if (intel_engine_add_wait(req->engine, &wait))
> /* In order to check that we haven't missed the interrupt
> * as we enabled it, we need to kick ourselves to do a
> @@ -1091,7 +1099,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>
> timeout = io_schedule_timeout(timeout);
>
> - if (intel_wait_complete(&wait))
> + if (intel_wait_complete(&wait) &&
> + i915_gem_request_global_seqno(req) == wait.seqno)
> break;
>
> set_current_state(state);
> @@ -1142,14 +1151,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> static void engine_retire_requests(struct intel_engine_cs *engine)
> {
> struct drm_i915_gem_request *request, *next;
> + u32 seqno = intel_engine_get_seqno(engine);
> + LIST_HEAD(retire);
>
> + spin_lock_irq(&engine->timeline->lock);
> list_for_each_entry_safe(request, next,
> &engine->timeline->requests, link) {
> - if (!__i915_gem_request_completed(request))
> - return;
> + if (!i915_seqno_passed(seqno, request->global_seqno))
> + break;
>
> - i915_gem_request_retire(request);
> + list_move_tail(&request->link, &retire);
> }
> + spin_unlock_irq(&engine->timeline->lock);
> +
> + list_for_each_entry_safe(request, next, &retire, link)
> + i915_gem_request_retire(request);
> }
>
> void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 467d3e13fce0..b81f6709905c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -135,6 +135,11 @@ struct drm_i915_gem_request {
> struct i915_priotree priotree;
> struct i915_dependency dep;
>
> + /** GEM sequence number associated with this request on the
> + * global execution timeline. It is zero when the request is not
> + * on the HW queue (i.e. not on the engine timeline list).
> + * Its value is guarded by the timeline spinlock.
> + */
> u32 global_seqno;
>
> /** Position in the ring of the start of the request */
> @@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
> *pdst = src;
> }
>
> +/**
> + * i915_gem_request_global_seqno - report the current global seqno
> + * @request - the request
> + *
> + * A request is assigned a global seqno only when it is on the hardware
> + * execution queue. The global seqno can be used to maintain a list of
> + * requests on the same engine in retirement order, for example for
> + * constructing a priority queue for waiting. Prior to its execution, or
> + * if it is subsequently removed in the event of preemption, its global
> + * seqno is zero. As both insertion and removal from the execution queue
> + * may operate in IRQ context, it is not guarded by the usual struct_mutex
> + * BKL. Instead those relying on the global seqno must be prepared for its
> + * value to change between reads. Only when the request is complete can
> + * the global seqno be stable (due to the memory barriers on submitting
> + * the commands to the hardware to write the breadcrumb, if the HWS shows
> + * that it has passed the global seqno and the global seqno is unchanged
> + * after the read, it is indeed complete).
> + */
> +static u32
> +i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
> +{
> + return READ_ONCE(request->global_seqno);
> +}
> +
> int
> i915_gem_request_await_object(struct drm_i915_gem_request *to,
> struct drm_i915_gem_object *obj,
> @@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
> }
>
> static inline bool
> -__i915_gem_request_started(const struct drm_i915_gem_request *req)
> +__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
> {
> - GEM_BUG_ON(!req->global_seqno);
> + GEM_BUG_ON(!seqno);
> return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> - req->global_seqno - 1);
> + seqno - 1);
> }
>
> static inline bool
> i915_gem_request_started(const struct drm_i915_gem_request *req)
> {
> - if (!req->global_seqno)
> + u32 seqno;
> +
> + seqno = i915_gem_request_global_seqno(req);
> + if (!seqno)
> return false;
>
> - return __i915_gem_request_started(req);
> + return __i915_gem_request_started(req, seqno);
> }
>
> static inline bool
> -__i915_gem_request_completed(const struct drm_i915_gem_request *req)
> +__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
> {
> - GEM_BUG_ON(!req->global_seqno);
> - return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> - req->global_seqno);
> + GEM_BUG_ON(!seqno);
> + return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
> + seqno == i915_gem_request_global_seqno(req);
> }
>
> static inline bool
> i915_gem_request_completed(const struct drm_i915_gem_request *req)
> {
> - if (!req->global_seqno)
> + u32 seqno;
> +
> + seqno = i915_gem_request_global_seqno(req);
> + if (!seqno)
> return false;
>
> - return __i915_gem_request_completed(req);
> + return __i915_gem_request_completed(req, seqno);
> }
>
> bool __i915_spin_request(const struct drm_i915_gem_request *request,
> - int state, unsigned long timeout_us);
> + u32 seqno, int state, unsigned long timeout_us);
> static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
> int state, unsigned long timeout_us)
> {
> - return (__i915_gem_request_started(request) &&
> - __i915_spin_request(request, state, timeout_us));
> + u32 seqno;
> +
> + seqno = i915_gem_request_global_seqno(request);
> + return (__i915_gem_request_started(request, seqno) &&
> + __i915_spin_request(request, seqno, state, timeout_us));
> }
>
> /* We treat requests as fences. This is not be to confused with our
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 4f4e703d1b14..d5bf4c0b2deb 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -545,6 +545,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
> struct rb_node *parent, **p;
> bool first, wakeup;
> + u32 seqno;
>
> /* Note that we may be called from an interrupt handler on another
> * device (e.g. nouveau signaling a fence completion causing us
> @@ -555,11 +556,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>
> /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
> assert_spin_locked(&request->lock);
> - if (!request->global_seqno)
> +
> + seqno = i915_gem_request_global_seqno(request);
> + if (!seqno)
> return;
>
> request->signaling.wait.tsk = b->signaler;
> - request->signaling.wait.seqno = request->global_seqno;
> + request->signaling.wait.seqno = seqno;
> i915_gem_request_get(request);
>
> spin_lock(&b->lock);
> @@ -583,8 +586,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> p = &b->signals.rb_node;
> while (*p) {
> parent = *p;
> - if (i915_seqno_passed(request->global_seqno,
> - to_signaler(parent)->global_seqno)) {
> + if (i915_seqno_passed(seqno,
> + to_signaler(parent)->signaling.wait.seqno)) {
> p = &parent->rb_right;
> first = false;
> } else {
>
More information about the Intel-gfx
mailing list