[Intel-gfx] [PATCH v2 10/15] drm/i915: Remove the preempted request from the execution queue
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Feb 22 13:33:22 UTC 2017
On 22/02/2017 11:46, Chris Wilson wrote:
> After the request is cancelled, we then need to remove it from the
> global execution timeline and return it to the context timeline, the
> inverse of submit_request().
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_request.c | 58 +++++++++++++++++++++-
> drivers/gpu/drm/i915/i915_gem_request.h | 3 ++
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 19 ++++++-
> drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ---
> drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 6 +++
> 5 files changed, 83 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index d18f450977e0..97116e492d01 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -441,6 +441,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request)
> spin_unlock_irqrestore(&engine->timeline->lock, flags);
> }
>
> +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
> +{
> + struct intel_engine_cs *engine = request->engine;
> + struct intel_timeline *timeline;
> +
> + assert_spin_locked(&engine->timeline->lock);
> +
> + /* Only unwind in reverse order, required so that the per-context list
> + * is kept in seqno/ring order.
> + */
> + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
> + engine->timeline->seqno--;
> +
> + /* We may be recursing from the signal callback of another i915 fence */
Copy-paste of the comment of there will really be preemption triggered
from the signal callback?
> + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> + request->global_seqno = 0;
> + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> + intel_engine_cancel_signaling(request);
> + spin_unlock(&request->lock);
> +
> + /* Transfer back from the global per-engine timeline to per-context */
> + timeline = request->timeline;
> + GEM_BUG_ON(timeline == engine->timeline);
> +
> + spin_lock(&timeline->lock);
> + list_move(&request->link, &timeline->requests);
> + spin_unlock(&timeline->lock);
> +
> + /* We don't need to wake_up any waiters on request->execute, they
> + * will get woken by any other event or us re-adding this request
> + * to the engine timeline (__i915_gem_request_submit()). The waiters
> + * should be quite adapt at finding that the request now has a new
> + * global_seqno to the one they went to sleep on.
> + */
> +}
> +
> +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
> +{
> + struct intel_engine_cs *engine = request->engine;
> + unsigned long flags;
> +
> + /* Will be called from irq-context when using foreign fences. */
> + spin_lock_irqsave(&engine->timeline->lock, flags);
> +
> + __i915_gem_request_unsubmit(request);
> +
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
> +}
> +
> static int __i915_sw_fence_call
> submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> {
> @@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> if (flags & I915_WAIT_LOCKED)
> add_wait_queue(errq, &reset);
>
> - intel_wait_init(&wait, i915_gem_request_global_seqno(req));
> + wait.tsk = current;
>
> +restart:
> reset_wait_queue(&req->execute, &exec);
> + wait.seqno = i915_gem_request_global_seqno(req);
Not sure if it is worth dropping intel_wait_init, I presume to avoid
assigning the task twice? It will still be the same task so just moving
the intel_wait_init here would be clearer.
> if (!wait.seqno) {
> do {
> set_current_state(state);
> @@ -1135,6 +1186,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> /* Only spin if we know the GPU is processing this request */
> if (i915_spin_request(req, state, 2))
> break;
> +
> + if (i915_gem_request_global_seqno(req) != wait.seqno) {
> + intel_engine_remove_wait(req->engine, &wait);
> + goto restart;
> + }
> }
>
> intel_engine_remove_wait(req->engine, &wait);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index b81f6709905c..5f73d8c0a38a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
> void __i915_gem_request_submit(struct drm_i915_gem_request *request);
> void i915_gem_request_submit(struct drm_i915_gem_request *request);
>
> +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
> +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
> +
> struct intel_rps_client;
> #define NO_WAITBOOST ERR_PTR(-1)
> #define IS_RPS_CLIENT(p) (!IS_ERR(p))
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 882e601ebb09..5bcad7872c08 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -453,7 +453,14 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
> spin_unlock_irq(&b->lock);
> }
>
> -static bool signal_complete(struct drm_i915_gem_request *request)
> +static bool signal_valid(const struct drm_i915_gem_request *request)
> +{
> + u32 seqno = READ_ONCE(request->global_seqno);
> +
> + return seqno == request->signaling.wait.seqno;
> +}
> +
> +static bool signal_complete(const struct drm_i915_gem_request *request)
> {
> if (!request)
> return false;
> @@ -462,7 +469,7 @@ static bool signal_complete(struct drm_i915_gem_request *request)
> * signalled that this wait is already completed.
> */
> if (intel_wait_complete(&request->signaling.wait))
> - return true;
> + return signal_valid(request);
>
> /* Carefully check if the request is complete, giving time for the
> * seqno to be visible or if the GPU hung.
> @@ -542,13 +549,21 @@ static int intel_breadcrumbs_signaler(void *arg)
>
> i915_gem_request_put(request);
> } else {
> + DEFINE_WAIT(exec);
> +
> if (kthread_should_stop()) {
> GEM_BUG_ON(request);
> break;
> }
>
> + if (request)
> + add_wait_queue(&request->execute, &exec);
> +
> schedule();
>
> + if (request)
> + remove_wait_queue(&request->execute, &exec);
> +
Not directly related but made me think why we are using
TASK_INTERRUPTIBLE in the signallers? Shouldn't it be
TASK_UNINTERRUPTIBLE and io_schedule? Sounds a bit deja vu though, maybe
we have talked about it before..
> if (kthread_should_park())
> kthread_parkme();
> }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 45d2c2fa946e..97fde79167a6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -582,12 +582,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
> /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
> int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
>
> -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
> -{
> - wait->tsk = current;
> - wait->seqno = seqno;
> -}
> -
> static inline bool intel_wait_complete(const struct intel_wait *wait)
> {
> return RB_EMPTY_NODE(&wait->node);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
> index 6426acc9fdca..62c020c7ea80 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
> @@ -28,6 +28,12 @@
> #include "mock_gem_device.h"
> #include "mock_engine.h"
>
> +static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
> +{
> + wait->tsk = current;
> + wait->seqno = seqno;
> +}
> +
> static int check_rbtree(struct intel_engine_cs *engine,
> const unsigned long *bitmap,
> const struct intel_wait *waiters,
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list