[PATCH 48/51] drm/i915: Remove i915_request_enqueue
Chris Wilson
chris at chris-wilson.co.uk
Sat Feb 6 21:23:36 UTC 2021
Now that everyone is using the scheduler interface, we can remove a very
frequent vfunc, i915_sched.submit_request and replace it with a static
call.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 -
.../gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +-
drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 -
.../drm/i915/gt/intel_execlists_submission.c | 8 --
drivers/gpu/drm/i915/gt/intel_reset.c | 23 +---
.../gpu/drm/i915/gt/intel_ring_scheduler.c | 7 -
.../gpu/drm/i915/gt/intel_ring_submission.c | 7 -
drivers/gpu/drm/i915/gt/selftest_execlists.c | 2 +-
drivers/gpu/drm/i915/gt/selftest_timeline.c | 4 +-
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 -
drivers/gpu/drm/i915/i915_active.c | 2 +-
drivers/gpu/drm/i915/i915_request.c | 130 ++++--------------
drivers/gpu/drm/i915/i915_request.h | 4 +-
drivers/gpu/drm/i915/i915_scheduler.c | 126 +++++++++++++++--
drivers/gpu/drm/i915/i915_scheduler.h | 19 ++-
drivers/gpu/drm/i915/i915_scheduler_types.h | 17 +--
drivers/gpu/drm/i915/selftests/i915_active.c | 2 +-
.../gpu/drm/i915/selftests/i915_gem_evict.c | 2 +-
drivers/gpu/drm/i915/selftests/i915_request.c | 4 +-
19 files changed, 169 insertions(+), 201 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5d2803b86e83..f1336013352b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -822,8 +822,6 @@ static int engine_init_common(struct intel_engine_cs *engine)
struct intel_context *ce;
int ret;
- engine->set_default_submission(engine);
-
/*
* We may need to do things with the shrinker which
* require us to immediately switch back to the default
@@ -1225,7 +1223,6 @@ void intel_engines_reset_default_submission(struct intel_gt *gt)
if (engine->sanitize)
engine->sanitize(engine);
- engine->set_default_submission(engine);
i915_sched_enable(intel_engine_get_scheduler(engine));
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5ef790d80b28..d01e2fd7cdd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -189,7 +189,7 @@ static void heartbeat(struct work_struct *wrk)
rq->emitted_jiffies + msecs_to_jiffies(delay)))
goto out;
- if (!i915_sw_fence_signaled(&rq->submit)) {
+ if (!i915_sw_fence_signaled(&rq->sched.submit)) {
/*
* Not yet submitted, system is stalled.
*
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index da28f5473653..d49d2e4c6d29 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -242,8 +242,6 @@ struct intel_engine_cs {
void (*park)(struct intel_engine_cs *engine);
void (*unpark)(struct intel_engine_cs *engine);
- void (*set_default_submission)(struct intel_engine_cs *engine);
-
const struct intel_context_ops *cops;
int (*emit_flush)(const struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 1d2f6bac5612..0bba29cd7f98 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2974,11 +2974,6 @@ static bool can_preempt(struct intel_engine_cs *engine)
return engine->class != RENDER_CLASS;
}
-static void execlists_set_default_submission(struct intel_engine_cs *engine)
-{
- engine->sched->submit_request = i915_request_enqueue;
-}
-
static void execlists_shutdown(struct intel_execlists *el)
{
/* Synchronise with residual timers and any softirq they raise */
@@ -3020,7 +3015,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
engine->emit_flush = gen12_emit_flush_xcs;
}
- engine->set_default_submission = execlists_set_default_submission;
if (INTEL_GEN(engine->i915) < 11) {
engine->irq_enable = gen8_logical_ring_enable_irq;
@@ -3107,7 +3101,6 @@ static struct i915_sched *init_execlists(struct intel_engine_cs *engine)
execlists_submission_tasklet, engine,
ENGINE_PHYSICAL);
- el->sched.submit_request = i915_request_enqueue;
el->sched.active_request = execlists_active_request;
el->sched.revoke_context = execlists_revoke_context;
el->sched.show = execlists_show;
@@ -3614,7 +3607,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
ve->base.sched->flags = sched;
- ve->base.sched->submit_request = i915_request_enqueue;
ve->base.sched->revoke_context = execlists_revoke_context;
tasklet_setup(&ve->base.sched->tasklet, virtual_submission_tasklet);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b17afaeaf8b0..5d6193dbb018 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -784,21 +784,6 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
}
}
-static void nop_submit_request(struct i915_request *request)
-{
- RQ_TRACE(request, "-EIO\n");
-
- request = i915_request_mark_eio(request);
- if (request) {
- struct intel_engine_cs *engine = i915_request_get_engine(request);
-
- i915_request_submit(request, engine);
- intel_engine_signal_breadcrumbs(engine);
-
- i915_request_put(request);
- }
-}
-
static void __intel_gt_set_wedged(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
@@ -821,12 +806,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
__intel_gt_reset(gt, ALL_ENGINES);
- for_each_engine(engine, gt, id) {
- struct i915_sched *se = intel_engine_get_scheduler(engine);
-
- i915_sched_disable(se);
- se->submit_request = nop_submit_request;
- }
+ for_each_engine(engine, gt, id)
+ i915_sched_disable(intel_engine_get_scheduler(engine));
/*
* Make sure no request can slip through without getting completed by
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c
index cf6207ea3b4e..3aec5a8f020b 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c
@@ -983,11 +983,6 @@ static const struct intel_context_ops ring_context_ops = {
.destroy = ring_context_destroy,
};
-static void set_default_submission(struct intel_engine_cs *engine)
-{
- engine->sched->submit_request = i915_request_enqueue;
-}
-
static void ring_release(struct intel_engine_cs *engine)
{
intel_engine_cleanup_common(engine);
@@ -1055,8 +1050,6 @@ static void setup_common(struct intel_engine_cs *engine)
engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
else
engine->emit_fini_breadcrumb = gen4_emit_breadcrumb_xcs;
-
- engine->set_default_submission = set_default_submission;
}
static void setup_rcs(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 6475fb1f3432..83ccce794324 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -919,11 +919,6 @@ static const struct intel_context_ops ring_context_ops = {
.destroy = ring_context_destroy,
};
-static void set_default_submission(struct intel_engine_cs *engine)
-{
- engine->sched->submit_request = i915_request_enqueue;
-}
-
static void ring_release(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -1013,8 +1008,6 @@ static void setup_common(struct intel_engine_cs *engine)
engine->emit_fini_breadcrumb = gen4_emit_breadcrumb_xcs;
else
engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
-
- engine->set_default_submission = set_default_submission;
}
static void setup_rcs(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index f07842637121..6f3c20934007 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -4394,7 +4394,7 @@ static int bond_virtual_engine(struct intel_gt *gt,
if (flags & BOND_SCHEDULE) {
onstack_fence_init(&fence);
- err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
+ err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->sched.submit,
&fence,
GFP_KERNEL);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 6f0c9a9868c0..1089171f5b58 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -1030,7 +1030,7 @@ static int live_hwsp_read(void *arg)
goto out;
}
- err = i915_sw_fence_await_dma_fence(&rq->submit,
+ err = i915_sw_fence_await_dma_fence(&rq->sched.submit,
&watcher[0].rq->fence, 0,
GFP_KERNEL);
if (err < 0) {
@@ -1075,7 +1075,7 @@ static int live_hwsp_read(void *arg)
goto out;
}
- err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit,
+ err = i915_sw_fence_await_dma_fence(&watcher[1].rq->sched.submit,
&rq->fence, 0,
GFP_KERNEL);
if (err < 0) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index c1ede6e8af12..89d8a89789a2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -433,11 +433,6 @@ static int guc_resume(struct intel_engine_cs *engine)
return 0;
}
-static void guc_set_default_submission(struct intel_engine_cs *engine)
-{
- engine->sched->submit_request = i915_request_enqueue;
-}
-
static void guc_release(struct intel_engine_cs *engine)
{
engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
@@ -466,7 +461,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
engine->emit_flush = gen12_emit_flush_xcs;
}
- engine->set_default_submission = guc_set_default_submission;
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 02c5ab8eb57e..bf467a9ca0fc 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -767,7 +767,7 @@ int i915_request_await_active(struct i915_request *rq,
struct i915_active *ref,
unsigned int flags)
{
- return await_active(ref, flags, rq_await_fence, rq, &rq->submit);
+ return await_active(ref, flags, rq_await_fence, rq, &rq->sched.submit);
}
static int sw_await_fence(void *arg, struct dma_fence *fence)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 80a3214d4ecf..207f0a1fd886 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -112,15 +112,7 @@ static void i915_fence_release(struct dma_fence *fence)
{
struct i915_request *rq = to_request(fence);
- /*
- * The request is put onto a RCU freelist (i.e. the address
- * is immediately reused), mark the fences as being freed now.
- * Otherwise the debugobjects for the fences are only marked as
- * freed when the slab cache itself is freed, and so we would get
- * caught trying to reuse dead objects.
- */
- i915_sw_fence_fini(&rq->submit);
- i915_sw_fence_fini(&rq->semaphore);
+ i915_sched_node_fini(&rq->sched);
/*
* Keep one request on each engine for reserved use under mempressure
@@ -181,7 +173,7 @@ static void irq_execute_cb_hook(struct irq_work *wrk)
{
struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
- cb->hook(container_of(cb->fence, struct i915_request, submit),
+ cb->hook(container_of(cb->fence, struct i915_request, sched.submit),
&cb->signal->fence);
i915_request_put(cb->signal);
@@ -281,7 +273,7 @@ bool i915_request_retire(struct i915_request *rq)
RQ_TRACE(rq, "\n");
- GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+ GEM_BUG_ON(!i915_sw_fence_signaled(&rq->sched.submit));
trace_i915_request_retire(rq);
i915_request_mark_complete(rq);
@@ -368,7 +360,7 @@ __await_execution(struct i915_request *rq,
if (!cb)
return -ENOMEM;
- cb->fence = &rq->submit;
+ cb->fence = &rq->sched.submit;
i915_sw_fence_await(cb->fence);
init_irq_work(&cb->work, irq_execute_cb);
@@ -445,20 +437,16 @@ void i915_request_set_error_once(struct i915_request *rq, int error)
} while (!try_cmpxchg(&rq->fence.error, &old, error));
}
-struct i915_request *i915_request_mark_eio(struct i915_request *rq)
+void __i915_request_mark_eio(struct i915_request *rq)
{
+ lockdep_assert_held(&i915_request_get_scheduler(rq)->lock);
+
if (__i915_request_is_complete(rq))
- return NULL;
+ return;
GEM_BUG_ON(i915_request_signaled(rq));
-
- /* As soon as the request is completed, it may be retired */
- rq = i915_request_get(rq);
-
i915_request_set_error_once(rq, -EIO);
i915_request_mark_complete(rq);
-
- return rq;
}
bool __i915_request_submit(struct i915_request *request,
@@ -515,7 +503,7 @@ bool __i915_request_submit(struct i915_request *request,
* optimistically try again.
*/
if (request->sched.semaphores &&
- i915_sw_fence_signaled(&request->semaphore))
+ i915_sw_fence_signaled(&request->sched.semaphore))
request->context->saturated |= request->sched.semaphores;
engine->emit_fini_breadcrumb(engine, request,
@@ -610,63 +598,6 @@ void i915_request_unsubmit(struct i915_request *request)
spin_unlock_irqrestore(&se->lock, flags);
}
-static int __i915_sw_fence_call
-submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
-{
- struct i915_request *request =
- container_of(fence, typeof(*request), submit);
-
- switch (state) {
- case FENCE_COMPLETE:
- trace_i915_request_submit(request);
-
- if (unlikely(fence->error))
- i915_request_set_error_once(request, fence->error);
-
- /*
- * We need to serialize use of the submit_request() callback
- * with its hotplugging performed during an emergency
- * i915_gem_set_wedged(). We use the RCU mechanism to mark the
- * critical section in order to force i915_gem_set_wedged() to
- * wait until the submit_request() is completed before
- * proceeding.
- */
- rcu_read_lock();
- i915_request_get_scheduler(request)->submit_request(request);
- rcu_read_unlock();
- break;
-
- case FENCE_FREE:
- i915_request_put(request);
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-static int __i915_sw_fence_call
-semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
-{
- struct i915_request *rq = container_of(fence, typeof(*rq), semaphore);
-
- switch (state) {
- case FENCE_COMPLETE:
- /*
- * The request is now ready to run; re-evaluate its deadline
- * to remove the semaphore deprioritisation and to assign
- * a deadline relative to its point-of-readiness [now].
- */
- i915_request_update_deadline(rq);
- break;
-
- case FENCE_FREE:
- i915_request_put(rq);
- break;
- }
-
- return NOTIFY_DONE;
-}
-
static void retire_requests(struct intel_timeline *tl)
{
struct i915_request *rq, *rn;
@@ -721,8 +652,6 @@ static void __i915_request_ctor(void *arg)
spin_lock_init(&rq->lock);
i915_sched_node_init(&rq->sched);
- i915_sw_fence_init(&rq->submit, submit_notify);
- i915_sw_fence_init(&rq->semaphore, semaphore_notify);
dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0);
@@ -810,13 +739,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
- /* We bump the ref for the fence chain */
- i915_sw_fence_reinit(&i915_request_get(rq)->submit);
- i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
-
- i915_sched_node_reinit(&rq->sched);
- rq->sched.engine = intel_engine_get_scheduler(ce->engine);
- rq->execution_mask = rq->sched.engine->mask;
+ i915_sched_prepare_request(intel_engine_get_scheduler(ce->engine), rq);
/* No zalloc, everything must be cleared after use */
rq->batch = NULL;
@@ -960,7 +883,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
err = 0;
if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
- err = i915_sw_fence_await_dma_fence(&rq->submit,
+ err = i915_sw_fence_await_dma_fence(&rq->sched.submit,
fence, 0,
I915_FENCE_GFP);
dma_fence_put(fence);
@@ -1043,7 +966,7 @@ emit_semaphore_wait(struct i915_request *to,
gfp_t gfp)
{
const intel_engine_mask_t mask = READ_ONCE(from->sched.engine)->mask;
- struct i915_sw_fence *wait = &to->submit;
+ struct i915_sw_fence *wait = &to->sched.submit;
if (!intel_context_use_semaphores(to->context))
goto await_fence;
@@ -1078,7 +1001,7 @@ emit_semaphore_wait(struct i915_request *to,
goto await_fence;
to->sched.semaphores |= mask & ~to->sched.engine->mask;
- wait = &to->semaphore;
+ wait = &to->sched.semaphore;
await_fence:
return i915_sw_fence_await_dma_fence(wait,
@@ -1190,7 +1113,7 @@ static int
__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
{
mark_external(rq);
- return i915_sw_fence_await_dma_fence(&rq->submit, fence,
+ return i915_sw_fence_await_dma_fence(&rq->sched.submit, fence,
i915_fence_context_timeout(i915_request_get_engine(rq)->i915,
fence->context),
I915_FENCE_GFP);
@@ -1245,7 +1168,8 @@ i915_request_await_execution(struct i915_request *rq,
do {
fence = *child++;
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
- i915_sw_fence_set_error_once(&rq->submit, fence->error);
+ i915_sw_fence_set_error_once(&rq->sched.submit,
+ fence->error);
continue;
}
@@ -1282,8 +1206,8 @@ await_request_submit(struct i915_request *to, struct i915_request *from)
* as it may then bypass the virtual request.
*/
if (to->sched.engine == READ_ONCE(from->sched.engine))
- return i915_sw_fence_await_sw_fence_gfp(&to->submit,
- &from->submit,
+ return i915_sw_fence_await_sw_fence_gfp(&to->sched.submit,
+ &from->sched.submit,
I915_FENCE_GFP);
else
return __i915_request_await_execution(to, from, NULL);
@@ -1299,7 +1223,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
GEM_BUG_ON(to->context == from->context);
if (i915_request_completed(from)) {
- i915_sw_fence_set_error_once(&to->submit, from->fence.error);
+ i915_sw_fence_set_error_once(&to->sched.submit,
+ from->fence.error);
return 0;
}
@@ -1347,7 +1272,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
do {
fence = *child++;
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
- i915_sw_fence_set_error_once(&rq->submit, fence->error);
+ i915_sw_fence_set_error_once(&rq->sched.submit,
+ fence->error);
continue;
}
@@ -1492,11 +1418,11 @@ __i915_request_add_to_timeline(struct i915_request *rq)
rq->fence.seqno));
if (in_order_submission(prev, rq))
- i915_sw_fence_await_sw_fence(&rq->submit,
- &prev->submit,
+ i915_sw_fence_await_sw_fence(&rq->sched.submit,
+ &prev->sched.submit,
&rq->submitq);
else
- __i915_sw_fence_await_dma_fence(&rq->submit,
+ __i915_sw_fence_await_dma_fence(&rq->sched.submit,
&prev->fence,
&rq->dmaq);
if (i915_request_use_scheduler(rq))
@@ -1553,8 +1479,8 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
void __i915_request_queue_bh(struct i915_request *rq)
{
- i915_sw_fence_commit(&rq->semaphore);
- i915_sw_fence_commit(&rq->submit);
+ i915_sw_fence_commit(&rq->sched.semaphore);
+ i915_sw_fence_commit(&rq->sched.submit);
}
void __i915_request_queue(struct i915_request *rq,
@@ -1867,7 +1793,7 @@ static const char *run_status(const struct i915_request *rq)
if (__i915_request_has_started(rq))
return "*";
- if (!i915_sw_fence_signaled(&rq->semaphore))
+ if (!i915_sw_fence_signaled(&rq->sched.semaphore))
return "&";
return "";
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 0aef90c60d7c..d854c9020cf0 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -207,7 +207,6 @@ struct i915_request {
* dependencies. When it is signaled, the request is ready to run.
* It is used by the driver to then queue the request for execution.
*/
- struct i915_sw_fence submit;
union {
wait_queue_entry_t submitq;
struct i915_sw_dma_fence_cb dmaq;
@@ -217,7 +216,6 @@ struct i915_request {
} duration;
};
struct llist_head execute_cb;
- struct i915_sw_fence semaphore;
/*
* A list of everyone we wait upon, and everyone who waits upon us.
@@ -313,7 +311,7 @@ i915_request_create(struct intel_context *ce);
void __i915_request_skip(struct i915_request *rq);
void i915_request_set_error_once(struct i915_request *rq, int error);
-struct i915_request *i915_request_mark_eio(struct i915_request *rq);
+void __i915_request_mark_eio(struct i915_request *rq);
struct i915_request *__i915_request_commit(struct i915_request *request);
void __i915_request_queue(struct i915_request *rq,
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index b3d703f76b82..956287a52be2 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -15,6 +15,7 @@
#include "i915_globals.h"
#include "i915_request.h"
#include "i915_scheduler.h"
+#include "i915_trace.h"
#include "i915_utils.h"
static struct i915_global_scheduler {
@@ -259,7 +260,6 @@ i915_sched_init(struct i915_sched *se,
init_ipi(&se->ipi);
- se->submit_request = i915_request_enqueue;
se->active_request = i915_sched_default_active_request;
se->revoke_context = i915_sched_default_revoke_context;
}
@@ -843,7 +843,7 @@ static int adj_prio(const struct i915_request *rq)
* When all semaphores are signaled, we will update the request
* to remove the semaphore penalty.
*/
- if (!i915_sw_fence_signaled(&rq->semaphore))
+ if (!i915_sw_fence_signaled(&rq->sched.semaphore))
prio -= __I915_PRIORITY_KERNEL__;
return prio;
@@ -1200,7 +1200,7 @@ bool __i915_request_requeue(struct i915_request *rq, struct i915_sched *se)
return true;
}
-void i915_request_enqueue(struct i915_request *rq)
+static void i915_request_enqueue(struct i915_request *rq)
{
struct i915_sched *se = i915_request_get_scheduler(rq);
u64 dl = earliest_deadline(se, rq);
@@ -1211,8 +1211,15 @@ void i915_request_enqueue(struct i915_request *rq)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&se->lock, flags);
+
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
+ if (unlikely(!test_bit(I915_SCHED_ENABLE_BIT, &se->flags))) {
+ __i915_request_mark_eio(rq);
+ __i915_request_submit(rq, i915_request_get_engine(rq));
+ goto unlock;
+ }
+
if (unlikely(ancestor_on_hold(se, rq))) {
RQ_TRACE(rq, "ancestor on hold\n");
list_add_tail(&rq->sched.link, &se->hold);
@@ -1226,6 +1233,8 @@ void i915_request_enqueue(struct i915_request *rq)
}
GEM_BUG_ON(list_empty(&rq->sched.link));
+
+unlock:
spin_unlock_irqrestore(&se->lock, flags);
if (kick)
i915_sched_kick(se);
@@ -1381,7 +1390,7 @@ void __i915_sched_resume_request(struct i915_sched *se, struct i915_request *rq)
RQ_TRACE(rq, "hold release\n");
GEM_BUG_ON(!i915_request_on_hold(rq));
- GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+ GEM_BUG_ON(!i915_sw_fence_signaled(&rq->sched.submit));
i915_request_clear_hold(rq);
list_del_init(&rq->sched.link);
@@ -1453,36 +1462,80 @@ void __i915_sched_cancel_queue(struct i915_sched *se)
/* Mark all executing requests as skipped. */
list_for_each_entry(rq, &se->requests, sched.link)
- i915_request_put(i915_request_mark_eio(rq));
+ __i915_request_mark_eio(rq);
/* Flush the queued requests to the timeline list (for retiring). */
i915_sched_dequeue(se, pl, rq, rn) {
- i915_request_put(i915_request_mark_eio(rq));
+ __i915_request_mark_eio(rq);
__i915_request_submit(rq, i915_request_get_engine(rq));
}
GEM_BUG_ON(!i915_sched_is_idle(se));
/* On-hold requests will be flushed to timeline upon their release */
list_for_each_entry(rq, &se->hold, sched.link)
- i915_request_put(i915_request_mark_eio(rq));
+ __i915_request_mark_eio(rq);
/* Remaining _unready_ requests will be nop'ed when submitted */
}
-void i915_sched_node_init(struct i915_sched_node *node)
+static int __i915_sw_fence_call
+submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
- spin_lock_init(&node->lock);
+ struct i915_request *rq =
+ container_of(fence, typeof(*rq), sched.submit);
- INIT_LIST_HEAD(&node->signalers_list);
- INIT_LIST_HEAD(&node->waiters_list);
- INIT_LIST_HEAD(&node->link);
+ switch (state) {
+ case FENCE_COMPLETE:
+ trace_i915_request_submit(rq);
- node->ipi_link = NULL;
+ if (unlikely(fence->error))
+ i915_request_set_error_once(rq, fence->error);
- i915_sched_node_reinit(node);
+ /*
+ * We need to serialize use of the submit_request() callback
+ * with its hotplugging performed during an emergency
+ * i915_gem_set_wedged(). We use the RCU mechanism to mark the
+ * critical section in order to force i915_gem_set_wedged() to
+ * wait until the submit_request() is completed before
+ * proceeding.
+ */
+ rcu_read_lock();
+ i915_request_enqueue(rq);
+ rcu_read_unlock();
+ break;
+
+ case FENCE_FREE:
+ i915_request_put(rq);
+ break;
+ }
+
+ return NOTIFY_DONE;
}
-void i915_sched_node_reinit(struct i915_sched_node *node)
+static int __i915_sw_fence_call
+semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+ struct i915_request *rq =
+ container_of(fence, typeof(*rq), sched.semaphore);
+
+ switch (state) {
+ case FENCE_COMPLETE:
+ /*
+ * The request is now ready to run; re-evaluate its deadline
+ * to remove the semaphore deprioritisation and to assign
+ * a deadline relative to its point-of-readiness [now].
+ */
+ i915_request_update_deadline(rq);
+ break;
+
+ case FENCE_FREE:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static void i915_sched_node_reinit(struct i915_sched_node *node)
{
node->attr.priority = I915_PRIORITY_INVALID;
node->deadline = I915_DEADLINE_NEVER;
@@ -1498,6 +1551,34 @@ void i915_sched_node_reinit(struct i915_sched_node *node)
GEM_BUG_ON(!list_empty(&node->link));
}
+void i915_sched_node_init(struct i915_sched_node *node)
+{
+ i915_sw_fence_init(&node->submit, submit_notify);
+ i915_sw_fence_init(&node->semaphore, semaphore_notify);
+
+ INIT_LIST_HEAD(&node->link);
+
+ spin_lock_init(&node->lock);
+ INIT_LIST_HEAD(&node->signalers_list);
+ INIT_LIST_HEAD(&node->waiters_list);
+
+ node->ipi_link = NULL;
+
+ i915_sched_node_reinit(node);
+}
+
+void i915_sched_prepare_request(struct i915_sched *se, struct i915_request *rq)
+{
+ i915_sched_node_reinit(&rq->sched);
+
+ /* We bump the ref for the fence chain */
+ i915_sw_fence_reinit(&i915_request_get(rq)->sched.submit);
+ i915_sw_fence_reinit(&rq->sched.semaphore);
+
+ rq->sched.engine = se;
+ rq->execution_mask = se->mask;
+}
+
static struct i915_dependency *
i915_dependency_alloc(void)
{
@@ -1602,6 +1683,21 @@ void i915_sched_node_retire(struct i915_sched_node *node)
}
}
+#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
+void i915_sched_node_fini(struct i915_sched_node *node)
+{
+ /*
+ * The request is put onto a RCU freelist (i.e. the address
+ * is immediately reused), mark the fences as being freed now.
+ * Otherwise the debugobjects for the fences are only marked as
+ * freed when the slab cache itself is freed, and so we would get
+ * caught trying to reuse dead objects.
+ */
+ i915_sw_fence_fini(&node->submit);
+ i915_sw_fence_fini(&node->semaphore);
+}
+#endif
+
void i915_sched_disable_tasklet(struct i915_sched *se)
{
__tasklet_disable_sync_once(&se->tasklet);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 915082cdf195..673f207840ae 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -24,7 +24,12 @@ struct drm_printer;
} while (0)
void i915_sched_node_init(struct i915_sched_node *node);
-void i915_sched_node_reinit(struct i915_sched_node *node);
+void i915_sched_prepare_request(struct i915_sched *se, struct i915_request *rq);
+#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
+void i915_sched_node_fini(struct i915_sched_node *node);
+#else
+static inline void i915_sched_node_fini(struct i915_sched_node *node) {}
+#endif
bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
struct i915_sched_node *signal,
@@ -80,7 +85,6 @@ void i915_request_set_deadline(struct i915_request *request, u64 deadline);
void i915_request_update_deadline(struct i915_request *request);
-void i915_request_enqueue(struct i915_request *request);
bool __i915_request_requeue(struct i915_request *rq,
struct i915_sched *se);
@@ -101,6 +105,11 @@ void i915_sched_resume_request(struct i915_sched *engine,
void __i915_sched_cancel_queue(struct i915_sched *se);
+static inline bool i915_sched_is_idle(const struct i915_sched *se)
+{
+ return i915_priolist_is_empty(&se->queue);
+}
+
/*
* Control whether the scheduler accepts any more requests. While
* disabled all incoming [ready] requests will be dropped and marked
@@ -116,6 +125,7 @@ static inline void i915_sched_enable(struct i915_sched *se)
static inline void i915_sched_disable(struct i915_sched *se)
{
clear_bit(I915_SCHED_ENABLE_BIT, &se->flags);
+ /* Now flush concurrent submission! */
}
static inline u64 i915_sched_to_ticks(ktime_t kt)
@@ -128,11 +138,6 @@ static inline u64 i915_sched_to_ns(u64 deadline)
return deadline << I915_SCHED_DEADLINE_SHIFT;
}
-static inline bool i915_sched_is_idle(const struct i915_sched *se)
-{
- return i915_priolist_is_empty(&se->queue);
-}
-
static inline bool
i915_sched_is_last_request(const struct i915_sched *se,
const struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index df362c1de3eb..0c55c9401078 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -12,6 +12,7 @@
#include <linux/workqueue.h>
#include "i915_priolist_types.h"
+#include "i915_sw_fence.h"
#include "i915_utils.h"
struct drm_printer;
@@ -74,15 +75,6 @@ struct i915_sched {
unsigned long flags;
unsigned long mask; /* available scheduling channels */
- /*
- * Pass the request to the submission backend (e.g. directly into
- * the legacy ringbuffer, or to the end of an execlist, or to the GuC).
- *
- * This is called from an atomic context with irqs disabled; must
- * be irq safe.
- */
- void (*submit_request)(struct i915_request *rq);
-
struct i915_request *(*active_request)(struct i915_sched *se);
void (*revoke_context)(struct intel_context *ce,
@@ -208,12 +200,15 @@ struct i915_sched_attr {
*/
struct i915_sched_node {
struct i915_sched *engine;
+ struct list_head link; /* guarded by i915_sched.lock */
- spinlock_t lock; /* protect the lists */
+ struct i915_sw_fence submit;
+ struct i915_sw_fence semaphore;
+ spinlock_t lock; /* protects the lists */
struct list_head signalers_list; /* those before us, we depend upon */
struct list_head waiters_list; /* those after us, they depend upon us */
- struct list_head link; /* guarded by i915_sched.lock */
+
struct i915_sched_stack {
/* Branch memoization used during depth-first search */
struct i915_request *prev;
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 4002c984c2e0..7a40035f34dd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -105,7 +105,7 @@ __live_active_setup(struct drm_i915_private *i915)
break;
}
- err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+ err = i915_sw_fence_await_sw_fence_gfp(&rq->sched.submit,
submit,
GFP_KERNEL);
if (err >= 0)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index f99bb0113726..05dff9c3123d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -471,7 +471,7 @@ static int igt_evict_contexts(void *arg)
}
/* Keep every request/ctx pinned until we are full */
- err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+ err = i915_sw_fence_await_sw_fence_gfp(&rq->sched.submit,
&fence,
GFP_KERNEL);
if (err < 0)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 8c9d60359324..8059888f8eef 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -287,7 +287,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
break;
}
- err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+ err = i915_sw_fence_await_sw_fence_gfp(&rq->sched.submit,
submit,
GFP_KERNEL);
@@ -1846,7 +1846,7 @@ static int measure_inter_request(struct intel_context *ce)
goto err_submit;
}
- err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+ err = i915_sw_fence_await_sw_fence_gfp(&rq->sched.submit,
submit,
GFP_KERNEL);
if (err < 0) {
--
2.20.1
More information about the Intel-gfx-trybot
mailing list