[PATCH 26/57] drm/i915: Move checking this request is executing to the scheduler
Chris Wilson
chris at chris-wilson.co.uk
Sun Jan 31 14:08:01 UTC 2021
The scheduler should know which requests it has sent to the HW, and if
necessary answer the question of whether that request is still on the
HW. Pull the execlists specific code out of the generic request handling
and push it into execlists.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
.../drm/i915/gt/intel_execlists_submission.c | 66 +++++++++++++++++
drivers/gpu/drm/i915/i915_request.c | 71 +------------------
drivers/gpu/drm/i915/i915_request.h | 8 +++
drivers/gpu/drm/i915/i915_scheduler.c | 6 ++
drivers/gpu/drm/i915/i915_scheduler_types.h | 2 +
5 files changed, 83 insertions(+), 70 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index f7ab49fc4099..ff1b6c6a3b2f 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2773,6 +2773,71 @@ execlists_active_request(struct i915_sched_engine *se)
return rq;
}
+static bool execlists_is_executing(const struct i915_request *rq)
+{
+ struct i915_sched_engine *se = i915_request_get_scheduler(rq);
+ struct intel_engine_execlists *el =
+ &container_of(se, struct intel_engine_cs, sched)->execlists;
+ struct i915_request * const *port, *p;
+ bool inflight = false;
+
+ if (!i915_request_is_ready(rq))
+ return false;
+
+ /*
+ * Even if we have unwound the request, it may still be on
+ * the GPU (preempt-to-busy). If that request is inside an
+ * unpreemptible critical section, it will not be removed. Some
+ * GPU functions may even be stuck waiting for the paired request
+ * (__await_execution) to be submitted and cannot be preempted
+ * until the bond is executing.
+ *
+ * As we know that there are always preemption points between
+ * requests, we know that only the currently executing request
+ * may be still active even though we have cleared the flag.
+ * However, we can't rely on our tracking of ELSP[0] to know
+ * which request is currently active and so maybe stuck, as
+ * the tracking maybe an event behind. Instead assume that
+ * if the context is still inflight, then it is still active
+ * even if the active flag has been cleared.
+ *
+ * To further complicate matters, if there a pending promotion, the HW
+ * may either perform a context switch to the second inflight execlists,
+ * or it may switch to the pending set of execlists. In the case of the
+ * latter, it may send the ACK and we process the event copying the
+ * pending[] over top of inflight[], _overwriting_ our *active. Since
+ * this implies the HW is arbitrating and not struck in *active, we do
+ * not worry about complete accuracy, but we do require no read/write
+ * tearing of the pointer [the read of the pointer must be valid, even
+ * as the array is being overwritten, for which we require the writes
+ * to avoid tearing.]
+ *
+ * Note that the read of *execlists->active may race with the promotion
+ * of execlists->pending[] to execlists->inflight[], overwriting
+ * the value at *execlists->active. This is fine. The promotion implies
+ * that we received an ACK from the HW, and so the context is not
+ * stuck -- if we do not see ourselves in *active, the inflight status
+ * is valid. If instead we see ourselves being copied into *active,
+ * we are inflight and may signal the callback.
+ */
+ if (!intel_context_inflight(rq->context))
+ return false;
+
+ rcu_read_lock();
+ for (port = READ_ONCE(el->active);
+ (p = READ_ONCE(*port)); /* may race with promotion of pending[] */
+ port++) {
+ if (p->context == rq->context) {
+ inflight = i915_seqno_passed(p->fence.seqno,
+ rq->fence.seqno);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return inflight;
+}
+
static bool can_preempt(struct intel_engine_cs *engine)
{
if (INTEL_GEN(engine->i915) > 8)
@@ -2909,6 +2974,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
u32 base = engine->mmio_base;
engine->sched.active_request = execlists_active_request;
+ engine->sched.is_executing = execlists_is_executing;
tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 52bda6a2a181..c3afe5aa858e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -349,74 +349,6 @@ void i915_request_retire_upto(struct i915_request *rq)
} while (i915_request_retire(tmp) && tmp != rq);
}
-static struct i915_request * const *
-__engine_active(struct intel_engine_cs *engine)
-{
- return READ_ONCE(engine->execlists.active);
-}
-
-static bool __request_in_flight(const struct i915_request *signal)
-{
- struct i915_request * const *port, *rq;
- bool inflight = false;
-
- if (!i915_request_is_ready(signal))
- return false;
-
- /*
- * Even if we have unwound the request, it may still be on
- * the GPU (preempt-to-busy). If that request is inside an
- * unpreemptible critical section, it will not be removed. Some
- * GPU functions may even be stuck waiting for the paired request
- * (__await_execution) to be submitted and cannot be preempted
- * until the bond is executing.
- *
- * As we know that there are always preemption points between
- * requests, we know that only the currently executing request
- * may be still active even though we have cleared the flag.
- * However, we can't rely on our tracking of ELSP[0] to know
- * which request is currently active and so maybe stuck, as
- * the tracking maybe an event behind. Instead assume that
- * if the context is still inflight, then it is still active
- * even if the active flag has been cleared.
- *
- * To further complicate matters, if there a pending promotion, the HW
- * may either perform a context switch to the second inflight execlists,
- * or it may switch to the pending set of execlists. In the case of the
- * latter, it may send the ACK and we process the event copying the
- * pending[] over top of inflight[], _overwriting_ our *active. Since
- * this implies the HW is arbitrating and not struck in *active, we do
- * not worry about complete accuracy, but we do require no read/write
- * tearing of the pointer [the read of the pointer must be valid, even
- * as the array is being overwritten, for which we require the writes
- * to avoid tearing.]
- *
- * Note that the read of *execlists->active may race with the promotion
- * of execlists->pending[] to execlists->inflight[], overwritting
- * the value at *execlists->active. This is fine. The promotion implies
- * that we received an ACK from the HW, and so the context is not
- * stuck -- if we do not see ourselves in *active, the inflight status
- * is valid. If instead we see ourselves being copied into *active,
- * we are inflight and may signal the callback.
- */
- if (!intel_context_inflight(signal->context))
- return false;
-
- rcu_read_lock();
- for (port = __engine_active(signal->engine);
- (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
- port++) {
- if (rq->context == signal->context) {
- inflight = i915_seqno_passed(rq->fence.seqno,
- signal->fence.seqno);
- break;
- }
- }
- rcu_read_unlock();
-
- return inflight;
-}
-
static int
__await_execution(struct i915_request *rq,
struct i915_request *signal,
@@ -460,8 +392,7 @@ __await_execution(struct i915_request *rq,
* the completed/retired request.
*/
if (llist_add(&cb->work.node.llist, &signal->execute_cb)) {
- if (i915_request_is_active(signal) ||
- __request_in_flight(signal))
+ if (i915_request_is_executing(signal))
__notify_execute_cb_imm(signal);
}
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 419bb3ce3a13..0dd8cc982596 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -624,4 +624,12 @@ i915_request_active_timeline(const struct i915_request *rq)
lockdep_is_held(&i915_request_get_scheduler(rq)->lock));
}
+static inline bool i915_request_is_executing(const struct i915_request *rq)
+{
+ if (i915_request_is_active(rq))
+ return true;
+
+ return i915_request_get_scheduler(rq)->is_executing(rq);
+}
+
#endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index b181f9b3ad3d..319da8e59f9d 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -133,6 +133,11 @@ static const struct i915_request *active_request(struct i915_sched_engine *se)
return active;
}
+static bool not_executing(const struct i915_request *rq)
+{
+ return false;
+}
+
void i915_sched_init_engine(struct i915_sched_engine *se,
struct device *dev,
const char *name,
@@ -155,6 +160,7 @@ void i915_sched_init_engine(struct i915_sched_engine *se,
se->submit_request = i915_request_enqueue;
se->active_request = active_request;
+ se->is_executing = not_executing;
/*
* Due to an interesting quirk in lockdep's internal debug tracking,
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 9bd8a19d0737..c2c36b3a45a1 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -38,6 +38,8 @@ struct i915_sched_engine {
const struct i915_request *
(*active_request)(struct i915_sched_engine *se);
+ bool (*is_executing)(const struct i915_request *rq);
+
struct list_head requests; /* active request, on HW */
struct list_head hold; /* ready requests, but on hold */
/**
--
2.20.1
More information about the Intel-gfx-trybot
mailing list