[Intel-gfx] [RFC 29/37] drm/i915/preempt: scheduler logic for preventing recursive preemption

Mon Nov 23 03:42:04 PST 2015

From: Dave Gordon <david.s.gordon at intel.com>

Once a preemptive request has been dispatched to the hardware-layer
submission mechanism, the scheduler must not send any further requests
to the same ring until the preemption completes. Here we add the logic
that ensure that only one preemption per ring can be in progress at one
time.

Actually-preemptive requests are still disabled via a module parameter
at this early stage, as the logic to process completion isn't in place
yet.

For: VIZ-2021
Signed-off-by: Dave Gordon <david.s.gordon at intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c | 53 +++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 61eabc6..81ac88b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -576,6 +576,10 @@ bool i915_scheduler_notify_request(struct drm_i915_gem_request *req)
 
 	/* Node was in flight so mark it as complete. */
 	if (req->cancelled) {
+		/* If a preemption was in progress, it won't complete now. */
+		if (node->status == i915_sqs_overtaking)
+			scheduler->flags[req->ring->id] &= ~(i915_sf_preempting|i915_sf_preempted);
+
 		node->status = i915_sqs_dead;
 		scheduler->stats[req->ring->id].kill_flying++;
 	} else {
@@ -1487,6 +1491,15 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
 
 	spin_lock_irqsave(&scheduler->lock, flags);
 
+	/*
+	 * If pre-emption is in progress on an engine then no further work
+	 * may be submitted to that same engine. Come back later ...
+	 */
+	if (i915_scheduler_is_ring_preempting(ring)) {
+		ret = -EAGAIN;
+		goto exit;
+	}
+
 	/* First time around, complain if anything unexpected occurs: */
 	ret = i915_scheduler_pop_from_queue_locked(ring, &node, &flags);
 	if (ret)
@@ -1526,7 +1539,15 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
 		 * list. So add it back in and mark it as in flight. */
 		i915_scheduler_fly_node(node);
 
-		scheduler->stats[ring->id].submitted++;
+		if (req->scheduler_flags & i915_req_sf_preempt) {
+			/* If this batch is pre-emptive then it will tie the hardware
+			 * up at least until it has begun to be executed. That is,
+			 * if a pre-emption request is in flight then no other work
+			 * may be submitted until it resolves. */
+			scheduler->flags[ring->id] |= i915_sf_preempting;
+			scheduler->stats[ring->id].preempts_submitted++;
+		} else
+			scheduler->stats[ring->id].submitted++;
 
 		scheduler->flags[ring->id] |= i915_sf_submitting;
 		spin_unlock_irqrestore(&scheduler->lock, flags);
@@ -1539,7 +1560,9 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
 
 			/* Oh dear! Either the node is broken or the ring is
 			 * busy. So need to kill the node or requeue it and try
-			 * again later as appropriate. */
+			 * again later as appropriate. Either way, clear the
+			 * pre-emption flag as it ain't happening. */
+			scheduler->flags[ring->id] &= ~i915_sf_preempting;
 
 			switch (-ret) {
 			case ENODEV:
@@ -1582,6 +1605,10 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
 				i915_scheduler_node_kill(scheduler, node);
 		}
 
+		/* If pre-emption is now in progress then stop launching */
+		if (i915_scheduler_is_ring_preempting(ring))
+			break;
+
 		/* Keep launching until the sky is sufficiently full. */
 		if (i915_scheduler_count_flying(scheduler, ring) >=
 						scheduler->min_flying)
@@ -1729,6 +1756,28 @@ int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file)
 	return 0;
 }
 
+bool i915_scheduler_is_ring_preempting(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct i915_scheduler *scheduler = dev_priv->scheduler;
+	uint32_t sched_flags = scheduler->flags[ring->id];
+
+	/*
+	 * The scheduler is prevented from sending batches to the hardware
+	 * while preemption is in progress (flag bit i915_sf_preempting).
+	 *
+	 * Post-preemption (i915_sf_preempted), the hardware ring will be
+	 * empty, and the scheduler therefore needs a chance to run the
+	 * delayed work task to retire completed work and restart submission
+	 *
+	 * Therefore, if either flag is set, the scheduler is busy.
+	 */
+	if (sched_flags & (i915_sf_preempting | i915_sf_preempted))
+		return true;
+
+	return false;
+}
+
 /*
  * Used by TDR to distinguish hung rings (not moving but with work to do)
  * from idle rings (not moving because there is nothing to do).
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 1597a15..569215a 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -194,6 +194,7 @@ int         i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *q
 bool        i915_scheduler_notify_request(struct drm_i915_gem_request *req);
 void        i915_scheduler_wakeup(struct drm_device *dev);
 bool        i915_scheduler_is_ring_flying(struct intel_engine_cs *ring);
+bool        i915_scheduler_is_ring_preempting(struct intel_engine_cs *ring);
 void        i915_gem_scheduler_work_handler(struct work_struct *work);
 int         i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked);
 int         i915_scheduler_flush_stamp(struct intel_engine_cs *ring,
-- 
1.9.1