[Intel-gfx] [RFC 35/38] drm/i915/preempt: Implement mid-batch preemption support

Fri Dec 11 06:50:43 PST 2015

From: Dave Gordon <david.s.gordon at intel.com>

Batch buffers which have been pre-emption mid-way through execution
must be handled seperately. Rather than simply re-submitting the batch
as a brand new piece of work, the driver only needs to requeue the
context. The hardware will take care of picking up where it left off.

v2: New patch in series.

For: VIZ-2021
Signed-off-by: Dave Gordon <david.s.gordon at intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  1 +
 drivers/gpu/drm/i915/i915_scheduler.c | 55 +++++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_scheduler.h |  3 ++
 drivers/gpu/drm/i915/intel_lrc.c      | 51 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.h      |  1 +
 5 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7137439..6798f9c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3722,6 +3722,7 @@ static int i915_scheduler_info(struct seq_file *m, void *unused)
 	PRINT_VAR("  Queued",               "u", stats[r].queued);
 	PRINT_VAR("  Submitted",            "u", stats[r].submitted);
 	PRINT_VAR("  Preempted",            "u", stats[r].preempted);
+	PRINT_VAR("  Midbatch preempted",   "u", stats[r].mid_preempted);
 	PRINT_VAR("  Completed",            "u", stats[r].completed);
 	PRINT_VAR("  Expired",              "u", stats[r].expired);
 	seq_putc(m, '\n');
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index d0c4b46..d96eefb 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -743,6 +743,7 @@ i915_scheduler_preemption_postprocess(struct intel_engine_cs *ring)
 	struct i915_scheduler *scheduler = dev_priv->scheduler;
 	struct i915_scheduler_queue_entry *pnode = NULL;
 	struct drm_i915_gem_request *preq = NULL;
+	struct drm_i915_gem_request *midp = NULL;
 	struct i915_scheduler_stats *stats;
 	unsigned long flags;
 	int preempted = 0, preemptive = 0;
@@ -806,8 +807,12 @@ i915_scheduler_preemption_postprocess(struct intel_engine_cs *ring)
 				node->status = i915_sqs_preempted;
 				trace_i915_scheduler_unfly(ring, node);
 				trace_i915_scheduler_node_state_change(ring, node);
-				/* Empty the preempted ringbuffer */
-				intel_lr_context_resync(req->ctx, ring, false);
+
+				/* Identify a mid-batch preemption */
+				if (req->seqno == ring->last_batch_start) {
+					WARN(midp, "Multiple mid-batch-preempted requests?\n");
+					midp = req;
+				}
 			}
 
 			i915_gem_request_dequeue(req);
@@ -821,11 +826,47 @@ i915_scheduler_preemption_postprocess(struct intel_engine_cs *ring)
 	if (stats->max_preempted < preempted)
 		stats->max_preempted = preempted;
 
+	/* Now fix up the contexts of all preempt{ive,ed} requests */
 	{
-		/* XXX: Sky should be empty now */
+		struct intel_context *mid_ctx = NULL;
 		struct i915_scheduler_queue_entry *node;
-		list_for_each_entry(node, &scheduler->node_queue[ring->id], link)
-			WARN_ON(I915_SQS_IS_FLYING(node));
+		u32 started = ring->last_batch_start;
+
+		/*
+		 * Iff preemption was mid-batch, we should have found a
+		 * mid-batch-preempted request
+		 */
+		if (started && started != ring->last_irq_seqno)
+			WARN(!midp, "Mid-batch preempted, but request not found\n");
+		else
+			WARN(midp, "Found unexpected mid-batch preemption?\n");
+
+		if (midp) {
+			/* Rewrite this context rather than emptying it */
+			intel_lr_context_resync_req(midp);
+			midp->scheduler_flags |= i915_req_sf_restart;
+			mid_ctx = midp->ctx;
+			stats->mid_preempted += 1;
+			WARN_ON(preq == midp);
+		}
+
+		list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
+			/* XXX: Sky should be empty now */
+			if (WARN_ON(I915_SQS_IS_FLYING(node)))
+				continue;
+
+			/* Clean up preempted contexts */
+			if (node->status != i915_sqs_preempted)
+				continue;
+
+			if (node->params.ctx != mid_ctx) {
+				/* Empty the preempted ringbuffer */
+				intel_lr_context_resync(node->params.ctx, ring, false);
+				/* Request is now queued, not preempted */
+				node->status = i915_sqs_queued;
+				trace_i915_scheduler_node_state_change(ring, node);
+			}
+		}
 	}
 
 	/* Anything else to do here ... ? */
@@ -1746,7 +1787,9 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
 		spin_lock_irqsave(&scheduler->lock, flags);
 		scheduler->flags[ring->id] &= ~i915_sf_submitting;
 
-		if (ret) {
+		if (ret == 0) {
+			req->scheduler_flags &= ~i915_req_sf_restart;
+		} else {
 			int requeue = 1;
 
 			/* Oh dear! Either the node is broken or the ring is
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 7e7e974..86ebfdd 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -33,6 +33,8 @@ enum {
 	i915_req_sf_was_preempt      = (1 << 1),
 	/* Request is preemptive */
 	i915_req_sf_preempt          = (1 << 2),
+	/* Request has been preempted midbatch, need to restart */
+	i915_req_sf_restart          = (1 << 3),
 };
 
 enum i915_scheduler_queue_status {
@@ -108,6 +110,7 @@ struct i915_scheduler_stats {
 	uint32_t            queued;
 	uint32_t            submitted;
 	uint32_t            preempted;
+	uint32_t            mid_preempted;
 	uint32_t            completed;
 	uint32_t            expired;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cc9f213..36d63b7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1012,6 +1012,18 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
 	if (ret)
 		goto err;
 
+	/*
+	 * For the case of restarting a mid-batch preempted request,
+	 * the ringbuffer already contains all necessary instructions,
+	 * so we can just go straight to submitting it
+	 */
+	if ((req->scheduler_flags & i915_req_sf_restart)) {
+		DRM_DEBUG_DRIVER("restart: req head/tail 0x%x/%x ringbuf 0x%x/%x\n",
+			req->head, req->tail, ringbuf->head, ringbuf->tail);
+		i915_gem_execbuffer_retire_commands(params);
+		return 0;
+	}
+
 	/* record where we start filling the ring */
 	req->head = intel_ring_get_tail(ringbuf);
 
@@ -2670,6 +2682,45 @@ error_pm:
 }
 
 /*
+ * Update the ringbuffer associated with the specified request
+ * so that only the section relating to that request is valid.
+ * Then propagate the change to the associated context image.
+ */
+void intel_lr_context_resync_req(struct drm_i915_gem_request *req)
+{
+	enum intel_ring_id ring_id = req->ring->id;
+	struct drm_i915_gem_object *ctx_obj;
+	struct intel_ringbuffer *ringbuf;
+	struct page *page;
+	uint32_t *reg_state;
+
+	ctx_obj = req->ctx->engine[ring_id].state;
+	ringbuf = req->ringbuf;
+
+	if (WARN_ON(!ringbuf || !ctx_obj))
+		return;
+	if (WARN_ON(i915_gem_object_get_pages(ctx_obj)))
+		return;
+
+	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
+	reg_state = kmap_atomic(page);
+
+	DRM_DEBUG_DRIVER("Updating ringbuf head/tail, previously 0x%x/%x ...\n",
+		ringbuf->head, ringbuf->tail);
+
+	ringbuf->tail = req->tail;
+	ringbuf->last_retired_head = req->head;
+	intel_ring_update_space(ringbuf);
+
+	DRM_DEBUG_DRIVER("Updated ringbuf, now 0x%x/%x space %d\n",
+		ringbuf->head, ringbuf->tail, ringbuf->space);
+
+	reg_state[CTX_RING_TAIL+1] = ringbuf->tail;
+
+	kunmap_atomic(reg_state);
+}
+
+/*
  * Empty the ringbuffer associated with the specified request
  * by updating the ringbuffer 'head' to the value of 'tail', or,
  * if 'rezero' is true, setting both 'head' and 'tail' to zero.
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index fa2f814..653064a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -83,6 +83,7 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *ring);
 int intel_lr_context_deferred_alloc(struct intel_context *ctx,
 				    struct intel_engine_cs *ring);
 void intel_lr_context_unpin(struct drm_i915_gem_request *req);
+void intel_lr_context_resync_req(struct drm_i915_gem_request *req);
 void intel_lr_context_resync(struct intel_context *ctx,
 			     struct intel_engine_cs *ring,
 			     bool rezero);
-- 
1.9.1