[Intel-gfx] [PATCH 24/49] drm/i915: Tidy execlist submission

Chris Wilson chris at chris-wilson.co.uk
Fri Mar 27 04:01:56 PDT 2015


The list handling during submission was quite confusing as the retired
requests were out of order - making it much harder in future to reduce
the extra lists. Simplify the submission mechanism to explicitly track
the actual requests current on each port and so trim the amount of work
required to track hardware and making execlists more consistent with the
GEM core.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  21 +--
 drivers/gpu/drm/i915/i915_drv.h         |   4 -
 drivers/gpu/drm/i915/i915_gem.c         |  15 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 293 ++++++++++++--------------------
 drivers/gpu/drm/i915/intel_lrc.h        |   1 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |   3 +-
 6 files changed, 125 insertions(+), 212 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5cea9a9c1cb9..21e2d67d3e23 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1929,8 +1929,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
 		return;
 	}
 
-	seq_printf(m, "CONTEXT: %s %u\n", ring->name,
-		   intel_execlists_ctx_id(ctx_obj));
+	seq_printf(m, "CONTEXT: %s\n", ring->name);
 
 	if (!i915_gem_obj_ggtt_bound(ctx_obj))
 		seq_puts(m, "\tNot bound in GGTT\n");
@@ -2016,7 +2015,7 @@ static int i915_execlists(struct seq_file *m, void *data)
 	intel_runtime_pm_get(dev_priv);
 
 	for_each_ring(ring, dev_priv, ring_id) {
-		struct drm_i915_gem_request *head_req = NULL;
+		struct drm_i915_gem_request *rq[2];
 		int count = 0;
 		unsigned long flags;
 
@@ -2046,22 +2045,16 @@ static int i915_execlists(struct seq_file *m, void *data)
 		}
 
 		spin_lock_irqsave(&ring->execlist_lock, flags);
+		memcpy(rq, ring->execlist_port, sizeof(rq));
 		list_for_each(cursor, &ring->execlist_queue)
 			count++;
-		head_req = list_first_entry_or_null(&ring->execlist_queue,
-				struct drm_i915_gem_request, execlist_link);
 		spin_unlock_irqrestore(&ring->execlist_lock, flags);
 
 		seq_printf(m, "\t%d requests in queue\n", count);
-		if (head_req) {
-			struct drm_i915_gem_object *ctx_obj;
-
-			ctx_obj = head_req->ctx->engine[ring_id].state;
-			seq_printf(m, "\tHead request id: %u\n",
-				   intel_execlists_ctx_id(ctx_obj));
-			seq_printf(m, "\tHead request tail: %u\n",
-				   head_req->tail);
-		}
+		seq_printf(m, "\tPort[0] seqno: %u\n",
+			   rq[0] ? rq[0]->seqno : 0);
+		seq_printf(m, "\tPort[1] seqno: %u\n",
+			   rq[1] ? rq[1]->seqno : 0);
 
 		seq_putc(m, '\n');
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 68a50891830f..ee51540e169a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2116,10 +2116,6 @@ struct drm_i915_gem_request {
 
 	/** Execlist link in the submission queue.*/
 	struct list_head execlist_link;
-
-	/** Execlists no. of times this request has been sent to the ELSP */
-	int elsp_submitted;
-
 };
 
 void i915_gem_request_free(struct kref *req_ref);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cc23a8773a89..db4a53f248a2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2632,15 +2632,14 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	 * are the ones that keep the context and ringbuffer backing objects
 	 * pinned in place.
 	 */
-	while (!list_empty(&ring->execlist_queue)) {
-		struct drm_i915_gem_request *submit_req;
-
-		submit_req = list_first_entry(&ring->execlist_queue,
-				struct drm_i915_gem_request,
-				execlist_link);
-		list_del(&submit_req->execlist_link);
+	if (i915.enable_execlists) {
+		spin_lock_irq(&ring->execlist_lock);
+		list_splice_tail_init(&ring->execlist_queue,
+				      &ring->execlist_completed);
+		memset(&ring->execlist_port, 0, sizeof(ring->execlist_port));
+		spin_unlock_irq(&ring->execlist_lock);
 
-		i915_gem_request_unreference(submit_req);
+		intel_execlists_retire_requests(ring);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3cd40699522e..a013239f5e26 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -230,78 +230,54 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists
 	return 0;
 }
 
-/**
- * intel_execlists_ctx_id() - get the Execlists Context ID
- * @ctx_obj: Logical Ring Context backing object.
- *
- * Do not confuse with ctx->id! Unfortunately we have a name overload
- * here: the old context ID we pass to userspace as a handler so that
- * they can refer to a context, and the new context ID we pass to the
- * ELSP so that the GPU can inform us of the context status via
- * interrupts.
- *
- * Return: 20-bits globally unique context ID.
- */
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
-{
-	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
-
-	/* LRCA is required to be 4K aligned so the more significant 20 bits
-	 * are globally unique */
-	return lrca >> 12;
-}
-
-static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
+static uint32_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
 					 struct drm_i915_gem_object *ctx_obj)
 {
-	struct drm_device *dev = ring->dev;
-	uint64_t desc;
-	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
-
-	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
+	uint32_t desc;
 
 	desc = GEN8_CTX_VALID;
 	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
 	desc |= GEN8_CTX_L3LLC_COHERENT;
 	desc |= GEN8_CTX_PRIVILEGE;
-	desc |= lrca;
-	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
+	desc |= i915_gem_obj_ggtt_offset(ctx_obj);
 
 	/* TODO: WaDisableLiteRestore when we start using semaphore
 	 * signalling between Command Streamers */
 	/* desc |= GEN8_CTX_FORCE_RESTORE; */
 
 	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
-	if (IS_GEN9(dev) &&
-	    INTEL_REVID(dev) <= SKL_REVID_B0 &&
+	if (IS_GEN9(ring->dev) && INTEL_REVID(ring->dev) <= SKL_REVID_B0 &&
 	    (ring->id == BCS || ring->id == VCS ||
-	    ring->id == VECS || ring->id == VCS2))
+	     ring->id == VECS || ring->id == VCS2))
 		desc |= GEN8_CTX_FORCE_RESTORE;
 
 	return desc;
 }
 
-static void execlists_elsp_write(struct intel_engine_cs *ring,
-				 struct drm_i915_gem_object *ctx_obj0,
-				 struct drm_i915_gem_object *ctx_obj1)
+static uint32_t execlists_request_write_tail(struct intel_engine_cs *ring,
+					     struct drm_i915_gem_request *rq)
+
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint64_t temp = 0;
+	rq->ctx->engine[ring->id].ringbuf->regs[CTX_RING_TAIL+1] = rq->tail;
+	return execlists_ctx_descriptor(ring, rq->ctx->engine[ring->id].state);
+}
+
+static void execlists_submit_pair(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
 	uint32_t desc[4];
 
-	/* XXX: You must always write both descriptors in the order below. */
-	if (ctx_obj1)
-		temp = execlists_ctx_descriptor(ring, ctx_obj1);
-	else
-		temp = 0;
-	desc[1] = (u32)(temp >> 32);
-	desc[0] = (u32)temp;
+	if (ring->execlist_port[1]) {
+		desc[0] = execlists_request_write_tail(ring,
+						       ring->execlist_port[1]);
+		desc[1] = ring->execlist_port[1]->seqno;
+	} else
+		desc[1] = desc[0] = 0;
 
-	temp = execlists_ctx_descriptor(ring, ctx_obj0);
-	desc[3] = (u32)(temp >> 32);
-	desc[2] = (u32)temp;
+	desc[2] = execlists_request_write_tail(ring, ring->execlist_port[0]);
+	desc[3] = ring->execlist_port[0]->seqno;
 
+	/* Note: You must always write both descriptors in the order below. */
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 	I915_WRITE(RING_ELSP(ring), desc[1]);
 	I915_WRITE(RING_ELSP(ring), desc[0]);
@@ -310,96 +286,82 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
 	/* The context is automatically loaded after the following */
 	I915_WRITE(RING_ELSP(ring), desc[2]);
 
-	/* ELSP is a wo register, so use another nearby reg for posting instead */
+	/* ELSP is a wo register, use another nearby reg for posting instead */
 	POSTING_READ(RING_EXECLIST_STATUS(ring));
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
-static void execlists_submit_contexts(struct intel_engine_cs *ring,
-				      struct intel_context *to0, u32 tail0,
-				      struct intel_context *to1, u32 tail1)
+static void execlists_context_unqueue(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
-	struct drm_i915_gem_object *ctx_obj1 = NULL;
+	struct drm_i915_gem_request *cursor;
+	bool submit = false;
+	int i = 0;
 
-	to0->engine[ring->id].ringbuf->regs[CTX_RING_TAIL+1] = tail0;
+	assert_spin_locked(&ring->execlist_lock);
 
-	if (to1) {
-		to1->engine[ring->id].ringbuf->regs[CTX_RING_TAIL+1] = tail1;
-		ctx_obj1 = to1->engine[ring->id].state;
+	/* Try to read in pairs */
+	cursor = ring->execlist_port[0];
+	if (cursor == NULL)
+		cursor = list_first_entry(&ring->execlist_queue,
+					  typeof(*cursor),
+					  execlist_link);
+	else
+		cursor = list_next_entry(cursor, execlist_link);
+	while (&cursor->execlist_link != &ring->execlist_queue) {
+		/* Same ctx: ignore earlier request, as the
+		 * second request extends the first.
+		 */
+		if (ring->execlist_port[i] &&
+		    cursor->ctx != ring->execlist_port[i]->ctx) {
+			if (++i == ARRAY_SIZE(ring->execlist_port))
+				break;
+		}
+
+		ring->execlist_port[i] = cursor;
+		submit = true;
+
+		cursor = list_next_entry(cursor, execlist_link);
 	}
 
-	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
+	if (submit)
+		execlists_submit_pair(ring);
 }
 
-static void execlists_context_unqueue(struct intel_engine_cs *ring)
+static bool execlists_complete_requests(struct intel_engine_cs *ring,
+					u32 seqno)
 {
-	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-	struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
-
 	assert_spin_locked(&ring->execlist_lock);
 
-	if (list_empty(&ring->execlist_queue))
-		return;
-
-	/* Try to read in pairs */
-	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
-				 execlist_link) {
-		if (!req0) {
-			req0 = cursor;
-		} else if (req0->ctx == cursor->ctx) {
-			/* Same ctx: ignore first request, as second request
-			 * will update tail past first request's workload */
-			cursor->elsp_submitted = req0->elsp_submitted;
-			list_del(&req0->execlist_link);
-			list_add_tail(&req0->execlist_link,
-				&ring->execlist_retired_req_list);
-			req0 = cursor;
-		} else {
-			req1 = cursor;
-			break;
-		}
-	}
+	if (seqno == 0)
+		return false;
 
-	WARN_ON(req1 && req1->elsp_submitted);
+	do {
+		struct drm_i915_gem_request *rq;
 
-	execlists_submit_contexts(ring, req0->ctx, req0->tail,
-				  req1 ? req1->ctx : NULL,
-				  req1 ? req1->tail : 0);
+		rq = ring->execlist_port[0];
+		if (rq == NULL)
+			break;
 
-	req0->elsp_submitted++;
-	if (req1)
-		req1->elsp_submitted++;
-}
+		if (!i915_seqno_passed(seqno, rq->seqno))
+			break;
 
-static bool execlists_check_remove_request(struct intel_engine_cs *ring,
-					   u32 request_id)
-{
-	struct drm_i915_gem_request *head_req;
+		do {
+			struct drm_i915_gem_request *prev =
+				list_entry(rq->execlist_link.prev,
+					   typeof(*rq),
+					   execlist_link);
 
-	assert_spin_locked(&ring->execlist_lock);
+			list_move_tail(&rq->execlist_link,
+				       &ring->execlist_completed);
 
-	head_req = list_first_entry_or_null(&ring->execlist_queue,
-					    struct drm_i915_gem_request,
-					    execlist_link);
+			rq = prev;
+		} while (&rq->execlist_link != &ring->execlist_queue);
 
-	if (head_req != NULL) {
-		struct drm_i915_gem_object *ctx_obj =
-				head_req->ctx->engine[ring->id].state;
-		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
-			WARN(head_req->elsp_submitted == 0,
-			     "Never submitted head request\n");
-
-			if (--head_req->elsp_submitted <= 0) {
-				list_del(&head_req->execlist_link);
-				list_add_tail(&head_req->execlist_link,
-					&ring->execlist_retired_req_list);
-				return true;
-			}
-		}
-	}
+		ring->execlist_port[0] = ring->execlist_port[1];
+		ring->execlist_port[1] = NULL;
+	} while (1);
 
-	return false;
+	return ring->execlist_port[1] == NULL;
 }
 
 /**
@@ -411,53 +373,34 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
  */
 void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	u32 status_pointer;
-	u8 read_pointer;
-	u8 write_pointer;
-	u32 status;
-	u32 status_id;
-	u32 submit_contexts = 0;
-
-	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
-
-	read_pointer = ring->next_context_status_buffer;
-	write_pointer = status_pointer & 0x07;
-	if (read_pointer > write_pointer)
-		write_pointer += 6;
-
-	spin_lock(&ring->execlist_lock);
-
-	while (read_pointer < write_pointer) {
-		read_pointer++;
-		status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
-				(read_pointer % 6) * 8);
-		status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
-				(read_pointer % 6) * 8 + 4);
-
-		if (status & GEN8_CTX_STATUS_PREEMPTED) {
-			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
-				if (execlists_check_remove_request(ring, status_id))
-					WARN(1, "Lite Restored request removed from queue\n");
-			} else
-				WARN(1, "Preemption without Lite Restore\n");
-		}
-
-		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
-		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
-			if (execlists_check_remove_request(ring, status_id))
-				submit_contexts++;
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+	u8 head, tail;
+	u32 seqno = 0;
+
+	head = ring->next_context_status_buffer;
+	tail = I915_READ(RING_CONTEXT_STATUS_PTR(ring)) & 0x7;
+	if (head > tail)
+		tail += 6;
+
+	while (head++ < tail) {
+		u32 reg = RING_CONTEXT_STATUS_BUF(ring) + (head % 6)*8;
+		u32 status = I915_READ(reg);
+		if (unlikely(status & GEN8_CTX_STATUS_PREEMPTED && 0)) {
+			DRM_ERROR("Pre-empted request %x %s Lite Restore\n",
+				  I915_READ(reg + 4),
+				  status & GEN8_CTX_STATUS_LITE_RESTORE ? "with" : "without");
 		}
+		if (status & (GEN8_CTX_STATUS_ACTIVE_IDLE |
+			      GEN8_CTX_STATUS_ELEMENT_SWITCH))
+			seqno = I915_READ(reg + 4);
 	}
 
-	if (submit_contexts != 0)
+	spin_lock(&ring->execlist_lock);
+	if (execlists_complete_requests(ring, seqno))
 		execlists_context_unqueue(ring);
-
 	spin_unlock(&ring->execlist_lock);
 
-	WARN(submit_contexts > 2, "More than two context complete events?\n");
-	ring->next_context_status_buffer = write_pointer % 6;
-
+	ring->next_context_status_buffer = tail % 6;
 	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
 		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
 }
@@ -467,9 +410,6 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 				   u32 tail,
 				   struct drm_i915_gem_request *request)
 {
-	struct drm_i915_gem_request *cursor;
-	int num_elements = 0;
-
 	if (WARN_ON(request == NULL))
 		return -ENODEV;
 
@@ -483,28 +423,8 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 
 	spin_lock_irq(&ring->execlist_lock);
 
-	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
-		if (++num_elements > 2)
-			break;
-
-	if (num_elements > 2) {
-		struct drm_i915_gem_request *tail_req;
-
-		tail_req = list_last_entry(&ring->execlist_queue,
-					   struct drm_i915_gem_request,
-					   execlist_link);
-
-		if (to == tail_req->ctx) {
-			WARN(tail_req->elsp_submitted != 0,
-				"More than 2 already-submitted reqs queued\n");
-			list_del(&tail_req->execlist_link);
-			list_add_tail(&tail_req->execlist_link,
-				&ring->execlist_retired_req_list);
-		}
-	}
-
 	list_add_tail(&request->execlist_link, &ring->execlist_queue);
-	if (num_elements == 0)
+	if (ring->execlist_port[0] == NULL)
 		execlists_context_unqueue(ring);
 
 	spin_unlock_irq(&ring->execlist_lock);
@@ -693,11 +613,11 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 	struct list_head list;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-	if (list_empty(&ring->execlist_retired_req_list))
+	if (list_empty(&ring->execlist_completed))
 		return;
 
 	spin_lock_irq(&ring->execlist_lock);
-	list_replace_init(&ring->execlist_retired_req_list, &list);
+	list_replace_init(&ring->execlist_completed, &list);
 	spin_unlock_irq(&ring->execlist_lock);
 
 	while (!list_empty(&list)) {
@@ -789,6 +709,11 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
 	if (ret)
 		goto reset_pin_count;
 
+	if (WARN_ON(i915_gem_obj_ggtt_offset(ctx_obj) & 0xFFFFFFFF00000FFFULL)) {
+		ret = -ENODEV;
+		goto unpin_ctx_obj;
+	}
+
 	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
 	if (ret)
 		goto unpin_ctx_obj;
@@ -1326,7 +1251,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->execlist_queue);
-	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
+	INIT_LIST_HEAD(&ring->execlist_completed);
 	spin_lock_init(&ring->execlist_lock);
 
 	ret = i915_cmd_parser_init_ring(ring);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1d24d4f963f1..03e69c8636b0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -83,7 +83,6 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 			       struct list_head *vmas,
 			       struct drm_i915_gem_object *batch_obj,
 			       u64 exec_start, u32 dispatch_flags);
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 
 void intel_lrc_irq_handler(struct intel_engine_cs *ring);
 void intel_execlists_retire_requests(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2477cf3e3906..870a1d008db9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -239,8 +239,9 @@ struct  intel_engine_cs {
 
 	/* Execlists */
 	spinlock_t execlist_lock;
+	struct drm_i915_gem_request *execlist_port[2];
 	struct list_head execlist_queue;
-	struct list_head execlist_retired_req_list;
+	struct list_head execlist_completed;
 	u8 next_context_status_buffer;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 	int		(*emit_request)(struct intel_ringbuffer *ringbuf,
-- 
2.1.4



More information about the Intel-gfx mailing list