[Intel-gfx] [PATCH 2/2] drm/i915: Protect engine request list with spinlock

Mika Kuoppala mika.kuoppala at linux.intel.com
Thu Feb 19 08:18:55 PST 2015


There are multiple players interested in the ring->request_list
state. Request submission can happen in kernel or user context,
idle worker is going through request list to free items. And then there
is hangcheck worker which tries to figure out if particular ring is
healthy by peeking at the request list among other things. And if
judged stuck by hangcheck, error state is colleted. Which in turns
needs access to ring->request_list.

So there are concerns that hangcheck will see a request completion
state mismatch from the ring->request_list state.

Restore order in accessing this shared list by protecting it with
spinlock.

References: https://bugs.freedesktop.org/show_bug.cgi?id=88651
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         | 88 ++++++++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_gpu_error.c   |  6 +++
 drivers/gpu/drm/i915/i915_irq.c         | 12 ++++-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 17 ++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  5 ++
 5 files changed, 102 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 06265e7..af7e32e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2468,14 +2468,19 @@ static int i915_gem_request_submit(struct drm_i915_gem_request *request,
 	request->batch_obj = batch;
 	request->postfix = intel_ring_get_tail(ringbuf);
 
+	/* We want to take this early so that anything waiting to request
+	 * completing doesn't see it before the request is actually in list
+	 */
+	spin_lock(&ring->request_list_lock);
+
 	if (i915.enable_execlists) {
 		ret = ring->emit_request(ringbuf, request);
 		if (ret)
-			return ret;
+			goto out;
 	} else {
 		ret = ring->add_request(ring);
 		if (ret)
-			return ret;
+			goto out;
 	}
 
 	request->tail = intel_ring_get_tail(ringbuf);
@@ -2498,10 +2503,12 @@ static int i915_gem_request_submit(struct drm_i915_gem_request *request,
 
 	list_add_tail(&request->list, &ring->request_list);
 	ring->outstanding_lazy_request = NULL;
+out:
+	spin_unlock(&ring->request_list_lock);
 
 	trace_i915_gem_request_add(request);
 
-	return 0;
+	return ret;
 }
 
 static void i915_gem_request_add_to_client(struct drm_i915_gem_request *request)
@@ -2610,9 +2617,18 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void i915_gem_free_request(struct drm_i915_gem_request *request)
+static void i915_gem_ring_remove_request(struct drm_i915_gem_request *request)
 {
+	struct intel_engine_cs *ring = request->ring;
+
+	spin_lock(&ring->request_list_lock);
 	list_del(&request->list);
+	spin_unlock(&ring->request_list_lock);
+}
+
+static void i915_gem_free_request(struct drm_i915_gem_request *request)
+{
+	i915_gem_ring_remove_request(request);
 	i915_gem_request_remove_from_client(request);
 
 	put_pid(request->pid);
@@ -2645,13 +2661,17 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_request *request;
 
+	spin_lock(&ring->request_list_lock);
+
 	list_for_each_entry(request, &ring->request_list, list) {
 		if (i915_gem_request_completed(request, false))
 			continue;
 
+		spin_unlock(&ring->request_list_lock);
 		return request;
 	}
 
+	spin_unlock(&ring->request_list_lock);
 	return NULL;
 }
 
@@ -2670,8 +2690,44 @@ static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
 
 	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
 
+	spin_lock(&ring->request_list_lock);
+
 	list_for_each_entry_continue(request, &ring->request_list, list)
 		i915_set_reset_status(dev_priv, request->ctx, false);
+
+	spin_unlock(&ring->request_list_lock);
+}
+
+static struct drm_i915_gem_request *
+i915_gem_ring_first_request(struct intel_engine_cs *ring)
+{
+	struct drm_i915_gem_request *request;
+
+	spin_lock(&ring->request_list_lock);
+
+	if (list_empty(&ring->request_list))
+		request = NULL;
+	else
+		request = list_first_entry(&ring->request_list,
+					   struct drm_i915_gem_request,
+					   list);
+
+	spin_unlock(&ring->request_list_lock);
+
+	return request;
+}
+
+static bool i915_gem_ring_request_list_empty(struct intel_engine_cs *ring)
+{
+	return i915_gem_ring_first_request(ring) == NULL;
+}
+
+static void i915_gem_ring_free_requests(struct intel_engine_cs *ring)
+{
+	struct drm_i915_gem_request *request;
+
+	while ((request = i915_gem_ring_first_request(ring)) != NULL)
+		i915_gem_free_request(request);
 }
 
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
@@ -2715,15 +2771,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	 * implicit references on things like e.g. ppgtt address spaces through
 	 * the request.
 	 */
-	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
-
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
-
-		i915_gem_free_request(request);
-	}
+	i915_gem_ring_free_requests(ring);
 
 	/* This may not have been flushed before the reset, so clean it now */
 	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
@@ -2778,7 +2826,9 @@ void i915_gem_reset(struct drm_device *dev)
 void
 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 {
-	if (list_empty(&ring->request_list))
+	struct drm_i915_gem_request *request;
+
+	if (i915_gem_ring_first_request(ring) == NULL)
 		return;
 
 	WARN_ON(i915_verify_lists(ring->dev));
@@ -2800,15 +2850,9 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 		i915_gem_object_move_to_inactive(obj);
 	}
 
-
-	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
+	while ((request = i915_gem_ring_first_request(ring)) != NULL) {
 		struct intel_ringbuffer *ringbuf;
 
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
-
 		if (!i915_gem_request_completed(request, true))
 			break;
 
@@ -2854,7 +2898,7 @@ i915_gem_retire_requests(struct drm_device *dev)
 
 	for_each_ring(ring, dev_priv, i) {
 		i915_gem_retire_requests_ring(ring);
-		idle &= list_empty(&ring->request_list);
+		idle &= i915_gem_ring_request_list_empty(ring);
 		if (i915.enable_execlists) {
 			unsigned long flags;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a982849..7da2463 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1032,6 +1032,9 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		i915_gem_record_active_context(ring, error, &error->ring[i]);
 
 		count = 0;
+
+		spin_lock(&ring->request_list_lock);
+
 		list_for_each_entry(request, &ring->request_list, list)
 			count++;
 
@@ -1041,6 +1044,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 				GFP_ATOMIC);
 		if (error->ring[i].requests == NULL) {
 			error->ring[i].num_requests = 0;
+			spin_unlock(&ring->request_list_lock);
 			continue;
 		}
 
@@ -1053,6 +1057,8 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			erq->jiffies = request->emitted_jiffies;
 			erq->tail = request->postfix;
 		}
+
+		spin_unlock(&ring->request_list_lock);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9073119..b0afedf 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2774,8 +2774,16 @@ ring_last_request(struct intel_engine_cs *ring)
 static bool
 ring_idle(struct intel_engine_cs *ring)
 {
-	return (list_empty(&ring->request_list) ||
-		i915_gem_request_completed(ring_last_request(ring), false));
+	bool idle;
+
+	spin_lock(&ring->request_list_lock);
+
+	idle = list_empty(&ring->request_list) ||
+		i915_gem_request_completed(ring_last_request(ring), false);
+
+	spin_unlock(&ring->request_list_lock);
+
+	return idle;
 }
 
 static bool
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d17e76d..da5c18a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2043,6 +2043,8 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 	if (intel_ring_space(ringbuf) >= n)
 		return 0;
 
+	spin_lock(&ring->request_list_lock);
+
 	list_for_each_entry(request, &ring->request_list, list) {
 		if (__intel_ring_space(request->postfix, ringbuf->tail,
 				       ringbuf->size) >= n) {
@@ -2051,7 +2053,12 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 	}
 
 	if (&request->list == &ring->request_list)
-		return -ENOSPC;
+		ret = -ENOSPC;
+
+	spin_unlock(&ring->request_list_lock);
+
+	if (ret)
+		return ret;
 
 	ret = i915_wait_request(request);
 	if (ret)
@@ -2149,14 +2156,20 @@ int intel_ring_idle(struct intel_engine_cs *ring)
 			return ret;
 	}
 
+	spin_lock(&ring->request_list_lock);
+
 	/* Wait upon the last request to be completed */
-	if (list_empty(&ring->request_list))
+	if (list_empty(&ring->request_list)) {
+		spin_unlock(&ring->request_list_lock);
 		return 0;
+	}
 
 	req = list_entry(ring->request_list.prev,
 			   struct drm_i915_gem_request,
 			   list);
 
+	spin_unlock(&ring->request_list_lock);
+
 	return i915_wait_request(req);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b6c484f..0f349e2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -263,6 +263,11 @@ struct  intel_engine_cs {
 	struct list_head request_list;
 
 	/**
+	 * Lock for the request_list
+	 */
+	spinlock_t request_list_lock;
+
+	/**
 	 * Do we have some not yet emitted requests outstanding?
 	 */
 	struct drm_i915_gem_request *outstanding_lazy_request;
-- 
1.9.1



More information about the Intel-gfx mailing list