[PATCH 1/4] drm/i915: Introduce a signal pin for requests

Wed Oct 20 12:50:13 UTC 2021

During error capture it's not uncommon as shown by multiple runs of
gem_exec_capture at pi, that the request signals under us. That also means
it might disappear from under us. The captured memory is, however,
protected by taking an active pin on the vma active structure, but that
doesn't work anymore with asynchronous binds and unbinds, and the
i915 vma active also is scheduled for going away.

To help keeping the request alive and to avoid freeing the memory of the
vmas to capture, introduce a reset signal pin which can be grabbed only
while the fence is not yet signaled and that blocks signaling until
released. Make sure it's properly lockdep annotated to avoid future
pitfalls with this functionality.

Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c |  10 +-
 drivers/gpu/drm/i915/i915_request.c         |   5 +
 drivers/gpu/drm/i915/i915_request.h         | 168 ++++++++++++++------
 3 files changed, 134 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 209cf265bf74..732c1e36fde9 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -121,9 +121,11 @@ check_signal_order(struct intel_context *ce, struct i915_request *rq)
 }
 
 static bool
-__dma_fence_signal(struct dma_fence *fence)
+__signal_request(struct i915_request *rq)
 {
-	return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
+	return (__i915_request_set_complete(rq) &&
+		!test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				  &rq->fence.flags));
 }
 
 static void
@@ -230,7 +232,7 @@ static void signal_irq_work(struct irq_work *work)
 				intel_context_put(ce);
 			}
 
-			if (__dma_fence_signal(&rq->fence))
+			if (__signal_request(rq))
 				/* We own signal_node now, xfer to local list */
 				signal = slist_add(&rq->signal_node, signal);
 			else
@@ -332,7 +334,7 @@ void intel_breadcrumbs_free(struct kref *kref)
 static void irq_signal_request(struct i915_request *rq,
 			       struct intel_breadcrumbs *b)
 {
-	if (!__dma_fence_signal(&rq->fence))
+	if (!__signal_request(rq))
 		return;
 
 	i915_request_get(rq);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 820a1f38b271..69d224d6b1a3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -308,6 +308,10 @@ bool i915_request_retire(struct i915_request *rq)
 	if (!__i915_request_is_complete(rq))
 		return false;
 
+	(void)__i915_request_set_complete(rq);
+	if (refcount_read(&rq->signal_pin_count))
+		return false;
+
 	RQ_TRACE(rq, "\n");
 
 	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
@@ -926,6 +930,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 
 	/* No zalloc, everything must be cleared after use */
 	rq->batch = NULL;
+	refcount_set(&rq->signal_pin_count, 1);
 	__rq_init_watchdog(rq);
 	GEM_BUG_ON(rq->capture_list);
 	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index dc359242d1ae..1e71c5f3b5c3 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -162,6 +162,12 @@ enum {
 	 * fence (dma_fence_array) and i915 generated for parallel submission.
 	 */
 	I915_FENCE_FLAG_COMPOSITE,
+
+	/*
+	 * I915_FENCE_FLAG_COMPLETED - Indicates that the request is completed,
+	 * but may still be held from signaling by a signal pin.
+	 */
+	I915_FENCE_FLAG_COMPLETED,
 };
 
 /**
@@ -313,6 +319,12 @@ struct i915_request {
 		struct hrtimer timer;
 	} watchdog;
 
+	/**
+	 * @signal_pin_count: Used to temporarily keep a request from
+	 * signaling even if it is completed.
+	 */
+	refcount_t signal_pin_count;
+
 	/**
 	 * @guc_fence_link: Requests may need to be stalled when using GuC
 	 * submission waiting for certain GuC operations to complete. If that is
@@ -493,49 +505,6 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
 	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
 }
 
-/**
- * i915_request_started - check if the request has begun being executed
- * @rq: the request
- *
- * If the timeline is not using initial breadcrumbs, a request is
- * considered started if the previous request on its timeline (i.e.
- * context) has been signaled.
- *
- * If the timeline is using semaphores, it will also be emitting an
- * "initial breadcrumb" after the semaphores are complete and just before
- * it began executing the user payload. A request can therefore be active
- * on the HW and not yet started as it is still busywaiting on its
- * dependencies (via HW semaphores).
- *
- * If the request has started, its dependencies will have been signaled
- * (either by fences or by semaphores) and it will have begun processing
- * the user payload.
- *
- * However, even if a request has started, it may have been preempted and
- * so no longer active, or it may have already completed.
- *
- * See also i915_request_is_active().
- *
- * Returns true if the request has begun executing the user payload, or
- * has completed:
- */
-static inline bool i915_request_started(const struct i915_request *rq)
-{
-	bool result;
-
-	if (i915_request_signaled(rq))
-		return true;
-
-	result = true;
-	rcu_read_lock(); /* the HWSP may be freed at runtime */
-	if (likely(!i915_request_signaled(rq)))
-		/* Remember: started but may have since been preempted! */
-		result = __i915_request_has_started(rq);
-	rcu_read_unlock();
-
-	return result;
-}
-
 /**
  * i915_request_is_running - check if the request may actually be executing
  * @rq: the request
@@ -584,22 +553,82 @@ static inline bool __i915_request_is_complete(const struct i915_request *rq)
 	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
 }
 
+static inline bool __i915_request_test_complete(const struct i915_request *rq)
+{
+	return test_bit(I915_FENCE_FLAG_COMPLETED, &rq->fence.flags);
+}
+
+/*
+ * Internal use: If the request wasn't marked completed, mark it as such and
+ * decrement the signal pin count. Return true if we actually marked it
+ * complete and decremented the last pin-count, that is, if it's ready to
+ * signal.
+ */
+static inline bool __i915_request_set_complete(struct i915_request *rq)
+{
+	return (!test_and_set_bit(I915_FENCE_FLAG_COMPLETED, &rq->fence.flags) &&
+		refcount_dec_and_test(&rq->signal_pin_count));
+}
+
 static inline bool i915_request_completed(const struct i915_request *rq)
 {
 	bool result;
 
-	if (i915_request_signaled(rq))
+	if (__i915_request_test_complete(rq))
 		return true;
 
 	result = true;
 	rcu_read_lock(); /* the HWSP may be freed at runtime */
-	if (likely(!i915_request_signaled(rq)))
+	if (likely(!__i915_request_test_complete(rq)))
 		result = __i915_request_is_complete(rq);
 	rcu_read_unlock();
 
 	return result;
 }
 
+/**
+ * i915_request_started - check if the request has begun being executed
+ * @rq: the request
+ *
+ * If the timeline is not using initial breadcrumbs, a request is
+ * considered started if the previous request on its timeline (i.e.
+ * context) has been signaled.
+ *
+ * If the timeline is using semaphores, it will also be emitting an
+ * "initial breadcrumb" after the semaphores are complete and just before
+ * it began executing the user payload. A request can therefore be active
+ * on the HW and not yet started as it is still busywaiting on its
+ * dependencies (via HW semaphores).
+ *
+ * If the request has started, its dependencies will have been signaled
+ * (either by fences or by semaphores) and it will have begun processing
+ * the user payload.
+ *
+ * However, even if a request has started, it may have been preempted and
+ * so no longer active, or it may have already completed.
+ *
+ * See also i915_request_is_active().
+ *
+ * Returns true if the request has begun executing the user payload, or
+ * has completed:
+ */
+static inline bool i915_request_started(const struct i915_request *rq)
+{
+	bool result;
+
+	if (i915_request_completed(rq))
+		return true;
+
+	result = true;
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	if (likely(!i915_request_signaled(rq)))
+		/* Remember: started but may have since been preempted! */
+		result = __i915_request_has_started(rq);
+	rcu_read_unlock();
+
+	return result;
+}
+
 static inline void i915_request_mark_complete(struct i915_request *rq)
 {
 	WRITE_ONCE(rq->hwsp_seqno, /* decouple from HWSP */
@@ -685,6 +714,55 @@ i915_request_active_seqno(const struct i915_request *rq)
 	return hwsp_phys_base + hwsp_relative_offset;
 }
 
+/**
+ * i915_request_signal_pin - Take a signal pin on a request
+ * @rq: The request on which to take a signal pin
+ * @cookie: Cookie to give as an argument to i915_request_signal_unpin
+ *
+ * This function is used to temporarily block a request from signaling,
+ * (but not from completing). Typical users would be memory management during
+ * error capturing or error propagation during preempt-ctx like fence
+ * revalidation.
+ *
+ * Return: true if successful, false otherwise. If true is returned, the
+ * function must be paired with an i915_request_signal_upin.
+ */
+static inline bool i915_request_signal_pin(struct i915_request *rq,
+					   bool *cookie)
+{
+	bool tmp = refcount_inc_not_zero(&rq->signal_pin_count);
+
+	if (tmp)
+		*cookie = dma_fence_begin_signalling();
+
+	return tmp;
+}
+
+/**
+ * i915_request_signal_pin - Release a signal pin on a request
+ * @rq: The request on which to take a signal pin
+ * @cookie: Cookie obtained from i915_request_signal_pin
+ *
+ * This function is used to release a pin that was taken using
+ * i915_request_signal_pin. It may signal the request and may
+ * therefore not be called under a fence lock.
+ */
+static inline void i915_request_signal_unpin(struct i915_request *rq,
+					     bool cookie)
+{
+	dma_fence_end_signalling(cookie);
+	/* Opencoded inefficient might_lock_irqsave() */
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(rq->fence.lock, irq_flags);
+		spin_unlock_irqrestore(rq->fence.lock, irq_flags);
+	}
+
+	if (refcount_dec_and_test(&rq->signal_pin_count))
+		dma_fence_signal(&rq->fence);
+}
+
 bool
 i915_request_active_engine(struct i915_request *rq,
 			   struct intel_engine_cs **active);
-- 
2.31.1