[PATCH v5] drm/sched: Make sure we wait for all dependencies in kill_jobs_cb()

Boris Brezillon boris.brezillon at collabora.com
Fri Jun 16 17:12:48 UTC 2023


drm_sched_entity_kill_jobs_cb() logic is omitting the last fence popped
from the dependency array that was waited upon before
drm_sched_entity_kill() was called (drm_sched_entity::dependency field),
so we're basically waiting for all dependencies except one.

In theory, this wait shouldn't be needed because resources should have
their users registered to the dma_resv object, thus guaranteeing that
future jobs wanting to access these resources wait on all the previous
users (depending on the access type, of course). But we want to keep
these explicit waits in the kill entity path just in case.

Let's make sure we keep all dependencies in the array in
drm_sched_job_dependency(), so we can iterate over the array and wait
in drm_sched_entity_kill_jobs_cb().

We make sure we wait on drm_sched_fence::finished if we were
originally asked to wait on drm_sched_fence::scheduled. In that case,
we assume the intent was to delegate the wait to the firmware/GPU or
rely on the pipelining done at the entity/scheduler level, but when
killing jobs, we really want to wait for completion not just scheduling.

v5:
- Flag deps on which we should only wait for the scheduled event
  at insertion time.

v4:
- Fix commit message
- Fix a use-after-free bug

v3:
- Always wait for drm_sched_fence::finished fences in
  drm_sched_entity_kill_jobs_cb() when we see a sched_fence

v2:
- Don't evict deps in drm_sched_job_dependency()

Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
Suggested-by: "Christian König" <christian.koenig at amd.com>
Cc: Frank Binns <frank.binns at imgtec.com>
Cc: Sarah Walker <sarah.walker at imgtec.com>
Cc: Donald Robson <donald.robson at imgtec.com>
Cc: Luben Tuikov <luben.tuikov at amd.com>
Cc: David Airlie <airlied at gmail.com>
Cc: Daniel Vetter <daniel at ffwll.ch>
Cc: Sumit Semwal <sumit.semwal at linaro.org>
Cc: "Christian König" <christian.koenig at amd.com>
---
Hello Christian,

It turns out the approach you suggested just moves the complexity to
the insertion path, and it actually makes the 'should we replace or
drop fence' test a bit more tedious. We might end up with less
duplicates if some drivers start mixing scheduled/finished fences, but
I doubt this will be the case in practice, and I'm not sure it's worth
the extra complexity.

Let me know what you think.

Regards,

Boris
---
 drivers/gpu/drm/scheduler/sched_entity.c | 29 ++++++++++++--
 drivers/gpu/drm/scheduler/sched_main.c   | 50 +++++++++++++++++++++++-
 include/drm/gpu_scheduler.h              |  7 ++++
 3 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 68e807ae136a..ffdee2544f42 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -176,13 +176,14 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
 {
 	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
 						 finish_cb);
+	unsigned long index;
 	int r;
 
 	dma_fence_put(f);
 
 	/* Wait for all dependencies to avoid data corruptions */
-	while (!xa_empty(&job->dependencies)) {
-		f = xa_erase(&job->dependencies, job->last_dependency++);
+	xa_for_each(&job->dependencies, index, f) {
+		xa_erase(&job->dependencies, index);
 		r = dma_fence_add_callback(f, &job->finish_cb,
 					   drm_sched_entity_kill_jobs_cb);
 		if (!r)
@@ -415,8 +416,28 @@ static struct dma_fence *
 drm_sched_job_dependency(struct drm_sched_job *job,
 			 struct drm_sched_entity *entity)
 {
-	if (!xa_empty(&job->dependencies))
-		return xa_erase(&job->dependencies, job->last_dependency++);
+	struct dma_fence *f;
+
+	/* We keep the fence around, so we can iterate over all dependencies
+	 * in drm_sched_entity_kill_jobs_cb() to ensure all deps are signaled
+	 * before killing the job.
+	 */
+	f = xa_load(&job->dependencies, job->last_dependency);
+	if (f) {
+		/* If the entry is flagged as 'wait-for-scheduled', we are
+		 * dealing with a drm_sched_fence and we want to wait for
+		 * the scheduled event only.
+		 */
+		if (xa_get_mark(&job->dependencies, job->last_dependency,
+				DRM_SCHED_DEP_WAIT_FOR_SCHEDULED)) {
+			struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+
+			f = &s_fence->scheduled;
+		}
+
+		job->last_dependency++;
+		return dma_fence_get(f);
+	}
 
 	if (job->sched->ops->prepare_job)
 		return job->sched->ops->prepare_job(job, entity);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 394010a60821..8ac207cbd713 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -691,6 +691,8 @@ EXPORT_SYMBOL(drm_sched_job_arm);
 int drm_sched_job_add_dependency(struct drm_sched_job *job,
 				 struct dma_fence *fence)
 {
+	bool fence_wait_for_scheduled = false;
+	struct drm_sched_fence *s_fence;
 	struct dma_fence *entry;
 	unsigned long index;
 	u32 id = 0;
@@ -699,20 +701,64 @@ int drm_sched_job_add_dependency(struct drm_sched_job *job,
 	if (!fence)
 		return 0;
 
+	s_fence = to_drm_sched_fence(fence);
+	if (s_fence && fence == &s_fence->scheduled) {
+		/* Make sure the finished fence hasn't been destroyed. */
+		fence = dma_fence_get_rcu(&s_fence->finished);
+		if (WARN_ON(!fence))
+			return -EINVAL;
+
+		/* Release the scheduled fence ref, now that we have a
+		 * ref on the finished fence.
+		 */
+		dma_fence_put(&s_fence->scheduled);
+
+		/* Waiting for scheduled event only. */
+		fence_wait_for_scheduled = true;
+	}
+
 	/* Deduplicate if we already depend on a fence from the same context.
 	 * This lets the size of the array of deps scale with the number of
 	 * engines involved, rather than the number of BOs.
 	 */
 	xa_for_each(&job->dependencies, index, entry) {
+		bool entry_wait_for_scheduled, fence_is_later;
+
 		if (entry->context != fence->context)
 			continue;
 
-		if (dma_fence_is_later(fence, entry)) {
+		entry_wait_for_scheduled = xa_get_mark(&job->dependencies,
+						       index,
+						       DRM_SCHED_DEP_WAIT_FOR_SCHEDULED);
+		fence_is_later = dma_fence_is_later(fence, entry);
+
+		/*
+		 * We can replace entry by fence when:
+		 * - fence is after entry and both are waiting on scheduled
+		 * - fence is after entry and both are waiting on finished
+		 * - fence is after entry, entry is waiting on scheduled and fence on finished
+		 *
+		 * We can keep entry and drop fence when:
+		 * - fence is before entry and both are waiting on scheduled
+		 * - fence is before entry and both are waiting on finished
+		 * - fence is before entry, fence is waiting on scheduled and entry on finished
+		 *
+		 * In all other situations, we can't guarantee the order and have to duplicate.
+		 */
+		if (fence_is_later && entry_wait_for_scheduled >= fence_wait_for_scheduled) {
 			dma_fence_put(entry);
 			xa_store(&job->dependencies, index, fence, GFP_KERNEL);
-		} else {
+			if (fence_wait_for_scheduled) {
+				xa_set_mark(&job->dependencies, index,
+					    DRM_SCHED_DEP_WAIT_FOR_SCHEDULED);
+			}
+		} else if (!fence_is_later &&
+			   entry_wait_for_scheduled <= fence_wait_for_scheduled) {
 			dma_fence_put(fence);
+		} else {
+			continue;
 		}
+
 		return 0;
 	}
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index e95b4837e5a3..fbdb01242997 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -262,6 +262,13 @@ struct drm_sched_rq {
 	struct rb_root_cached		rb_tree_root;
 };
 
+/*
+ * Used to flag dependency entries that are backed by drm_sched_fence object
+ * and on which we should only wait for the scheduled events.
+ * For internal use only.
+ */
+#define DRM_SCHED_DEP_WAIT_FOR_SCHEDULED	XA_MARK_1
+
 /**
  * struct drm_sched_fence - fences corresponding to the scheduling of a job.
  */
-- 
2.40.1



More information about the dri-devel mailing list