[Intel-xe] [RFC PATCH 6/7] drm/xe: Add last fence as dependency for jobs on user exec queues

Matthew Brost matthew.brost at intel.com
Thu Dec 7 05:57:28 UTC 2023


The last fence must be added as a dependency for jobs on user exec
queues as it is possible for the last fence to be a composite software
fence (unordered, ioctl with zero bb or binds) rather than hardware
fence (ordered, previous job on queue).

Suggested-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c       |  4 ++++
 drivers/gpu/drm/xe/xe_exec_queue.c |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c    | 14 +++++++++++---
 drivers/gpu/drm/xe/xe_sched_job.c  | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_sched_job.h  |  4 ++++
 5 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 438e34585e1e..92b0da6580e8 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -313,6 +313,10 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto err_put_job;
 
 	if (!xe_vm_in_lr_mode(vm)) {
+		err = xe_sched_job_last_fence_add_dep(job, vm);
+		if (err)
+			goto err_put_job;
+
 		err = down_read_interruptible(&vm->userptr.notifier_lock);
 		if (err)
 			goto err_put_job;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 67e3fd9dfc5f..3911d14522ee 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -887,7 +887,7 @@ static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
 						    struct xe_vm *vm)
 {
 	if (q->flags & EXEC_QUEUE_FLAG_VM)
-		lockdep_assert_held_write(&vm->lock);
+		lockdep_assert_held(&vm->lock);
 	else
 		xe_vm_assert_held(vm);
 }
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index e8b567708ac0..ce14498b416a 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1163,17 +1163,24 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 	return fence;
 }
 
-static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
+static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q,
+			struct xe_sync_entry *syncs, u32 num_syncs)
 {
+	struct dma_fence *fence;
 	int i;
 
 	for (i = 0; i < num_syncs; i++) {
-		struct dma_fence *fence = syncs[i].fence;
+		fence = syncs[i].fence;
 
 		if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				       &fence->flags))
 			return false;
 	}
+	if (q) {
+		fence = xe_exec_queue_last_fence_get(q, vm);
+		if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			return false;
+	}
 
 	return true;
 }
@@ -1234,7 +1241,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
 
 	/* Use the CPU if no in syncs and engine is idle */
-	if (no_in_syncs(syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
+	if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
 		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
 							num_updates,
 							first_munmap_rebind,
@@ -1351,6 +1358,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 			goto err_job;
 	}
 
+	err = xe_sched_job_last_fence_add_dep(job, vm);
 	for (i = 0; !err && i < num_syncs; i++)
 		err = xe_sync_entry_add_deps(&syncs[i], job);
 
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index b467d5bfa4ac..b7d714522ae1 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -260,3 +260,20 @@ void xe_sched_job_push(struct xe_sched_job *job)
 	drm_sched_entity_push_job(&job->drm);
 	xe_sched_job_put(job);
 }
+
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
+{
+	struct dma_fence *fence;
+
+	fence = xe_exec_queue_last_fence_get(job->q, vm);
+
+	/* Only wait on unsignaled software fences */
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
+	    !(fence->context == job->drm.entity->fence_context ||
+	      fence->context == job->drm.entity->fence_context + 1)) {
+		dma_fence_get(fence);
+		return drm_sched_job_add_dependency(&job->drm, fence);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 6ca1d426c036..34f475ba7f50 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -8,6 +8,8 @@
 
 #include "xe_sched_job_types.h"
 
+struct xe_vm;
+
 #define XE_SCHED_HANG_LIMIT 1
 #define XE_SCHED_JOB_TIMEOUT LONG_MAX
 
@@ -54,6 +56,8 @@ bool xe_sched_job_completed(struct xe_sched_job *job);
 void xe_sched_job_arm(struct xe_sched_job *job);
 void xe_sched_job_push(struct xe_sched_job *job);
 
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm);
+
 static inline struct xe_sched_job *
 to_xe_sched_job(struct drm_sched_job *drm)
 {
-- 
2.34.1



More information about the Intel-xe mailing list