[RFC PATCH 7/8] drm/xe: Add ULLS migration job support to GuC submission

Matthew Brost matthew.brost at intel.com
Mon Aug 12 02:47:16 UTC 2024


Reduce max jobs but 1 to avoid overflow of semaphores, directly set hwe
tail for ULLS, set current job sempahore, and supress submit H2G for
ULLS aside from first ULLS submission.

Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index da63be550d4d..aa1e8a76bd0c 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -706,7 +706,7 @@ static void wq_item_append(struct xe_exec_queue *q)
 }
 
 #define RESUME_PENDING	~0x0ull
-static void submit_exec_queue(struct xe_exec_queue *q)
+static void submit_exec_queue(struct xe_sched_job *job, struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
@@ -716,6 +716,8 @@ static void submit_exec_queue(struct xe_exec_queue *q)
 	u32 num_g2h = 0;
 	int len = 0;
 	bool extra_submit = false;
+	bool ulls = test_bit(JOB_FLAG_ULLS, &job->fence->flags);
+	bool ulls_first = test_bit(JOB_FLAG_ULLS_FIRST, &job->fence->flags);
 
 	xe_assert(xe, exec_queue_registered(q));
 
@@ -724,6 +726,12 @@ static void submit_exec_queue(struct xe_exec_queue *q)
 	else
 		xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
 
+	if (ulls && !ulls_first)
+		xe_hw_engine_write_ring_tail(q->hwe, lrc->ring.tail);
+
+	if (ulls)
+		xe_lrc_set_ulls_semaphore(lrc, xe_sched_job_lrc_seqno(job));
+
 	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
 		return;
 
@@ -740,13 +748,14 @@ static void submit_exec_queue(struct xe_exec_queue *q)
 		set_exec_queue_pending_enable(q);
 		set_exec_queue_enabled(q);
 		trace_xe_exec_queue_scheduling_enable(q);
-	} else {
+	} else if (!ulls || ulls_first) {
 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
 		action[len++] = q->guc->id;
 		trace_xe_exec_queue_submit(q);
 	}
 
-	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
+	if (!ulls || ulls_first || num_g2h)
+		xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
 
 	if (extra_submit) {
 		len = 0;
@@ -777,7 +786,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
 			register_exec_queue(q);
 		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
 			q->ring_ops->emit_job(job);
-		submit_exec_queue(q);
+		submit_exec_queue(job, q);
 	}
 
 	if (lr) {
@@ -1435,6 +1444,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	struct xe_guc_exec_queue *ge;
 	long timeout;
 	int err, i;
+	int max_jobs = (q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES);
 
 	xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
 
@@ -1449,11 +1459,15 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
 		INIT_LIST_HEAD(&ge->static_msgs[i].link);
 
+	if (q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION) {
+		xe_assert(xe, LRC_MIGRATION_ULLS_SEMAPORE_COUNT - 1 < max_jobs);
+		max_jobs = LRC_MIGRATION_ULLS_SEMAPORE_COUNT - 1;
+	}
+
 	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
 		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
-			    get_submit_wq(guc),
-			    q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
+			    get_submit_wq(guc), max_jobs, 64,
 			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
 			    q->name, gt_to_xe(q->gt)->drm.dev);
 	if (err)
-- 
2.34.1



More information about the Intel-xe mailing list