[PATCH] drm/xe: Take preemption into account when resubmitting jobs

Tue Aug 12 18:54:23 UTC 2025

On 8/9/2025 6:34 AM, Matthew Brost wrote:
> Take preemption into account when resubmitting jobs, and adjust the new
> LRC head pointer accordingly to skip over previously executed parts of
> the job. To support this, save the head pointer of each job when it is
> emitted.
>
> This code can either be leveraged or reused for VF recovery.

Right. VF migration recovery.

This will help in amending the jobs ring fixup code with ring position 
control.

>
> Signed-off-by: Matthew Brost<matthew.brost at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_guc_submit.c      | 23 +++++++++++++++++++++--
>   drivers/gpu/drm/xe/xe_ring_ops.c        | 23 +++++++++++++++++++----
>   drivers/gpu/drm/xe/xe_sched_job_types.h |  2 ++
>   3 files changed, 42 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 1185b23b1384..3ba707bbb74d 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -1954,16 +1954,35 @@ void xe_guc_submit_pause(struct xe_guc *guc)
>   		xe_sched_submission_stop_async(&q->guc->sched);
>   }
>   
> +static int guc_lrc_offset(struct xe_lrc *lrc, u32 job_head)
> +{
> +	if (xe_lrc_ring_head(lrc) == job_head)
> +		return 0;

not sure why we've singled out this condition rather than putting 
(job_head <= xe_lrc_ring_head(lrc))

below, but that's just a matter of individual style, so can be both ways.

> +
> +	if (job_head < xe_lrc_ring_head(lrc))
> +		return xe_lrc_ring_head(lrc) - job_head;
> +
> +	return lrc->ring.size - job_head + xe_lrc_ring_head(lrc);

I don't think it's a good idea to read the head value from LRC multiple 
times,

this is vram access. Also if we're assuming the value in LRC is kept 
unchanged,

maybe a comment would make sense, to avoid incorrect reuse?

But instead, since it is used 4 times, a local var is fully justified.

-Tomasz

> +}
> +
>   static void guc_exec_queue_start(struct xe_exec_queue *q)
>   {
>   	struct xe_gpu_scheduler *sched = &q->guc->sched;
>   
>   	if (!exec_queue_killed_or_banned_or_wedged(q)) {
> +		struct xe_sched_job *job;
>   		int i;
>   
> +		job = xe_sched_first_pending_job(&q->guc->sched);
> +
>   		trace_xe_exec_queue_resubmit(q);
> -		for (i = 0; i < q->width; ++i)
> -			xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
> +		for (i = 0; i < q->width; ++i) {
> +			int offset = !job ? 0 :
> +				guc_lrc_offset(q->lrc[i], job->ptrs[i].head);
> +
> +			xe_lrc_set_ring_head(q->lrc[i], (q->lrc[i]->ring.tail +
> +					     offset) % q->lrc[i]->ring.size);
> +		}
>   		xe_sched_resubmit_jobs(sched);
>   	}
>   
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index 5f15360d14bf..4dad28f0614d 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -245,12 +245,14 @@ static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
>   
>   /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
>   static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc,
> -				    u64 batch_addr, u32 seqno)
> +				    u64 batch_addr, u32 *head, u32 seqno)
>   {
>   	u32 dw[MAX_JOB_SIZE_DW], i = 0;
>   	u32 ppgtt_flag = get_ppgtt_flag(job);
>   	struct xe_gt *gt = job->q->gt;
>   
> +	*head = lrc->ring.tail;
> +
>   	i = emit_copy_timestamp(lrc, dw, i);
>   
>   	if (job->ring_ops_flush_tlb) {
> @@ -296,7 +298,7 @@ static bool has_aux_ccs(struct xe_device *xe)
>   }
>   
>   static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
> -				   u64 batch_addr, u32 seqno)
> +				   u64 batch_addr, u32 *head, u32 seqno)
>   {
>   	u32 dw[MAX_JOB_SIZE_DW], i = 0;
>   	u32 ppgtt_flag = get_ppgtt_flag(job);
> @@ -304,6 +306,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
>   	struct xe_device *xe = gt_to_xe(gt);
>   	bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
>   
> +	*head = lrc->ring.tail;
> +
>   	i = emit_copy_timestamp(lrc, dw, i);
>   
>   	dw[i++] = preparser_disable(true);
> @@ -346,7 +350,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
>   
>   static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
>   					    struct xe_lrc *lrc,
> -					    u64 batch_addr, u32 seqno)
> +					    u64 batch_addr, u32 *head,
> +					    u32 seqno)
>   {
>   	u32 dw[MAX_JOB_SIZE_DW], i = 0;
>   	u32 ppgtt_flag = get_ppgtt_flag(job);
> @@ -355,6 +360,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
>   	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
>   	u32 mask_flags = 0;
>   
> +	*head = lrc->ring.tail;
> +
>   	i = emit_copy_timestamp(lrc, dw, i);
>   
>   	dw[i++] = preparser_disable(true);
> @@ -396,11 +403,14 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
>   }
>   
>   static void emit_migration_job_gen12(struct xe_sched_job *job,
> -				     struct xe_lrc *lrc, u32 seqno)
> +				     struct xe_lrc *lrc, u32 *head,
> +				     u32 seqno)
>   {
>   	u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc);
>   	u32 dw[MAX_JOB_SIZE_DW], i = 0;
>   
> +	*head = lrc->ring.tail;
> +
>   	i = emit_copy_timestamp(lrc, dw, i);
>   
>   	i = emit_store_imm_ggtt(saddr, seqno, dw, i);
> @@ -434,6 +444,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)
>   
>   	__emit_job_gen12_simple(job, job->q->lrc[0],
>   				job->ptrs[0].batch_addr,
> +				&job->ptrs[0].head,
>   				xe_sched_job_lrc_seqno(job));
>   }
>   
> @@ -443,6 +454,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
>   
>   	if (xe_sched_job_is_migration(job->q)) {
>   		emit_migration_job_gen12(job, job->q->lrc[0],
> +					 &job->ptrs[0].head,
>   					 xe_sched_job_lrc_seqno(job));
>   		return;
>   	}
> @@ -450,6 +462,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
>   	for (i = 0; i < job->q->width; ++i)
>   		__emit_job_gen12_simple(job, job->q->lrc[i],
>   					job->ptrs[i].batch_addr,
> +					&job->ptrs[i].head,
>   					xe_sched_job_lrc_seqno(job));
>   }
>   
> @@ -461,6 +474,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
>   	for (i = 0; i < job->q->width; ++i)
>   		__emit_job_gen12_video(job, job->q->lrc[i],
>   				       job->ptrs[i].batch_addr,
> +				       &job->ptrs[i].head,
>   				       xe_sched_job_lrc_seqno(job));
>   }
>   
> @@ -471,6 +485,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
>   	for (i = 0; i < job->q->width; ++i)
>   		__emit_job_gen12_render_compute(job, job->q->lrc[i],
>   						job->ptrs[i].batch_addr,
> +						&job->ptrs[i].head,
>   						xe_sched_job_lrc_seqno(job));
>   }
>   
> diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
> index dbf260dded8d..359f93b0cdca 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job_types.h
> +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
> @@ -24,6 +24,8 @@ struct xe_job_ptrs {
>   	struct dma_fence_chain *chain_fence;
>   	/** @batch_addr: Batch buffer address. */
>   	u64 batch_addr;
> +	/** @head: The head pointer of the LRC when the job was submitted */
> +	u32 head;
>   };
>   
>   /**
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-xe/attachments/20250812/55662e74/attachment-0001.htm>