[PATCH] drm/xe: Take preemption into account when resubmitting jobs
Matthew Brost
matthew.brost at intel.com
Fri Aug 15 01:33:22 UTC 2025
On Tue, Aug 12, 2025 at 08:54:23PM +0200, Lis, Tomasz wrote:
>
> On 8/9/2025 6:34 AM, Matthew Brost wrote:
> > Take preemption into account when resubmitting jobs, and adjust the new
> > LRC head pointer accordingly to skip over previously executed parts of
> > the job. To support this, save the head pointer of each job when it is
> > emitted.
> >
> > This code can either be leveraged or reused for VF recovery.
>
> Right. VF migration recovery.
>
> This will help in amending the jobs ring fixup code with ring position
> control.
>
> >
> > Signed-off-by: Matthew Brost<matthew.brost at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_guc_submit.c | 23 +++++++++++++++++++++--
> > drivers/gpu/drm/xe/xe_ring_ops.c | 23 +++++++++++++++++++----
> > drivers/gpu/drm/xe/xe_sched_job_types.h | 2 ++
> > 3 files changed, 42 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> > index 1185b23b1384..3ba707bbb74d 100644
> > --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> > @@ -1954,16 +1954,35 @@ void xe_guc_submit_pause(struct xe_guc *guc)
> > xe_sched_submission_stop_async(&q->guc->sched);
> > }
> > +static int guc_lrc_offset(struct xe_lrc *lrc, u32 job_head)
> > +{
> > + if (xe_lrc_ring_head(lrc) == job_head)
> > + return 0;
>
> not sure why we've singled out this condition rather than putting (job_head
> <= xe_lrc_ring_head(lrc))
>
> below, but that's just a matter of individual style, so can be both ways.
>
> > +
> > + if (job_head < xe_lrc_ring_head(lrc))
> > + return xe_lrc_ring_head(lrc) - job_head;
> > +
> > + return lrc->ring.size - job_head + xe_lrc_ring_head(lrc);
>
> I don't think it's a good idea to read the head value from LRC multiple
> times,
>
> this is vram access. Also if we're assuming the value in LRC is kept
> unchanged,
>
> maybe a comment would make sense, to avoid incorrect reuse?
>
> But instead, since it is used 4 times, a local var is fully justified.
>
Yes, even though this is not a hot path, better to have local variable.
Matt
> -Tomasz
>
> > +}
> > +
> > static void guc_exec_queue_start(struct xe_exec_queue *q)
> > {
> > struct xe_gpu_scheduler *sched = &q->guc->sched;
> > if (!exec_queue_killed_or_banned_or_wedged(q)) {
> > + struct xe_sched_job *job;
> > int i;
> > + job = xe_sched_first_pending_job(&q->guc->sched);
> > +
> > trace_xe_exec_queue_resubmit(q);
> > - for (i = 0; i < q->width; ++i)
> > - xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
> > + for (i = 0; i < q->width; ++i) {
> > + int offset = !job ? 0 :
> > + guc_lrc_offset(q->lrc[i], job->ptrs[i].head);
> > +
> > + xe_lrc_set_ring_head(q->lrc[i], (q->lrc[i]->ring.tail +
> > + offset) % q->lrc[i]->ring.size);
> > + }
> > xe_sched_resubmit_jobs(sched);
> > }
> > diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> > index 5f15360d14bf..4dad28f0614d 100644
> > --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> > +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> > @@ -245,12 +245,14 @@ static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
> > /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
> > static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc,
> > - u64 batch_addr, u32 seqno)
> > + u64 batch_addr, u32 *head, u32 seqno)
> > {
> > u32 dw[MAX_JOB_SIZE_DW], i = 0;
> > u32 ppgtt_flag = get_ppgtt_flag(job);
> > struct xe_gt *gt = job->q->gt;
> > + *head = lrc->ring.tail;
> > +
> > i = emit_copy_timestamp(lrc, dw, i);
> > if (job->ring_ops_flush_tlb) {
> > @@ -296,7 +298,7 @@ static bool has_aux_ccs(struct xe_device *xe)
> > }
> > static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
> > - u64 batch_addr, u32 seqno)
> > + u64 batch_addr, u32 *head, u32 seqno)
> > {
> > u32 dw[MAX_JOB_SIZE_DW], i = 0;
> > u32 ppgtt_flag = get_ppgtt_flag(job);
> > @@ -304,6 +306,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
> > struct xe_device *xe = gt_to_xe(gt);
> > bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
> > + *head = lrc->ring.tail;
> > +
> > i = emit_copy_timestamp(lrc, dw, i);
> > dw[i++] = preparser_disable(true);
> > @@ -346,7 +350,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
> > static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
> > struct xe_lrc *lrc,
> > - u64 batch_addr, u32 seqno)
> > + u64 batch_addr, u32 *head,
> > + u32 seqno)
> > {
> > u32 dw[MAX_JOB_SIZE_DW], i = 0;
> > u32 ppgtt_flag = get_ppgtt_flag(job);
> > @@ -355,6 +360,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
> > bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
> > u32 mask_flags = 0;
> > + *head = lrc->ring.tail;
> > +
> > i = emit_copy_timestamp(lrc, dw, i);
> > dw[i++] = preparser_disable(true);
> > @@ -396,11 +403,14 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
> > }
> > static void emit_migration_job_gen12(struct xe_sched_job *job,
> > - struct xe_lrc *lrc, u32 seqno)
> > + struct xe_lrc *lrc, u32 *head,
> > + u32 seqno)
> > {
> > u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc);
> > u32 dw[MAX_JOB_SIZE_DW], i = 0;
> > + *head = lrc->ring.tail;
> > +
> > i = emit_copy_timestamp(lrc, dw, i);
> > i = emit_store_imm_ggtt(saddr, seqno, dw, i);
> > @@ -434,6 +444,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)
> > __emit_job_gen12_simple(job, job->q->lrc[0],
> > job->ptrs[0].batch_addr,
> > + &job->ptrs[0].head,
> > xe_sched_job_lrc_seqno(job));
> > }
> > @@ -443,6 +454,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
> > if (xe_sched_job_is_migration(job->q)) {
> > emit_migration_job_gen12(job, job->q->lrc[0],
> > + &job->ptrs[0].head,
> > xe_sched_job_lrc_seqno(job));
> > return;
> > }
> > @@ -450,6 +462,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
> > for (i = 0; i < job->q->width; ++i)
> > __emit_job_gen12_simple(job, job->q->lrc[i],
> > job->ptrs[i].batch_addr,
> > + &job->ptrs[i].head,
> > xe_sched_job_lrc_seqno(job));
> > }
> > @@ -461,6 +474,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
> > for (i = 0; i < job->q->width; ++i)
> > __emit_job_gen12_video(job, job->q->lrc[i],
> > job->ptrs[i].batch_addr,
> > + &job->ptrs[i].head,
> > xe_sched_job_lrc_seqno(job));
> > }
> > @@ -471,6 +485,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
> > for (i = 0; i < job->q->width; ++i)
> > __emit_job_gen12_render_compute(job, job->q->lrc[i],
> > job->ptrs[i].batch_addr,
> > + &job->ptrs[i].head,
> > xe_sched_job_lrc_seqno(job));
> > }
> > diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
> > index dbf260dded8d..359f93b0cdca 100644
> > --- a/drivers/gpu/drm/xe/xe_sched_job_types.h
> > +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
> > @@ -24,6 +24,8 @@ struct xe_job_ptrs {
> > struct dma_fence_chain *chain_fence;
> > /** @batch_addr: Batch buffer address. */
> > u64 batch_addr;
> > + /** @head: The head pointer of the LRC when the job was submitted */
> > + u32 head;
> > };
> > /**
More information about the Intel-xe
mailing list