[PATCH] drm/xe: Take preemption into account when resubmitting jobs

Mon Aug 11 19:29:05 UTC 2025

On Mon, 2025-08-11 at 11:36 -0700, Matthew Brost wrote:
> On Mon, Aug 11, 2025 at 10:21:46AM -0600, Summers, Stuart wrote:
> > On Fri, 2025-08-08 at 21:34 -0700, Matthew Brost wrote:
> > > Take preemption into account when resubmitting jobs, and adjust
> > > the
> > > new
> > > LRC head pointer accordingly to skip over previously executed
> > > parts
> > > of
> > > the job. To support this, save the head pointer of each job when
> > > it
> > > is
> > > emitted.
> > > 
> > > This code can either be leveraged or reused for VF recovery.
> > > 
> > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_guc_submit.c      | 23
> > > +++++++++++++++++++++--
> > >  drivers/gpu/drm/xe/xe_ring_ops.c        | 23
> > > +++++++++++++++++++----
> > >  drivers/gpu/drm/xe/xe_sched_job_types.h |  2 ++
> > >  3 files changed, 42 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c
> > > b/drivers/gpu/drm/xe/xe_guc_submit.c
> > > index 1185b23b1384..3ba707bbb74d 100644
> > > --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> > > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> > > @@ -1954,16 +1954,35 @@ void xe_guc_submit_pause(struct xe_guc
> > > *guc)
> > >                 xe_sched_submission_stop_async(&q->guc->sched);
> > >  }
> > >  
> > > +static int guc_lrc_offset(struct xe_lrc *lrc, u32 job_head)
> > > +{
> > > +       if (xe_lrc_ring_head(lrc) == job_head)
> > > +               return 0;
> > > +
> > > +       if (job_head < xe_lrc_ring_head(lrc))
> > > +               return xe_lrc_ring_head(lrc) - job_head;
> > > +
> > > +       return lrc->ring.size - job_head + xe_lrc_ring_head(lrc);
> > > +}
> > > +
> > >  static void guc_exec_queue_start(struct xe_exec_queue *q)
> > >  {
> > >         struct xe_gpu_scheduler *sched = &q->guc->sched;
> > >  
> > >         if (!exec_queue_killed_or_banned_or_wedged(q)) {
> > > +               struct xe_sched_job *job;
> > >                 int i;
> > >  
> > > +               job = xe_sched_first_pending_job(&q->guc->sched);
> > 
> > Can we reuse the sched variable at the top of this function for the
> > argument here?
> > 
> 
> Yes.
> 
> > Also, I'm still going through, but how do we know the first pending
> > job
> > corresponds to the context we are updating here?
> > 
> 
> The pending job list is per context (queue).

Ah sorry I was misreading this... yeah makes sense.

> 
> > And why only update the first in the list? Should we make the same
> > calculation against all of the pending jobs?
> > 
> 
> Jobs are executed serially within a context (queue, LRC, ring). We
> just
> need to look at the first pending job and skip over any instructions
> which have already been executed. BTW, this idea is meant for VF
> pause /
> unpause but job submitted this patch to convay the idea in the
> existing
> GT reset path as it applies here too + provide hooks for that
> feature.

I'll let Michal/Tomasz respond on the VF pause/unpause case. But yeah
what you said makes sense to me. I have a couple more questions, but
let me respond back on the original patch so I'm not confusing the
review too much going back and forth..

Thanks,
Stuart

> 
> Matt
> 
> > Thanks,
> > Stuart
> > 
> > > +
> > >                 trace_xe_exec_queue_resubmit(q);
> > > -               for (i = 0; i < q->width; ++i)
> > > -                       xe_lrc_set_ring_head(q->lrc[i], q-
> > > >lrc[i]-
> > > > ring.tail);
> > > +               for (i = 0; i < q->width; ++i) {
> > > +                       int offset = !job ? 0 :
> > > +                               guc_lrc_offset(q->lrc[i], job-
> > > > ptrs[i].head);
> > > +
> > > +                       xe_lrc_set_ring_head(q->lrc[i], (q-
> > > >lrc[i]-
> > > > ring.tail +
> > > +                                            offset) % q->lrc[i]-
> > > > ring.size);
> > > +               }
> > >                 xe_sched_resubmit_jobs(sched);
> > >         }
> > >  
> > > diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c
> > > b/drivers/gpu/drm/xe/xe_ring_ops.c
> > > index 5f15360d14bf..4dad28f0614d 100644
> > > --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> > > +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> > > @@ -245,12 +245,14 @@ static int emit_copy_timestamp(struct
> > > xe_lrc
> > > *lrc, u32 *dw, int i)
> > >  
> > >  /* for engines that don't require any special HW handling (no
> > > EUs,
> > > no aux inval, etc) */
> > >  static void __emit_job_gen12_simple(struct xe_sched_job *job,
> > > struct
> > > xe_lrc *lrc,
> > > -                                   u64 batch_addr, u32 seqno)
> > > +                                   u64 batch_addr, u32 *head,
> > > u32
> > > seqno)
> > >  {
> > >         u32 dw[MAX_JOB_SIZE_DW], i = 0;
> > >         u32 ppgtt_flag = get_ppgtt_flag(job);
> > >         struct xe_gt *gt = job->q->gt;
> > >  
> > > +       *head = lrc->ring.tail;
> > > +
> > >         i = emit_copy_timestamp(lrc, dw, i);
> > >  
> > >         if (job->ring_ops_flush_tlb) {
> > > @@ -296,7 +298,7 @@ static bool has_aux_ccs(struct xe_device *xe)
> > >  }
> > >  
> > >  static void __emit_job_gen12_video(struct xe_sched_job *job,
> > > struct
> > > xe_lrc *lrc,
> > > -                                  u64 batch_addr, u32 seqno)
> > > +                                  u64 batch_addr, u32 *head, u32
> > > seqno)
> > >  {
> > >         u32 dw[MAX_JOB_SIZE_DW], i = 0;
> > >         u32 ppgtt_flag = get_ppgtt_flag(job);
> > > @@ -304,6 +306,8 @@ static void __emit_job_gen12_video(struct
> > > xe_sched_job *job, struct xe_lrc *lrc,
> > >         struct xe_device *xe = gt_to_xe(gt);
> > >         bool decode = job->q->class ==
> > > XE_ENGINE_CLASS_VIDEO_DECODE;
> > >  
> > > +       *head = lrc->ring.tail;
> > > +
> > >         i = emit_copy_timestamp(lrc, dw, i);
> > >  
> > >         dw[i++] = preparser_disable(true);
> > > @@ -346,7 +350,8 @@ static void __emit_job_gen12_video(struct
> > > xe_sched_job *job, struct xe_lrc *lrc,
> > >  
> > >  static void __emit_job_gen12_render_compute(struct xe_sched_job
> > > *job,
> > >                                             struct xe_lrc *lrc,
> > > -                                           u64 batch_addr, u32
> > > seqno)
> > > +                                           u64 batch_addr, u32
> > > *head,
> > > +                                           u32 seqno)
> > >  {
> > >         u32 dw[MAX_JOB_SIZE_DW], i = 0;
> > >         u32 ppgtt_flag = get_ppgtt_flag(job);
> > > @@ -355,6 +360,8 @@ static void
> > > __emit_job_gen12_render_compute(struct xe_sched_job *job,
> > >         bool lacks_render = !(gt->info.engine_mask &
> > > XE_HW_ENGINE_RCS_MASK);
> > >         u32 mask_flags = 0;
> > >  
> > > +       *head = lrc->ring.tail;
> > > +
> > >         i = emit_copy_timestamp(lrc, dw, i);
> > >  
> > >         dw[i++] = preparser_disable(true);
> > > @@ -396,11 +403,14 @@ static void
> > > __emit_job_gen12_render_compute(struct xe_sched_job *job,
> > >  }
> > >  
> > >  static void emit_migration_job_gen12(struct xe_sched_job *job,
> > > -                                    struct xe_lrc *lrc, u32
> > > seqno)
> > > +                                    struct xe_lrc *lrc, u32
> > > *head,
> > > +                                    u32 seqno)
> > >  {
> > >         u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc);
> > >         u32 dw[MAX_JOB_SIZE_DW], i = 0;
> > >  
> > > +       *head = lrc->ring.tail;
> > > +
> > >         i = emit_copy_timestamp(lrc, dw, i);
> > >  
> > >         i = emit_store_imm_ggtt(saddr, seqno, dw, i);
> > > @@ -434,6 +444,7 @@ static void emit_job_gen12_gsc(struct
> > > xe_sched_job *job)
> > >  
> > >         __emit_job_gen12_simple(job, job->q->lrc[0],
> > >                                 job->ptrs[0].batch_addr,
> > > +                               &job->ptrs[0].head,
> > >                                 xe_sched_job_lrc_seqno(job));
> > >  }
> > >  
> > > @@ -443,6 +454,7 @@ static void emit_job_gen12_copy(struct
> > > xe_sched_job *job)
> > >  
> > >         if (xe_sched_job_is_migration(job->q)) {
> > >                 emit_migration_job_gen12(job, job->q->lrc[0],
> > > +                                        &job->ptrs[0].head,
> > >                                         
> > > xe_sched_job_lrc_seqno(job));
> > >                 return;
> > >         }
> > > @@ -450,6 +462,7 @@ static void emit_job_gen12_copy(struct
> > > xe_sched_job *job)
> > >         for (i = 0; i < job->q->width; ++i)
> > >                 __emit_job_gen12_simple(job, job->q->lrc[i],
> > >                                         job->ptrs[i].batch_addr,
> > > +                                       &job->ptrs[i].head,
> > >                                         xe_sched_job_lrc_seqno(jo
> > > b));
> > >  }
> > >  
> > > @@ -461,6 +474,7 @@ static void emit_job_gen12_video(struct
> > > xe_sched_job *job)
> > >         for (i = 0; i < job->q->width; ++i)
> > >                 __emit_job_gen12_video(job, job->q->lrc[i],
> > >                                        job->ptrs[i].batch_addr,
> > > +                                      &job->ptrs[i].head,
> > >                                       
> > > xe_sched_job_lrc_seqno(job));
> > >  }
> > >  
> > > @@ -471,6 +485,7 @@ static void
> > > emit_job_gen12_render_compute(struct
> > > xe_sched_job *job)
> > >         for (i = 0; i < job->q->width; ++i)
> > >                 __emit_job_gen12_render_compute(job, job->q-
> > > >lrc[i],
> > >                                                 job-
> > > > ptrs[i].batch_addr,
> > > +                                               &job-
> > > >ptrs[i].head,
> > >                                                 xe_sched_job_lrc_
> > > seqn
> > > o(job));
> > >  }
> > >  
> > > diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h
> > > b/drivers/gpu/drm/xe/xe_sched_job_types.h
> > > index dbf260dded8d..359f93b0cdca 100644
> > > --- a/drivers/gpu/drm/xe/xe_sched_job_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
> > > @@ -24,6 +24,8 @@ struct xe_job_ptrs {
> > >         struct dma_fence_chain *chain_fence;
> > >         /** @batch_addr: Batch buffer address. */
> > >         u64 batch_addr;
> > > +       /** @head: The head pointer of the LRC when the job was
> > > submitted */
> > > +       u32 head;
> > >  };
> > >  
> > >  /**
> >