[PATCH v2 5/9] drm/xe: Add dependency scheduler for GT TLB invalidations to bind queues

Tue Jul 15 21:52:12 UTC 2025

On Tue, Jul 15, 2025 at 03:45:55PM -0600, Summers, Stuart wrote:
> On Tue, 2025-07-15 at 14:44 -0700, Matthew Brost wrote:
> > On Tue, Jul 15, 2025 at 03:34:29PM -0600, Summers, Stuart wrote:
> > > On Wed, 2025-07-02 at 16:42 -0700, Matthew Brost wrote:
> > > > Add a generic dependency scheduler for GT TLB invalidations, used
> > > > to
> > > > schedule jobs that issue GT TLB invalidations to bind queues.
> > > > 
> > > > v2:
> > > >  - Use shared GT TLB invalidation queue for dep scheduler
> > > >  - Break allocation of dep scheduler into its own function
> > > >  - Add define for max number tlb invalidations
> > > >  - Skip media if not present
> > > > 
> > > > Suggested-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > > ---
> > > >  drivers/gpu/drm/xe/xe_exec_queue.c       | 48
> > > > ++++++++++++++++++++++++
> > > >  drivers/gpu/drm/xe/xe_exec_queue_types.h | 13 +++++++
> > > >  2 files changed, 61 insertions(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> > > > b/drivers/gpu/drm/xe/xe_exec_queue.c
> > > > index fee22358cc09..7aaf669cf5fc 100644
> > > > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > > > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > > > @@ -12,6 +12,7 @@
> > > >  #include <drm/drm_file.h>
> > > >  #include <uapi/drm/xe_drm.h>
> > > >  
> > > > +#include "xe_dep_scheduler.h"
> > > >  #include "xe_device.h"
> > > >  #include "xe_gt.h"
> > > >  #include "xe_hw_engine_class_sysfs.h"
> > > > @@ -39,6 +40,12 @@ static int exec_queue_user_extensions(struct
> > > > xe_device *xe, struct xe_exec_queue
> > > >  
> > > >  static void __xe_exec_queue_free(struct xe_exec_queue *q)
> > > >  {
> > > > +       int i;
> > > > +
> > > > +       for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i)
> > > > +               if (q->tlb_inval[i].dep_scheduler)
> > > > +                       xe_dep_scheduler_fini(q-
> > > > > tlb_inval[i].dep_scheduler);
> > > > +
> > > >         if (xe_exec_queue_uses_pxp(q))
> > > >                 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp,
> > > > q);
> > > >         if (q->vm)
> > > > @@ -50,6 +57,39 @@ static void __xe_exec_queue_free(struct
> > > > xe_exec_queue *q)
> > > >         kfree(q);
> > > >  }
> > > >  
> > > > +static int alloc_dep_schedulers(struct xe_device *xe, struct
> > > > xe_exec_queue *q)
> > > > +{
> > > > +       struct xe_tile *tile = gt_to_tile(q->gt);
> > > > +       int i;
> > > > +
> > > > +       for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) {
> > > > +               struct xe_dep_scheduler *dep_scheduler;
> > > > +               struct xe_gt *gt;
> > > > +               struct workqueue_struct *wq;
> > > > +
> > > > +               if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT)
> > > > +                       gt = tile->primary_gt;
> > > > +               else
> > > > +                       gt = tile->media_gt;
> > > > +
> > > > +               if (!gt)
> > > > +                       continue;
> > > > +
> > > > +               wq = gt->tlb_invalidation.job_wq;
> > > > +
> > > > +#define MAX_TLB_INVAL_JOBS     16      /* Picking a reasonable
> > > > value
> > > > */
> > > 
> > > So if we exceed tihs number, that means we won't get an
> > > invalidation of
> > > that range before a context switch to a new queue?

Sorry missed this.

No. This is just maximum number of TLB invalidations, on this queue,
which can be inflight to the hardware at once. If MAX_TLB_INVAL_JOBS is
exceeded, the scheduler just holds jobs until prior onces complete and
more job credits become available.

Context switching or other queues are not really related here. Think of
this a FIFO in which only MAX_TLB_INVAL_JOBS can be outstanding at time.

Matt

> > > 
> > > > +               dep_scheduler = xe_dep_scheduler_create(xe, wq,
> > > > q-
> > > > > name,
> > > > +                                                       MAX_TLB_I
> > > > NVAL
> > > > _JOBS);
> > > > +               if (IS_ERR(dep_scheduler))
> > > > +                       return PTR_ERR(dep_scheduler);
> > > > +
> > > > +               q->tlb_inval[i].dep_scheduler = dep_scheduler;
> > > > +       }
> > > > +#undef MAX_TLB_INVAL_JOBS
> > > > +
> > > > +       return 0;
> > > > +}
> > > > +
> > > >  static struct xe_exec_queue *__xe_exec_queue_alloc(struct
> > > > xe_device
> > > > *xe,
> > > >                                                    struct xe_vm
> > > > *vm,
> > > >                                                    u32
> > > > logical_mask,
> > > > @@ -94,6 +134,14 @@ static struct xe_exec_queue
> > > > *__xe_exec_queue_alloc(struct xe_device *xe,
> > > >         else
> > > >                 q->sched_props.priority =
> > > > XE_EXEC_QUEUE_PRIORITY_NORMAL;
> > > 
> > > We want these to be high priority right? I.e. if another user
> > > creates a
> > > queue and submits at high priority can we get stale data on those
> > > executions if this one is blocked behind that high priority one?
> > 
> > I'm struggling with exactly what you are asking here.
> > 
> > The q->sched_props.priority is priority of exec queue being created
> > and is existing code. In the case of bind queues (i.e., ones that
> > call alloc_dep_schedulers) this maps to priority of the GuC context
> > which runs bind jobs on a hardware copy engine. 
> > 
> > dep_schedulers do not have priority, at least that is used in any
> > way.
> > This is just software queue which runs TLB invalidations once the
> > bind
> > jobs complete.
> > 
> > wrt high priority passing lower priority ones, that isn't possible if
> > there is a dependency chain. The driver holds any jobs in the KMD
> > until
> > all dependecies are resolved - high priority queues just get serviced
> > by
> > the GuC over low priority queues if both queues have runnable jobs.
> > 
> > TL;DR nothing is patch changes anything wrt to priorities.
> 
> You're right I think I read this wrong somehow :(, ignore my comment!
> 
> Still the minor question above about the max number of dependency jobs
> when you have time.
> 
> Thanks,
> Stuart
> 
> > 
> > Matt
> > 
> > > 
> > > Thanks,
> > > Stuart
> > > 
> > > >  
> > > > +       if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE |
> > > > EXEC_QUEUE_FLAG_VM)) {
> > > > +               err = alloc_dep_schedulers(xe, q);
> > > > +               if (err) {
> > > > +                       __xe_exec_queue_free(q);
> > > > +                       return ERR_PTR(err);
> > > > +               }
> > > > +       }
> > > > +
> > > >         if (vm)
> > > >                 q->vm = xe_vm_get(vm);
> > > >  
> > > > diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > > b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > > index abdf4a57e6e2..ba443a497b38 100644
> > > > --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > > +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > > @@ -134,6 +134,19 @@ struct xe_exec_queue {
> > > >                 struct list_head link;
> > > >         } lr;
> > > >  
> > > > +#define XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT     0
> > > > +#define XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT       1
> > > > +#define
> > > > XE_EXEC_QUEUE_TLB_INVAL_COUNT          (XE_EXEC_QUEUE_TLB_INVAL_M
> > > > EDIA
> > > > _GT  + 1)
> > > > +
> > > > +       /** @tlb_inval: TLB invalidations exec queue state */
> > > > +       struct {
> > > > +               /**
> > > > +                * @tlb_inval.dep_scheduler: The TLB invalidation
> > > > +                * dependency scheduler
> > > > +                */
> > > > +               struct xe_dep_scheduler *dep_scheduler;
> > > > +       } tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT];
> > > > +
> > > >         /** @pxp: PXP info tracking */
> > > >         struct {
> > > >                 /** @pxp.type: PXP session type used by this
> > > > queue */
> > > 
>