[PATCH v2 8/9] drm/xe: Use GT TLB invalidation jobs in PT layer

Summers, Stuart stuart.summers at intel.com
Thu Jul 17 22:26:13 UTC 2025


On Thu, 2025-07-17 at 14:07 -0700, Matthew Brost wrote:
> On Thu, Jul 17, 2025 at 03:00:14PM -0600, Summers, Stuart wrote:
> > On Wed, 2025-07-02 at 16:42 -0700, Matthew Brost wrote:
> > > Rather than open-coding GT TLB invalidations in the PT layer, use
> > > GT
> > > TLB
> > > invalidation jobs. The real benefit is that GT TLB invalidation
> > > jobs
> > > use
> > > a single dma-fence context, allowing the generated fences to be
> > > squashed
> > > in dma-resv/DRM scheduler.
> > > 
> > > v2:
> > >  - s/;;/; (checkpatch)
> > >  - Move ijob/mjob job push after range fence install
> > > 
> > > Suggested-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_migrate.h |   9 ++
> > >  drivers/gpu/drm/xe/xe_pt.c      | 178 +++++++++++++-------------
> > > ----
> > > --
> > >  2 files changed, 80 insertions(+), 107 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_migrate.h
> > > b/drivers/gpu/drm/xe/xe_migrate.h
> > > index e9d83d320f8c..605398ea773e 100644
> > > --- a/drivers/gpu/drm/xe/xe_migrate.h
> > > +++ b/drivers/gpu/drm/xe/xe_migrate.h
> > > @@ -14,6 +14,7 @@ struct ttm_resource;
> > >  
> > >  struct xe_bo;
> > >  struct xe_gt;
> > > +struct xe_gt_tlb_inval_job;
> > >  struct xe_exec_queue;
> > >  struct xe_migrate;
> > >  struct xe_migrate_pt_update;
> > > @@ -89,6 +90,14 @@ struct xe_migrate_pt_update {
> > >         struct xe_vma_ops *vops;
> > >         /** @job: The job if a GPU page-table update. NULL
> > > otherwise
> > > */
> > >         struct xe_sched_job *job;
> > > +       /**
> > > +        * @ijob: The GT TLB invalidation job for primary tile.
> > > NULL
> > > otherwise
> > > +        */
> > > +       struct xe_gt_tlb_inval_job *ijob;
> > > +       /**
> > > +        * @mjob: The GT TLB invalidation job for media tile.
> > > NULL
> > > otherwise
> > > +        */
> > > +       struct xe_gt_tlb_inval_job *mjob;
> > >         /** @tile_id: Tile ID of the update */
> > >         u8 tile_id;
> > >  };
> > > diff --git a/drivers/gpu/drm/xe/xe_pt.c
> > > b/drivers/gpu/drm/xe/xe_pt.c
> > > index c8e63bd23300..67d02307779b 100644
> > > --- a/drivers/gpu/drm/xe/xe_pt.c
> > > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > > @@ -13,7 +13,7 @@
> > >  #include "xe_drm_client.h"
> > >  #include "xe_exec_queue.h"
> > >  #include "xe_gt.h"
> > > -#include "xe_gt_tlb_invalidation.h"
> > > +#include "xe_gt_tlb_inval_job.h"
> > >  #include "xe_migrate.h"
> > >  #include "xe_pt_types.h"
> > >  #include "xe_pt_walk.h"
> > > @@ -1261,6 +1261,8 @@ static int op_add_deps(struct xe_vm *vm,
> > > struct
> > > xe_vma_op *op,
> > >  }
> > >  
> > >  static int xe_pt_vm_dependencies(struct xe_sched_job *job,
> > > +                                struct xe_gt_tlb_inval_job
> > > *ijob,
> > > +                                struct xe_gt_tlb_inval_job
> > > *mjob,
> > >                                  struct xe_vm *vm,
> > >                                  struct xe_vma_ops *vops,
> > >                                  struct xe_vm_pgtable_update_ops
> > > *pt_update_ops,
> > > @@ -1328,6 +1330,20 @@ static int xe_pt_vm_dependencies(struct
> > > xe_sched_job *job,
> > >         for (i = 0; job && !err && i < vops->num_syncs; i++)
> > >                 err = xe_sync_entry_add_deps(&vops->syncs[i],
> > > job);
> > >  
> > > +       if (job) {
> > > +               if (ijob) {
> > > +                       err =
> > > xe_gt_tlb_inval_job_alloc_dep(ijob);
> > > +                       if (err)
> > > +                               return err;
> > > +               }
> > > +
> > > +               if (mjob) {
> > > +                       err =
> > > xe_gt_tlb_inval_job_alloc_dep(mjob);
> > > +                       if (err)
> > > +                               return err;
> > > +               }
> > > +       }
> > > +
> > >         return err;
> > >  }
> > >  
> > > @@ -1339,7 +1355,8 @@ static int xe_pt_pre_commit(struct
> > > xe_migrate_pt_update *pt_update)
> > >         struct xe_vm_pgtable_update_ops *pt_update_ops =
> > >                 &vops->pt_update_ops[pt_update->tile_id];
> > >  
> > > -       return xe_pt_vm_dependencies(pt_update->job, vm,
> > > pt_update-
> > > > vops,
> > > +       return xe_pt_vm_dependencies(pt_update->job, pt_update-
> > > >ijob,
> > > +                                    pt_update->mjob, vm,
> > > pt_update-
> > > > vops,
> > >                                      pt_update_ops, rftree);
> > >  }
> > >  
> > > @@ -1509,75 +1526,6 @@ static int xe_pt_svm_pre_commit(struct
> > > xe_migrate_pt_update *pt_update)
> > >  }
> > >  #endif
> > >  
> > > -struct invalidation_fence {
> > > -       struct xe_gt_tlb_invalidation_fence base;
> > > -       struct xe_gt *gt;
> > > -       struct dma_fence *fence;
> > > -       struct dma_fence_cb cb;
> > > -       struct work_struct work;
> > > -       u64 start;
> > > -       u64 end;
> > > -       u32 asid;
> > > -};
> > > -
> > > -static void invalidation_fence_cb(struct dma_fence *fence,
> > > -                                 struct dma_fence_cb *cb)
> > > -{
> > > -       struct invalidation_fence *ifence =
> > > -               container_of(cb, struct invalidation_fence, cb);
> > > -       struct xe_device *xe = gt_to_xe(ifence->gt);
> > > -
> > > -       trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base);
> > > -       if (!ifence->fence->error) {
> > > -               queue_work(system_wq, &ifence->work);
> > > -       } else {
> > > -               ifence->base.base.error = ifence->fence->error;
> > > -               xe_gt_tlb_invalidation_fence_signal(&ifence-
> > > >base);
> > > -       }
> > > -       dma_fence_put(ifence->fence);
> > > -}
> > > -
> > > -static void invalidation_fence_work_func(struct work_struct *w)
> > > -{
> > > -       struct invalidation_fence *ifence =
> > > -               container_of(w, struct invalidation_fence, work);
> > > -       struct xe_device *xe = gt_to_xe(ifence->gt);
> > > -
> > > -       trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence-
> > > > base);
> > > -       xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base,
> > > ifence->start,
> > > -                                    ifence->end, ifence->asid);
> > > -}
> > > -
> > > -static void invalidation_fence_init(struct xe_gt *gt,
> > > -                                   struct invalidation_fence
> > > *ifence,
> > > -                                   struct dma_fence *fence,
> > > -                                   u64 start, u64 end, u32 asid)
> > > -{
> > > -       int ret;
> > > -
> > > -       trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt),
> > > &ifence->base);
> > > -
> > > -       xe_gt_tlb_invalidation_fence_init(gt, &ifence->base,
> > > false);
> > > -
> > > -       ifence->fence = fence;
> > > -       ifence->gt = gt;
> > > -       ifence->start = start;
> > > -       ifence->end = end;
> > > -       ifence->asid = asid;
> > > -
> > > -       INIT_WORK(&ifence->work, invalidation_fence_work_func);
> > > -       ret = dma_fence_add_callback(fence, &ifence->cb,
> > > invalidation_fence_cb);
> > > -       if (ret == -ENOENT) {
> > > -               dma_fence_put(ifence->fence);   /* Usually
> > > dropped in
> > > CB */
> > > -               invalidation_fence_work_func(&ifence->work);
> > > -       } else if (ret) {
> > > -               dma_fence_put(&ifence->base.base);      /* Caller
> > > ref
> > > */
> > > -               dma_fence_put(&ifence->base.base);      /*
> > > Creation
> > > ref */
> > > -       }
> > > -
> > > -       xe_gt_assert(gt, !ret || ret == -ENOENT);
> > > -}
> > > -
> > >  struct xe_pt_stage_unbind_walk {
> > >         /** @base: The pagewalk base-class. */
> > >         struct xe_pt_walk base;
> > > @@ -2407,8 +2355,8 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> > > struct xe_vma_ops *vops)
> > >         struct xe_vm *vm = vops->vm;
> > >         struct xe_vm_pgtable_update_ops *pt_update_ops =
> > >                 &vops->pt_update_ops[tile->id];
> > > -       struct dma_fence *fence;
> > > -       struct invalidation_fence *ifence = NULL, *mfence = NULL;
> > > +       struct dma_fence *fence, *ifence, *mfence;
> > > +       struct xe_gt_tlb_inval_job *ijob = NULL, *mjob = NULL;
> > >         struct dma_fence **fences = NULL;
> > >         struct dma_fence_array *cf = NULL;
> > >         struct xe_range_fence *rfence;
> > > @@ -2440,34 +2388,47 @@ xe_pt_update_ops_run(struct xe_tile
> > > *tile,
> > > struct xe_vma_ops *vops)
> > >  #endif
> > >  
> > >         if (pt_update_ops->needs_invalidation) {
> > > -               ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
> > > -               if (!ifence) {
> > > -                       err = -ENOMEM;
> > > +               ijob = xe_gt_tlb_inval_job_create(pt_update_ops-
> > > >q,
> > > +                                                 tile-
> > > >primary_gt,
> > > +                                                 pt_update_ops-
> > > > start,
> > > +                                                 pt_update_ops-
> > > > last,
> > > +                                                 vm->usm.asid);
> > > +
> > 
> > Remove extra line.
> > 
> 
> Sure.
> 
> > > +               if (IS_ERR(ijob)) {
> > > +                       err = PTR_ERR(ijob);
> > >                         goto kill_vm_tile1;
> > >                 }
> > > +
> > >                 if (tile->media_gt) {
> > > -                       mfence = kzalloc(sizeof(*ifence),
> > 
> > I realize it's the same, but this should probably be
> > sizeof(*mfence).
> > 
> 
> Well it is getting removed.

Heh, somehow I misread that...

> 
> 
> > > GFP_KERNEL);
> > > -                       if (!mfence) {
> > > -                               err = -ENOMEM;
> > > -                               goto free_ifence;
> > > +                       mjob =
> > > xe_gt_tlb_inval_job_create(pt_update_ops->q,
> > > +                                                         tile-
> > > > media_gt,
> > > +                                                        
> > > pt_update_ops->start,
> > > +                                                        
> > > pt_update_ops->last,
> > > +                                                         vm-
> > > > usm.asid);
> > > +                       if (IS_ERR(mjob)) {
> > > +                               err = PTR_ERR(mjob);
> > > +                               goto free_ijob;
> > 
> > I think this needs a little more granularity. In free_ijob below,
> > we're
> > also doing a kfree for fences and cf, both of which at this point
> > aren't yet allocated. I realize you aren't changing anything really
> > here, but that looks wrong to me. Maybe we just haven't hit an
> > issue
> > here because we've never tested the ENOMEM case?
> > 
> 
> fences & cf are initialized to NULL, kfree on a NULL is a nop.
> 
> Same with xe_gt_tlb_inval_job_put, skips on NULL or IS_ERR.

Yeah I saw the job_put.. and true on the kfree... I guess we're saving
a couple of labels at the bottom of the function, but it still feels
wrong as we're depending on the underlying implementation, which would
break if we changed that or wrapped it somehow.

Anyway not a deal breaker, it's the same as it was before anyway.

> 
> > >                         }
> > >                         fences = kmalloc_array(2,
> > > sizeof(*fences),
> > > GFP_KERNEL);
> > >                         if (!fences) {
> > >                                 err = -ENOMEM;
> > > -                               goto free_ifence;
> > > +                               goto free_ijob;
> > >                         }
> > >                         cf = dma_fence_array_alloc(2);
> > >                         if (!cf) {
> > >                                 err = -ENOMEM;
> > > -                               goto free_ifence;
> > > +                               goto free_ijob;
> > >                         }
> > >                 }
> > > +
> > > +               update.ijob = ijob;
> > > +               update.mjob = mjob;
> > 
> > Is there a reason not to put these inline above where the ijob and
> > mjob
> > are allocated? That way if we moved this to a loop eventually (not
> > here, in a future patch) we could more easily reduce the number of
> > indentations here by doing:
> 
> Sure, can move.
> 
> > if (!media_gt)
> >     continue;
> > 
> > >         }
> > >  
> > >         rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
> > >         if (!rfence) {
> > >                 err = -ENOMEM;
> > > -               goto free_ifence;
> > > +               goto free_ijob;
> > >         }
> > >  
> > >         fence = xe_migrate_update_pgtables(tile->migrate,
> > > &update);
> > > @@ -2491,30 +2452,30 @@ xe_pt_update_ops_run(struct xe_tile
> > > *tile,
> > > struct xe_vma_ops *vops)
> > >                                   pt_update_ops->last, fence))
> > >                 dma_fence_wait(fence, false);
> > >  
> > > -       /* tlb invalidation must be done before signaling rebind
> > > */
> > 
> > Why drop the comment?
> > 
> 
> Let me pull that back in.

Yeah so other than those two (comment here and line break above), the
rest looks good to me. I like the job based approach rather than
embedding like we used to have, thanks for the change Matt!

Reviewed-by: Stuart Summers <stuart.summers at intel.com>

> 
> Matt
> 
> > Thanks,
> > Stuart
> > 
> > > -       if (ifence) {
> > > -               if (mfence)
> > > -                       dma_fence_get(fence);
> > > -               invalidation_fence_init(tile->primary_gt, ifence,
> > > fence,
> > > -                                       pt_update_ops->start,
> > > -                                       pt_update_ops->last, vm-
> > > > usm.asid);
> > > -               if (mfence) {
> > > -                       invalidation_fence_init(tile->media_gt,
> > > mfence, fence,
> > > -                                               pt_update_ops-
> > > >start,
> > > -                                               pt_update_ops-
> > > >last,
> > > vm->usm.asid);
> > > -                       fences[0] = &ifence->base.base;
> > > -                       fences[1] = &mfence->base.base;
> > > +       if (ijob) {
> > > +               struct dma_fence *__fence;
> > > +
> > > +               ifence = xe_gt_tlb_inval_job_push(ijob, tile-
> > > > migrate, fence);
> > > +               __fence = ifence;
> > > +
> > > +               if (mjob) {
> > > +                       fences[0] = ifence;
> > > +                       mfence = xe_gt_tlb_inval_job_push(mjob,
> > > tile-
> > > > migrate,
> > > +                                                         fence);
> > > +                       fences[1] = mfence;
> > > +
> > >                         dma_fence_array_init(cf, 2, fences,
> > >                                              vm-
> > > >composite_fence_ctx,
> > >                                              vm-
> > > > composite_fence_seqno++,
> > >                                              false);
> > > -                       fence = &cf->base;
> > > -               } else {
> > > -                       fence = &ifence->base.base;
> > > +                       __fence = &cf->base;
> > >                 }
> > > +
> > > +               dma_fence_put(fence);
> > > +               fence = __fence;
> > >         }
> > >  
> > > -       if (!mfence) {
> > > +       if (!mjob) {
> > >                 dma_resv_add_fence(xe_vm_resv(vm), fence,
> > >                                    pt_update_ops-
> > > >wait_vm_bookkeep ?
> > >                                    DMA_RESV_USAGE_KERNEL :
> > > @@ -2523,19 +2484,19 @@ xe_pt_update_ops_run(struct xe_tile
> > > *tile,
> > > struct xe_vma_ops *vops)
> > >                 list_for_each_entry(op, &vops->list, link)
> > >                         op_commit(vops->vm, tile, pt_update_ops,
> > > op,
> > > fence, NULL);
> > >         } else {
> > > -               dma_resv_add_fence(xe_vm_resv(vm), &ifence-
> > > > base.base,
> > > +               dma_resv_add_fence(xe_vm_resv(vm), ifence,
> > >                                    pt_update_ops-
> > > >wait_vm_bookkeep ?
> > >                                    DMA_RESV_USAGE_KERNEL :
> > >                                    DMA_RESV_USAGE_BOOKKEEP);
> > >  
> > > -               dma_resv_add_fence(xe_vm_resv(vm), &mfence-
> > > > base.base,
> > > +               dma_resv_add_fence(xe_vm_resv(vm), mfence,
> > >                                    pt_update_ops-
> > > >wait_vm_bookkeep ?
> > >                                    DMA_RESV_USAGE_KERNEL :
> > >                                    DMA_RESV_USAGE_BOOKKEEP);
> > >  
> > >                 list_for_each_entry(op, &vops->list, link)
> > > -                       op_commit(vops->vm, tile, pt_update_ops,
> > > op,
> > > -                                 &ifence->base.base, &mfence-
> > > > base.base);
> > > +                       op_commit(vops->vm, tile, pt_update_ops,
> > > op,
> > > ifence,
> > > +                                 mfence);
> > >         }
> > >  
> > >         if (pt_update_ops->needs_svm_lock)
> > > @@ -2543,15 +2504,18 @@ xe_pt_update_ops_run(struct xe_tile
> > > *tile,
> > > struct xe_vma_ops *vops)
> > >         if (pt_update_ops->needs_userptr_lock)
> > >                 up_read(&vm->userptr.notifier_lock);
> > >  
> > > +       xe_gt_tlb_inval_job_put(mjob);
> > > +       xe_gt_tlb_inval_job_put(ijob);
> > > +
> > >         return fence;
> > >  
> > >  free_rfence:
> > >         kfree(rfence);
> > > -free_ifence:
> > > +free_ijob:
> > >         kfree(cf);
> > >         kfree(fences);
> > > -       kfree(mfence);
> > > -       kfree(ifence);
> > > +       xe_gt_tlb_inval_job_put(mjob);
> > > +       xe_gt_tlb_inval_job_put(ijob);
> > >  kill_vm_tile1:
> > >         if (err != -EAGAIN && err != -ENODATA && tile->id)
> > >                 xe_vm_kill(vops->vm, false);
> > 



More information about the Intel-xe mailing list