[PATCH v2 8/9] drm/xe: Use GT TLB invalidation jobs in PT layer

Matthew Brost matthew.brost at intel.com
Thu Jul 17 21:07:03 UTC 2025


On Thu, Jul 17, 2025 at 03:00:14PM -0600, Summers, Stuart wrote:
> On Wed, 2025-07-02 at 16:42 -0700, Matthew Brost wrote:
> > Rather than open-coding GT TLB invalidations in the PT layer, use GT
> > TLB
> > invalidation jobs. The real benefit is that GT TLB invalidation jobs
> > use
> > a single dma-fence context, allowing the generated fences to be
> > squashed
> > in dma-resv/DRM scheduler.
> > 
> > v2:
> >  - s/;;/; (checkpatch)
> >  - Move ijob/mjob job push after range fence install
> > 
> > Suggested-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_migrate.h |   9 ++
> >  drivers/gpu/drm/xe/xe_pt.c      | 178 +++++++++++++-----------------
> > --
> >  2 files changed, 80 insertions(+), 107 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_migrate.h
> > b/drivers/gpu/drm/xe/xe_migrate.h
> > index e9d83d320f8c..605398ea773e 100644
> > --- a/drivers/gpu/drm/xe/xe_migrate.h
> > +++ b/drivers/gpu/drm/xe/xe_migrate.h
> > @@ -14,6 +14,7 @@ struct ttm_resource;
> >  
> >  struct xe_bo;
> >  struct xe_gt;
> > +struct xe_gt_tlb_inval_job;
> >  struct xe_exec_queue;
> >  struct xe_migrate;
> >  struct xe_migrate_pt_update;
> > @@ -89,6 +90,14 @@ struct xe_migrate_pt_update {
> >         struct xe_vma_ops *vops;
> >         /** @job: The job if a GPU page-table update. NULL otherwise
> > */
> >         struct xe_sched_job *job;
> > +       /**
> > +        * @ijob: The GT TLB invalidation job for primary tile. NULL
> > otherwise
> > +        */
> > +       struct xe_gt_tlb_inval_job *ijob;
> > +       /**
> > +        * @mjob: The GT TLB invalidation job for media tile. NULL
> > otherwise
> > +        */
> > +       struct xe_gt_tlb_inval_job *mjob;
> >         /** @tile_id: Tile ID of the update */
> >         u8 tile_id;
> >  };
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > index c8e63bd23300..67d02307779b 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -13,7 +13,7 @@
> >  #include "xe_drm_client.h"
> >  #include "xe_exec_queue.h"
> >  #include "xe_gt.h"
> > -#include "xe_gt_tlb_invalidation.h"
> > +#include "xe_gt_tlb_inval_job.h"
> >  #include "xe_migrate.h"
> >  #include "xe_pt_types.h"
> >  #include "xe_pt_walk.h"
> > @@ -1261,6 +1261,8 @@ static int op_add_deps(struct xe_vm *vm, struct
> > xe_vma_op *op,
> >  }
> >  
> >  static int xe_pt_vm_dependencies(struct xe_sched_job *job,
> > +                                struct xe_gt_tlb_inval_job *ijob,
> > +                                struct xe_gt_tlb_inval_job *mjob,
> >                                  struct xe_vm *vm,
> >                                  struct xe_vma_ops *vops,
> >                                  struct xe_vm_pgtable_update_ops
> > *pt_update_ops,
> > @@ -1328,6 +1330,20 @@ static int xe_pt_vm_dependencies(struct
> > xe_sched_job *job,
> >         for (i = 0; job && !err && i < vops->num_syncs; i++)
> >                 err = xe_sync_entry_add_deps(&vops->syncs[i], job);
> >  
> > +       if (job) {
> > +               if (ijob) {
> > +                       err = xe_gt_tlb_inval_job_alloc_dep(ijob);
> > +                       if (err)
> > +                               return err;
> > +               }
> > +
> > +               if (mjob) {
> > +                       err = xe_gt_tlb_inval_job_alloc_dep(mjob);
> > +                       if (err)
> > +                               return err;
> > +               }
> > +       }
> > +
> >         return err;
> >  }
> >  
> > @@ -1339,7 +1355,8 @@ static int xe_pt_pre_commit(struct
> > xe_migrate_pt_update *pt_update)
> >         struct xe_vm_pgtable_update_ops *pt_update_ops =
> >                 &vops->pt_update_ops[pt_update->tile_id];
> >  
> > -       return xe_pt_vm_dependencies(pt_update->job, vm, pt_update-
> > >vops,
> > +       return xe_pt_vm_dependencies(pt_update->job, pt_update->ijob,
> > +                                    pt_update->mjob, vm, pt_update-
> > >vops,
> >                                      pt_update_ops, rftree);
> >  }
> >  
> > @@ -1509,75 +1526,6 @@ static int xe_pt_svm_pre_commit(struct
> > xe_migrate_pt_update *pt_update)
> >  }
> >  #endif
> >  
> > -struct invalidation_fence {
> > -       struct xe_gt_tlb_invalidation_fence base;
> > -       struct xe_gt *gt;
> > -       struct dma_fence *fence;
> > -       struct dma_fence_cb cb;
> > -       struct work_struct work;
> > -       u64 start;
> > -       u64 end;
> > -       u32 asid;
> > -};
> > -
> > -static void invalidation_fence_cb(struct dma_fence *fence,
> > -                                 struct dma_fence_cb *cb)
> > -{
> > -       struct invalidation_fence *ifence =
> > -               container_of(cb, struct invalidation_fence, cb);
> > -       struct xe_device *xe = gt_to_xe(ifence->gt);
> > -
> > -       trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base);
> > -       if (!ifence->fence->error) {
> > -               queue_work(system_wq, &ifence->work);
> > -       } else {
> > -               ifence->base.base.error = ifence->fence->error;
> > -               xe_gt_tlb_invalidation_fence_signal(&ifence->base);
> > -       }
> > -       dma_fence_put(ifence->fence);
> > -}
> > -
> > -static void invalidation_fence_work_func(struct work_struct *w)
> > -{
> > -       struct invalidation_fence *ifence =
> > -               container_of(w, struct invalidation_fence, work);
> > -       struct xe_device *xe = gt_to_xe(ifence->gt);
> > -
> > -       trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence-
> > >base);
> > -       xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base,
> > ifence->start,
> > -                                    ifence->end, ifence->asid);
> > -}
> > -
> > -static void invalidation_fence_init(struct xe_gt *gt,
> > -                                   struct invalidation_fence
> > *ifence,
> > -                                   struct dma_fence *fence,
> > -                                   u64 start, u64 end, u32 asid)
> > -{
> > -       int ret;
> > -
> > -       trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt),
> > &ifence->base);
> > -
> > -       xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);
> > -
> > -       ifence->fence = fence;
> > -       ifence->gt = gt;
> > -       ifence->start = start;
> > -       ifence->end = end;
> > -       ifence->asid = asid;
> > -
> > -       INIT_WORK(&ifence->work, invalidation_fence_work_func);
> > -       ret = dma_fence_add_callback(fence, &ifence->cb,
> > invalidation_fence_cb);
> > -       if (ret == -ENOENT) {
> > -               dma_fence_put(ifence->fence);   /* Usually dropped in
> > CB */
> > -               invalidation_fence_work_func(&ifence->work);
> > -       } else if (ret) {
> > -               dma_fence_put(&ifence->base.base);      /* Caller ref
> > */
> > -               dma_fence_put(&ifence->base.base);      /* Creation
> > ref */
> > -       }
> > -
> > -       xe_gt_assert(gt, !ret || ret == -ENOENT);
> > -}
> > -
> >  struct xe_pt_stage_unbind_walk {
> >         /** @base: The pagewalk base-class. */
> >         struct xe_pt_walk base;
> > @@ -2407,8 +2355,8 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> > struct xe_vma_ops *vops)
> >         struct xe_vm *vm = vops->vm;
> >         struct xe_vm_pgtable_update_ops *pt_update_ops =
> >                 &vops->pt_update_ops[tile->id];
> > -       struct dma_fence *fence;
> > -       struct invalidation_fence *ifence = NULL, *mfence = NULL;
> > +       struct dma_fence *fence, *ifence, *mfence;
> > +       struct xe_gt_tlb_inval_job *ijob = NULL, *mjob = NULL;
> >         struct dma_fence **fences = NULL;
> >         struct dma_fence_array *cf = NULL;
> >         struct xe_range_fence *rfence;
> > @@ -2440,34 +2388,47 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> > struct xe_vma_ops *vops)
> >  #endif
> >  
> >         if (pt_update_ops->needs_invalidation) {
> > -               ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
> > -               if (!ifence) {
> > -                       err = -ENOMEM;
> > +               ijob = xe_gt_tlb_inval_job_create(pt_update_ops->q,
> > +                                                 tile->primary_gt,
> > +                                                 pt_update_ops-
> > >start,
> > +                                                 pt_update_ops-
> > >last,
> > +                                                 vm->usm.asid);
> > +
> 
> Remove extra line.
> 

Sure.

> > +               if (IS_ERR(ijob)) {
> > +                       err = PTR_ERR(ijob);
> >                         goto kill_vm_tile1;
> >                 }
> > +
> >                 if (tile->media_gt) {
> > -                       mfence = kzalloc(sizeof(*ifence),
> 
> I realize it's the same, but this should probably be sizeof(*mfence).
> 

Well it is getting removed.


> > GFP_KERNEL);
> > -                       if (!mfence) {
> > -                               err = -ENOMEM;
> > -                               goto free_ifence;
> > +                       mjob =
> > xe_gt_tlb_inval_job_create(pt_update_ops->q,
> > +                                                         tile-
> > >media_gt,
> > +                                                        
> > pt_update_ops->start,
> > +                                                        
> > pt_update_ops->last,
> > +                                                         vm-
> > >usm.asid);
> > +                       if (IS_ERR(mjob)) {
> > +                               err = PTR_ERR(mjob);
> > +                               goto free_ijob;
> 
> I think this needs a little more granularity. In free_ijob below, we're
> also doing a kfree for fences and cf, both of which at this point
> aren't yet allocated. I realize you aren't changing anything really
> here, but that looks wrong to me. Maybe we just haven't hit an issue
> here because we've never tested the ENOMEM case?
> 

fences & cf are initialized to NULL, kfree on a NULL is a nop.

Same with xe_gt_tlb_inval_job_put, skips on NULL or IS_ERR.

> >                         }
> >                         fences = kmalloc_array(2, sizeof(*fences),
> > GFP_KERNEL);
> >                         if (!fences) {
> >                                 err = -ENOMEM;
> > -                               goto free_ifence;
> > +                               goto free_ijob;
> >                         }
> >                         cf = dma_fence_array_alloc(2);
> >                         if (!cf) {
> >                                 err = -ENOMEM;
> > -                               goto free_ifence;
> > +                               goto free_ijob;
> >                         }
> >                 }
> > +
> > +               update.ijob = ijob;
> > +               update.mjob = mjob;
> 
> Is there a reason not to put these inline above where the ijob and mjob
> are allocated? That way if we moved this to a loop eventually (not
> here, in a future patch) we could more easily reduce the number of
> indentations here by doing:

Sure, can move.

> if (!media_gt)
>     continue;
> 
> >         }
> >  
> >         rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
> >         if (!rfence) {
> >                 err = -ENOMEM;
> > -               goto free_ifence;
> > +               goto free_ijob;
> >         }
> >  
> >         fence = xe_migrate_update_pgtables(tile->migrate, &update);
> > @@ -2491,30 +2452,30 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> > struct xe_vma_ops *vops)
> >                                   pt_update_ops->last, fence))
> >                 dma_fence_wait(fence, false);
> >  
> > -       /* tlb invalidation must be done before signaling rebind */
> 
> Why drop the comment?
> 

Let me pull that back in.

Matt

> Thanks,
> Stuart
> 
> > -       if (ifence) {
> > -               if (mfence)
> > -                       dma_fence_get(fence);
> > -               invalidation_fence_init(tile->primary_gt, ifence,
> > fence,
> > -                                       pt_update_ops->start,
> > -                                       pt_update_ops->last, vm-
> > >usm.asid);
> > -               if (mfence) {
> > -                       invalidation_fence_init(tile->media_gt,
> > mfence, fence,
> > -                                               pt_update_ops->start,
> > -                                               pt_update_ops->last,
> > vm->usm.asid);
> > -                       fences[0] = &ifence->base.base;
> > -                       fences[1] = &mfence->base.base;
> > +       if (ijob) {
> > +               struct dma_fence *__fence;
> > +
> > +               ifence = xe_gt_tlb_inval_job_push(ijob, tile-
> > >migrate, fence);
> > +               __fence = ifence;
> > +
> > +               if (mjob) {
> > +                       fences[0] = ifence;
> > +                       mfence = xe_gt_tlb_inval_job_push(mjob, tile-
> > >migrate,
> > +                                                         fence);
> > +                       fences[1] = mfence;
> > +
> >                         dma_fence_array_init(cf, 2, fences,
> >                                              vm->composite_fence_ctx,
> >                                              vm-
> > >composite_fence_seqno++,
> >                                              false);
> > -                       fence = &cf->base;
> > -               } else {
> > -                       fence = &ifence->base.base;
> > +                       __fence = &cf->base;
> >                 }
> > +
> > +               dma_fence_put(fence);
> > +               fence = __fence;
> >         }
> >  
> > -       if (!mfence) {
> > +       if (!mjob) {
> >                 dma_resv_add_fence(xe_vm_resv(vm), fence,
> >                                    pt_update_ops->wait_vm_bookkeep ?
> >                                    DMA_RESV_USAGE_KERNEL :
> > @@ -2523,19 +2484,19 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> > struct xe_vma_ops *vops)
> >                 list_for_each_entry(op, &vops->list, link)
> >                         op_commit(vops->vm, tile, pt_update_ops, op,
> > fence, NULL);
> >         } else {
> > -               dma_resv_add_fence(xe_vm_resv(vm), &ifence-
> > >base.base,
> > +               dma_resv_add_fence(xe_vm_resv(vm), ifence,
> >                                    pt_update_ops->wait_vm_bookkeep ?
> >                                    DMA_RESV_USAGE_KERNEL :
> >                                    DMA_RESV_USAGE_BOOKKEEP);
> >  
> > -               dma_resv_add_fence(xe_vm_resv(vm), &mfence-
> > >base.base,
> > +               dma_resv_add_fence(xe_vm_resv(vm), mfence,
> >                                    pt_update_ops->wait_vm_bookkeep ?
> >                                    DMA_RESV_USAGE_KERNEL :
> >                                    DMA_RESV_USAGE_BOOKKEEP);
> >  
> >                 list_for_each_entry(op, &vops->list, link)
> > -                       op_commit(vops->vm, tile, pt_update_ops, op,
> > -                                 &ifence->base.base, &mfence-
> > >base.base);
> > +                       op_commit(vops->vm, tile, pt_update_ops, op,
> > ifence,
> > +                                 mfence);
> >         }
> >  
> >         if (pt_update_ops->needs_svm_lock)
> > @@ -2543,15 +2504,18 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> > struct xe_vma_ops *vops)
> >         if (pt_update_ops->needs_userptr_lock)
> >                 up_read(&vm->userptr.notifier_lock);
> >  
> > +       xe_gt_tlb_inval_job_put(mjob);
> > +       xe_gt_tlb_inval_job_put(ijob);
> > +
> >         return fence;
> >  
> >  free_rfence:
> >         kfree(rfence);
> > -free_ifence:
> > +free_ijob:
> >         kfree(cf);
> >         kfree(fences);
> > -       kfree(mfence);
> > -       kfree(ifence);
> > +       xe_gt_tlb_inval_job_put(mjob);
> > +       xe_gt_tlb_inval_job_put(ijob);
> >  kill_vm_tile1:
> >         if (err != -EAGAIN && err != -ENODATA && tile->id)
> >                 xe_vm_kill(vops->vm, false);
> 


More information about the Intel-xe mailing list