[PATCH v2 8/9] drm/xe: Use GT TLB invalidation jobs in PT layer

Summers, Stuart stuart.summers at intel.com
Thu Jul 17 22:36:07 UTC 2025


On Thu, 2025-07-17 at 15:35 -0700, Matthew Brost wrote:
> On Thu, Jul 17, 2025 at 04:26:13PM -0600, Summers, Stuart wrote:
> > On Thu, 2025-07-17 at 14:07 -0700, Matthew Brost wrote:
> > > On Thu, Jul 17, 2025 at 03:00:14PM -0600, Summers, Stuart wrote:
> > > > On Wed, 2025-07-02 at 16:42 -0700, Matthew Brost wrote:
> > > > > Rather than open-coding GT TLB invalidations in the PT layer,
> > > > > use
> > > > > GT
> > > > > TLB
> > > > > invalidation jobs. The real benefit is that GT TLB
> > > > > invalidation
> > > > > jobs
> > > > > use
> > > > > a single dma-fence context, allowing the generated fences to
> > > > > be
> > > > > squashed
> > > > > in dma-resv/DRM scheduler.
> > > > > 
> > > > > v2:
> > > > >  - s/;;/; (checkpatch)
> > > > >  - Move ijob/mjob job push after range fence install
> > > > > 
> > > > > Suggested-by: Thomas Hellström
> > > > > <thomas.hellstrom at linux.intel.com>
> > > > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/xe/xe_migrate.h |   9 ++
> > > > >  drivers/gpu/drm/xe/xe_pt.c      | 178 +++++++++++++---------
> > > > > ----
> > > > > ----
> > > > > --
> > > > >  2 files changed, 80 insertions(+), 107 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/xe/xe_migrate.h
> > > > > b/drivers/gpu/drm/xe/xe_migrate.h
> > > > > index e9d83d320f8c..605398ea773e 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_migrate.h
> > > > > +++ b/drivers/gpu/drm/xe/xe_migrate.h
> > > > > @@ -14,6 +14,7 @@ struct ttm_resource;
> > > > >  
> > > > >  struct xe_bo;
> > > > >  struct xe_gt;
> > > > > +struct xe_gt_tlb_inval_job;
> > > > >  struct xe_exec_queue;
> > > > >  struct xe_migrate;
> > > > >  struct xe_migrate_pt_update;
> > > > > @@ -89,6 +90,14 @@ struct xe_migrate_pt_update {
> > > > >         struct xe_vma_ops *vops;
> > > > >         /** @job: The job if a GPU page-table update. NULL
> > > > > otherwise
> > > > > */
> > > > >         struct xe_sched_job *job;
> > > > > +       /**
> > > > > +        * @ijob: The GT TLB invalidation job for primary
> > > > > tile.
> > > > > NULL
> > > > > otherwise
> > > > > +        */
> > > > > +       struct xe_gt_tlb_inval_job *ijob;
> > > > > +       /**
> > > > > +        * @mjob: The GT TLB invalidation job for media tile.
> > > > > NULL
> > > > > otherwise
> > > > > +        */
> > > > > +       struct xe_gt_tlb_inval_job *mjob;
> > > > >         /** @tile_id: Tile ID of the update */
> > > > >         u8 tile_id;
> > > > >  };
> > > > > diff --git a/drivers/gpu/drm/xe/xe_pt.c
> > > > > b/drivers/gpu/drm/xe/xe_pt.c
> > > > > index c8e63bd23300..67d02307779b 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_pt.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > > > > @@ -13,7 +13,7 @@
> > > > >  #include "xe_drm_client.h"
> > > > >  #include "xe_exec_queue.h"
> > > > >  #include "xe_gt.h"
> > > > > -#include "xe_gt_tlb_invalidation.h"
> > > > > +#include "xe_gt_tlb_inval_job.h"
> > > > >  #include "xe_migrate.h"
> > > > >  #include "xe_pt_types.h"
> > > > >  #include "xe_pt_walk.h"
> > > > > @@ -1261,6 +1261,8 @@ static int op_add_deps(struct xe_vm
> > > > > *vm,
> > > > > struct
> > > > > xe_vma_op *op,
> > > > >  }
> > > > >  
> > > > >  static int xe_pt_vm_dependencies(struct xe_sched_job *job,
> > > > > +                                struct xe_gt_tlb_inval_job
> > > > > *ijob,
> > > > > +                                struct xe_gt_tlb_inval_job
> > > > > *mjob,
> > > > >                                  struct xe_vm *vm,
> > > > >                                  struct xe_vma_ops *vops,
> > > > >                                  struct
> > > > > xe_vm_pgtable_update_ops
> > > > > *pt_update_ops,
> > > > > @@ -1328,6 +1330,20 @@ static int
> > > > > xe_pt_vm_dependencies(struct
> > > > > xe_sched_job *job,
> > > > >         for (i = 0; job && !err && i < vops->num_syncs; i++)
> > > > >                 err = xe_sync_entry_add_deps(&vops->syncs[i],
> > > > > job);
> > > > >  
> > > > > +       if (job) {
> > > > > +               if (ijob) {
> > > > > +                       err =
> > > > > xe_gt_tlb_inval_job_alloc_dep(ijob);
> > > > > +                       if (err)
> > > > > +                               return err;
> > > > > +               }
> > > > > +
> > > > > +               if (mjob) {
> > > > > +                       err =
> > > > > xe_gt_tlb_inval_job_alloc_dep(mjob);
> > > > > +                       if (err)
> > > > > +                               return err;
> > > > > +               }
> > > > > +       }
> > > > > +
> > > > >         return err;
> > > > >  }
> > > > >  
> > > > > @@ -1339,7 +1355,8 @@ static int xe_pt_pre_commit(struct
> > > > > xe_migrate_pt_update *pt_update)
> > > > >         struct xe_vm_pgtable_update_ops *pt_update_ops =
> > > > >                 &vops->pt_update_ops[pt_update->tile_id];
> > > > >  
> > > > > -       return xe_pt_vm_dependencies(pt_update->job, vm,
> > > > > pt_update-
> > > > > > vops,
> > > > > +       return xe_pt_vm_dependencies(pt_update->job,
> > > > > pt_update-
> > > > > > ijob,
> > > > > +                                    pt_update->mjob, vm,
> > > > > pt_update-
> > > > > > vops,
> > > > >                                      pt_update_ops, rftree);
> > > > >  }
> > > > >  
> > > > > @@ -1509,75 +1526,6 @@ static int xe_pt_svm_pre_commit(struct
> > > > > xe_migrate_pt_update *pt_update)
> > > > >  }
> > > > >  #endif
> > > > >  
> > > > > -struct invalidation_fence {
> > > > > -       struct xe_gt_tlb_invalidation_fence base;
> > > > > -       struct xe_gt *gt;
> > > > > -       struct dma_fence *fence;
> > > > > -       struct dma_fence_cb cb;
> > > > > -       struct work_struct work;
> > > > > -       u64 start;
> > > > > -       u64 end;
> > > > > -       u32 asid;
> > > > > -};
> > > > > -
> > > > > -static void invalidation_fence_cb(struct dma_fence *fence,
> > > > > -                                 struct dma_fence_cb *cb)
> > > > > -{
> > > > > -       struct invalidation_fence *ifence =
> > > > > -               container_of(cb, struct invalidation_fence,
> > > > > cb);
> > > > > -       struct xe_device *xe = gt_to_xe(ifence->gt);
> > > > > -
> > > > > -       trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence-
> > > > > >base);
> > > > > -       if (!ifence->fence->error) {
> > > > > -               queue_work(system_wq, &ifence->work);
> > > > > -       } else {
> > > > > -               ifence->base.base.error = ifence->fence-
> > > > > >error;
> > > > > -               xe_gt_tlb_invalidation_fence_signal(&ifence-
> > > > > > base);
> > > > > -       }
> > > > > -       dma_fence_put(ifence->fence);
> > > > > -}
> > > > > -
> > > > > -static void invalidation_fence_work_func(struct work_struct
> > > > > *w)
> > > > > -{
> > > > > -       struct invalidation_fence *ifence =
> > > > > -               container_of(w, struct invalidation_fence,
> > > > > work);
> > > > > -       struct xe_device *xe = gt_to_xe(ifence->gt);
> > > > > -
> > > > > -       trace_xe_gt_tlb_invalidation_fence_work_func(xe,
> > > > > &ifence-
> > > > > > base);
> > > > > -       xe_gt_tlb_invalidation_range(ifence->gt, &ifence-
> > > > > >base,
> > > > > ifence->start,
> > > > > -                                    ifence->end, ifence-
> > > > > >asid);
> > > > > -}
> > > > > -
> > > > > -static void invalidation_fence_init(struct xe_gt *gt,
> > > > > -                                   struct invalidation_fence
> > > > > *ifence,
> > > > > -                                   struct dma_fence *fence,
> > > > > -                                   u64 start, u64 end, u32
> > > > > asid)
> > > > > -{
> > > > > -       int ret;
> > > > > -
> > > > > -
> > > > >        trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt),
> > > > > &ifence->base);
> > > > > -
> > > > > -       xe_gt_tlb_invalidation_fence_init(gt, &ifence->base,
> > > > > false);
> > > > > -
> > > > > -       ifence->fence = fence;
> > > > > -       ifence->gt = gt;
> > > > > -       ifence->start = start;
> > > > > -       ifence->end = end;
> > > > > -       ifence->asid = asid;
> > > > > -
> > > > > -       INIT_WORK(&ifence->work,
> > > > > invalidation_fence_work_func);
> > > > > -       ret = dma_fence_add_callback(fence, &ifence->cb,
> > > > > invalidation_fence_cb);
> > > > > -       if (ret == -ENOENT) {
> > > > > -               dma_fence_put(ifence->fence);   /* Usually
> > > > > dropped in
> > > > > CB */
> > > > > -               invalidation_fence_work_func(&ifence->work);
> > > > > -       } else if (ret) {
> > > > > -               dma_fence_put(&ifence->base.base);      /*
> > > > > Caller
> > > > > ref
> > > > > */
> > > > > -               dma_fence_put(&ifence->base.base);      /*
> > > > > Creation
> > > > > ref */
> > > > > -       }
> > > > > -
> > > > > -       xe_gt_assert(gt, !ret || ret == -ENOENT);
> > > > > -}
> > > > > -
> > > > >  struct xe_pt_stage_unbind_walk {
> > > > >         /** @base: The pagewalk base-class. */
> > > > >         struct xe_pt_walk base;
> > > > > @@ -2407,8 +2355,8 @@ xe_pt_update_ops_run(struct xe_tile
> > > > > *tile,
> > > > > struct xe_vma_ops *vops)
> > > > >         struct xe_vm *vm = vops->vm;
> > > > >         struct xe_vm_pgtable_update_ops *pt_update_ops =
> > > > >                 &vops->pt_update_ops[tile->id];
> > > > > -       struct dma_fence *fence;
> > > > > -       struct invalidation_fence *ifence = NULL, *mfence =
> > > > > NULL;
> > > > > +       struct dma_fence *fence, *ifence, *mfence;
> > > > > +       struct xe_gt_tlb_inval_job *ijob = NULL, *mjob =
> > > > > NULL;
> > > > >         struct dma_fence **fences = NULL;
> > > > >         struct dma_fence_array *cf = NULL;
> > > > >         struct xe_range_fence *rfence;
> > > > > @@ -2440,34 +2388,47 @@ xe_pt_update_ops_run(struct xe_tile
> > > > > *tile,
> > > > > struct xe_vma_ops *vops)
> > > > >  #endif
> > > > >  
> > > > >         if (pt_update_ops->needs_invalidation) {
> > > > > -               ifence = kzalloc(sizeof(*ifence),
> > > > > GFP_KERNEL);
> > > > > -               if (!ifence) {
> > > > > -                       err = -ENOMEM;
> > > > > +               ijob =
> > > > > xe_gt_tlb_inval_job_create(pt_update_ops-
> > > > > > q,
> > > > > +                                                 tile-
> > > > > > primary_gt,
> > > > > +                                                
> > > > > pt_update_ops-
> > > > > > start,
> > > > > +                                                
> > > > > pt_update_ops-
> > > > > > last,
> > > > > +                                                 vm-
> > > > > >usm.asid);
> > > > > +
> > > > 
> > > > Remove extra line.
> > > > 
> > > 
> > > Sure.
> > > 
> > > > > +               if (IS_ERR(ijob)) {
> > > > > +                       err = PTR_ERR(ijob);
> > > > >                         goto kill_vm_tile1;
> > > > >                 }
> > > > > +
> > > > >                 if (tile->media_gt) {
> > > > > -                       mfence = kzalloc(sizeof(*ifence),
> > > > 
> > > > I realize it's the same, but this should probably be
> > > > sizeof(*mfence).
> > > > 
> > > 
> > > Well it is getting removed.
> > 
> > Heh, somehow I misread that...
> > 
> > > 
> > > 
> > > > > GFP_KERNEL);
> > > > > -                       if (!mfence) {
> > > > > -                               err = -ENOMEM;
> > > > > -                               goto free_ifence;
> > > > > +                       mjob =
> > > > > xe_gt_tlb_inval_job_create(pt_update_ops->q,
> > > > > +                                                        
> > > > > tile-
> > > > > > media_gt,
> > > > > +                                                        
> > > > > pt_update_ops->start,
> > > > > +                                                        
> > > > > pt_update_ops->last,
> > > > > +                                                         vm-
> > > > > > usm.asid);
> > > > > +                       if (IS_ERR(mjob)) {
> > > > > +                               err = PTR_ERR(mjob);
> > > > > +                               goto free_ijob;
> > > > 
> > > > I think this needs a little more granularity. In free_ijob
> > > > below,
> > > > we're
> > > > also doing a kfree for fences and cf, both of which at this
> > > > point
> > > > aren't yet allocated. I realize you aren't changing anything
> > > > really
> > > > here, but that looks wrong to me. Maybe we just haven't hit an
> > > > issue
> > > > here because we've never tested the ENOMEM case?
> > > > 
> > > 
> > > fences & cf are initialized to NULL, kfree on a NULL is a nop.
> > > 
> > > Same with xe_gt_tlb_inval_job_put, skips on NULL or IS_ERR.
> > 
> > Yeah I saw the job_put.. and true on the kfree... I guess we're
> > saving
> > a couple of labels at the bottom of the function, but it still
> > feels
> > wrong as we're depending on the underlying implementation, which
> > would
> > break if we changed that or wrapped it somehow.
> > 
> > Anyway not a deal breaker, it's the same as it was before anyway.
> > 
> 
> I get what you are saying, but we do things like all over driver in
> particular with kfree, dma_fence_get/put that rely NULL being an
> acceptable argument. I'm not saying this is the best practice, just
> saying it is done everywhere in Xe / Linux. In either of cases, kfree
> or
> dma_fence_get/put, changed to not accept NULL the entire Linux kernel
> would explode so it is pretty safe to assume this will not be
> changing.

Yeah I get that and it makes sense. I do think in general we should aim
to do things right regardless of the rest of the kernel implementation
(within reason of course). But also like I had said, this was already
implemented this way, you're just changing the pointers around a bit.
No worries here for now. Maybe we can look at refactoring some of this
some time down the road when there's time.

Thanks,
Stuart

> 
> Matt
> 
> > > 
> > > > >                         }
> > > > >                         fences = kmalloc_array(2,
> > > > > sizeof(*fences),
> > > > > GFP_KERNEL);
> > > > >                         if (!fences) {
> > > > >                                 err = -ENOMEM;
> > > > > -                               goto free_ifence;
> > > > > +                               goto free_ijob;
> > > > >                         }
> > > > >                         cf = dma_fence_array_alloc(2);
> > > > >                         if (!cf) {
> > > > >                                 err = -ENOMEM;
> > > > > -                               goto free_ifence;
> > > > > +                               goto free_ijob;
> > > > >                         }
> > > > >                 }
> > > > > +
> > > > > +               update.ijob = ijob;
> > > > > +               update.mjob = mjob;
> > > > 
> > > > Is there a reason not to put these inline above where the ijob
> > > > and
> > > > mjob
> > > > are allocated? That way if we moved this to a loop eventually
> > > > (not
> > > > here, in a future patch) we could more easily reduce the number
> > > > of
> > > > indentations here by doing:
> > > 
> > > Sure, can move.
> > > 
> > > > if (!media_gt)
> > > >     continue;
> > > > 
> > > > >         }
> > > > >  
> > > > >         rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
> > > > >         if (!rfence) {
> > > > >                 err = -ENOMEM;
> > > > > -               goto free_ifence;
> > > > > +               goto free_ijob;
> > > > >         }
> > > > >  
> > > > >         fence = xe_migrate_update_pgtables(tile->migrate,
> > > > > &update);
> > > > > @@ -2491,30 +2452,30 @@ xe_pt_update_ops_run(struct xe_tile
> > > > > *tile,
> > > > > struct xe_vma_ops *vops)
> > > > >                                   pt_update_ops->last,
> > > > > fence))
> > > > >                 dma_fence_wait(fence, false);
> > > > >  
> > > > > -       /* tlb invalidation must be done before signaling
> > > > > rebind
> > > > > */
> > > > 
> > > > Why drop the comment?
> > > > 
> > > 
> > > Let me pull that back in.
> > 
> > Yeah so other than those two (comment here and line break above),
> > the
> > rest looks good to me. I like the job based approach rather than
> > embedding like we used to have, thanks for the change Matt!
> > 
> > Reviewed-by: Stuart Summers <stuart.summers at intel.com>
> > 
> > > 
> > > Matt
> > > 
> > > > Thanks,
> > > > Stuart
> > > > 
> > > > > -       if (ifence) {
> > > > > -               if (mfence)
> > > > > -                       dma_fence_get(fence);
> > > > > -               invalidation_fence_init(tile->primary_gt,
> > > > > ifence,
> > > > > fence,
> > > > > -                                       pt_update_ops->start,
> > > > > -                                       pt_update_ops->last,
> > > > > vm-
> > > > > > usm.asid);
> > > > > -               if (mfence) {
> > > > > -                       invalidation_fence_init(tile-
> > > > > >media_gt,
> > > > > mfence, fence,
> > > > > -
> > > > >                                                pt_update_ops-
> > > > > > start,
> > > > > -
> > > > >                                                pt_update_ops-
> > > > > > last,
> > > > > vm->usm.asid);
> > > > > -                       fences[0] = &ifence->base.base;
> > > > > -                       fences[1] = &mfence->base.base;
> > > > > +       if (ijob) {
> > > > > +               struct dma_fence *__fence;
> > > > > +
> > > > > +               ifence = xe_gt_tlb_inval_job_push(ijob, tile-
> > > > > > migrate, fence);
> > > > > +               __fence = ifence;
> > > > > +
> > > > > +               if (mjob) {
> > > > > +                       fences[0] = ifence;
> > > > > +                       mfence =
> > > > > xe_gt_tlb_inval_job_push(mjob,
> > > > > tile-
> > > > > > migrate,
> > > > > +                                                        
> > > > > fence);
> > > > > +                       fences[1] = mfence;
> > > > > +
> > > > >                         dma_fence_array_init(cf, 2, fences,
> > > > >                                              vm-
> > > > > > composite_fence_ctx,
> > > > >                                              vm-
> > > > > > composite_fence_seqno++,
> > > > >                                              false);
> > > > > -                       fence = &cf->base;
> > > > > -               } else {
> > > > > -                       fence = &ifence->base.base;
> > > > > +                       __fence = &cf->base;
> > > > >                 }
> > > > > +
> > > > > +               dma_fence_put(fence);
> > > > > +               fence = __fence;
> > > > >         }
> > > > >  
> > > > > -       if (!mfence) {
> > > > > +       if (!mjob) {
> > > > >                 dma_resv_add_fence(xe_vm_resv(vm), fence,
> > > > >                                    pt_update_ops-
> > > > > > wait_vm_bookkeep ?
> > > > >                                    DMA_RESV_USAGE_KERNEL :
> > > > > @@ -2523,19 +2484,19 @@ xe_pt_update_ops_run(struct xe_tile
> > > > > *tile,
> > > > > struct xe_vma_ops *vops)
> > > > >                 list_for_each_entry(op, &vops->list, link)
> > > > >                         op_commit(vops->vm, tile,
> > > > > pt_update_ops,
> > > > > op,
> > > > > fence, NULL);
> > > > >         } else {
> > > > > -               dma_resv_add_fence(xe_vm_resv(vm), &ifence-
> > > > > > base.base,
> > > > > +               dma_resv_add_fence(xe_vm_resv(vm), ifence,
> > > > >                                    pt_update_ops-
> > > > > > wait_vm_bookkeep ?
> > > > >                                    DMA_RESV_USAGE_KERNEL :
> > > > >                                    DMA_RESV_USAGE_BOOKKEEP);
> > > > >  
> > > > > -               dma_resv_add_fence(xe_vm_resv(vm), &mfence-
> > > > > > base.base,
> > > > > +               dma_resv_add_fence(xe_vm_resv(vm), mfence,
> > > > >                                    pt_update_ops-
> > > > > > wait_vm_bookkeep ?
> > > > >                                    DMA_RESV_USAGE_KERNEL :
> > > > >                                    DMA_RESV_USAGE_BOOKKEEP);
> > > > >  
> > > > >                 list_for_each_entry(op, &vops->list, link)
> > > > > -                       op_commit(vops->vm, tile,
> > > > > pt_update_ops,
> > > > > op,
> > > > > -                                 &ifence->base.base,
> > > > > &mfence-
> > > > > > base.base);
> > > > > +                       op_commit(vops->vm, tile,
> > > > > pt_update_ops,
> > > > > op,
> > > > > ifence,
> > > > > +                                 mfence);
> > > > >         }
> > > > >  
> > > > >         if (pt_update_ops->needs_svm_lock)
> > > > > @@ -2543,15 +2504,18 @@ xe_pt_update_ops_run(struct xe_tile
> > > > > *tile,
> > > > > struct xe_vma_ops *vops)
> > > > >         if (pt_update_ops->needs_userptr_lock)
> > > > >                 up_read(&vm->userptr.notifier_lock);
> > > > >  
> > > > > +       xe_gt_tlb_inval_job_put(mjob);
> > > > > +       xe_gt_tlb_inval_job_put(ijob);
> > > > > +
> > > > >         return fence;
> > > > >  
> > > > >  free_rfence:
> > > > >         kfree(rfence);
> > > > > -free_ifence:
> > > > > +free_ijob:
> > > > >         kfree(cf);
> > > > >         kfree(fences);
> > > > > -       kfree(mfence);
> > > > > -       kfree(ifence);
> > > > > +       xe_gt_tlb_inval_job_put(mjob);
> > > > > +       xe_gt_tlb_inval_job_put(ijob);
> > > > >  kill_vm_tile1:
> > > > >         if (err != -EAGAIN && err != -ENODATA && tile->id)
> > > > >                 xe_vm_kill(vops->vm, false);
> > > > 
> > 



More information about the Intel-xe mailing list