[PATCH v2 8/9] drm/xe: Use GT TLB invalidation jobs in PT layer
Matthew Brost
matthew.brost at intel.com
Thu Jul 17 22:35:02 UTC 2025
On Thu, Jul 17, 2025 at 04:26:13PM -0600, Summers, Stuart wrote:
> On Thu, 2025-07-17 at 14:07 -0700, Matthew Brost wrote:
> > On Thu, Jul 17, 2025 at 03:00:14PM -0600, Summers, Stuart wrote:
> > > On Wed, 2025-07-02 at 16:42 -0700, Matthew Brost wrote:
> > > > Rather than open-coding GT TLB invalidations in the PT layer, use
> > > > GT
> > > > TLB
> > > > invalidation jobs. The real benefit is that GT TLB invalidation
> > > > jobs
> > > > use
> > > > a single dma-fence context, allowing the generated fences to be
> > > > squashed
> > > > in dma-resv/DRM scheduler.
> > > >
> > > > v2:
> > > > - s/;;/; (checkpatch)
> > > > - Move ijob/mjob job push after range fence install
> > > >
> > > > Suggested-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > > ---
> > > > drivers/gpu/drm/xe/xe_migrate.h | 9 ++
> > > > drivers/gpu/drm/xe/xe_pt.c | 178 +++++++++++++-------------
> > > > ----
> > > > --
> > > > 2 files changed, 80 insertions(+), 107 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/xe/xe_migrate.h
> > > > b/drivers/gpu/drm/xe/xe_migrate.h
> > > > index e9d83d320f8c..605398ea773e 100644
> > > > --- a/drivers/gpu/drm/xe/xe_migrate.h
> > > > +++ b/drivers/gpu/drm/xe/xe_migrate.h
> > > > @@ -14,6 +14,7 @@ struct ttm_resource;
> > > >
> > > > struct xe_bo;
> > > > struct xe_gt;
> > > > +struct xe_gt_tlb_inval_job;
> > > > struct xe_exec_queue;
> > > > struct xe_migrate;
> > > > struct xe_migrate_pt_update;
> > > > @@ -89,6 +90,14 @@ struct xe_migrate_pt_update {
> > > > struct xe_vma_ops *vops;
> > > > /** @job: The job if a GPU page-table update. NULL
> > > > otherwise
> > > > */
> > > > struct xe_sched_job *job;
> > > > + /**
> > > > + * @ijob: The GT TLB invalidation job for primary tile.
> > > > NULL
> > > > otherwise
> > > > + */
> > > > + struct xe_gt_tlb_inval_job *ijob;
> > > > + /**
> > > > + * @mjob: The GT TLB invalidation job for media tile.
> > > > NULL
> > > > otherwise
> > > > + */
> > > > + struct xe_gt_tlb_inval_job *mjob;
> > > > /** @tile_id: Tile ID of the update */
> > > > u8 tile_id;
> > > > };
> > > > diff --git a/drivers/gpu/drm/xe/xe_pt.c
> > > > b/drivers/gpu/drm/xe/xe_pt.c
> > > > index c8e63bd23300..67d02307779b 100644
> > > > --- a/drivers/gpu/drm/xe/xe_pt.c
> > > > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > > > @@ -13,7 +13,7 @@
> > > > #include "xe_drm_client.h"
> > > > #include "xe_exec_queue.h"
> > > > #include "xe_gt.h"
> > > > -#include "xe_gt_tlb_invalidation.h"
> > > > +#include "xe_gt_tlb_inval_job.h"
> > > > #include "xe_migrate.h"
> > > > #include "xe_pt_types.h"
> > > > #include "xe_pt_walk.h"
> > > > @@ -1261,6 +1261,8 @@ static int op_add_deps(struct xe_vm *vm,
> > > > struct
> > > > xe_vma_op *op,
> > > > }
> > > >
> > > > static int xe_pt_vm_dependencies(struct xe_sched_job *job,
> > > > + struct xe_gt_tlb_inval_job
> > > > *ijob,
> > > > + struct xe_gt_tlb_inval_job
> > > > *mjob,
> > > > struct xe_vm *vm,
> > > > struct xe_vma_ops *vops,
> > > > struct xe_vm_pgtable_update_ops
> > > > *pt_update_ops,
> > > > @@ -1328,6 +1330,20 @@ static int xe_pt_vm_dependencies(struct
> > > > xe_sched_job *job,
> > > > for (i = 0; job && !err && i < vops->num_syncs; i++)
> > > > err = xe_sync_entry_add_deps(&vops->syncs[i],
> > > > job);
> > > >
> > > > + if (job) {
> > > > + if (ijob) {
> > > > + err =
> > > > xe_gt_tlb_inval_job_alloc_dep(ijob);
> > > > + if (err)
> > > > + return err;
> > > > + }
> > > > +
> > > > + if (mjob) {
> > > > + err =
> > > > xe_gt_tlb_inval_job_alloc_dep(mjob);
> > > > + if (err)
> > > > + return err;
> > > > + }
> > > > + }
> > > > +
> > > > return err;
> > > > }
> > > >
> > > > @@ -1339,7 +1355,8 @@ static int xe_pt_pre_commit(struct
> > > > xe_migrate_pt_update *pt_update)
> > > > struct xe_vm_pgtable_update_ops *pt_update_ops =
> > > > &vops->pt_update_ops[pt_update->tile_id];
> > > >
> > > > - return xe_pt_vm_dependencies(pt_update->job, vm,
> > > > pt_update-
> > > > > vops,
> > > > + return xe_pt_vm_dependencies(pt_update->job, pt_update-
> > > > >ijob,
> > > > + pt_update->mjob, vm,
> > > > pt_update-
> > > > > vops,
> > > > pt_update_ops, rftree);
> > > > }
> > > >
> > > > @@ -1509,75 +1526,6 @@ static int xe_pt_svm_pre_commit(struct
> > > > xe_migrate_pt_update *pt_update)
> > > > }
> > > > #endif
> > > >
> > > > -struct invalidation_fence {
> > > > - struct xe_gt_tlb_invalidation_fence base;
> > > > - struct xe_gt *gt;
> > > > - struct dma_fence *fence;
> > > > - struct dma_fence_cb cb;
> > > > - struct work_struct work;
> > > > - u64 start;
> > > > - u64 end;
> > > > - u32 asid;
> > > > -};
> > > > -
> > > > -static void invalidation_fence_cb(struct dma_fence *fence,
> > > > - struct dma_fence_cb *cb)
> > > > -{
> > > > - struct invalidation_fence *ifence =
> > > > - container_of(cb, struct invalidation_fence, cb);
> > > > - struct xe_device *xe = gt_to_xe(ifence->gt);
> > > > -
> > > > - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base);
> > > > - if (!ifence->fence->error) {
> > > > - queue_work(system_wq, &ifence->work);
> > > > - } else {
> > > > - ifence->base.base.error = ifence->fence->error;
> > > > - xe_gt_tlb_invalidation_fence_signal(&ifence-
> > > > >base);
> > > > - }
> > > > - dma_fence_put(ifence->fence);
> > > > -}
> > > > -
> > > > -static void invalidation_fence_work_func(struct work_struct *w)
> > > > -{
> > > > - struct invalidation_fence *ifence =
> > > > - container_of(w, struct invalidation_fence, work);
> > > > - struct xe_device *xe = gt_to_xe(ifence->gt);
> > > > -
> > > > - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence-
> > > > > base);
> > > > - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base,
> > > > ifence->start,
> > > > - ifence->end, ifence->asid);
> > > > -}
> > > > -
> > > > -static void invalidation_fence_init(struct xe_gt *gt,
> > > > - struct invalidation_fence
> > > > *ifence,
> > > > - struct dma_fence *fence,
> > > > - u64 start, u64 end, u32 asid)
> > > > -{
> > > > - int ret;
> > > > -
> > > > - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt),
> > > > &ifence->base);
> > > > -
> > > > - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base,
> > > > false);
> > > > -
> > > > - ifence->fence = fence;
> > > > - ifence->gt = gt;
> > > > - ifence->start = start;
> > > > - ifence->end = end;
> > > > - ifence->asid = asid;
> > > > -
> > > > - INIT_WORK(&ifence->work, invalidation_fence_work_func);
> > > > - ret = dma_fence_add_callback(fence, &ifence->cb,
> > > > invalidation_fence_cb);
> > > > - if (ret == -ENOENT) {
> > > > - dma_fence_put(ifence->fence); /* Usually
> > > > dropped in
> > > > CB */
> > > > - invalidation_fence_work_func(&ifence->work);
> > > > - } else if (ret) {
> > > > - dma_fence_put(&ifence->base.base); /* Caller
> > > > ref
> > > > */
> > > > - dma_fence_put(&ifence->base.base); /*
> > > > Creation
> > > > ref */
> > > > - }
> > > > -
> > > > - xe_gt_assert(gt, !ret || ret == -ENOENT);
> > > > -}
> > > > -
> > > > struct xe_pt_stage_unbind_walk {
> > > > /** @base: The pagewalk base-class. */
> > > > struct xe_pt_walk base;
> > > > @@ -2407,8 +2355,8 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> > > > struct xe_vma_ops *vops)
> > > > struct xe_vm *vm = vops->vm;
> > > > struct xe_vm_pgtable_update_ops *pt_update_ops =
> > > > &vops->pt_update_ops[tile->id];
> > > > - struct dma_fence *fence;
> > > > - struct invalidation_fence *ifence = NULL, *mfence = NULL;
> > > > + struct dma_fence *fence, *ifence, *mfence;
> > > > + struct xe_gt_tlb_inval_job *ijob = NULL, *mjob = NULL;
> > > > struct dma_fence **fences = NULL;
> > > > struct dma_fence_array *cf = NULL;
> > > > struct xe_range_fence *rfence;
> > > > @@ -2440,34 +2388,47 @@ xe_pt_update_ops_run(struct xe_tile
> > > > *tile,
> > > > struct xe_vma_ops *vops)
> > > > #endif
> > > >
> > > > if (pt_update_ops->needs_invalidation) {
> > > > - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
> > > > - if (!ifence) {
> > > > - err = -ENOMEM;
> > > > + ijob = xe_gt_tlb_inval_job_create(pt_update_ops-
> > > > >q,
> > > > + tile-
> > > > >primary_gt,
> > > > + pt_update_ops-
> > > > > start,
> > > > + pt_update_ops-
> > > > > last,
> > > > + vm->usm.asid);
> > > > +
> > >
> > > Remove extra line.
> > >
> >
> > Sure.
> >
> > > > + if (IS_ERR(ijob)) {
> > > > + err = PTR_ERR(ijob);
> > > > goto kill_vm_tile1;
> > > > }
> > > > +
> > > > if (tile->media_gt) {
> > > > - mfence = kzalloc(sizeof(*ifence),
> > >
> > > I realize it's the same, but this should probably be
> > > sizeof(*mfence).
> > >
> >
> > Well it is getting removed.
>
> Heh, somehow I misread that...
>
> >
> >
> > > > GFP_KERNEL);
> > > > - if (!mfence) {
> > > > - err = -ENOMEM;
> > > > - goto free_ifence;
> > > > + mjob =
> > > > xe_gt_tlb_inval_job_create(pt_update_ops->q,
> > > > + tile-
> > > > > media_gt,
> > > > +
> > > > pt_update_ops->start,
> > > > +
> > > > pt_update_ops->last,
> > > > + vm-
> > > > > usm.asid);
> > > > + if (IS_ERR(mjob)) {
> > > > + err = PTR_ERR(mjob);
> > > > + goto free_ijob;
> > >
> > > I think this needs a little more granularity. In free_ijob below,
> > > we're
> > > also doing a kfree for fences and cf, both of which at this point
> > > aren't yet allocated. I realize you aren't changing anything really
> > > here, but that looks wrong to me. Maybe we just haven't hit an
> > > issue
> > > here because we've never tested the ENOMEM case?
> > >
> >
> > fences & cf are initialized to NULL, kfree on a NULL is a nop.
> >
> > Same with xe_gt_tlb_inval_job_put, skips on NULL or IS_ERR.
>
> Yeah I saw the job_put.. and true on the kfree... I guess we're saving
> a couple of labels at the bottom of the function, but it still feels
> wrong as we're depending on the underlying implementation, which would
> break if we changed that or wrapped it somehow.
>
> Anyway not a deal breaker, it's the same as it was before anyway.
>
I get what you are saying, but we do things like all over driver in
particular with kfree, dma_fence_get/put that rely NULL being an
acceptable argument. I'm not saying this is the best practice, just
saying it is done everywhere in Xe / Linux. In either of cases, kfree or
dma_fence_get/put, changed to not accept NULL the entire Linux kernel
would explode so it is pretty safe to assume this will not be changing.
Matt
> >
> > > > }
> > > > fences = kmalloc_array(2,
> > > > sizeof(*fences),
> > > > GFP_KERNEL);
> > > > if (!fences) {
> > > > err = -ENOMEM;
> > > > - goto free_ifence;
> > > > + goto free_ijob;
> > > > }
> > > > cf = dma_fence_array_alloc(2);
> > > > if (!cf) {
> > > > err = -ENOMEM;
> > > > - goto free_ifence;
> > > > + goto free_ijob;
> > > > }
> > > > }
> > > > +
> > > > + update.ijob = ijob;
> > > > + update.mjob = mjob;
> > >
> > > Is there a reason not to put these inline above where the ijob and
> > > mjob
> > > are allocated? That way if we moved this to a loop eventually (not
> > > here, in a future patch) we could more easily reduce the number of
> > > indentations here by doing:
> >
> > Sure, can move.
> >
> > > if (!media_gt)
> > > continue;
> > >
> > > > }
> > > >
> > > > rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
> > > > if (!rfence) {
> > > > err = -ENOMEM;
> > > > - goto free_ifence;
> > > > + goto free_ijob;
> > > > }
> > > >
> > > > fence = xe_migrate_update_pgtables(tile->migrate,
> > > > &update);
> > > > @@ -2491,30 +2452,30 @@ xe_pt_update_ops_run(struct xe_tile
> > > > *tile,
> > > > struct xe_vma_ops *vops)
> > > > pt_update_ops->last, fence))
> > > > dma_fence_wait(fence, false);
> > > >
> > > > - /* tlb invalidation must be done before signaling rebind
> > > > */
> > >
> > > Why drop the comment?
> > >
> >
> > Let me pull that back in.
>
> Yeah so other than those two (comment here and line break above), the
> rest looks good to me. I like the job based approach rather than
> embedding like we used to have, thanks for the change Matt!
>
> Reviewed-by: Stuart Summers <stuart.summers at intel.com>
>
> >
> > Matt
> >
> > > Thanks,
> > > Stuart
> > >
> > > > - if (ifence) {
> > > > - if (mfence)
> > > > - dma_fence_get(fence);
> > > > - invalidation_fence_init(tile->primary_gt, ifence,
> > > > fence,
> > > > - pt_update_ops->start,
> > > > - pt_update_ops->last, vm-
> > > > > usm.asid);
> > > > - if (mfence) {
> > > > - invalidation_fence_init(tile->media_gt,
> > > > mfence, fence,
> > > > - pt_update_ops-
> > > > >start,
> > > > - pt_update_ops-
> > > > >last,
> > > > vm->usm.asid);
> > > > - fences[0] = &ifence->base.base;
> > > > - fences[1] = &mfence->base.base;
> > > > + if (ijob) {
> > > > + struct dma_fence *__fence;
> > > > +
> > > > + ifence = xe_gt_tlb_inval_job_push(ijob, tile-
> > > > > migrate, fence);
> > > > + __fence = ifence;
> > > > +
> > > > + if (mjob) {
> > > > + fences[0] = ifence;
> > > > + mfence = xe_gt_tlb_inval_job_push(mjob,
> > > > tile-
> > > > > migrate,
> > > > + fence);
> > > > + fences[1] = mfence;
> > > > +
> > > > dma_fence_array_init(cf, 2, fences,
> > > > vm-
> > > > >composite_fence_ctx,
> > > > vm-
> > > > > composite_fence_seqno++,
> > > > false);
> > > > - fence = &cf->base;
> > > > - } else {
> > > > - fence = &ifence->base.base;
> > > > + __fence = &cf->base;
> > > > }
> > > > +
> > > > + dma_fence_put(fence);
> > > > + fence = __fence;
> > > > }
> > > >
> > > > - if (!mfence) {
> > > > + if (!mjob) {
> > > > dma_resv_add_fence(xe_vm_resv(vm), fence,
> > > > pt_update_ops-
> > > > >wait_vm_bookkeep ?
> > > > DMA_RESV_USAGE_KERNEL :
> > > > @@ -2523,19 +2484,19 @@ xe_pt_update_ops_run(struct xe_tile
> > > > *tile,
> > > > struct xe_vma_ops *vops)
> > > > list_for_each_entry(op, &vops->list, link)
> > > > op_commit(vops->vm, tile, pt_update_ops,
> > > > op,
> > > > fence, NULL);
> > > > } else {
> > > > - dma_resv_add_fence(xe_vm_resv(vm), &ifence-
> > > > > base.base,
> > > > + dma_resv_add_fence(xe_vm_resv(vm), ifence,
> > > > pt_update_ops-
> > > > >wait_vm_bookkeep ?
> > > > DMA_RESV_USAGE_KERNEL :
> > > > DMA_RESV_USAGE_BOOKKEEP);
> > > >
> > > > - dma_resv_add_fence(xe_vm_resv(vm), &mfence-
> > > > > base.base,
> > > > + dma_resv_add_fence(xe_vm_resv(vm), mfence,
> > > > pt_update_ops-
> > > > >wait_vm_bookkeep ?
> > > > DMA_RESV_USAGE_KERNEL :
> > > > DMA_RESV_USAGE_BOOKKEEP);
> > > >
> > > > list_for_each_entry(op, &vops->list, link)
> > > > - op_commit(vops->vm, tile, pt_update_ops,
> > > > op,
> > > > - &ifence->base.base, &mfence-
> > > > > base.base);
> > > > + op_commit(vops->vm, tile, pt_update_ops,
> > > > op,
> > > > ifence,
> > > > + mfence);
> > > > }
> > > >
> > > > if (pt_update_ops->needs_svm_lock)
> > > > @@ -2543,15 +2504,18 @@ xe_pt_update_ops_run(struct xe_tile
> > > > *tile,
> > > > struct xe_vma_ops *vops)
> > > > if (pt_update_ops->needs_userptr_lock)
> > > > up_read(&vm->userptr.notifier_lock);
> > > >
> > > > + xe_gt_tlb_inval_job_put(mjob);
> > > > + xe_gt_tlb_inval_job_put(ijob);
> > > > +
> > > > return fence;
> > > >
> > > > free_rfence:
> > > > kfree(rfence);
> > > > -free_ifence:
> > > > +free_ijob:
> > > > kfree(cf);
> > > > kfree(fences);
> > > > - kfree(mfence);
> > > > - kfree(ifence);
> > > > + xe_gt_tlb_inval_job_put(mjob);
> > > > + xe_gt_tlb_inval_job_put(ijob);
> > > > kill_vm_tile1:
> > > > if (err != -EAGAIN && err != -ENODATA && tile->id)
> > > > xe_vm_kill(vops->vm, false);
> > >
>
More information about the Intel-xe
mailing list