[PATCH v2 7/9] drm/xe: Add GT TLB invalidation jobs

Wed Jul 16 01:08:24 UTC 2025

On Tue, Jul 15, 2025 at 05:09:01PM -0600, Summers, Stuart wrote:
> On Wed, 2025-07-02 at 16:42 -0700, Matthew Brost wrote:
> > Add GT TLB invalidation jobs which issue GT TLB invalidations. Built
> > on
> > top of Xe generic dependency scheduler.
> > 
> > v2:
> >  - Fix checkpatch
> > 
> > Suggested-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >  drivers/gpu/drm/xe/Makefile              |   1 +
> >  drivers/gpu/drm/xe/xe_gt_tlb_inval_job.c | 271
> > +++++++++++++++++++++++
> >  drivers/gpu/drm/xe/xe_gt_tlb_inval_job.h |  34 +++
> >  3 files changed, 306 insertions(+)
> >  create mode 100644 drivers/gpu/drm/xe/xe_gt_tlb_inval_job.c
> >  create mode 100644 drivers/gpu/drm/xe/xe_gt_tlb_inval_job.h
> > 
> > diff --git a/drivers/gpu/drm/xe/Makefile
> > b/drivers/gpu/drm/xe/Makefile
> > index 0edcfc770c0d..5aad44a3b5fd 100644
> > --- a/drivers/gpu/drm/xe/Makefile
> > +++ b/drivers/gpu/drm/xe/Makefile
> > @@ -55,6 +55,7 @@ xe-y += xe_bb.o \
> >         xe_gt_sysfs.o \
> >         xe_gt_throttle.o \
> >         xe_gt_tlb_invalidation.o \
> > +       xe_gt_tlb_inval_job.o \
> >         xe_gt_topology.o \
> >         xe_guc.o \
> >         xe_guc_ads.o \
> > diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.c
> > b/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.c
> > new file mode 100644
> > index 000000000000..428d20f16ec2
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.c
> > @@ -0,0 +1,271 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2025 Intel Corporation
> > + */
> > +
> > +#include "xe_dep_job_types.h"
> > +#include "xe_dep_scheduler.h"
> > +#include "xe_exec_queue.h"
> > +#include "xe_gt.h"
> > +#include "xe_gt_tlb_invalidation.h"
> > +#include "xe_gt_tlb_inval_job.h"
> > +#include "xe_migrate.h"
> > +#include "xe_pm.h"
> > +
> > +/** struct xe_gt_tlb_inval_job - GT TLB invalidation job */
> > +struct xe_gt_tlb_inval_job {
> > +       /** @dep: base generic dependency Xe job */
> > +       struct xe_dep_job dep;
> > +       /** @gt: GT to invalidate */
> > +       struct xe_gt *gt;
> > +       /** @q: exec queue issuing the invalidate */
> > +       struct xe_exec_queue *q;
> > +       /** @refcount: ref count of this job */
> > +       struct kref refcount;
> > +       /**
> > +        * @fence: dma fence to indicate completion. 1 way
> > relationship - job
> > +        * can safely reference fence, fence cannot safely reference
> > job.
> > +        */
> > +       struct dma_fence *fence;
> > +       /** @start: Start address to invalidate */
> > +       u64 start;
> > +       /** @end: End address to invalidate */
> > +       u64 end;
> > +       /** @asid: Address space ID to invalidate */
> > +       u32 asid;
> > +       /** @fence_armed: Fence has been armed */
> > +       bool fence_armed;
> > +};
> > +
> > +static struct dma_fence *xe_gt_tlb_inval_job_run(struct xe_dep_job
> > *dep_job)
> > +{
> > +       struct xe_gt_tlb_inval_job *job =
> > +               container_of(dep_job, typeof(*job), dep);
> > +       struct xe_gt_tlb_invalidation_fence *ifence =
> > +               container_of(job->fence, typeof(*ifence), base);
> > +
> > +       xe_gt_tlb_invalidation_range(job->gt, ifence, job->start,
> > +                                    job->end, job->asid);
> > +
> > +       return job->fence;
> > +}
> > +
> > +static void xe_gt_tlb_inval_job_free(struct xe_dep_job *dep_job)
> > +{
> > +       struct xe_gt_tlb_inval_job *job =
> > +               container_of(dep_job, typeof(*job), dep);
> > +
> > +       /* Pairs with get in xe_gt_tlb_inval_job_push */
> > +       xe_gt_tlb_inval_job_put(job);
> > +}
> > +
> > +static const struct xe_dep_job_ops dep_job_ops = {
> > +       .run_job = xe_gt_tlb_inval_job_run,
> > +       .free_job = xe_gt_tlb_inval_job_free,
> > +};
> > +
> > +static int xe_gt_tlb_inval_context(struct xe_gt *gt)
> > +{
> > +       return xe_gt_is_media_type(gt) ?
> > XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT :
> > +               XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT;
> > +}
> > +
> > +/**
> > + * xe_gt_tlb_inval_job_create() - GT TLB invalidation job create
> > + * @gt: GT to invalidate
> > + * @q: exec queue issuing the invalidate
> > + * @start: Start address to invalidate
> > + * @end: End address to invalidate
> > + * @asid: Address space ID to invalidate
> > + *
> > + * Create a GT TLB invalidation job and initialize internal fields.
> > The caller is
> > + * responsible for releasing the creation reference.
> > + *
> > + * Return: GT TLB invalidation job object or ERR_PTR
> > + */
> > +struct xe_gt_tlb_inval_job *xe_gt_tlb_inval_job_create(struct
> > xe_exec_queue *q,
> > +                                                      struct xe_gt
> > *gt,
> > +                                                      u64 start, u64
> > end,
> > +                                                      u32 asid)
> > +{
> > +       struct xe_gt_tlb_inval_job *job;
> > +       struct xe_dep_scheduler *dep_scheduler =
> > +               q-
> > >tlb_inval[xe_gt_tlb_inval_context(gt)].dep_scheduler;
> > +       struct drm_sched_entity *entity =
> > +               xe_dep_scheduler_entity(dep_scheduler);
> > +       struct xe_gt_tlb_invalidation_fence *ifence;
> > +       int err;
> > +
> > +       job = kmalloc(sizeof(*job), GFP_KERNEL);
> > +       if (!job)
> > +               return ERR_PTR(-ENOMEM);
> > +
> > +       job->q = q;
> > +       job->gt = gt;
> > +       job->start = start;
> > +       job->end = end;
> > +       job->asid = asid;
> > +       job->fence_armed = false;
> > +       job->dep.ops = &dep_job_ops;
> > +       kref_init(&job->refcount);
> > +       xe_exec_queue_get(q);
> > +
> > +       ifence = kmalloc(sizeof(*ifence), GFP_KERNEL);
> > +       if (!ifence) {
> > +               err = -ENOMEM;
> > +               goto err_job;
> > +       }
> > +       job->fence = &ifence->base;
> > +
> > +       err = drm_sched_job_init(&job->dep.drm, entity, 1, NULL,
> > +                                q->xef ? q->xef->drm->client_id :
> > 0);
> > +       if (err)
> > +               goto err_fence;
> > +
> > +       xe_pm_runtime_get_noresume(gt_to_xe(job->gt));
> > +       return job;
> > +
> > +err_fence:
> > +       kfree(ifence);
> > +err_job:
> > +       xe_exec_queue_put(q);
> > +       kfree(job);
> > +
> > +       return ERR_PTR(err);
> > +}
> > +
> > +static void xe_gt_tlb_inval_job_destroy(struct kref *ref)
> > +{
> > +       struct xe_gt_tlb_inval_job *job = container_of(ref,
> > typeof(*job),
> > +                                                      refcount);
> > +       struct xe_gt_tlb_invalidation_fence *ifence =
> > +               container_of(job->fence, typeof(*ifence), base);
> > +       struct xe_device *xe = gt_to_xe(job->gt);
> > +       struct xe_exec_queue *q = job->q;
> > +
> > +       if (!job->fence_armed)
> > +               kfree(ifence);
> > +       else
> > +               /* Ref from xe_gt_tlb_invalidation_fence_init */
> > +               dma_fence_put(job->fence);
> > +
> > +       drm_sched_job_cleanup(&job->dep.drm);
> > +       kfree(job);
> > +       xe_exec_queue_put(q);   /* Pairs with get from
> > xe_gt_tlb_inval_job_create */
> > +       xe_pm_runtime_put(xe);  /* Pairs with get from
> > xe_gt_tlb_inval_job_create */
> 
> This patch also looks great to me. My only concern here is it feels
> weird reordering these puts and kfrees from how we allocated them in
> xe_gt_tlb_inval_job_create. It does look functional though because the
> kfree of the job and exec queue/runtime put are really independent, but
> if for some reason that changed in the future (however unlikely) and we
> wanted to link those, it seems safer to do the cleanup in the reverse
> of the order you did the allocation. Let me know what you think here.

They are independent. The ordering is just mirroring what
xe_sched_job_destroy does but in bith cases could be reordered if
desired. I'd rather leave it as is for now, so the functions that create
/ destroy jobs look more or less the same in Xe.

Matt

> 
> Thanks,
> Stuart
> 
> > +}
> > +
> > +/**
> > + * xe_gt_tlb_inval_alloc_dep() - GT TLB invalidation job alloc
> > dependency
> > + * @job: GT TLB invalidation job to alloc dependency for
> > + *
> > + * Allocate storage for a dependency in the GT TLB invalidation
> > fence. This
> > + * function should be called at most once per job and must be paired
> > with
> > + * xe_gt_tlb_inval_job_push being called with a real (non-signaled)
> > fence.
> > + *
> > + * Return: 0 on success, -errno on failure
> > + */
> > +int xe_gt_tlb_inval_job_alloc_dep(struct xe_gt_tlb_inval_job *job)
> > +{
> > +       xe_assert(gt_to_xe(job->gt), !xa_load(&job-
> > >dep.drm.dependencies, 0));
> > +
> > +       return drm_sched_job_add_dependency(&job->dep.drm,
> > +                                           dma_fence_get_stub());
> > +}
> > +
> > +/**
> > + * xe_gt_tlb_inval_job_push() - GT TLB invalidation job push
> > + * @job: GT TLB invalidation job to push
> > + * @m: The migration object being used
> > + * @fence: Dependency for GT TLB invalidation job
> > + *
> > + * Pushes a GT TLB invalidation job for execution, using @fence as a
> > dependency.
> > + * Storage for @fence must be preallocated with
> > xe_gt_tlb_inval_job_alloc_dep
> > + * prior to this call if @fence is not signaled. Takes a reference
> > to the job’s
> > + * finished fence, which the caller is responsible for releasing,
> > and retutn it
> > + * to the caller. This function is safe to be called in the path of
> > reclaim.
> > + *
> > + * Return: Job's finished fence
> > + */
> > +struct dma_fence *xe_gt_tlb_inval_job_push(struct
> > xe_gt_tlb_inval_job *job,
> > +                                          struct xe_migrate *m,
> > +                                          struct dma_fence *fence)
> > +{
> > +       struct xe_gt_tlb_invalidation_fence *ifence =
> > +               container_of(job->fence, typeof(*ifence), base);
> > +
> > +       if (!dma_fence_is_signaled(fence)) {
> > +               void *ptr;
> > +
> > +               /*
> > +                * Can be in path of reclaim, hence the preallocation
> > of fence
> > +                * storage in xe_gt_tlb_inval_job_alloc_dep. Verify
> > caller did
> > +                * this correctly.
> > +                */
> > +               xe_assert(gt_to_xe(job->gt),
> > +                         xa_load(&job->dep.drm.dependencies, 0) ==
> > +                         dma_fence_get_stub());
> > +
> > +               dma_fence_get(fence);   /* ref released once
> > dependency processed by scheduler */
> > +               ptr = xa_store(&job->dep.drm.dependencies, 0, fence,
> > +                              GFP_ATOMIC);
> > +               xe_assert(gt_to_xe(job->gt), !xa_is_err(ptr));
> > +       }
> > +
> > +       xe_gt_tlb_inval_job_get(job);   /* Pairs with put in free_job
> > */
> > +       job->fence_armed = true;
> > +
> > +       /*
> > +        * We need the migration lock to protect the seqnos (job and
> > +        * invalidation fence) and the spsc queue, only taken on
> > migration
> > +        * queue, user queues protected dma-resv VM lock.
> > +        */
> > +       xe_migrate_job_lock(m, job->q);
> > +
> > +       /* Creation ref pairs with put in xe_gt_tlb_inval_job_destroy
> > */
> > +       xe_gt_tlb_invalidation_fence_init(job->gt, ifence, false);
> > +       dma_fence_get(job->fence);      /* Pairs with put in DRM
> > scheduler */
> > +
> > +       drm_sched_job_arm(&job->dep.drm);
> > +       /*
> > +        * caller ref, get must be done before job push as it could
> > immediately
> > +        * signal and free.
> > +        */
> > +       dma_fence_get(&job->dep.drm.s_fence->finished);
> > +       drm_sched_entity_push_job(&job->dep.drm);
> > +
> > +       xe_migrate_job_unlock(m, job->q);
> > +
> > +       /*
> > +        * Not using job->fence, as it has its own dma-fence context,
> > which does
> > +        * not allow GT TLB invalidation fences on the same queue, GT
> > tuple to
> > +        * be squashed in dma-resv/DRM scheduler. Instead, we use the
> > DRM scheduler
> > +        * context and job's finished fence, which enables squashing.
> > +        */
> > +       return &job->dep.drm.s_fence->finished;
> > +}
> > +
> > +/**
> > + * xe_gt_tlb_inval_job_get() - Get a reference to GT TLB
> > invalidation job
> > + * @job: GT TLB invalidation job object
> > + *
> > + * Increment the GT TLB invalidation job's reference count
> > + */
> > +void xe_gt_tlb_inval_job_get(struct xe_gt_tlb_inval_job *job)
> > +{
> > +       kref_get(&job->refcount);
> > +}
> > +
> > +/**
> > + * xe_gt_tlb_inval_job_put() - Put a reference to GT TLB
> > invalidation job
> > + * @job: GT TLB invalidation job object
> > + *
> > + * Decrement the GT TLB invalidation job's reference count, call
> > + * xe_gt_tlb_inval_job_destroy when reference count == 0. Skips
> > decrement if
> > + * input @job is NULL or IS_ERR.
> > + */
> > +void xe_gt_tlb_inval_job_put(struct xe_gt_tlb_inval_job *job)
> > +{
> > +       if (job && !IS_ERR(job))
> > +               kref_put(&job->refcount,
> > xe_gt_tlb_inval_job_destroy);
> > +}
> > diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.h
> > b/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.h
> > new file mode 100644
> > index 000000000000..883896194a34
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/xe_gt_tlb_inval_job.h
> > @@ -0,0 +1,34 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2025 Intel Corporation
> > + */
> > +
> > +#ifndef _XE_GT_TLB_INVAL_JOB_H_
> > +#define _XE_GT_TLB_INVAL_JOB_H_
> > +
> > +#include <linux/types.h>
> > +
> > +struct dma_fence;
> > +struct drm_sched_job;
> > +struct kref;
> > +struct xe_exec_queue;
> > +struct xe_gt;
> > +struct xe_gt_tlb_inval_job;
> > +struct xe_migrate;
> > +
> > +struct xe_gt_tlb_inval_job *xe_gt_tlb_inval_job_create(struct
> > xe_exec_queue *q,
> > +                                                      struct xe_gt
> > *gt,
> > +                                                      u64 start, u64
> > end,
> > +                                                      u32 asid);
> > +
> > +int xe_gt_tlb_inval_job_alloc_dep(struct xe_gt_tlb_inval_job *job);
> > +
> > +struct dma_fence *xe_gt_tlb_inval_job_push(struct
> > xe_gt_tlb_inval_job *job,
> > +                                          struct xe_migrate *m,
> > +                                          struct dma_fence *fence);
> > +
> > +void xe_gt_tlb_inval_job_get(struct xe_gt_tlb_inval_job *job);
> > +
> > +void xe_gt_tlb_inval_job_put(struct xe_gt_tlb_inval_job *job);
> > +
> > +#endif
>