[PATCH] drm/xe: Use TLB innvalidation context for invalidations issued from bind queues
Thomas Hellström
thomas.hellstrom at linux.intel.com
Fri Jun 13 08:40:19 UTC 2025
On Thu, 2025-06-12 at 14:40 -0700, Matthew Brost wrote:
> In order to avoid adding tons of invalidation fences to dma-resv
> BOOKKEEP slots, and thus jobs dependencies, when stream of unbinds
> arrives (e.g., many user frees or unmaps), use a dma fence tlb
> invalidation context associated with the queue issuing the bind
> operation.
>
> Two fence contexts are needed - one for each GT as TLB invalidations
> are only ordered on a GT. A per GT ordered wq is needed to queue the
> invalidations to maintain dma fence ordering as well.
>
> This fixes the below splat when the number of invalidations gets out
> of
> hand:
>
> [ 1661.638258] watchdog: BUG: soft lockup - CPU#2 stuck for 26s!
> [kworker/u65:8:75257]
> [ 1661.638262] Modules linked in: xe drm_gpusvm drm_gpuvm
> drm_ttm_helper ttm drm_exec gpu_sched drm_suballoc_helper drm_buddy
> drm_kms_helper x86_pkg_temp_thermal coretemp snd_hda_cod
> ec_realtek
> snd_hda_codec_generic snd_hda_scodec_component mei_pxp mei_hdcp
> wmi_bmof
> snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep i2c_i801
> snd_hda_core i2c_mux snd_pcm i2c_s mbus video wmi mei_me mei
> fuse
> igb e1000e i2c_algo_bit ptp ghash_clmulni_intel pps_core
> intel_lpss_pci
> [last unloaded: xe]
> [ 1661.638278] CPU: 2 UID: 0 PID: 75257 Comm: kworker/u65:8
> Tainted: G S 6.16.0-rc1-xe+ #397 PREEMPT(undef)
> [ 1661.638280] Tainted: [S]=CPU_OUT_OF_SPEC
> [ 1661.638280] Hardware name: Intel Corporation Raptor Lake Client
> Platform/RPL-S ADP-S DDR5 UDIMM CRB, BIOS
> RPLSFWI1.R00.3492.A00.2211291114 11/29/2022
> [ 1661.638281] Workqueue: xe_gt_page_fault_work_queue
> xe_svm_garbage_collector_work_func [xe]
> [ 1661.638311] RIP: 0010:xas_start+0x47/0xd0
> [ 1661.638317] Code: 07 48 8b 57 08 48 8b 40 08 48 89 c1 83 e1 03
> 48 83 f9 02 75 08 48 3d 00 10 00 00 77 21 48 85 d2 75 29 48 c7 47 18
> 00
> 00 00 00 <c3> cc cc cc cc 48 c1 fa 02 85 d2 74 c7 31 c0 c3 cc
> cc
> cc cc 0f b6
> [ 1661.638317] RSP: 0018:ffffc90003d9b968 EFLAGS: 00000297
> [ 1661.638318] RAX: ffff88810459b232 RBX: ffffc90003d9b9a0 RCX:
> 0000000000000006
> [ 1661.638319] RDX: 0000000000000009 RSI: 0000000000000003 RDI:
> ffffc90003d9b9a0
> [ 1661.638320] RBP: ffffffffffffffff R08: ffff888197a0a600 R09:
> 0000000000000228
> [ 1661.638320] R10: ffffffffffffffff R11: ffffffffffffffc0 R12:
> 0000000000000241
> [ 1661.638320] R13: ffffffffffffffff R14: 0000000000000040 R15:
> ffff8881014db000
> [ 1661.638321] FS: 0000000000000000(0000)
> GS:ffff88890aee8000(0000) knlGS:0000000000000000
> [ 1661.638322] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 1661.638322] CR2: 00007fc07287fff8 CR3: 000000000242c003 CR4:
> 0000000000f70ef0
> [ 1661.638323] PKRU: 55555554
> [ 1661.638323] Call Trace:
> [ 1661.638325] <TASK>
> [ 1661.638326] xas_load+0xd/0xb0
> [ 1661.638328] xas_find+0x187/0x1d0
> [ 1661.638330] xa_find_after+0x10f/0x130
> [ 1661.638332] drm_sched_job_add_dependency+0x80/0x1e0
> [gpu_sched]
> [ 1661.638335] drm_sched_job_add_resv_dependencies+0x62/0x120
> [gpu_sched]
> [ 1661.638337] xe_pt_vm_dependencies+0x5b/0x2f0 [xe]
> [ 1661.638359] xe_pt_svm_pre_commit+0x59/0x1a0 [xe]
> [ 1661.638376] xe_migrate_update_pgtables+0x67f/0x910 [xe]
> [ 1661.638397] ? xe_pt_stage_unbind+0x92/0xd0 [xe]
> [ 1661.638416] xe_pt_update_ops_run+0x12e/0x7f0 [xe]
> [ 1661.638433] ops_execute+0x1b1/0x430 [xe]
> [ 1661.638449] xe_vm_range_unbind+0x260/0x2a0 [xe]
> [ 1661.638465] xe_svm_garbage_collector+0xfe/0x1c0 [xe]
> [ 1661.638478] xe_svm_garbage_collector_work_func+0x25/0x30 [xe]
> [ 1661.638491] process_one_work+0x16b/0x2e0
> [ 1661.638495] worker_thread+0x284/0x410
> [ 1661.638496] ? __pfx_worker_thread+0x10/0x10
> [ 1661.638496] kthread+0xe9/0x210
> [ 1661.638498] ? __pfx_kthread+0x10/0x10
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_exec_queue.c | 8 +++++++
> drivers/gpu/drm/xe/xe_exec_queue_types.h | 17 +++++++++++++++
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 23 ++++++++++++++++---
> --
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 4 +++-
> drivers/gpu/drm/xe/xe_pt.c | 10 +++++++--
> drivers/gpu/drm/xe/xe_svm.c | 9 +++++---
> drivers/gpu/drm/xe/xe_vm.c | 6 ++++--
> 7 files changed, 64 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> b/drivers/gpu/drm/xe/xe_exec_queue.c
> index fee22358cc09..71e354c56ad9 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -94,6 +94,14 @@ static struct xe_exec_queue
> *__xe_exec_queue_alloc(struct xe_device *xe,
> else
> q->sched_props.priority =
> XE_EXEC_QUEUE_PRIORITY_NORMAL;
>
> + if (q->flags & (EXEC_QUEUE_FLAG_PERMANENT |
> EXEC_QUEUE_FLAG_VM)) {
> + int i;
> +
> + for (i = 0; i < XE_EXEC_QUEUE_TLB_CONTEXT_COUNT;
> ++i)
> + q->tlb_invalidation.context[i] =
> + dma_fence_context_alloc(1);
> + }
Hmm. If invalidations are ordered per GT, why don't we just allocate
one invalidation context per GT, rather than one per GT per exec_queue?
Also, moving forward, this seems like a fit for a one-per-gt
invalidation drm_scheduler?
Thanks,
Thomas
> +
> if (vm)
> q->vm = xe_vm_get(vm);
>
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index cc1cffb5c87f..81d240e561ee 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -132,6 +132,23 @@ struct xe_exec_queue {
> struct list_head link;
> } lr;
>
> + /** @tlb_invalidation: TLB invalidations exec queue state */
> + struct {
> + /**
> + * @tlb_invalidation.context: The TLB invalidation
> context
> + * for the queue (VM and MIGRATION queues only)
> + */
> +#define XE_EXEC_QUEUE_TLB_CONTEXT_PRIMARY_GT 0
> +#define XE_EXEC_QUEUE_TLB_CONTEXT_MEDIA_GT 1
> +#define
> XE_EXEC_QUEUE_TLB_CONTEXT_COUNT (XE_EXEC_QUEUE_TLB_CONTEXT_MEDIA_GT +1)
> + u64 context[XE_EXEC_QUEUE_TLB_CONTEXT_COUNT];
> + /**
> + * @tlb_invalidation.seqno: The TLB invalidation
> seqno for the
> + * queue (VM and MIGRATION queues only)
> + */
> + u32 seqno[XE_EXEC_QUEUE_TLB_CONTEXT_COUNT];
> + } tlb_invalidation;
> +
> /** @pxp: PXP info tracking */
> struct {
> /** @pxp.type: PXP session type used by this queue
> */
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index 084cbdeba8ea..0a2fcaaf04fc 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -7,6 +7,7 @@
>
> #include "abi/guc_actions_abi.h"
> #include "xe_device.h"
> +#include "xe_exec_queue_types.h"
> #include "xe_force_wake.h"
> #include "xe_gt.h"
> #include "xe_gt_printk.h"
> @@ -294,7 +295,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
> struct xe_gt_tlb_invalidation_fence fence;
> int ret;
>
> - xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
> + xe_gt_tlb_invalidation_fence_init(gt, NULL, &fence,
> 0, true);
> ret = xe_gt_tlb_invalidation_guc(gt, &fence);
> if (ret)
> return ret;
> @@ -431,7 +432,7 @@ void xe_gt_tlb_invalidation_vm(struct xe_gt *gt,
> struct xe_vm *vm)
> u64 range = 1ull << vm->xe->info.va_bits;
> int ret;
>
> - xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
> + xe_gt_tlb_invalidation_fence_init(gt, NULL, &fence, 0,
> true);
>
> ret = xe_gt_tlb_invalidation_range(gt, &fence, 0, range, vm-
> >usm.asid);
> if (ret < 0)
> @@ -551,7 +552,9 @@ static const struct dma_fence_ops
> invalidation_fence_ops = {
> /**
> * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation
> fence
> * @gt: GT
> + * @q: exec queue issuing TLB invalidation, if NULL no queue
> associated
> * @fence: TLB invalidation fence to initialize
> + * @tlb_context: TLB invalidation context for exec_queue
> * @stack: fence is stack variable
> *
> * Initialize TLB invalidation fence for use.
> xe_gt_tlb_invalidation_fence_fini
> @@ -559,15 +562,25 @@ static const struct dma_fence_ops
> invalidation_fence_ops = {
> * even on error.
> */
> void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
> + struct xe_exec_queue *q,
> struct
> xe_gt_tlb_invalidation_fence *fence,
> + int tlb_context,
> bool stack)
> {
> + xe_gt_assert(gt, tlb_context <
> XE_EXEC_QUEUE_TLB_CONTEXT_COUNT);
> +
> xe_pm_runtime_get_noresume(gt_to_xe(gt));
>
> spin_lock_irq(>->tlb_invalidation.lock);
> - dma_fence_init(&fence->base, &invalidation_fence_ops,
> - >->tlb_invalidation.lock,
> - dma_fence_context_alloc(1), 1);
> + if (q)
> + dma_fence_init(&fence->base,
> &invalidation_fence_ops,
> + >->tlb_invalidation.lock,
> + q-
> >tlb_invalidation.context[tlb_context],
> + ++q-
> >tlb_invalidation.seqno[tlb_context]);
> + else
> + dma_fence_init(&fence->base,
> &invalidation_fence_ops,
> + >->tlb_invalidation.lock,
> + dma_fence_context_alloc(1), 1);
> spin_unlock_irq(>->tlb_invalidation.lock);
> INIT_LIST_HEAD(&fence->link);
> if (stack)
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> index abe9b03d543e..8440c608a0ec 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> @@ -10,6 +10,7 @@
>
> #include "xe_gt_tlb_invalidation_types.h"
>
> +struct xe_exec_queue;
> struct xe_gt;
> struct xe_guc;
> struct xe_vm;
> @@ -29,8 +30,9 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32
> *msg, u32 len);
>
> void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
> + struct xe_exec_queue *q,
> struct
> xe_gt_tlb_invalidation_fence *fence,
> - bool stack);
> + int tlb_context, bool stack);
> void xe_gt_tlb_invalidation_fence_signal(struct
> xe_gt_tlb_invalidation_fence *fence);
>
> static inline void
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index f39d5cc9f411..feab4b7c7e70 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1529,7 +1529,7 @@ static void invalidation_fence_cb(struct
> dma_fence *fence,
>
> trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base);
> if (!ifence->fence->error) {
> - queue_work(system_wq, &ifence->work);
> + queue_work(ifence->gt->ordered_wq, &ifence->work);
> } else {
> ifence->base.base.error = ifence->fence->error;
> xe_gt_tlb_invalidation_fence_signal(&ifence->base);
> @@ -1551,13 +1551,15 @@ static void
> invalidation_fence_work_func(struct work_struct *w)
> static void invalidation_fence_init(struct xe_gt *gt,
> struct invalidation_fence
> *ifence,
> struct dma_fence *fence,
> + struct xe_exec_queue *q, int
> tlb_context,
> u64 start, u64 end, u32 asid)
> {
> int ret;
>
> trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt),
> &ifence->base);
>
> - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);
> + xe_gt_tlb_invalidation_fence_init(gt, q, &ifence->base,
> tlb_context,
> + false);
>
> ifence->fence = fence;
> ifence->gt = gt;
> @@ -2467,10 +2469,14 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> struct xe_vma_ops *vops)
> if (mfence)
> dma_fence_get(fence);
> invalidation_fence_init(tile->primary_gt, ifence,
> fence,
> + pt_update_ops->q,
> + XE_EXEC_QUEUE_TLB_CONTEXT_PR
> IMARY_GT,
> pt_update_ops->start,
> pt_update_ops->last, vm-
> >usm.asid);
> if (mfence) {
> invalidation_fence_init(tile->media_gt,
> mfence, fence,
> + pt_update_ops->q,
> + XE_EXEC_QUEUE_TLB_CO
> NTEXT_MEDIA_GT,
> pt_update_ops-
> >start,
> pt_update_ops->last,
> vm->usm.asid);
> fences[0] = &ifence->base.base;
> diff --git a/drivers/gpu/drm/xe/xe_svm.c
> b/drivers/gpu/drm/xe/xe_svm.c
> index 13abc6049041..2edd1c52150e 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -227,7 +227,9 @@ static void xe_svm_invalidate(struct drm_gpusvm
> *gpusvm,
> int err;
>
> xe_gt_tlb_invalidation_fence_init(tile-
> >primary_gt,
> -
> &fence[fence_id], true);
> + NULL,
> +
> &fence[fence_id], 0,
> + true);
>
> err = xe_gt_tlb_invalidation_range(tile-
> >primary_gt,
>
> &fence[fence_id],
> @@ -241,8 +243,9 @@ static void xe_svm_invalidate(struct drm_gpusvm
> *gpusvm,
> if (!tile->media_gt)
> continue;
>
> - xe_gt_tlb_invalidation_fence_init(tile-
> >media_gt,
> -
> &fence[fence_id], true);
> + xe_gt_tlb_invalidation_fence_init(tile-
> >media_gt, NULL,
> +
> &fence[fence_id], 0,
> + true);
>
> err = xe_gt_tlb_invalidation_range(tile-
> >media_gt,
>
> &fence[fence_id],
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index d18807b92b18..730319b78a0a 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3896,8 +3896,9 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
> if (xe_pt_zap_ptes(tile, vma)) {
> xe_device_wmb(xe);
> xe_gt_tlb_invalidation_fence_init(tile-
> >primary_gt,
> + NULL,
>
> &fence[fence_id],
> - true);
> + 0, true);
>
> ret = xe_gt_tlb_invalidation_vma(tile-
> >primary_gt,
>
> &fence[fence_id], vma);
> @@ -3909,8 +3910,9 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
> continue;
>
> xe_gt_tlb_invalidation_fence_init(tile-
> >media_gt,
> + NULL,
>
> &fence[fence_id],
> - true);
> + 0, true);
>
> ret = xe_gt_tlb_invalidation_vma(tile-
> >media_gt,
>
> &fence[fence_id], vma);
More information about the Intel-xe
mailing list