[PATCH 3/5] drm/xe: Prep TLB invalidation fence before sending
stuartsummers
stuart.summers at intel.com
Wed Jul 23 18:22:20 UTC 2025
From: Matthew Brost <matthew.brost at intel.com>
It is a bit backwards to add a TLB invalidation fence to the pending
list after issuing the invalidation. Perform this step before issuing
the TLB invalidation in a helper function.
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/xe_tlb_inval.c | 116 ++++++++++++++++--------------
1 file changed, 62 insertions(+), 54 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c
index a25c35005689..61bc16410228 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
@@ -65,19 +65,19 @@ __inval_fence_signal(struct xe_device *xe, struct xe_tlb_inval_fence *fence)
static void
inval_fence_signal(struct xe_device *xe, struct xe_tlb_inval_fence *fence)
{
+ lockdep_assert_held(&fence->tlb_inval->pending_lock);
+
list_del(&fence->link);
__inval_fence_signal(xe, fence);
}
-void xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence)
+static void
+inval_fence_signal_unlocked(struct xe_device *xe,
+ struct xe_tlb_inval_fence *fence)
{
- struct xe_gt *gt;
-
- if (WARN_ON_ONCE(!fence->tlb_inval))
- return;
-
- gt = fence->tlb_inval->private;
- __inval_fence_signal(gt_to_xe(gt), fence);
+ spin_lock_irq(&fence->tlb_inval->pending_lock);
+ inval_fence_signal(xe, fence);
+ spin_unlock_irq(&fence->tlb_inval->pending_lock);
}
static void xe_gt_tlb_fence_timeout(struct work_struct *work)
@@ -201,16 +201,13 @@ static bool tlb_inval_seqno_past(struct xe_gt *gt, int seqno)
return seqno_recv >= seqno;
}
-static int send_tlb_inval(struct xe_guc *guc,
- struct xe_tlb_inval_fence *fence,
+static int send_tlb_inval(struct xe_guc *guc, struct xe_tlb_inval_fence *fence,
u32 *action, int len)
{
struct xe_gt *gt = guc_to_gt(guc);
- struct xe_device *xe = gt_to_xe(gt);
- int seqno;
- int ret;
xe_gt_assert(gt, fence);
+ lockdep_assert_held(&guc->ct.lock);
/*
* XXX: The seqno algorithm relies on TLB invalidation being processed
@@ -218,47 +215,38 @@ static int send_tlb_inval(struct xe_guc *guc,
* need to be updated.
*/
- mutex_lock(&guc->ct.lock);
- seqno = gt->tlb_inval.seqno;
- fence->seqno = seqno;
- trace_xe_tlb_inval_fence_send(xe, fence);
- action[1] = seqno;
- ret = xe_guc_ct_send_locked(&guc->ct, action, len,
- G2H_LEN_DW_TLB_INVALIDATE, 1);
- if (!ret) {
- spin_lock_irq(>->tlb_inval.pending_lock);
- /*
- * We haven't actually published the TLB fence as per
- * pending_fences, but in theory our seqno could have already
- * been written as we acquired the pending_lock. In such a case
- * we can just go ahead and signal the fence here.
- */
- if (tlb_inval_seqno_past(gt, seqno)) {
- __inval_fence_signal(xe, fence);
- } else {
- fence->inval_time = ktime_get();
- list_add_tail(&fence->link,
- >->tlb_inval.pending_fences);
-
- if (list_is_singular(>->tlb_inval.pending_fences))
- queue_delayed_work(system_wq,
- >->tlb_inval.fence_tdr,
- tlb_timeout_jiffies(gt));
- }
- spin_unlock_irq(>->tlb_inval.pending_lock);
- } else {
- __inval_fence_signal(xe, fence);
- }
- if (!ret) {
- gt->tlb_inval.seqno = (gt->tlb_inval.seqno + 1) %
- TLB_INVALIDATION_SEQNO_MAX;
- if (!gt->tlb_inval.seqno)
- gt->tlb_inval.seqno = 1;
- }
- mutex_unlock(&guc->ct.lock);
xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
+ action[1] = fence->seqno;
- return ret;
+ return xe_guc_ct_send(&guc->ct, action, len,
+ G2H_LEN_DW_TLB_INVALIDATE, 1);
+}
+
+static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence)
+{
+ struct xe_tlb_inval *tlb_inval = fence->tlb_inval;
+ struct xe_gt *gt = tlb_inval->private;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ lockdep_assert_held(>->uc.guc.ct.lock);
+
+ fence->seqno = tlb_inval->seqno;
+ trace_xe_tlb_inval_fence_send(xe, fence);
+
+ spin_lock_irq(&tlb_inval->pending_lock);
+ fence->inval_time = ktime_get();
+ list_add_tail(&fence->link, &tlb_inval->pending_fences);
+
+ if (list_is_singular(&tlb_inval->pending_fences))
+ queue_delayed_work(system_wq,
+ &tlb_inval->fence_tdr,
+ tlb_timeout_jiffies(gt));
+ spin_unlock_irq(&tlb_inval->pending_lock);
+
+ tlb_inval->seqno = (tlb_inval->seqno + 1) %
+ TLB_INVALIDATION_SEQNO_MAX;
+ if (!tlb_inval->seqno)
+ tlb_inval->seqno = 1;
}
#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
@@ -286,7 +274,16 @@ static int xe_tlb_inval_guc(struct xe_gt *gt,
};
int ret;
+ mutex_lock(>->uc.guc.ct.lock);
+
+ xe_tlb_inval_fence_prep(fence);
+
ret = send_tlb_inval(>->uc.guc, fence, action, ARRAY_SIZE(action));
+ if (ret < 0)
+ inval_fence_signal_unlocked(gt_to_xe(gt), fence);
+
+ mutex_unlock(>->uc.guc.ct.lock);
+
/*
* -ECANCELED indicates the CT is stopped for a GT reset. TLB caches
* should be nuked on a GT reset so this error can be ignored.
@@ -413,7 +410,7 @@ int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
#define MAX_TLB_INVALIDATION_LEN 7
u32 action[MAX_TLB_INVALIDATION_LEN];
u64 length = end - start;
- int len = 0;
+ int len = 0, ret;
xe_gt_assert(gt, fence);
@@ -474,7 +471,18 @@ int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
- return send_tlb_inval(>->uc.guc, fence, action, len);
+ mutex_lock(>->uc.guc.ct.lock);
+
+ xe_tlb_inval_fence_prep(fence);
+
+ ret = send_tlb_inval(>->uc.guc, fence, action,
+ ARRAY_SIZE(action));
+ if (ret < 0)
+ inval_fence_signal_unlocked(xe, fence);
+
+ mutex_unlock(>->uc.guc.ct.lock);
+
+ return ret;
}
/**
--
2.34.1
More information about the Intel-xe
mailing list