[PATCH 6/8] drm/xe: Prep TLB invalidation fence before sending

Thu Aug 7 20:17:36 UTC 2025

On Thu, Aug 07, 2025 at 07:36:47PM +0000, stuartsummers wrote:
> From: Matthew Brost <matthew.brost at intel.com>
> 
> It is a bit backwards to add a TLB invalidation fence to the pending
> list after issuing the invalidation. Perform this step before issuing
> the TLB invalidation in a helper function.
> 
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> Reviewed-by: Stuart Summers <stuart.summers at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_tlb_inval.c | 105 +++++++++++++++---------------
>  1 file changed, 52 insertions(+), 53 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c
> index 995699108bcb..967e5a261bb6 100644
> --- a/drivers/gpu/drm/xe/xe_tlb_inval.c
> +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
> @@ -65,19 +65,19 @@ __inval_fence_signal(struct xe_device *xe, struct xe_tlb_inval_fence *fence)
>  static void
>  inval_fence_signal(struct xe_device *xe, struct xe_tlb_inval_fence *fence)
>  {
> +	lockdep_assert_held(&fence->tlb_inval->pending_lock);
> +
>  	list_del(&fence->link);
>  	__inval_fence_signal(xe, fence);
>  }
>  
> -void xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence)
> +static void
> +inval_fence_signal_unlocked(struct xe_device *xe,
> +			    struct xe_tlb_inval_fence *fence)
>  {
> -	struct xe_gt *gt;
> -
> -	if (WARN_ON_ONCE(!fence->tlb_inval))
> -		return;
> -
> -	gt = fence->tlb_inval->private;
> -	__inval_fence_signal(gt_to_xe(gt), fence);
> +	spin_lock_irq(&fence->tlb_inval->pending_lock);
> +	inval_fence_signal(xe, fence);
> +	spin_unlock_irq(&fence->tlb_inval->pending_lock);
>  }
>  
>  static void xe_gt_tlb_fence_timeout(struct work_struct *work)
> @@ -208,14 +208,10 @@ static bool tlb_inval_seqno_past(struct xe_gt *gt, int seqno)
>  	return seqno_recv >= seqno;
>  }
>  
> -static int send_tlb_inval(struct xe_guc *guc,
> -			  struct xe_tlb_inval_fence *fence,
> +static int send_tlb_inval(struct xe_guc *guc, struct xe_tlb_inval_fence *fence,
>  			  u32 *action, int len)
>  {
>  	struct xe_gt *gt = guc_to_gt(guc);
> -	struct xe_device *xe = gt_to_xe(gt);
> -	int seqno;
> -	int ret;
>  
>  	xe_gt_assert(gt, fence);
>  
> @@ -225,47 +221,38 @@ static int send_tlb_inval(struct xe_guc *guc,
>  	 * need to be updated.
>  	 */
>  
> +	xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
> +	action[1] = fence->seqno;
> +
> +	return xe_guc_ct_send(&guc->ct, action, len,
> +			      G2H_LEN_DW_TLB_INVALIDATE, 1);
> +}
> +
> +static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence)
> +{
> +	struct xe_tlb_inval *tlb_inval = fence->tlb_inval;
> +	struct xe_gt *gt = tlb_inval->private;
> +	struct xe_device *xe = gt_to_xe(gt);
> +
>  	mutex_lock(&gt->tlb_inval.seqno_lock);

The seqno lock should be asserted here, with the caller taking this and
holding until send_tlb_inval is called.

Matt

> -	seqno = gt->tlb_inval.seqno;
> -	fence->seqno = seqno;
> +	fence->seqno = tlb_inval->seqno;
>  	trace_xe_tlb_inval_fence_send(xe, fence);
> -	action[1] = seqno;
> -	ret = xe_guc_ct_send(&guc->ct, action, len,
> -			     G2H_LEN_DW_TLB_INVALIDATE, 1);
> -	if (!ret) {
> -		spin_lock_irq(&gt->tlb_inval.pending_lock);
> -		/*
> -		 * We haven't actually published the TLB fence as per
> -		 * pending_fences, but in theory our seqno could have already
> -		 * been written as we acquired the pending_lock. In such a case
> -		 * we can just go ahead and signal the fence here.
> -		 */
> -		if (tlb_inval_seqno_past(gt, seqno)) {
> -			__inval_fence_signal(xe, fence);
> -		} else {
> -			fence->inval_time = ktime_get();
> -			list_add_tail(&fence->link,
> -				      &gt->tlb_inval.pending_fences);
> -
> -			if (list_is_singular(&gt->tlb_inval.pending_fences))
> -				queue_delayed_work(system_wq,
> -						   &gt->tlb_inval.fence_tdr,
> -						   tlb_timeout_jiffies(gt));
> -		}
> -		spin_unlock_irq(&gt->tlb_inval.pending_lock);
> -	} else {
> -		__inval_fence_signal(xe, fence);
> -	}
> -	if (!ret) {
> -		gt->tlb_inval.seqno = (gt->tlb_inval.seqno + 1) %
> -			TLB_INVALIDATION_SEQNO_MAX;
> -		if (!gt->tlb_inval.seqno)
> -			gt->tlb_inval.seqno = 1;
> -	}
> -	mutex_unlock(&gt->tlb_inval.seqno_lock);
> -	xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
>  
> -	return ret;
> +	spin_lock_irq(&tlb_inval->pending_lock);
> +	fence->inval_time = ktime_get();
> +	list_add_tail(&fence->link, &tlb_inval->pending_fences);
> +
> +	if (list_is_singular(&tlb_inval->pending_fences))
> +		queue_delayed_work(system_wq,
> +				   &tlb_inval->fence_tdr,
> +				   tlb_timeout_jiffies(gt));
> +	spin_unlock_irq(&tlb_inval->pending_lock);
> +
> +	tlb_inval->seqno = (tlb_inval->seqno + 1) %
> +		TLB_INVALIDATION_SEQNO_MAX;
> +	if (!tlb_inval->seqno)
> +		tlb_inval->seqno = 1;
> +	mutex_unlock(&gt->tlb_inval.seqno_lock);
>  }
>  
>  #define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
> @@ -293,7 +280,12 @@ static int xe_tlb_inval_guc(struct xe_gt *gt,
>  	};
>  	int ret;
>  
> +	xe_tlb_inval_fence_prep(fence);
> +
>  	ret = send_tlb_inval(&gt->uc.guc, fence, action, ARRAY_SIZE(action));
> +	if (ret < 0)
> +		inval_fence_signal_unlocked(gt_to_xe(gt), fence);
> +
>  	/*
>  	 * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches
>  	 *  should be nuked on a GT reset so this error can be ignored.
> @@ -420,7 +412,7 @@ int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
>  #define MAX_TLB_INVALIDATION_LEN	7
>  	u32 action[MAX_TLB_INVALIDATION_LEN];
>  	u64 length = end - start;
> -	int len = 0;
> +	int len = 0, ret;
>  
>  	xe_gt_assert(gt, fence);
>  
> @@ -481,7 +473,14 @@ int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
>  
>  	xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
>  
> -	return send_tlb_inval(&gt->uc.guc, fence, action, len);
> +	xe_tlb_inval_fence_prep(fence);
> +
> +	ret = send_tlb_inval(&gt->uc.guc, fence, action,
> +			     ARRAY_SIZE(action));
> +	if (ret < 0)
> +		inval_fence_signal_unlocked(xe, fence);
> +
> +	return ret;
>  }
>  
>  /**
> -- 
> 2.34.1
>