[Intel-xe] [PATCH 09/22] drm/xe: Add TDR for invalidation fence timeout cleanup

Rodrigo Vivi rodrigo.vivi at intel.com
Fri Feb 3 20:23:56 UTC 2023


From: Matthew Brost <matthew.brost at intel.com>

Endless fences are not good, add a TDR to cleanup any invalidation
fences which have not received an invalidation message within a timeout
period.

Signed-off-by: Matthew Brost <matthew.brost at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c   | 58 +++++++++++++++++--
 .../gpu/drm/xe/xe_gt_tlb_invalidation_types.h |  2 +
 drivers/gpu/drm/xe/xe_gt_types.h              |  5 ++
 drivers/gpu/drm/xe/xe_trace.h                 |  5 ++
 4 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 4d179357ce65..9e026fd0a45d 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -9,12 +9,45 @@
 #include "xe_guc_ct.h"
 #include "xe_trace.h"
 
+#define TLB_TIMEOUT	(HZ / 4)
+
 static struct xe_gt *
 guc_to_gt(struct xe_guc *guc)
 {
 	return container_of(guc, struct xe_gt, uc.guc);
 }
 
+static void xe_gt_tlb_fence_timeout(struct work_struct *work)
+{
+	struct xe_gt *gt = container_of(work, struct xe_gt,
+					tlb_invalidation.fence_tdr.work);
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+
+	mutex_lock(&gt->uc.guc.ct.lock);
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link) {
+		s64 since_inval_ms = ktime_ms_delta(ktime_get(),
+						    fence->invalidation_time);
+
+		if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
+			break;
+
+		trace_xe_gt_tlb_invalidation_fence_timeout(fence);
+		drm_err(&gt_to_xe(gt)->drm, "TLB invalidation fence timeout, seqno=%d",
+			fence->seqno);
+
+		list_del(&fence->link);
+		fence->base.error = -ETIME;
+		dma_fence_signal(&fence->base);
+		dma_fence_put(&fence->base);
+	}
+	if (!list_empty(&gt->tlb_invalidation.pending_fences))
+		queue_delayed_work(system_wq,
+				   &gt->tlb_invalidation.fence_tdr,
+				   TLB_TIMEOUT);
+	mutex_unlock(&gt->uc.guc.ct.lock);
+}
+
 /**
  * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
  * @gt: graphics tile
@@ -30,6 +63,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 	INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
 	spin_lock_init(&gt->tlb_invalidation.lock);
 	gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
+	INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
+			  xe_gt_tlb_fence_timeout);
 
 	return 0;
 }
@@ -44,6 +79,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 {
 	struct xe_gt_tlb_invalidation_fence *fence, *next;
 
+	cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
+
 	mutex_lock(&gt->uc.guc.ct.lock);
 	list_for_each_entry_safe(fence, next,
 				 &gt->tlb_invalidation.pending_fences, link) {
@@ -67,6 +104,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	};
 	int seqno;
 	int ret;
+	bool queue_work;
 
 	/*
 	 * XXX: The seqno algorithm relies on TLB invalidation being processed
@@ -76,10 +114,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	mutex_lock(&guc->ct.lock);
 	seqno = gt->tlb_invalidation.seqno;
 	if (fence) {
-		/*
-		 * FIXME: How to deal TLB invalidation timeout, right now we
-		 * just have an endless fence which isn't ideal.
-		 */
+		queue_work = list_empty(&gt->tlb_invalidation.pending_fences);
 		fence->seqno = seqno;
 		list_add_tail(&fence->link,
 			      &gt->tlb_invalidation.pending_fences);
@@ -92,6 +127,13 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 		gt->tlb_invalidation.seqno = 1;
 	ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
 				    G2H_LEN_DW_TLB_INVALIDATE, 1);
+	if (!ret && fence) {
+		fence->invalidation_time = ktime_get();
+		if (queue_work)
+			queue_delayed_work(system_wq,
+					   &gt->tlb_invalidation.fence_tdr,
+					   TLB_TIMEOUT);
+	}
 	if (!ret)
 		ret = seqno;
 	mutex_unlock(&guc->ct.lock);
@@ -152,7 +194,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 	 */
 	ret = wait_event_timeout(guc->ct.wq,
 				 tlb_invalidation_seqno_past(gt, seqno),
-				 HZ / 5);
+				 TLB_TIMEOUT);
 	if (!ret) {
 		drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
 			seqno, gt->tlb_invalidation.seqno_recv);
@@ -201,6 +243,12 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) {
 		trace_xe_gt_tlb_invalidation_fence_signal(fence);
 		list_del(&fence->link);
+		if (!list_empty(&gt->tlb_invalidation.pending_fences))
+			mod_delayed_work(system_wq,
+					 &gt->tlb_invalidation.fence_tdr,
+					 TLB_TIMEOUT);
+		else
+			cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
 		dma_fence_signal(&fence->base);
 		dma_fence_put(&fence->base);
 	}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
index ab57c14c6d14..934c828efe31 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
@@ -21,6 +21,8 @@ struct xe_gt_tlb_invalidation_fence {
 	struct list_head link;
 	/** @seqno: seqno of TLB invalidation to signal fence one */
 	int seqno;
+	/** @invalidation_time: time of TLB invalidation */
+	ktime_t invalidation_time;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 3b2d9842add7..a40fab262ac9 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -174,6 +174,11 @@ struct xe_gt {
 		 * invaliations, protected by CT lock
 		 */
 		struct list_head pending_fences;
+		/**
+		 * @fence_tdr: schedules a delayed call to
+		 * xe_gt_tlb_fence_timeout after the timeut interval is over.
+		 */
+		struct delayed_work fence_tdr;
 		/** @fence_context: context for TLB invalidation fences */
 		u64 fence_context;
 		/**
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 373b0825ec79..1774658b18b7 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -69,6 +69,11 @@ DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal,
 	     TP_ARGS(fence)
 );
 
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
 DECLARE_EVENT_CLASS(xe_bo,
 		    TP_PROTO(struct xe_bo *bo),
 		    TP_ARGS(bo),
-- 
2.39.1



More information about the Intel-xe mailing list