[PATCH v9 6/8] drm/xe/eustall: Add EU stall sampling support for Xe2

Harish Chegondi harish.chegondi at intel.com
Mon Feb 10 13:46:47 UTC 2025


Add EU stall sampling support for Xe2 architecture GPUs - LNL and BMG.
EU stall data format for LNL and BMG is different from that of PVC.

v9: Use GRAPHICS_VER() check instead of platform

v8: Renamed struct drm_xe_eu_stall_data_xe2 to struct xe_eu_stall_data_xe2
    since it is a local structure.

Signed-off-by: Harish Chegondi <harish.chegondi at intel.com>
---
 drivers/gpu/drm/xe/xe_eu_stall.c | 51 ++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index 428267010805..52f8f40ba9a6 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -113,12 +113,51 @@ struct xe_eu_stall_data_pvc {
 	__u64 unused[6];
 } __packed;
 
+/**
+ * struct xe_eu_stall_data_xe2 - EU stall data format for LNL, BMG
+ *			Bits		Field
+ * @ip_addr:		0  to 28	IP (addr)
+ * @tdr_count:		29 to 36	Tdr count
+ * @other_count:	37 to 44	other count
+ * @control_count:	45 to 52	control count
+ * @pipestall_count:	53 to 60	pipestall count
+ * @send_count:		61 to 68	send count
+ * @dist_acc_count:	69 to 76	dist_acc count
+ * @sbid_count:		77 to 84	sbid count
+ * @sync_count:		85 to 92	sync count
+ * @inst_fetch_count:	93 to 100	inst_fetch count
+ * @active_count:	101 to 108	Active count
+ * @ex_id:		109 to 111	Exid
+ * @end_flag:		112		EndFlag (is always 1)
+ * @unused_bits:	113 to 127	unused bits
+ * @unused:		remaining	unused bytes
+ */
+struct xe_eu_stall_data_xe2 {
+	__u64 ip_addr:29;
+	__u64 tdr_count:8;
+	__u64 other_count:8;
+	__u64 control_count:8;
+	__u64 pipestall_count:8;
+	__u64 send_count:8;
+	__u64 dist_acc_count:8;
+	__u64 sbid_count:8;
+	__u64 sync_count:8;
+	__u64 inst_fetch_count:8;
+	__u64 active_count:8;
+	__u64 ex_id:3;
+	__u64 end_flag:1;
+	__u64 unused_bits:15;
+	__u64 unused[6];
+} __packed;
+
 static size_t xe_eu_stall_data_record_size(struct xe_device *xe)
 {
 	unsigned long record_size = 0;
 
 	if (xe->info.platform == XE_PVC)
 		record_size = sizeof(struct xe_eu_stall_data_pvc);
+	else if (GRAPHICS_VER(xe) >= 20)
+		record_size = sizeof(struct xe_eu_stall_data_xe2);
 
 	return record_size;
 }
@@ -345,10 +384,16 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
 
 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
 {
+	struct xe_device *xe = gt_to_xe(gt);
 	u32 write_ptr_reg;
 
-	/* On PVC, the overflow bit has to be cleared by writing 1 to it. */
-	write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
+	/* On PVC, the overflow bit has to be cleared by writing 1 to it.
+	 * On other GPUs, the bit has to be cleared by writing 0 to it.
+	 */
+	if (GRAPHICS_VER(xe) >= 20)
+		write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
+	else
+		write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
 
 	xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance);
 }
@@ -821,7 +866,7 @@ static const struct file_operations fops_eu_stall = {
 
 static inline bool has_eu_stall_sampling_support(struct xe_device *xe)
 {
-	return ((xe->info.platform == XE_PVC) ? true : false);
+	return ((xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) ? true : false);
 }
 
 /**
-- 
2.48.1



More information about the Intel-xe mailing list