[PATCH v5 5/7] drm/xe/eustall: Add EU stall sampling support for Xe2
Harish Chegondi
harish.chegondi at intel.com
Mon Nov 18 09:07:17 UTC 2024
Add EU stall sampling support for Xe2 architecture GPUs - LNL and BMG.
EU stall data format for LNL and BMG is different from that of PVC.
Signed-off-by: Harish Chegondi <harish.chegondi at intel.com>
---
drivers/gpu/drm/xe/xe_eu_stall.c | 52 ++++++++++++++++++++++++++++++--
1 file changed, 49 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index 96ac08cc7bf2..9169a31f620c 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -73,6 +73,42 @@ struct drm_xe_eu_stall_data_pvc {
__u64 unused[6];
} __packed;
+/**
+ * struct drm_xe_eu_stall_data_xe2 - EU stall data format for LNL, BMG
+ *
+ * Bits Field
+ * 0 to 28 IP (addr)
+ * 29 to 36 Tdr count
+ * 37 to 44 other count
+ * 45 to 52 control count
+ * 53 to 60 pipestall count
+ * 61 to 68 send count
+ * 69 to 76 dist_acc count
+ * 77 to 84 sbid count
+ * 85 to 92 sync count
+ * 93 to 100 inst_fetch count
+ * 101 to 108 Active count
+ * 109 to 111 Exid
+ * 112 EndFlag (is always 1)
+ */
+struct drm_xe_eu_stall_data_xe2 {
+ __u64 ip_addr:29;
+ __u64 tdr_count:8;
+ __u64 other_count:8;
+ __u64 control_count:8;
+ __u64 pipestall_count:8;
+ __u64 send_count:8;
+ __u64 dist_acc_count:8;
+ __u64 sbid_count:8;
+ __u64 sync_count:8;
+ __u64 inst_fetch_count:8;
+ __u64 active_count:8;
+ __u64 ex_id:3;
+ __u64 end_flag:1;
+ __u64 unused_bits:15;
+ __u64 unused[6];
+} __packed;
+
static unsigned long
xe_eu_stall_data_record_size(struct xe_device *xe)
{
@@ -81,6 +117,8 @@ xe_eu_stall_data_record_size(struct xe_device *xe)
if (platform == XE_PVC)
record_size = sizeof(struct drm_xe_eu_stall_data_pvc);
+ else if ((platform == XE_LUNARLAKE) || (platform == XE_BATTLEMAGE))
+ record_size = sizeof(struct drm_xe_eu_stall_data_xe2);
return record_size;
}
@@ -308,10 +346,16 @@ eu_stall_data_buf_check(struct xe_eu_stall_data_stream *stream)
static void
clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
{
+ struct xe_device *xe = gt_to_xe(gt);
u32 write_ptr_reg;
- /* On PVC, the overflow bit has to be cleared by writing 1 to it. */
- write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
+ /* On PVC, the overflow bit has to be cleared by writing 1 to it.
+ * On other GPUs, the bit has to be cleared by writing 0 to it.
+ */
+ if (GRAPHICS_VER(xe) >= 20)
+ write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
+ else
+ write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance);
}
@@ -872,7 +916,9 @@ static const struct file_operations fops_eu_stall = {
static inline bool has_eu_stall_sampling_support(struct xe_device *xe)
{
- return ((xe->info.platform == XE_PVC) ? true : false);
+ return ((xe->info.platform == XE_PVC ||
+ xe->info.platform == XE_LUNARLAKE ||
+ xe->info.platform == XE_BATTLEMAGE) ? true : false);
}
/**
--
2.45.1
More information about the Intel-xe
mailing list