[PATCH 1/2] drm/xe: Convert xe_device_snapshot to a standalone snapshot

Rodrigo Vivi rodrigo.vivi at intel.com
Tue Jan 30 22:37:08 UTC 2024


devcoredump direction is that the snapshot could be taken
even after the driver unbind. At that point the xe device
will be gone. So, let's convert to a proper snapshot.

Cc: José Roberto de Souza <jose.souza at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
 drivers/gpu/drm/xe/xe_devcoredump.c       |  5 +-
 drivers/gpu/drm/xe/xe_devcoredump_types.h |  3 +-
 drivers/gpu/drm/xe/xe_device.c            | 80 ++++++++++++++++++++---
 drivers/gpu/drm/xe/xe_device.h            |  6 +-
 drivers/gpu/drm/xe/xe_device_types.h      | 13 ++++
 5 files changed, 94 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index e701f0d07b67..30e7edbb8b6f 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -63,7 +63,6 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 				   size_t count, void *data, size_t datalen)
 {
 	struct xe_devcoredump *coredump = data;
-	struct xe_device *xe = coredump_to_xe(coredump);
 	struct xe_devcoredump_snapshot *ss;
 	struct drm_printer p;
 	struct drm_print_iterator iter;
@@ -90,7 +89,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 	drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
 	ts = ktime_to_timespec64(ss->boot_time);
 	drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
-	xe_device_snapshot_print(xe, &p);
+	xe_device_snapshot_print(ss->xe, &p);
 
 	drm_printf(&p, "\n**** GuC CT ****\n");
 	xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
@@ -114,6 +113,7 @@ static void xe_devcoredump_free(void *data)
 	if (!data || !coredump_to_xe(coredump))
 		return;
 
+	xe_device_snapshot_free(coredump->snapshot.xe);
 	xe_guc_ct_snapshot_free(coredump->snapshot.ct);
 	xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
 	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
@@ -153,6 +153,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 
 	xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
 
+	coredump->snapshot.xe = xe_device_snapshot_capture(gt_to_xe(q->gt));
 	coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
 	coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job);
 
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 50106efcbc29..8950b1ca7456 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -26,7 +26,8 @@ struct xe_devcoredump_snapshot {
 	/** @boot_time:  Relative boot time so the uptime can be calculated. */
 	ktime_t boot_time;
 
-	/* GuC snapshots */
+	/** @xe: Xe Device Info snapshot */
+	struct xe_device_snapshot *xe;
 	/** @ct: GuC CT snapshot */
 	struct xe_guc_ct_snapshot *ct;
 	/** @ge: Guc Engine snapshot */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 6faa7865b1aa..6999bb7aea42 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -728,22 +728,84 @@ void xe_device_mem_access_put(struct xe_device *xe)
 	xe_assert(xe, ref >= 0);
 }
 
-void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
+/**
+ * xe_device_snapshot_capture - Take a quick snapshot of the Xe Device info.
+ * @xe: faulty Xe device.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a Xe device snapshot that must be freed by the
+ * caller, using `xe_device_snapshot_free`.
+ */
+struct xe_device_snapshot *
+xe_device_snapshot_capture(struct xe_device *xe)
 {
+	struct xe_device_snapshot *snapshot;
 	struct xe_gt *gt;
 	u8 id;
 
-	drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
-	drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
+	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
+
+	if (!snapshot) {
+		drm_err(&xe->drm, "Skipping Xe Device snapshot.\n");
+		return NULL;
+	}
 
+	snapshot->devid = xe->info.devid;
+	snapshot->revid = xe->info.revid;
+	snapshot->gt_count = xe->info.gt_count;
+
+	snapshot->gt = kmalloc_array(xe->info.gt_count,
+				    sizeof(struct gt_info_snapshot), GFP_ATOMIC);
 	for_each_gt(gt, xe, id) {
-		drm_printf(p, "GT id: %u\n", id);
+		snapshot->gt[id].type = gt->info.type;
+		snapshot->gt[id].gmdid = gt->info.gmdid;
+		snapshot->gt[id].reference_clock = gt->info.reference_clock;
+	}
+
+	return snapshot;
+}
+
+/**
+ * xe_decice_snapshot_print - Print out a given Xe device snapshot.
+ * @snapshot: Xe device snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC Submit Engine snapshot object.
+ */
+void xe_device_snapshot_print(struct xe_device_snapshot *ss,
+			      struct drm_printer *p)
+{
+	int i;
+
+	drm_printf(p, "PCI ID: 0x%04x\n", ss->devid);
+	drm_printf(p, "PCI revision: 0x%02x\n", ss->revid);
+
+	for(i = 0; i < ss->gt_count; i++) {
+		drm_printf(p, "GT id: %u\n", i);
 		drm_printf(p, "\tType: %s\n",
-			   gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+			   ss->gt[i].type == XE_GT_TYPE_MAIN ? "main" : "media");
 		drm_printf(p, "\tIP ver: %u.%u.%u\n",
-			   REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
-			   REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
-			   REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
-		drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
+			   REG_FIELD_GET(GMD_ID_ARCH_MASK, ss->gt[i].gmdid),
+			   REG_FIELD_GET(GMD_ID_RELEASE_MASK, ss->gt[i].gmdid),
+			   REG_FIELD_GET(GMD_ID_REVID, ss->gt[i].gmdid));
+		drm_printf(p, "\tCS reference clock: %u\n", ss->gt[i].reference_clock);
 	}
 }
+
+/**
+ * xe_device_snapshot_free - Free all allocated objects for a given xe device snapshot.
+ * @snapshot: Xe device snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_device_snapshot_free(struct xe_device_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	kfree(snapshot->gt);
+	kfree(snapshot);
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 270124da1e00..409bfbcffb4a 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -175,6 +175,10 @@ static inline bool xe_device_has_memirq(struct xe_device *xe)
 
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
 
-void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
+struct xe_device_snapshot *
+xe_device_snapshot_capture(struct xe_device *xe);
+void xe_device_snapshot_print(struct xe_device_snapshot *snapshot,
+			      struct drm_printer *p);
+void xe_device_snapshot_free(struct xe_device_snapshot *snapshot);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index eb2b806a1d23..50dac1a5b053 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -540,6 +540,19 @@ struct xe_device {
 #endif
 };
 
+struct gt_info_snapshot {
+	enum xe_gt_type type;
+	u32 gmdid;
+	u32 reference_clock;
+};
+
+struct xe_device_snapshot {
+	u16 devid;
+	u8 revid;
+	u8 gt_count;
+	struct gt_info_snapshot *gt;
+};
+
 /**
  * struct xe_file - file handle for XE driver
  */
-- 
2.43.0



More information about the Intel-xe mailing list