[PATCH 1/2] drm/xe: Convert xe_device_snapshot to a standalone snapshot
Rodrigo Vivi
rodrigo.vivi at intel.com
Tue Jan 30 22:37:08 UTC 2024
devcoredump direction is that the snapshot could be taken
even after the driver unbind. At that point the xe device
will be gone. So, let's convert to a proper snapshot.
Cc: José Roberto de Souza <jose.souza at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
drivers/gpu/drm/xe/xe_devcoredump.c | 5 +-
drivers/gpu/drm/xe/xe_devcoredump_types.h | 3 +-
drivers/gpu/drm/xe/xe_device.c | 80 ++++++++++++++++++++---
drivers/gpu/drm/xe/xe_device.h | 6 +-
drivers/gpu/drm/xe/xe_device_types.h | 13 ++++
5 files changed, 94 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index e701f0d07b67..30e7edbb8b6f 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -63,7 +63,6 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct xe_devcoredump *coredump = data;
- struct xe_device *xe = coredump_to_xe(coredump);
struct xe_devcoredump_snapshot *ss;
struct drm_printer p;
struct drm_print_iterator iter;
@@ -90,7 +89,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
ts = ktime_to_timespec64(ss->boot_time);
drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
- xe_device_snapshot_print(xe, &p);
+ xe_device_snapshot_print(ss->xe, &p);
drm_printf(&p, "\n**** GuC CT ****\n");
xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
@@ -114,6 +113,7 @@ static void xe_devcoredump_free(void *data)
if (!data || !coredump_to_xe(coredump))
return;
+ xe_device_snapshot_free(coredump->snapshot.xe);
xe_guc_ct_snapshot_free(coredump->snapshot.ct);
xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
for (i = 0; i < XE_NUM_HW_ENGINES; i++)
@@ -153,6 +153,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ coredump->snapshot.xe = xe_device_snapshot_capture(gt_to_xe(q->gt));
coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job);
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 50106efcbc29..8950b1ca7456 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -26,7 +26,8 @@ struct xe_devcoredump_snapshot {
/** @boot_time: Relative boot time so the uptime can be calculated. */
ktime_t boot_time;
- /* GuC snapshots */
+ /** @xe: Xe Device Info snapshot */
+ struct xe_device_snapshot *xe;
/** @ct: GuC CT snapshot */
struct xe_guc_ct_snapshot *ct;
/** @ge: Guc Engine snapshot */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 6faa7865b1aa..6999bb7aea42 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -728,22 +728,84 @@ void xe_device_mem_access_put(struct xe_device *xe)
xe_assert(xe, ref >= 0);
}
-void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
+/**
+ * xe_device_snapshot_capture - Take a quick snapshot of the Xe Device info.
+ * @xe: faulty Xe device.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a Xe device snapshot that must be freed by the
+ * caller, using `xe_device_snapshot_free`.
+ */
+struct xe_device_snapshot *
+xe_device_snapshot_capture(struct xe_device *xe)
{
+ struct xe_device_snapshot *snapshot;
struct xe_gt *gt;
u8 id;
- drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
- drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
+ snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
+
+ if (!snapshot) {
+ drm_err(&xe->drm, "Skipping Xe Device snapshot.\n");
+ return NULL;
+ }
+ snapshot->devid = xe->info.devid;
+ snapshot->revid = xe->info.revid;
+ snapshot->gt_count = xe->info.gt_count;
+
+ snapshot->gt = kmalloc_array(xe->info.gt_count,
+ sizeof(struct gt_info_snapshot), GFP_ATOMIC);
for_each_gt(gt, xe, id) {
- drm_printf(p, "GT id: %u\n", id);
+ snapshot->gt[id].type = gt->info.type;
+ snapshot->gt[id].gmdid = gt->info.gmdid;
+ snapshot->gt[id].reference_clock = gt->info.reference_clock;
+ }
+
+ return snapshot;
+}
+
+/**
+ * xe_decice_snapshot_print - Print out a given Xe device snapshot.
+ * @snapshot: Xe device snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC Submit Engine snapshot object.
+ */
+void xe_device_snapshot_print(struct xe_device_snapshot *ss,
+ struct drm_printer *p)
+{
+ int i;
+
+ drm_printf(p, "PCI ID: 0x%04x\n", ss->devid);
+ drm_printf(p, "PCI revision: 0x%02x\n", ss->revid);
+
+ for(i = 0; i < ss->gt_count; i++) {
+ drm_printf(p, "GT id: %u\n", i);
drm_printf(p, "\tType: %s\n",
- gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+ ss->gt[i].type == XE_GT_TYPE_MAIN ? "main" : "media");
drm_printf(p, "\tIP ver: %u.%u.%u\n",
- REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
- REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
- REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
- drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
+ REG_FIELD_GET(GMD_ID_ARCH_MASK, ss->gt[i].gmdid),
+ REG_FIELD_GET(GMD_ID_RELEASE_MASK, ss->gt[i].gmdid),
+ REG_FIELD_GET(GMD_ID_REVID, ss->gt[i].gmdid));
+ drm_printf(p, "\tCS reference clock: %u\n", ss->gt[i].reference_clock);
}
}
+
+/**
+ * xe_device_snapshot_free - Free all allocated objects for a given xe device snapshot.
+ * @snapshot: Xe device snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_device_snapshot_free(struct xe_device_snapshot *snapshot)
+{
+ if (!snapshot)
+ return;
+
+ kfree(snapshot->gt);
+ kfree(snapshot);
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 270124da1e00..409bfbcffb4a 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -175,6 +175,10 @@ static inline bool xe_device_has_memirq(struct xe_device *xe)
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
-void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
+struct xe_device_snapshot *
+xe_device_snapshot_capture(struct xe_device *xe);
+void xe_device_snapshot_print(struct xe_device_snapshot *snapshot,
+ struct drm_printer *p);
+void xe_device_snapshot_free(struct xe_device_snapshot *snapshot);
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index eb2b806a1d23..50dac1a5b053 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -540,6 +540,19 @@ struct xe_device {
#endif
};
+struct gt_info_snapshot {
+ enum xe_gt_type type;
+ u32 gmdid;
+ u32 reference_clock;
+};
+
+struct xe_device_snapshot {
+ u16 devid;
+ u8 revid;
+ u8 gt_count;
+ struct gt_info_snapshot *gt;
+};
+
/**
* struct xe_file - file handle for XE driver
*/
--
2.43.0
More information about the Intel-xe
mailing list