[Intel-xe] [PATCH 11/14] drm/xe: Add HW Engine snapshot to xe_devcoredump.
Rodrigo Vivi
rodrigo.vivi at intel.com
Wed Apr 26 20:57:10 UTC 2023
Let's continue to add our existent simple logs to devcoredump one
by one. Any format change should come on follow-up work.
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
drivers/gpu/drm/xe/xe_devcoredump.c | 45 +++++++++++++++++++++++
drivers/gpu/drm/xe/xe_devcoredump_types.h | 4 ++
2 files changed, 49 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 0e7ec654a9f2..1ffd12646a99 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -9,10 +9,13 @@
#include <linux/devcoredump.h>
#include <generated/utsrelease.h>
+#include "xe_device.h"
#include "xe_engine.h"
+#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_guc_ct.h"
#include "xe_guc_submit.h"
+#include "xe_hw_engine.h"
/**
* DOC: Xe device coredump
@@ -62,6 +65,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
struct drm_printer p;
struct drm_print_iterator iter;
struct timespec64 ts;
+ struct xe_engine *e;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
/* Our device is gone already... */
if (!data || !coredump_to_xe(coredump))
@@ -75,6 +81,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
mutex_lock(&coredump->lock);
ss = &coredump->snapshot;
+ e = coredump->faulty_engine;
p = drm_coredump_printer(&iter);
drm_printf(&p, "**** Xe Device Coredump ****\n");
@@ -92,6 +99,10 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
xe_guc_engine_snapshot_print(coredump->snapshot.ge, &p);
+ drm_printf(&p, "\n**** HW Engines ****\n");
+ for_each_hw_engine(hwe, e->gt, id)
+ xe_hw_engine_snapshot_print(coredump->snapshot.hwe[id], &p);
+
mutex_unlock(&coredump->lock);
return count - iter.remain;
@@ -100,6 +111,8 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
static void xe_devcoredump_free(void *data)
{
struct xe_devcoredump *coredump = data;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
/* Our device is gone. Nothing to do... */
if (!data || !coredump_to_xe(coredump))
@@ -109,6 +122,8 @@ static void xe_devcoredump_free(void *data)
xe_guc_ct_snapshot_free(coredump->snapshot.ct);
xe_guc_engine_snapshot_free(coredump->snapshot.ge);
+ for_each_hw_engine(hwe, coredump->faulty_engine->gt, id)
+ xe_hw_engine_snapshot_free(coredump->snapshot.hwe[id]);
coredump->faulty_engine = NULL;
drm_info(&coredump_to_xe(coredump)->drm,
@@ -122,13 +137,43 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump)
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
struct xe_engine *e = coredump->faulty_engine;
struct xe_guc *guc = engine_to_guc(e);
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ u32 adj_logical_mask = e->logical_mask;
+ u32 width_mask = (0x1 << e->width) - 1;
+ int i;
+ bool cookie;
lockdep_assert_held(&coredump->lock);
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
+ cookie = dma_fence_begin_signalling();
+ for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+ if (adj_logical_mask & BIT(i)) {
+ adj_logical_mask |= width_mask << i;
+ i += e->width;
+ } else {
+ ++i;
+ }
+ }
+
+ xe_force_wake_get(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
+
coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct);
coredump->snapshot.ge = xe_guc_engine_snapshot_capture(e);
+
+ for_each_hw_engine(hwe, e->gt, id) {
+ if (hwe->class != e->hwe->class ||
+ !(BIT(hwe->logical_instance) & adj_logical_mask)) {
+ coredump->snapshot.hwe[id] = NULL;
+ continue;
+ }
+ coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
+ }
+
+ xe_force_wake_put(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
+ dma_fence_end_signalling(cookie);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index e055b266af70..8b17ecf1b6e6 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -9,6 +9,8 @@
#include <linux/ktime.h>
#include <linux/mutex.h>
+#include "xe_hw_engine_types.h"
+
struct xe_device;
/**
@@ -29,6 +31,8 @@ struct xe_devcoredump_snapshot {
struct xe_guc_ct_snapshot *ct;
/** @ge: Guc Engine snapshot */
struct xe_guc_submit_engine_snapshot *ge;
+ /** @hwe: HW Engine snapshot array */
+ struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
};
/**
--
2.39.2
More information about the Intel-xe
mailing list