[PATCH v9 09/11] drm/xe/guc: Dump entire CTB on errors
John.C.Harrison at Intel.com
John.C.Harrison at Intel.com
Wed Oct 2 21:14:20 UTC 2024
From: John Harrison <John.C.Harrison at Intel.com>
The dump of the CT buffers was only showing the unprocessed data which
is not generally useful for saying why a hang occurred - because it
was probably caused by the commands that were just processed. So save
and dump the entire buffer but in a more compact dump format. Also
zero fill it on allocation to avoid confusion over uninitialised data
in the dump.
v2: Add kerneldoc - review feedback from Michal W.
v3: Fix kerneldoc.
v4: Use ascii85 instead of hexdump (review feedback from Matthew B).
v5: Dump the entire CTB object rather than separately dumping just the
H2G and G2H sections. That way it includes the full header info.
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
Reviewed-by: Julia Filipchuk <julia.filipchuk at intel.com>
---
drivers/gpu/drm/xe/xe_guc_ct.c | 94 ++++++++++------------------
drivers/gpu/drm/xe/xe_guc_ct.h | 8 +--
drivers/gpu/drm/xe/xe_guc_ct_types.h | 6 +-
3 files changed, 41 insertions(+), 67 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 46320b94bceb..39abf3b3a043 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -17,6 +17,7 @@
#include "abi/guc_actions_sriov_abi.h"
#include "abi/guc_klvs_abi.h"
#include "xe_bo.h"
+#include "xe_devcoredump.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_pagefault.h"
@@ -435,6 +436,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
+ xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
@@ -1575,48 +1577,33 @@ static void g2h_worker_func(struct work_struct *w)
receive_g2h(ct);
}
-static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
- struct guc_ctb_snapshot *snapshot,
- bool atomic)
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic)
{
- u32 head, tail;
+ struct xe_guc_ct_snapshot *snapshot;
- xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
- sizeof(struct guc_ct_buffer_desc));
- memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
- snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
- atomic ? GFP_ATOMIC : GFP_KERNEL);
- if (!snapshot->cmds) {
- drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CT info will be available.\n");
- return;
+ if (ct->bo) {
+ snapshot->ctb_size = ct->bo->size;
+ snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
}
- head = snapshot->desc.head;
- tail = snapshot->desc.tail;
-
- if (head != tail) {
- struct iosys_map map =
- IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
-
- while (head != tail) {
- snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
- ++head;
- if (head == ctb->info.size) {
- head = 0;
- map = ctb->cmds;
- } else {
- iosys_map_incr(&map, sizeof(u32));
- }
- }
- }
+ return snapshot;
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+ struct guc_ctb_snapshot *snapshot)
+{
+ xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+ sizeof(struct guc_ct_buffer_desc));
+ memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
}
static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
struct drm_printer *p)
{
- u32 head, tail;
-
drm_printf(p, "\tsize: %d\n", snapshot->info.size);
drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
drm_printf(p, "\thead: %d\n", snapshot->info.head);
@@ -1626,25 +1613,6 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
-
- if (!snapshot->cmds)
- return;
-
- head = snapshot->desc.head;
- tail = snapshot->desc.tail;
-
- while (head != tail) {
- drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
- snapshot->cmds[head]);
- ++head;
- if (head == snapshot->info.size)
- head = 0;
- }
-}
-
-static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
-{
- kfree(snapshot->cmds);
}
/**
@@ -1665,9 +1633,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
struct xe_device *xe = ct_to_xe(ct);
struct xe_guc_ct_snapshot *snapshot;
- snapshot = kzalloc(sizeof(*snapshot),
- atomic ? GFP_ATOMIC : GFP_KERNEL);
-
+ snapshot = xe_guc_ct_snapshot_alloc(ct, atomic);
if (!snapshot) {
xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
return NULL;
@@ -1676,12 +1642,13 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
snapshot->ct_enabled = true;
snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
- guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
- &snapshot->h2g, atomic);
- guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
- &snapshot->g2h, atomic);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h);
}
+ if (ct->bo && snapshot->ctb)
+ xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
+
return snapshot;
}
@@ -1704,9 +1671,15 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
drm_puts(p, "G2H CTB (all sizes in DW):\n");
guc_ctb_snapshot_print(&snapshot->g2h, p);
-
drm_printf(p, "\tg2h outstanding: %d\n",
snapshot->g2h_outstanding);
+
+ if (snapshot->ctb) {
+ xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
+ } else {
+ drm_printf(p, "CTB snapshot missing!\n");
+ return;
+ }
} else {
drm_puts(p, "CT disabled\n");
}
@@ -1724,8 +1697,7 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
if (!snapshot)
return;
- guc_ctb_snapshot_free(&snapshot->h2g);
- guc_ctb_snapshot_free(&snapshot->g2h);
+ kfree(snapshot->ctb);
kfree(snapshot);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 293041bed7ed..338f0b75d29f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -9,6 +9,7 @@
#include "xe_guc_ct_types.h"
struct drm_printer;
+struct xe_device;
int xe_guc_ct_init(struct xe_guc_ct *ct);
int xe_guc_ct_enable(struct xe_guc_ct *ct);
@@ -16,10 +17,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct);
void xe_guc_ct_stop(struct xe_guc_ct *ct);
void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
-struct xe_guc_ct_snapshot *
-xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
-void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
- struct drm_printer *p);
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic);
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p);
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 85e127ec91d7..8e1b9d981d61 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -52,8 +52,6 @@ struct guc_ctb {
struct guc_ctb_snapshot {
/** @desc: snapshot of the CTB descriptor */
struct guc_ct_buffer_desc desc;
- /** @cmds: snapshot of the CTB commands */
- u32 *cmds;
/** @info: snapshot of the CTB info */
struct guc_ctb_info info;
};
@@ -70,6 +68,10 @@ struct xe_guc_ct_snapshot {
struct guc_ctb_snapshot g2h;
/** @h2g: H2G CTB snapshot */
struct guc_ctb_snapshot h2g;
+ /** @ctb_size: size of the snapshot of the CTB */
+ size_t ctb_size;
+ /** @ctb: snapshot of the entire CTB */
+ u32 *ctb;
};
/**
--
2.46.0
More information about the Intel-xe
mailing list