[PATCH v9 4/4] drm/xe/guc: Extract GuC capture lists to register snapshot
Zhanjun Dong
zhanjun.dong at intel.com
Fri Jun 7 00:07:19 UTC 2024
Upon the G2H Notify-Err-Capture event, parse through the
GuC Log Buffer (error-capture-subregion) and generate one or
more capture-nodes. A single node represents a single "engine-
instance-capture-dump" and contains at least 3 register lists:
global, engine-class and engine-instance. An internal link
list is maintained to store one or more nodes.
Because the link-list node generation happen before the call
to devcoredump, duplicate global and engine-class register
lists for each engine-instance register dump if we find
dependent-engine resets in a engine-capture-group.
When xe_devcoredump calls into snapshot_from_capture_engine,
we detach the matching node (guc-id, LRCA, etc) from the link list
above and attach it to snapshot_regs structure when have
matching LRCA/guc-id/engine-instance.
To avoid dynamically allocate the output nodes during gt reset,
pre-allocate a fixed number of empty nodes up front (at the
time of ADS registration) that we can consume from or return to
an internal cached list of nodes.
Add guc capture data structure definition.
Add xe_hw_engine_snapshot_from_capture to take snapshot from capture
node list.
Move snapshot register struct out of engine snapshot struct.
Add offset in snapshot register to register definition list at
xe_guc_capture.c.
Snapshot could be split into global, engine class, engine instance
and steering register zone, few macros defined to separate zones.
Support combines 2 32bit registers as a 64bit register in snapshot,
perform endian convert if needed.
Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
---
drivers/gpu/drm/xe/abi/guc_actions_abi.h | 7 +
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +
drivers/gpu/drm/xe/xe_devcoredump.c | 4 +
drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 +
drivers/gpu/drm/xe/xe_guc.h | 23 +
drivers/gpu/drm/xe/xe_guc_capture.c | 876 +++++++++++++++++++++-
drivers/gpu/drm/xe/xe_guc_capture.h | 9 +
drivers/gpu/drm/xe/xe_guc_capture_fwif.h | 45 ++
drivers/gpu/drm/xe/xe_guc_ct.c | 2 +
drivers/gpu/drm/xe/xe_guc_fwif.h | 6 +
drivers/gpu/drm/xe/xe_guc_submit.c | 63 +-
drivers/gpu/drm/xe/xe_guc_submit.h | 2 +
drivers/gpu/drm/xe/xe_hw_engine.c | 218 ++++--
drivers/gpu/drm/xe/xe_hw_engine_types.h | 159 ++--
drivers/gpu/drm/xe/xe_lrc.h | 1 +
15 files changed, 1244 insertions(+), 175 deletions(-)
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 79ba98a169f9..ed1eeea34e8e 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -182,6 +182,13 @@ enum xe_guc_sleep_state_status {
#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8)
+enum intel_guc_state_capture_event_status {
+ XE_GUC_STATE_CAPTURE_EVENT_STATUS_SUCCESS = 0x0,
+ XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE = 0x1,
+};
+
+#define XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x000000FF
+
#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
#define XE_GUC_TLB_INVAL_MODE_SHIFT 8
/* Flush PPC or SMRO caches along with TLB invalidation request */
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index d09b2473259f..c6bd50738e2b 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -574,4 +574,6 @@
#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
+#define SFC_DONE(n) XE_REG(0x1cc000 + (n) * 0x1000)
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index d7f2d19a77c1..5e80710d3cc8 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -16,6 +16,7 @@
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_guc_submit.h"
#include "xe_hw_engine.h"
@@ -149,10 +150,12 @@ static void xe_devcoredump_free(void *data)
if (coredump->snapshot.hwe[i])
xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
xe_vm_snapshot_free(coredump->snapshot.vm);
+ xe_guc_capture_free(&coredump->snapshot.gt->uc.guc);
/* To prevent stale data on next snapshot, clear everything */
memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
coredump->captured = false;
+ coredump->job = NULL;
drm_info(&coredump_to_xe(coredump)->drm,
"Xe device coredump has been deleted.\n");
}
@@ -186,6 +189,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
put_task_struct(task);
ss->gt = q->gt;
+ coredump->job = job;
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
cookie = dma_fence_begin_signalling();
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 923cdf72a816..c39ab73a9f6a 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -61,6 +61,8 @@ struct xe_devcoredump {
bool captured;
/** @snapshot: Snapshot is captured at time of the first crash */
struct xe_devcoredump_snapshot snapshot;
+ /** @job: Point to the issue job */
+ struct xe_sched_job *job;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index ddfa855458ab..e1afda9070f4 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -59,6 +59,29 @@ static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
}
}
+static inline u16 xe_guc_class_to_capture_class(uint class)
+{
+ switch (class) {
+ case GUC_RENDER_CLASS:
+ case GUC_COMPUTE_CLASS:
+ return GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE;
+ case GUC_GSC_OTHER_CLASS:
+ return GUC_CAPTURE_LIST_CLASS_GSC_OTHER;
+ case GUC_VIDEO_CLASS:
+ case GUC_VIDEOENHANCE_CLASS:
+ case GUC_BLITTER_CLASS:
+ return class;
+ default:
+ XE_WARN_ON(class);
+ return -1;
+ }
+}
+
+static inline u16 xe_engine_class_to_guc_capture_class(enum xe_engine_class class)
+{
+ return xe_guc_class_to_capture_class(xe_guc_class_to_capture_class(class));
+}
+
static inline struct xe_gt *guc_to_gt(struct xe_guc *guc)
{
return container_of(guc, struct xe_gt, uc.guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index 0c90def290de..f18933503672 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -26,10 +26,13 @@
#include "xe_guc_capture_fwif.h"
#include "xe_guc_ct.h"
#include "xe_guc_log.h"
+#include "xe_guc_submit_types.h"
#include "xe_guc_submit.h"
#include "xe_hw_engine_types.h"
+#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_map.h"
+#include "xe_sched_job.h"
/*
* Define all device tables of GuC error capture register lists
@@ -37,28 +40,81 @@
* from the engine-mmio-base
*/
#define COMMON_XELP_BASE_GLOBAL \
- { FORCEWAKE_GT, 0, 0}
+ { FORCEWAKE_GT, 0, 0, "FORCEWAKE_GT",\
+ offsetof(struct snapshot_regs, forcewake_gt) }
#define COMMON_BASE_ENGINE_INSTANCE \
- { RING_ESR(0), 0, 0}, \
- { RING_EMR(0), 0, 0}, \
- { RING_EIR(0), 0, 0}, \
- { RING_EXECLIST_STATUS_HI(0), 0, 0}, \
- { RING_EXECLIST_STATUS_LO(0), 0, 0}, \
- { RING_DMA_FADD(0), 0, 0}, \
- { RING_DMA_FADD_UDW(0), 0, 0}, \
- { RING_IPEHR(0), 0, 0}, \
- { RING_BBADDR(0), 0, 0}, \
- { RING_BBADDR_UDW(0), 0, 0}, \
- { RING_ACTHD(0), 0, 0}, \
- { RING_ACTHD_UDW(0), 0, 0}, \
- { RING_START(0), 0, 0}, \
- { RING_HEAD(0), 0, 0}, \
- { RING_TAIL(0), 0, 0}, \
- { RING_CTL(0), 0, 0}, \
- { RING_MI_MODE(0), 0, 0}, \
- { RING_HWS_PGA(0), 0, 0}, \
- { RING_MODE(0), 0, 0}
+ { RING_HWSTAM(0), 0, 0, "HWSTAM",\
+ offsetof(struct snapshot_regs, ring_hwstam) }, \
+ { RING_HWS_PGA(0), 0, 0, "RING_HWS_PGA",\
+ offsetof(struct snapshot_regs, ring_hws_pga) }, \
+ { RING_HEAD(0), 0, 0, "RING_HEAD",\
+ offsetof(struct snapshot_regs, ring_head) }, \
+ { RING_TAIL(0), 0, 0, "RING_TAIL",\
+ offsetof(struct snapshot_regs, ring_tail) }, \
+ { RING_CTL(0), 0, 0, "RING_CTL",\
+ offsetof(struct snapshot_regs, ring_ctl) }, \
+ { RING_MI_MODE(0), 0, 0, "RING_MI_MODE",\
+ offsetof(struct snapshot_regs, ring_mi_mode) }, \
+ { RING_MODE(0), 0, 0, "RING_MODE",\
+ offsetof(struct snapshot_regs, ring_mode) }, \
+ { RING_ESR(0), 0, 0, "RING_ESR",\
+ offsetof(struct snapshot_regs, ring_esr) }, \
+ { RING_EMR(0), 0, 0, "RING_EMR",\
+ offsetof(struct snapshot_regs, ring_emr) }, \
+ { RING_EIR(0), 0, 0, "RING_EIR",\
+ offsetof(struct snapshot_regs, ring_eir) }, \
+ { RING_IMR(0), 0, 0, "RING_IMR",\
+ offsetof(struct snapshot_regs, ring_imr) }, \
+ { RING_IPEHR(0), 0, 0, "IPEHR",\
+ offsetof(struct snapshot_regs, ipehr) }, \
+ /* 64 bit register - Start */ \
+ /* defined XE_GUC_SNAPSHOT_REGS_U64_START_REG_ADDR to the address of 1st register below */ \
+ /* into xe_hw_engine_types.h */ \
+ { RING_ACTHD(0), 0, 0, "ACTHD",\
+ offsetof(struct snapshot_regs, ring_acthd) }, \
+ { RING_ACTHD_UDW(0), 0, 0, NULL,\
+ offsetof(struct snapshot_regs, ring_acthd) + 4}, \
+ { RING_BBADDR(0), 0, 0, "RING_BBADDR",\
+ offsetof(struct snapshot_regs, ring_bbaddr) }, \
+ { RING_BBADDR_UDW(0), 0, 0, NULL,\
+ offsetof(struct snapshot_regs, ring_bbaddr) + 4}, \
+ { RING_START(0), 0, 0, "RING_START",\
+ offsetof(struct snapshot_regs, ring_start) }, \
+ { RING_START_UDW(0), 0, 0, NULL,\
+ offsetof(struct snapshot_regs, ring_start) + 4}, \
+ { RING_DMA_FADD(0), 0, 0, "RING_DMA_FADD",\
+ offsetof(struct snapshot_regs, ring_dma_fadd) }, \
+ { RING_DMA_FADD_UDW(0), 0, 0, NULL,\
+ offsetof(struct snapshot_regs, ring_dma_fadd) + 4}, \
+ { RING_EXECLIST_STATUS_LO(0), 0, 0, "RING_EXECLIST_STATUS",\
+ offsetof(struct snapshot_regs, ring_execlist_status)}, \
+ { RING_EXECLIST_STATUS_HI(0), 0, 0, NULL,\
+ offsetof(struct snapshot_regs, ring_execlist_status) + 4}, \
+ { RING_EXECLIST_SQ_CONTENTS_LO(0), 0, 0, "RING_EXECLIST_SQ_CONTENTS",\
+ offsetof(struct snapshot_regs, ring_execlist_sq_contents)}, \
+ { RING_EXECLIST_SQ_CONTENTS_HI(0), 0, 0, NULL,\
+ offsetof(struct snapshot_regs, ring_execlist_sq_contents) + 4}, \
+ /* 64 bit register - End */ \
+ /* Extra handling registers */ \
+ /* define XE_GUC_SNAPSHOT_EXTRA_OPERATION_REGS_START_REG_ADDR to the address of 1st */ \
+ /* register below into xe_hw_engine_types.h */ \
+ { INDIRECT_RING_STATE(0), 0, 0, "INDIRECT_RING_STATE",\
+ offsetof(struct snapshot_regs, indirect_ring_state)}
+
+#define COMMON_XELP_RC_CLASS \
+ { RCU_MODE, 0, 0, "RCU_MODE",\
+ offsetof(struct snapshot_regs, rcu_mode) }
+
+#define XELP_DIRECT_READ_VEC_CLASS \
+ { SFC_DONE(0), 0, 0, "SFC_DONE[0]", \
+ offsetof(struct snapshot_regs, sfc_done_0) }, \
+ { SFC_DONE(1), 0, 0, "SFC_DONE[1]", \
+ offsetof(struct snapshot_regs, sfc_done_1) }, \
+ { SFC_DONE(2), 0, 0, "SFC_DONE[2]", \
+ offsetof(struct snapshot_regs, sfc_done_2) }, \
+ { SFC_DONE(3), 0, 0, "SFC_DONE[3]", \
+ offsetof(struct snapshot_regs, sfc_done_3) }
/* XE_LP Global */
static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
@@ -70,6 +126,11 @@ static const struct __guc_mmio_reg_descr xe_rc_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
};
+/* Render / Compute Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_rc_class_regs[] = {
+ COMMON_XELP_RC_CLASS,
+};
+
/* Media Decode/Encode Per-Engine-Instance */
static const struct __guc_mmio_reg_descr xe_vd_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
@@ -80,6 +141,11 @@ static const struct __guc_mmio_reg_descr xe_vec_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
};
+/* Video Enhancement Per-Engine-Class */
+static const struct __guc_mmio_reg_descr xe_vec_direct_read_regs[] = {
+ XELP_DIRECT_READ_VEC_CLASS,
+};
+
/* Blitter Per-Engine-Instance */
static const struct __guc_mmio_reg_descr xe_blt_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
@@ -112,12 +178,13 @@ static const struct __guc_mmio_reg_descr empty_regs_list[] = {
/* List of lists */
static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = {
MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
- MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(xe_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE,
GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(xe_vec_direct_read_regs, PF, DIRECT_READ, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE,
GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
@@ -148,6 +215,7 @@ static const char * const capture_engine_class_names[] = {
*/
#define get_item_with_default(ar, index) (ar[(index) >= ARRAY_SIZE(ar) ? ARRAY_SIZE(ar) - 1 : \
(index)])
+static void guc_capture_create_prealloc_nodes(struct xe_guc *guc);
static const struct __guc_mmio_reg_descr_group *
guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
@@ -167,6 +235,12 @@ guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
return NULL;
}
+const struct __guc_mmio_reg_descr_group *
+xe_guc_capture_get_reg_desc_list(u32 owner, u32 type, u32 engine_classid)
+{
+ return guc_capture_get_one_list(xe_lp_lists, owner, type, engine_classid);
+}
+
static struct __guc_mmio_reg_descr_group *
guc_capture_get_one_ext_list(struct __guc_mmio_reg_descr_group *reglists,
u32 owner, u32 type, u32 id)
@@ -430,6 +504,12 @@ xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type, u32 classid, voi
return cache->status;
}
+ /*
+ * ADS population of input registers is a good
+ * time to pre-allocate cachelist output nodes
+ */
+ guc_capture_create_prealloc_nodes(guc);
+
ret = xe_guc_capture_getlistsize(guc, owner, type, classid, &size);
if (ret) {
cache->is_valid = true;
@@ -567,6 +647,756 @@ static void check_guc_capture_size(struct xe_guc *guc)
buffer_size, spare_size, capture_size);
}
+static void
+guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
+ struct list_head *list)
+{
+ list_add_tail(&node->link, list);
+}
+
+static void
+guc_capture_add_node_to_outlist(struct xe_guc_state_capture *guc,
+ struct __guc_capture_parsed_output *node)
+{
+ guc_capture_add_node_to_list(node, &guc->outlist);
+}
+
+static void
+guc_capture_add_node_to_cachelist(struct xe_guc_state_capture *guc,
+ struct __guc_capture_parsed_output *node)
+{
+ guc_capture_add_node_to_list(node, &guc->cachelist);
+}
+
+static void
+guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *node)
+{
+ struct guc_mmio_reg *tmp[GUC_CAPTURE_LIST_TYPE_MAX];
+ int i;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+ tmp[i] = node->reginfo[i].regs;
+ memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
+ guc->capture->max_mmio_per_node);
+ }
+ memset(node, 0, sizeof(*node));
+ for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i)
+ node->reginfo[i].regs = tmp[i];
+
+ INIT_LIST_HEAD(&node->link);
+}
+
+/*
+ * KMD Init time flows:
+ * --------------------
+ * --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
+ * xe_guc_ads acquires the register lists by calling
+ * xe_guc_capture_list_size and xe_guc_capture_list_get 'n' times,
+ * where n = 1 for global-reg-list +
+ * num_engine_classes for class-reg-list +
+ * num_engine_classes for instance-reg-list
+ * (since all instances of the same engine-class type
+ * have an identical engine-instance register-list).
+ * ADS module also calls separately for PF vs VF.
+ *
+ * --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
+ * Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
+ * Note2: 'x 3' to hold multiple capture groups
+ *
+ * GUC Runtime notify capture:
+ * --------------------------
+ * --> G2H STATE_CAPTURE_NOTIFICATION
+ * L--> xe_guc_capture_process
+ * L--> Loop through B (head..tail) and for each engine instance's
+ * err-state-captured register-list we find, we alloc 'C':
+ * --> alloc C: A capture-output-node structure that includes misc capture info along
+ * with 3 register list dumps (global, engine-class and engine-instance)
+ * This node is created from a pre-allocated list of blank nodes in
+ * guc->capture->cachelist and populated with the error-capture
+ * data from GuC and then it's added into guc->capture->outlist linked
+ * list. This list is used for matchup and printout by xe_devcoredump_read
+ * and xe_hw_engine_snapshot_print, (when user invokes the devcoredump sysfs).
+ *
+ * GUC --> notify context reset:
+ * -----------------------------
+ * --> guc_exec_queue_timedout_job
+ * L--> xe_devcoredump
+ * L--> devcoredump_snapshot(..IS_GUC_CAPTURE)
+ * --> xe_hw_engine_snapshot_capture(..IS_GUC_CAPTURE)
+ * L--> xe_hw_engine_find_and_copy_guc_capture_snapshot is where
+ * detach C from internal linked list and add it into
+ * xe_hw_engine_snapshot struct (if the context and
+ * engine of the event notification matches a node
+ * in the link list).
+ *
+ * User Sysfs / Debugfs
+ * --------------------
+ * --> xe_devcoredump_read->
+ * L--> xxx_snapshot_print
+ * L--> xe_hw_engine_snapshot_print
+ * register lists values of the xe_hw_engine_snapshot
+ * saved from the error-engine-dump.
+ *
+ */
+
+static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
+{
+ if (buf->wr >= buf->rd)
+ return (buf->wr - buf->rd);
+ return (buf->size - buf->rd) + buf->wr;
+}
+
+static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
+{
+ if (buf->rd > buf->wr)
+ return (buf->size - buf->rd);
+ return (buf->wr - buf->rd);
+}
+
+/*
+ * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
+ *
+ * The GuC Log buffer region for error-capture is managed like a ring buffer.
+ * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
+ * Additionally, as per the current and foreseeable future, all packed error-
+ * capture output structures are dword aligned.
+ *
+ * That said, if the GuC firmware is in the midst of writing a structure that is larger
+ * than one dword but the tail end of the err-capture buffer-region has lesser space left,
+ * we would need to extract that structure one dword at a time straddled across the end,
+ * onto the start of the ring.
+ *
+ * Below function, guc_capture_log_remove_dw is a helper for that. All callers of this
+ * function would typically do a straight-up memcpy from the ring contents and will only
+ * call this helper if their structure-extraction is straddling across the end of the
+ * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
+ * scalability for future expansion of output data types without requiring a redesign
+ * of the flow controls.
+ */
+static int
+guc_capture_log_remove_dw(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ u32 *dw)
+{
+ int tries = 2;
+ int avail = 0;
+
+ if (!guc_capture_buf_cnt(buf))
+ return 0;
+
+ while (tries--) {
+ avail = guc_capture_buf_cnt_to_end(buf);
+ if (avail >= sizeof(u32)) {
+ *dw = xe_map_rd(guc_to_xe(guc), &guc->log.bo->vmap,
+ buf->data_offset + buf->rd, u32);
+ buf->rd += 4;
+ return 4;
+ }
+ if (avail)
+ xe_gt_dbg(guc_to_gt(guc), "Register capture log not dword aligned, skipping.\n");
+ buf->rd = 0;
+ }
+
+ return 0;
+}
+
+static bool
+guc_capture_data_extracted(struct xe_guc *guc, struct __guc_capture_bufstate *b,
+ int size, void *dest)
+{
+ if (guc_capture_buf_cnt_to_end(b) >= size) {
+ xe_map_memcpy_from(guc_to_xe(guc), dest, &guc->log.bo->vmap,
+ b->data_offset + b->rd, size);
+ b->rd += size;
+ return true;
+ }
+ return false;
+}
+
+static int
+guc_capture_log_get_group_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_state_capture_group_header_t *ghdr)
+{
+ int read = 0;
+ int fullsize = sizeof(struct guc_state_capture_group_header_t);
+
+ if (fullsize > guc_capture_buf_cnt(buf))
+ return -1;
+
+ if (guc_capture_data_extracted(guc, buf, fullsize, (void *)ghdr))
+ return 0;
+
+ read += guc_capture_log_remove_dw(guc, buf, &ghdr->owner);
+ read += guc_capture_log_remove_dw(guc, buf, &ghdr->info);
+ if (read != fullsize)
+ return -1;
+
+ return 0;
+}
+
+static int
+guc_capture_log_get_data_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_state_capture_header_t *hdr)
+{
+ int read = 0;
+ int fullsize = sizeof(struct guc_state_capture_header_t);
+
+ if (fullsize > guc_capture_buf_cnt(buf))
+ return -1;
+
+ if (guc_capture_data_extracted(guc, buf, fullsize, (void *)hdr))
+ return 0;
+
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->owner);
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->info);
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->lrca);
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->guc_id);
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->num_mmios);
+ if (read != fullsize)
+ return -1;
+
+ return 0;
+}
+
+static int
+guc_capture_log_get_register(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_mmio_reg *reg)
+{
+ int read = 0;
+ int fullsize = sizeof(struct guc_mmio_reg);
+
+ if (fullsize > guc_capture_buf_cnt(buf))
+ return -1;
+
+ if (guc_capture_data_extracted(guc, buf, fullsize, (void *)reg))
+ return 0;
+
+ read += guc_capture_log_remove_dw(guc, buf, ®->offset);
+ read += guc_capture_log_remove_dw(guc, buf, ®->value);
+ read += guc_capture_log_remove_dw(guc, buf, ®->flags);
+ read += guc_capture_log_remove_dw(guc, buf, ®->mask);
+ if (read != fullsize)
+ return -1;
+
+ return 0;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_get_prealloc_node(struct xe_guc *guc)
+{
+ struct __guc_capture_parsed_output *found = NULL;
+
+ if (!list_empty(&guc->capture->cachelist)) {
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ /* get first avail node from the cache list */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
+ found = n;
+ break;
+ }
+ } else {
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ /* traverse down and steal back the oldest node already allocated */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
+ found = n;
+ }
+ }
+ if (found) {
+ list_del(&found->link);
+ guc_capture_init_node(guc, found);
+ }
+
+ return found;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_clone_node(struct xe_guc *guc, struct __guc_capture_parsed_output *original,
+ u32 keep_reglist_mask)
+{
+ struct __guc_capture_parsed_output *new;
+ int i;
+
+ new = guc_capture_get_prealloc_node(guc);
+ if (!new)
+ return NULL;
+ if (!original)
+ return new;
+
+ new->is_partial = original->is_partial;
+
+ /* copy reg-lists that we want to clone */
+ for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+ if (keep_reglist_mask & BIT(i)) {
+ XE_WARN_ON(original->reginfo[i].num_regs >
+ guc->capture->max_mmio_per_node);
+
+ memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
+ original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
+
+ new->reginfo[i].num_regs = original->reginfo[i].num_regs;
+ new->reginfo[i].vfid = original->reginfo[i].vfid;
+
+ if (i == GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS) {
+ new->eng_class = original->eng_class;
+ } else if (i == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) {
+ new->eng_inst = original->eng_inst;
+ new->guc_id = original->guc_id;
+ new->lrca = original->lrca;
+ }
+ }
+ }
+
+ return new;
+}
+
+static int
+guc_capture_extract_reglists(struct xe_guc *guc, struct __guc_capture_bufstate *buf)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct guc_state_capture_group_header_t ghdr = {0};
+ struct guc_state_capture_header_t hdr = {0};
+ struct __guc_capture_parsed_output *node = NULL;
+ struct guc_mmio_reg *regs = NULL;
+ int i, numlists, numregs, ret = 0;
+ enum guc_capture_type datatype;
+ struct guc_mmio_reg tmp;
+ bool is_partial = false;
+
+ i = guc_capture_buf_cnt(buf);
+ if (!i)
+ return -ENODATA;
+
+ if (i % sizeof(u32)) {
+ xe_gt_warn(gt, "Got mis-aligned register capture entries\n");
+ ret = -EIO;
+ goto bailout;
+ }
+
+ /* first get the capture group header */
+ if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
+ ret = -EIO;
+ goto bailout;
+ }
+ /*
+ * we would typically expect a layout as below where n would be expected to be
+ * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
+ * instances being reset together.
+ * ____________________________________________
+ * | Capture Group |
+ * | ________________________________________ |
+ * | | Capture Group Header: | |
+ * | | - num_captures = 5 | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture1: | |
+ * | | Hdr: GLOBAL, numregs=a | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... rega | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture2: | |
+ * | | Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regb | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture3: | |
+ * | | Hdr: INSTANCE=RCS, numregs=c | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regc | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture4: | |
+ * | | Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regd | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture5: | |
+ * | | Hdr: INSTANCE=CCS0, numregs=e | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... rege | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * |__________________________________________|
+ */
+ is_partial = FIELD_GET(CAP_GRP_HDR_CAPTURE_TYPE, ghdr.info);
+ numlists = FIELD_GET(CAP_GRP_HDR_NUM_CAPTURES, ghdr.info);
+
+ while (numlists--) {
+ if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
+ ret = -EIO;
+ break;
+ }
+
+ datatype = FIELD_GET(CAP_HDR_CAPTURE_TYPE, hdr.info);
+ if (datatype > GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) {
+ /* unknown capture type - skip over to next capture set */
+ numregs = FIELD_GET(CAP_HDR_NUM_MMIOS, hdr.num_mmios);
+ while (numregs--) {
+ if (guc_capture_log_get_register(guc, buf, &tmp)) {
+ ret = -EIO;
+ break;
+ }
+ }
+ continue;
+ } else if (node) {
+ /*
+ * Based on the current capture type and what we have so far,
+ * decide if we should add the current node into the internal
+ * linked list for match-up when xe_devcoredump calls later
+ * (and alloc a blank node for the next set of reglists)
+ * or continue with the same node or clone the current node
+ * but only retain the global or class registers (such as the
+ * case of dependent engine resets).
+ */
+ if (datatype == GUC_CAPTURE_LIST_TYPE_GLOBAL) {
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = NULL;
+ } else if (datatype == GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS &&
+ node->reginfo[GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS].num_regs) {
+ /* Add to list, clone node and duplicate global list */
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = guc_capture_clone_node(guc, node,
+ GCAP_PARSED_REGLIST_INDEX_GLOBAL);
+ } else if (datatype == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE &&
+ node->reginfo[GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE].num_regs) {
+ /* Add to list, clone node and duplicate global + class lists */
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = guc_capture_clone_node(guc, node,
+ (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
+ GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
+ }
+ }
+
+ if (!node) {
+ node = guc_capture_get_prealloc_node(guc);
+ if (!node) {
+ ret = -ENOMEM;
+ break;
+ }
+ if (datatype != GUC_CAPTURE_LIST_TYPE_GLOBAL)
+ xe_gt_dbg(gt, "Register capture missing global dump: %08x!\n",
+ datatype);
+ }
+ node->is_partial = is_partial;
+ node->reginfo[datatype].vfid = FIELD_GET(CAP_HDR_CAPTURE_VFID, hdr.owner);
+
+ switch (datatype) {
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
+ node->eng_class = FIELD_GET(CAP_HDR_ENGINE_CLASS, hdr.info);
+ node->eng_inst = FIELD_GET(CAP_HDR_ENGINE_INSTANCE, hdr.info);
+ node->lrca = hdr.lrca;
+ node->guc_id = hdr.guc_id;
+ break;
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
+ node->eng_class = FIELD_GET(CAP_HDR_ENGINE_CLASS, hdr.info);
+ break;
+ default:
+ break;
+ }
+
+ numregs = FIELD_GET(CAP_HDR_NUM_MMIOS, hdr.num_mmios);
+ if (numregs > guc->capture->max_mmio_per_node) {
+ xe_gt_dbg(gt, "Register capture list extraction clipped by prealloc!\n");
+ numregs = guc->capture->max_mmio_per_node;
+ }
+ node->reginfo[datatype].num_regs = numregs;
+ regs = node->reginfo[datatype].regs;
+ i = 0;
+ while (numregs--) {
+ if (guc_capture_log_get_register(guc, buf, ®s[i++])) {
+ ret = -EIO;
+ break;
+ }
+ }
+ }
+
+bailout:
+ if (node) {
+ /* If we have data, add to linked list for match-up when xe_devcoredump calls */
+ for (i = GUC_CAPTURE_LIST_TYPE_GLOBAL; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+ if (node->reginfo[i].regs) {
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = NULL;
+ break;
+ }
+ }
+ if (node) /* else return it back to cache list */
+ guc_capture_add_node_to_cachelist(guc->capture, node);
+ }
+ return ret;
+}
+
+static int __guc_capture_flushlog_complete(struct xe_guc *guc)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
+ GUC_CAPTURE_LOG_BUFFER
+ };
+
+ return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static void __guc_capture_process_output(struct xe_guc *guc)
+{
+ unsigned int buffer_size, read_offset, write_offset, full_count;
+ struct xe_uc *uc = container_of(guc, typeof(*uc), guc);
+ struct guc_log_buffer_state log_buf_state_local;
+ struct __guc_capture_bufstate buf;
+ bool new_overflow;
+ int ret;
+ u32 log_buf_state_offset;
+ u32 src_data_offset;
+
+ log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_CAPTURE_LOG_BUFFER;
+ src_data_offset = xe_guc_get_log_buffer_offset(&guc->log, GUC_CAPTURE_LOG_BUFFER);
+
+ /*
+ * Make a copy of the state structure, inside GuC log buffer
+ * (which is uncached mapped), on the stack to avoid reading
+ * from it multiple times.
+ */
+ xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap,
+ log_buf_state_offset, sizeof(struct guc_log_buffer_state));
+
+ buffer_size = xe_guc_get_log_buffer_size(&guc->log, GUC_CAPTURE_LOG_BUFFER);
+ read_offset = log_buf_state_local.read_ptr;
+ write_offset = log_buf_state_local.sampled_write_ptr;
+ full_count = log_buf_state_local.buffer_full_cnt;
+
+ /* Bookkeeping stuff */
+ guc->log.stats[GUC_CAPTURE_LOG_BUFFER].flush += log_buf_state_local.flush_to_file;
+ new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_CAPTURE_LOG_BUFFER,
+ full_count);
+
+ /* Now copy the actual logs. */
+ if (unlikely(new_overflow)) {
+ /* copy the whole buffer in case of overflow */
+ read_offset = 0;
+ write_offset = buffer_size;
+ } else if (unlikely((read_offset > buffer_size) ||
+ (write_offset > buffer_size))) {
+ xe_gt_err(guc_to_gt(guc),
+ "Register capture buffer in invalid state: read = 0x%X, size = 0x%X!\n",
+ read_offset, buffer_size);
+ /* copy whole buffer as offsets are unreliable */
+ read_offset = 0;
+ write_offset = buffer_size;
+ }
+
+ buf.size = buffer_size;
+ buf.rd = read_offset;
+ buf.wr = write_offset;
+ buf.data_offset = src_data_offset;
+
+ if (!xe_guc_read_stopped(guc)) {
+ do {
+ ret = guc_capture_extract_reglists(guc, &buf);
+ } while (ret >= 0);
+ }
+
+ /* Update the state of log buffer err-cap state */
+ xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
+ log_buf_state_offset + offsetof(struct guc_log_buffer_state, read_ptr), u32,
+ write_offset);
+ /* Clear the flush_to_file from local first, the local was loaded by above
+ * xe_map_memcpy_from.
+ */
+ log_buf_state_local.flush_to_file = 0;
+ /* Then write out the "updated local" through xe_map_wr() */
+ xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
+ log_buf_state_offset + offsetof(struct guc_log_buffer_state, flags), u32,
+ log_buf_state_local.flags);
+ __guc_capture_flushlog_complete(guc);
+}
+
+void xe_guc_capture_process(struct xe_guc *guc)
+{
+ if (guc->capture)
+ __guc_capture_process_output(guc);
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_alloc_one_node(struct xe_guc *guc)
+{
+ struct drm_device *drm = guc_to_drm(guc);
+ struct __guc_capture_parsed_output *new;
+ int i;
+
+ new = drmm_kzalloc(drm, sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+ new->reginfo[i].regs = drmm_kzalloc(drm, guc->capture->max_mmio_per_node *
+ sizeof(struct guc_mmio_reg), GFP_KERNEL);
+ if (!new->reginfo[i].regs) {
+ while (i)
+ drmm_kfree(drm, new->reginfo[--i].regs);
+ drmm_kfree(drm, new);
+ return NULL;
+ }
+ }
+ guc_capture_init_node(guc, new);
+
+ return new;
+}
+
+static void
+__guc_capture_create_prealloc_nodes(struct xe_guc *guc)
+{
+ struct __guc_capture_parsed_output *node = NULL;
+ int i;
+
+ for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
+ node = guc_capture_alloc_one_node(guc);
+ if (!node) {
+ xe_gt_warn(guc_to_gt(guc), "Register capture pre-alloc-cache failure\n");
+ /* dont free the priors, use what we got and cleanup at shutdown */
+ return;
+ }
+ guc_capture_add_node_to_cachelist(guc->capture, node);
+ }
+}
+
+static int
+guc_get_max_reglist_count(struct xe_guc *guc)
+{
+ int i, j, k, tmp, maxregcount = 0;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
+ for (j = 0; j < GUC_CAPTURE_LIST_TYPE_MAX; ++j) {
+ for (k = 0; k < GUC_MAX_ENGINE_CLASSES; ++k) {
+ if (j == GUC_CAPTURE_LIST_TYPE_GLOBAL && k > 0)
+ continue;
+
+ tmp = guc_cap_list_num_regs(guc->capture, i, j, k);
+ if (tmp > maxregcount)
+ maxregcount = tmp;
+ }
+ }
+ }
+ if (!maxregcount)
+ maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
+
+ return maxregcount;
+}
+
+static void
+guc_capture_create_prealloc_nodes(struct xe_guc *guc)
+{
+ /* skip if we've already done the pre-alloc */
+ if (guc->capture->max_mmio_per_node)
+ return;
+
+ guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
+ __guc_capture_create_prealloc_nodes(guc);
+}
+
+static void cp_reg_to_snapshot(int type, u16 hwe_guc_class, u32 offset, u32 value,
+ struct snapshot_regs *regs)
+{
+ int i;
+ const struct __guc_mmio_reg_descr_group *list;
+
+ /* Get register list for the type/class */
+ list = xe_guc_capture_get_reg_desc_list(GUC_CAPTURE_LIST_INDEX_PF, type,
+ xe_guc_class_to_capture_class(hwe_guc_class));
+ if (!list)
+ return;
+
+ for (i = 0; i < list->num_regs; i++)
+ if (offset == list->list[i].reg.addr) {
+ u32 *field = (u32 *)((uintptr_t)regs + list->list[i].position_in_snapshot);
+ *field = value;
+ return;
+ }
+}
+
+static void guc_capture_parse_reglist(struct __guc_capture_parsed_output *node,
+ struct xe_hw_engine_snapshot *snapshot, u16 hwe_guc_class)
+{
+ int i, type;
+
+ if (!node)
+ return;
+
+ for (type = GUC_CAPTURE_LIST_TYPE_GLOBAL; type < GUC_CAPTURE_LIST_TYPE_MAX; type++) {
+ struct gcap_reg_list_info *reginfo = &node->reginfo[type];
+ struct guc_mmio_reg *regs = reginfo->regs;
+
+ for (i = 0; i < reginfo->num_regs; i++)
+ cp_reg_to_snapshot(type, hwe_guc_class, regs[i].offset, regs[i].value,
+ &snapshot->reg);
+ }
+}
+
+/**
+ * xe_hw_engine_find_and_copy_guc_capture_snapshot - Take a engine snapshot from GuC capture.
+ * @hwe: Xe HW Engine.
+ * @snapshot: Xe HW Engine snapshot object to save data, copied from error capture
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: None
+ */
+void
+xe_hw_engine_find_and_copy_guc_capture_snapshot(struct xe_hw_engine *hwe,
+ struct xe_hw_engine_snapshot *snapshot)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_guc *guc = >->uc.guc;
+ struct __guc_capture_parsed_output *n, *ntmp;
+ struct xe_devcoredump *devcoredump = &xe->devcoredump;
+ struct list_head *list = &guc->capture->outlist;
+ struct xe_sched_job *job = devcoredump->job;
+ struct xe_guc_submit_exec_queue_snapshot *ge = devcoredump->snapshot.ge;
+ u16 guc_id = ge->guc.id;
+ u32 lrca;
+ u16 hwe_guc_class = xe_engine_class_to_guc_class(hwe->class);
+
+ lrca = xe_lrc_ggtt_addr(job->q->lrc[0]) & LRC_GTT_ADDRESS_MASK;
+
+ /*
+ * Look for a matching GuC reported error capture node from
+ * the internal output link-list based on engine class and instance.
+ */
+ list_for_each_entry_safe(n, ntmp, list, link) {
+ if (n->eng_class == hwe_guc_class && n->eng_inst == hwe->instance &&
+ n->guc_id == guc_id && (n->lrca & LRC_GTT_ADDRESS_MASK) == lrca) {
+ guc_capture_parse_reglist(n, snapshot, hwe_guc_class);
+ list_del(&n->link);
+ return;
+ }
+ }
+}
+
+void xe_guc_capture_free(struct xe_guc *guc)
+{
+ if (guc->capture && !list_empty(&guc->capture->outlist)) {
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
+ list_del(&n->link);
+ /* put node back to cache list */
+ /* No need to init here, guc_capture_get_prealloc_node init it later */
+ guc_capture_add_node_to_cachelist(guc->capture, n);
+ }
+ }
+}
+
int xe_guc_capture_init(struct xe_guc *guc)
{
guc->capture = drmm_kzalloc(guc_to_drm(guc), sizeof(*guc->capture), GFP_KERNEL);
@@ -574,7 +1404,9 @@ int xe_guc_capture_init(struct xe_guc *guc)
return -ENOMEM;
guc->capture->reglists = guc_capture_get_device_reglist(guc);
-
check_guc_capture_size(guc);
+ INIT_LIST_HEAD(&guc->capture->outlist);
+ INIT_LIST_HEAD(&guc->capture->cachelist);
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h
index a62b1dbd47a6..c0bada99c9ec 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.h
+++ b/drivers/gpu/drm/xe/xe_guc_capture.h
@@ -10,6 +10,8 @@
#include "regs/xe_reg_defs.h"
struct xe_guc;
+struct xe_hw_engine;
+struct xe_hw_engine_snapshot;
/*
* struct __guc_mmio_reg_descr / struct __guc_mmio_reg_descr_group
@@ -25,6 +27,7 @@ struct __guc_mmio_reg_descr {
u32 flags;
u32 mask;
const char *regname;
+ u32 position_in_snapshot;
};
struct __guc_mmio_reg_descr_group {
@@ -36,9 +39,15 @@ struct __guc_mmio_reg_descr_group {
struct __guc_mmio_reg_descr *extlist; /* only used for steered registers */
};
+void xe_guc_capture_process(struct xe_guc *guc);
int xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type, u32 classid, void **outptr);
int xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type, u32 classid, size_t *size);
int xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size);
+const struct __guc_mmio_reg_descr_group *
+xe_guc_capture_get_reg_desc_list(u32 owner, u32 type, u32 engine_classid);
+void xe_hw_engine_find_and_copy_guc_capture_snapshot(struct xe_hw_engine *hwe,
+ struct xe_hw_engine_snapshot *snapshot);
+void xe_guc_capture_free(struct xe_guc *guc);
int xe_guc_capture_init(struct xe_guc *guc);
#endif /* _XE_GUC_CAPTURE_H */
diff --git a/drivers/gpu/drm/xe/xe_guc_capture_fwif.h b/drivers/gpu/drm/xe/xe_guc_capture_fwif.h
index 199e3c0108a4..5ef8c20fe9bc 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_capture_fwif.h
@@ -10,6 +10,51 @@
#include "xe_guc_fwif.h"
+/*
+ * struct __guc_capture_bufstate
+ *
+ * Book-keeping structure used to track read and write pointers
+ * as we extract error capture data from the GuC-log-buffer's
+ * error-capture region as a stream of dwords.
+ */
+struct __guc_capture_bufstate {
+ u32 size;
+ u32 data_offset;
+ u32 rd;
+ u32 wr;
+};
+
+/*
+ * struct __guc_capture_parsed_output - extracted error capture node
+ *
+ * A single unit of extracted error-capture output data grouped together
+ * at an engine-instance level. We keep these nodes in a linked list.
+ * See cachelist and outlist below.
+ */
+struct __guc_capture_parsed_output {
+ /*
+ * A single set of 3 capture lists: a global-list
+ * an engine-class-list and an engine-instance list.
+ * outlist in __guc_capture_parsed_output will keep
+ * a linked list of these nodes that will eventually
+ * be detached from outlist and attached into to
+ * xe_codedump in response to a context reset
+ */
+ struct list_head link;
+ bool is_partial;
+ u32 eng_class;
+ u32 eng_inst;
+ u32 guc_id;
+ u32 lrca;
+ struct gcap_reg_list_info {
+ u32 vfid;
+ u32 num_regs;
+ struct guc_mmio_reg *regs;
+ } reginfo[GUC_CAPTURE_LIST_TYPE_MAX];
+#define GCAP_PARSED_REGLIST_INDEX_GLOBAL BIT(GUC_CAPTURE_LIST_TYPE_GLOBAL)
+#define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS)
+};
+
/*
* struct guc_debug_capture_list_header / struct guc_debug_capture_list
*
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index c1f258348f5c..865b58bb4fd9 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1045,6 +1045,8 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
/* Selftest only at the moment */
break;
case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+ ret = xe_guc_error_capture_handler(guc, payload, adj_len);
+ break;
case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
/* FIXME: Handle this */
break;
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 908298791c93..f8f9c76eb7ac 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -206,6 +206,12 @@ enum guc_capture_type {
GUC_CAPTURE_LIST_TYPE_MAX,
};
+/* GuC support limited registers range to be captured for debug purpose,
+ * for unsupported registers, direct read is the only way to save the data.
+ * GuC capture handling will ignore all lists with this type: GUC_CAPTURE_LIST_TYPE_DIRECT_READ
+ */
+#define GUC_CAPTURE_LIST_TYPE_DIRECT_READ GUC_CAPTURE_LIST_TYPE_MAX
+
/* Class indecies for capture_class and capture_instance arrays */
enum {
GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 47aab04cf34f..f02f4c0c9568 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -25,6 +25,7 @@
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_guc.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_guc_id_mgr.h"
@@ -769,7 +770,7 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
xe_sched_job_put(job);
}
-static int guc_read_stopped(struct xe_guc *guc)
+int xe_guc_read_stopped(struct xe_guc *guc)
{
return atomic_read(&guc->submission_state.stopped);
}
@@ -791,7 +792,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
set_min_preemption_timeout(guc, q);
smp_rmb();
ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
- guc_read_stopped(guc), HZ * 5);
+ xe_guc_read_stopped(guc), HZ * 5);
if (!ret) {
struct xe_gpu_scheduler *sched = &q->guc->sched;
@@ -906,7 +907,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
*/
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_disable(q) ||
- guc_read_stopped(guc), HZ * 5);
+ xe_guc_read_stopped(guc), HZ * 5);
if (!ret) {
drm_warn(&xe->drm, "Schedule disable failed to respond");
xe_sched_submission_start(sched);
@@ -929,6 +930,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
int err = -ETIME;
int i = 0;
bool wedged;
+ bool reset_status = exec_queue_reset(q);
+ bool guc_en = xe_device_uc_enabled(xe);
/*
* TDR has fired before free job worker. Common if exec queue
@@ -948,7 +951,15 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
"VM job timed out on non-killed execqueue\n");
- if (!exec_queue_killed(q))
+ /* take devcoredump on:
+ * 1. GuC not enabled
+ * 2. GuC enabled with GuC reset status == 1
+ * When GuC enabled, register value is captured by GuC, GuC will notify host
+ * with capture notification message, which is right before reset.
+ * GuC reset status 1 also means capture ready.
+ * If not ready, will take snapshot after wait event within this function.
+ */
+ if (!exec_queue_killed(q) && (!guc_en || (guc_en && reset_status)))
xe_devcoredump(job);
trace_xe_sched_job_timedout(job);
@@ -996,8 +1007,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
smp_rmb();
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_disable(q) ||
- guc_read_stopped(guc), HZ * 5);
- if (!ret || guc_read_stopped(guc)) {
+ xe_guc_read_stopped(guc), HZ * 5);
+ if (!ret || xe_guc_read_stopped(guc)) {
drm_warn(&xe->drm, "Schedule disable failed to respond");
xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
@@ -1007,6 +1018,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
}
}
+ /* When entring this function, if capture/reset not ready, now is time to take snapshot */
+ if (!exec_queue_killed(q) && guc_en && !reset_status)
+ xe_devcoredump(job);
+
/* Stop fence signaling */
xe_hw_fence_irq_stop(q->fence_irq);
@@ -1112,7 +1127,7 @@ static void suspend_fence_signal(struct xe_exec_queue *q)
struct xe_device *xe = guc_to_xe(guc);
xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
- guc_read_stopped(guc));
+ xe_guc_read_stopped(guc));
xe_assert(xe, q->guc->suspend_pending);
q->guc->suspend_pending = false;
@@ -1128,9 +1143,9 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
exec_queue_enabled(q)) {
wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
- guc_read_stopped(guc));
+ xe_guc_read_stopped(guc));
- if (!guc_read_stopped(guc)) {
+ if (!xe_guc_read_stopped(guc)) {
MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
s64 since_resume_ms =
ktime_ms_delta(ktime_get(),
@@ -1258,7 +1273,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
q->entity = &ge->entity;
- if (guc_read_stopped(guc))
+ if (xe_guc_read_stopped(guc))
xe_sched_stop(sched);
mutex_unlock(&guc->submission_state.lock);
@@ -1385,7 +1400,7 @@ static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
struct xe_guc *guc = exec_queue_to_guc(q);
wait_event(q->guc->suspend_wait, !q->guc->suspend_pending ||
- guc_read_stopped(guc));
+ xe_guc_read_stopped(guc));
}
static void guc_exec_queue_resume(struct xe_exec_queue *q)
@@ -1495,7 +1510,7 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
void xe_guc_submit_reset_wait(struct xe_guc *guc)
{
- wait_event(guc->ct.wq, !guc_read_stopped(guc));
+ wait_event(guc->ct.wq, !xe_guc_read_stopped(guc));
}
void xe_guc_submit_stop(struct xe_guc *guc)
@@ -1504,7 +1519,7 @@ void xe_guc_submit_stop(struct xe_guc *guc)
unsigned long index;
struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, guc_read_stopped(guc) == 1);
+ xe_assert(xe, xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
@@ -1542,7 +1557,7 @@ int xe_guc_submit_start(struct xe_guc *guc)
unsigned long index;
struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, guc_read_stopped(guc) == 1);
+ xe_assert(xe, xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
atomic_dec(&guc->submission_state.stopped);
@@ -1698,8 +1713,6 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
- /* FIXME: Do error capture, most likely async */
-
trace_xe_exec_queue_reset(q);
/*
@@ -1715,6 +1728,24 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0;
}
+int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ u32 status;
+
+ if (unlikely(len != 1)) {
+ xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
+ if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
+ xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
+
+ xe_guc_capture_process(guc);
+
+ return 0;
+}
+
int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index 4ad5f4c1b084..d92256de473e 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -19,12 +19,14 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc);
void xe_guc_submit_stop(struct xe_guc *guc);
int xe_guc_submit_start(struct xe_guc *guc);
+int xe_guc_read_stopped(struct xe_guc *guc);
int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len);
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len);
struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 0a83506e1ad8..3bc88fbad952 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -20,6 +20,9 @@
#include "xe_gt_printk.h"
#include "xe_gt_mcr.h"
#include "xe_gt_topology.h"
+#include "xe_guc.h"
+#include "xe_guc_capture.h"
+#include "xe_guc_capture_fwif.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
#include "xe_lrc.h"
@@ -287,6 +290,7 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
{
xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
+
xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
reg.addr += hwe->mmio_base;
@@ -825,6 +829,62 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
}
}
+static void
+xe_hw_engine_snapshot_from_hw_by_type(struct xe_hw_engine *hwe,
+ struct xe_hw_engine_snapshot *snapshot, int type)
+{
+ const struct __guc_mmio_reg_descr_group *list;
+ u16 capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
+ int i;
+
+ list = xe_guc_capture_get_reg_desc_list(GUC_CAPTURE_LIST_INDEX_PF, type, capture_class);
+ if (!list)
+ return;
+
+ for (i = 0; i < list->num_regs; i++) {
+ u32 *field;
+
+ /* loop until extra operation registers zone */
+ if (list->list[i].reg.addr == XE_GUC_SNAPSHOT_EXTRA_OPERATION_REGS_START_REG_ADDR)
+ break;
+
+ field = (u32 *)((uintptr_t)&snapshot->reg +
+ list->list[i].position_in_snapshot);
+ if (type == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE)
+ *field = hw_engine_mmio_read32(hwe, list->list[i].reg);
+ else
+ *field = xe_mmio_read32(hwe->gt, list->list[i].reg);
+ }
+}
+
+/**
+ * xe_hw_engine_snapshot_from_hw - Take a quick engine snapshot from HW.
+ * @hwe: Xe HW Engine.
+ * @snapshot: Point to the Xe HW Engine snapshot object to save data.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: None
+ */
+static void
+xe_hw_engine_snapshot_from_hw(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot)
+{
+ int type;
+
+ for (type = GUC_CAPTURE_LIST_TYPE_GLOBAL; type < GUC_CAPTURE_LIST_TYPE_MAX; type++)
+ xe_hw_engine_snapshot_from_hw_by_type(hwe, snapshot, type);
+
+ /* Extra operation required registers zone - start */
+ if (xe_gt_has_indirect_ring_state(hwe->gt))
+ snapshot->reg.indirect_ring_state =
+ hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
+ /* Extra operation required registers zone - End */
+
+ /* Capture steering registers */
+ xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
+}
+
/**
* xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
* @hwe: Xe HW Engine.
@@ -839,8 +899,12 @@ struct xe_hw_engine_snapshot *
xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
{
struct xe_hw_engine_snapshot *snapshot;
+ struct xe_gt *gt = hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_guc *guc = >->uc.guc;
size_t len;
- u64 val;
+ u32 i;
+ bool endian_convert_required;
if (!xe_hw_engine_is_valid(hwe))
return NULL;
@@ -850,6 +914,9 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
if (!snapshot)
return NULL;
+ i = 0x01020304;
+ endian_convert_required = (i != le32_to_cpu(i));
+
/* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
* includes xe_hw_engine_types.h the length of this 3 registers can't be
* set in struct xe_hw_engine_snapshot, so here doing additional
@@ -881,62 +948,35 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
snapshot->mmio_base = hwe->mmio_base;
/* no more VF accessible data below this point */
- if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
+ if (IS_SRIOV_VF(xe))
return snapshot;
- snapshot->reg.ring_execlist_status =
- hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
- val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
- snapshot->reg.ring_execlist_status |= val << 32;
-
- snapshot->reg.ring_execlist_sq_contents =
- hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
- val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
- snapshot->reg.ring_execlist_sq_contents |= val << 32;
-
- snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
- val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
- snapshot->reg.ring_acthd |= val << 32;
-
- snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
- val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
- snapshot->reg.ring_bbaddr |= val << 32;
-
- snapshot->reg.ring_dma_fadd =
- hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
- val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
- snapshot->reg.ring_dma_fadd |= val << 32;
-
- snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
- snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
- snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
- if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
- val = hw_engine_mmio_read32(hwe, RING_START_UDW(0));
- snapshot->reg.ring_start |= val << 32;
- }
- if (xe_gt_has_indirect_ring_state(hwe->gt)) {
- snapshot->reg.indirect_ring_state =
- hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
+ /* Check GuC settings, job is set and capture outlist not empty,
+ * otherwise take it from engine
+ */
+ if (xe_device_uc_enabled(xe) && xe->wedged.mode >= 1 &&
+ !list_empty(&guc->capture->outlist) && xe->devcoredump.job)
+ xe_hw_engine_find_and_copy_guc_capture_snapshot(hwe, snapshot);
+ else
+ xe_hw_engine_snapshot_from_hw(hwe, snapshot);
+
+ /* Read registers defined in "Direct read" list */
+ xe_hw_engine_snapshot_from_hw_by_type(hwe, snapshot, GUC_CAPTURE_LIST_TYPE_DIRECT_READ);
+
+ /* appy mask for ring head and tail */
+ snapshot->reg.ring_head &= HEAD_ADDR;
+ snapshot->reg.ring_tail &= TAIL_ADDR;
+
+ /* adjust u64 endine in snapshot if needed */
+ if (endian_convert_required) {
+ for (i = 0; i < XE_GUC_SNAPSHOT_REGS_U32_START_OFFSET; i += sizeof(u64)) {
+ u64 *pdata = (u64 *)((ulong)&snapshot->reg + i);
+ u32 *pl = (u32 *)pdata;
+ u32 *ph = (u32 *)((ulong)pdata + 4);
+ *pdata = ((u64)*ph) << 32 | *pl;
+ }
}
- snapshot->reg.ring_head =
- hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
- snapshot->reg.ring_tail =
- hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
- snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
- snapshot->reg.ring_mi_mode =
- hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
- snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
- snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
- snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
- snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
- snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
- snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
- xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
-
- if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
- snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
-
return snapshot;
}
@@ -993,6 +1033,8 @@ xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, str
void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
struct drm_printer *p)
{
+ int i, type;
+
if (!snapshot)
return;
@@ -1001,34 +1043,52 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
snapshot->logical_instance);
drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
snapshot->forcewake.domain, snapshot->forcewake.ref);
- drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
- drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
- drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
- snapshot->reg.ring_execlist_status);
- drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
- snapshot->reg.ring_execlist_sq_contents);
- drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
- drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
- drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
- drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
- drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
- drm_printf(p, "\tRING_MODE: 0x%08x\n",
- snapshot->reg.ring_mode);
- drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
- drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
- drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
- drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
- drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
- drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
- drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
- drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
- snapshot->reg.indirect_ring_state);
- drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
+
+ /* Print will include direct read list in this main loop */
+ for (type = GUC_CAPTURE_LIST_TYPE_GLOBAL; type <= GUC_CAPTURE_LIST_TYPE_DIRECT_READ;
+ type++) {
+ const struct __guc_mmio_reg_descr_group *list;
+ u16 capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class);
+
+ /* Capture engine registers */
+ list = xe_guc_capture_get_reg_desc_list(GUC_CAPTURE_LIST_INDEX_PF, type,
+ capture_class);
+ if (!list)
+ continue;
+
+ /* loop 32bit registers until 64 bit registers */
+ for (i = 0; i < list->num_regs; i++) {
+ u32 *field;
+
+ if (list->list[i].reg.addr == XE_GUC_SNAPSHOT_REGS_U64_START_REG_ADDR)
+ break;
+ field = (u32 *)((uintptr_t)&snapshot->reg +
+ list->list[i].position_in_snapshot);
+ drm_printf(p, "\t%s: 0x%08x\n", list->list[i].regname, *field);
+ }
+
+ if (type != GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE)
+ continue; /* 64bit and special registers is for engine instance only */
+
+ /* loop 64 bit registers until special registers */
+ for (; i < list->num_regs; i += 2) {
+ u64 *field;
+
+ if (list->list[i].reg.addr ==
+ XE_GUC_SNAPSHOT_EXTRA_OPERATION_REGS_START_REG_ADDR)
+ break;
+ field = (u64 *)((uintptr_t)&snapshot->reg +
+ list->list[i].position_in_snapshot);
+ drm_printf(p, "\t%s: 0x%016llx\n", list->list[i].regname, *field);
+ }
+
+ /* Handling special registers - Start */
+ drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n", snapshot->reg.indirect_ring_state);
+ /* Handling special registers - End */
+ }
+
xe_hw_engine_snapshot_instdone_print(snapshot, p);
- if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
- drm_printf(p, "\tRCU_MODE: 0x%08x\n",
- snapshot->reg.rcu_mode);
drm_puts(p, "\n");
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 580bbd7e83b2..617101dca272 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -150,6 +150,106 @@ struct xe_hw_engine {
struct xe_hw_engine_class_intf *eclass;
};
+/**
+ * struct xe_hw_engine_snapshot - Hardware engine snapshot
+ *
+ * Contains the snapshot of useful hardware engine info and registers.
+ */
+struct snapshot_regs {
+ /* Engine instance type - start */
+ /* 64 bit registers zone - start */
+ /*
+ * u64 data captured by 2 u32s from GuC or by hw read.
+ * Save data into this u64 zone will always write in format of:
+ * offset + 0000: [low 32]
+ * offset + 0004: [high 32]
+ * Once all data captured, data will be converted to CPU endian order if needed at the
+ * end of xe_hw_engine_snapshot_capture
+ */
+ #define XE_GUC_SNAPSHOT_REGS_U64_START_REG_ADDR RING_ACTHD(0).addr
+ /** @ring_acthd: RING_ACTHD */
+ u64 ring_acthd;
+ /** @ring_bbaddr: RING_BBADDR */
+ u64 ring_bbaddr;
+ /** @ring_start: RING_START */
+ u64 ring_start;
+ /** @ring_dma_fadd: RING_DMA_FADD */
+ u64 ring_dma_fadd;
+ /** @ring_execlist_status: RING_EXECLIST_STATUS */
+ u64 ring_execlist_status;
+ /** @ring_execlist_sq_contents: RING_EXECLIST_SQ_CONTENTS */
+ u64 ring_execlist_sq_contents;
+ /* 64 bit registers zone - end */
+
+ /* 32 bit registers zone - start */
+ /** @reg.ring_hwstam: RING_HWSTAM */
+ u32 ring_hwstam;
+ #define XE_GUC_SNAPSHOT_REGS_U32_START_OFFSET offsetof(struct snapshot_regs, ring_hwstam)
+
+ /** @reg.ring_hws_pga: RING_HWS_PGA */
+ u32 ring_hws_pga;
+ u32 ring_head;
+ /** @reg.ring_tail: RING_TAIL */
+ u32 ring_tail;
+ /** @reg.ring_ctl: RING_CTL */
+ u32 ring_ctl;
+ /** @reg.ring_mi_mode: RING_MI_MODE */
+ u32 ring_mi_mode;
+ /** @reg.ring_mode: RING_MODE */
+ u32 ring_mode;
+ /** @reg.ring_imr: RING_IMR */
+ u32 ring_imr;
+ /** @reg.ring_esr: RING_ESR */
+ u32 ring_esr;
+ /** @reg.ring_emr: RING_EMR */
+ u32 ring_emr;
+ /** @reg.ring_eir: RING_EIR */
+ u32 ring_eir;
+ /** @reg.ipehr: IPEHR */
+ u32 ipehr;
+ /* Engine instance type - end */
+
+ /* Engine class type - start */
+ /** @reg.rcu_mode: RCU_MODE */
+ u32 rcu_mode;
+ /** @reg.sfc_done_[0-3]: SFC_DONE[0-3] */
+ u32 sfc_done_0;
+ u32 sfc_done_1;
+ u32 sfc_done_2;
+ u32 sfc_done_3;
+ /* Engine class type - end */
+
+ /* Global type - start */
+ /** @reg.forcewake_gt: FORCEWAKE_GT */
+ u32 forcewake_gt;
+ /* Global type - end */
+
+ /* Extra operation Registers zone - start */
+ /* registers requires extra code to handling */
+ #define XE_GUC_SNAPSHOT_EXTRA_OPERATION_REGS_START_REG_ADDR INDIRECT_RING_STATE(0).addr
+ /** @reg.indirect_ring_state: INDIRECT_RING_STATE */
+ u32 indirect_ring_state;
+ /* Special registers zone - end */
+
+ /* Steering registers */
+ struct {
+ /** @reg.instdone.ring: RING_INSTDONE */
+ u32 ring;
+ /** @reg.instdone.slice_common: SC_INSTDONE */
+ u32 *slice_common;
+ /** @reg.instdone.slice_common_extra: SC_INSTDONE_EXTRA */
+ u32 *slice_common_extra;
+ /** @reg.instdone.slice_common_extra2: SC_INSTDONE_EXTRA2 */
+ u32 *slice_common_extra2;
+ /** @reg.instdone.sampler: SAMPLER_INSTDONE */
+ u32 *sampler;
+ /** @reg.instdone.row: ROW_INSTDONE */
+ u32 *row;
+ /** @reg.instdone.geom_svg: INSTDONE_GEOM_SVGUNIT */
+ u32 *geom_svg;
+ } instdone;
+};
+
/**
* struct xe_hw_engine_snapshot - Hardware engine snapshot
*
@@ -172,64 +272,7 @@ struct xe_hw_engine_snapshot {
/** @mmio_base: MMIO base address of this hw engine*/
u32 mmio_base;
/** @reg: Useful MMIO register snapshot */
- struct {
- /** @reg.ring_execlist_status: RING_EXECLIST_STATUS */
- u64 ring_execlist_status;
- /** @reg.ring_execlist_sq_contents: RING_EXECLIST_SQ_CONTENTS */
- u64 ring_execlist_sq_contents;
- /** @reg.ring_acthd: RING_ACTHD */
- u64 ring_acthd;
- /** @reg.ring_bbaddr: RING_BBADDR */
- u64 ring_bbaddr;
- /** @reg.ring_dma_fadd: RING_DMA_FADD */
- u64 ring_dma_fadd;
- /** @reg.ring_hwstam: RING_HWSTAM */
- u32 ring_hwstam;
- /** @reg.ring_hws_pga: RING_HWS_PGA */
- u32 ring_hws_pga;
- /** @reg.ring_start: RING_START */
- u64 ring_start;
- /** @reg.ring_head: RING_HEAD */
- u32 ring_head;
- /** @reg.ring_tail: RING_TAIL */
- u32 ring_tail;
- /** @reg.ring_ctl: RING_CTL */
- u32 ring_ctl;
- /** @reg.ring_mi_mode: RING_MI_MODE */
- u32 ring_mi_mode;
- /** @reg.ring_mode: RING_MODE */
- u32 ring_mode;
- /** @reg.ring_imr: RING_IMR */
- u32 ring_imr;
- /** @reg.ring_esr: RING_ESR */
- u32 ring_esr;
- /** @reg.ring_emr: RING_EMR */
- u32 ring_emr;
- /** @reg.ring_eir: RING_EIR */
- u32 ring_eir;
- /** @reg.indirect_ring_state: INDIRECT_RING_STATE */
- u32 indirect_ring_state;
- /** @reg.ipehr: IPEHR */
- u32 ipehr;
- /** @reg.rcu_mode: RCU_MODE */
- u32 rcu_mode;
- struct {
- /** @reg.instdone.ring: RING_INSTDONE */
- u32 ring;
- /** @reg.instdone.slice_common: SC_INSTDONE */
- u32 *slice_common;
- /** @reg.instdone.slice_common_extra: SC_INSTDONE_EXTRA */
- u32 *slice_common_extra;
- /** @reg.instdone.slice_common_extra2: SC_INSTDONE_EXTRA2 */
- u32 *slice_common_extra2;
- /** @reg.instdone.sampler: SAMPLER_INSTDONE */
- u32 *sampler;
- /** @reg.instdone.row: ROW_INSTDONE */
- u32 *row;
- /** @reg.instdone.geom_svg: INSTDONE_GEOM_SVGUNIT */
- u32 *geom_svg;
- } instdone;
- } reg;
+ struct snapshot_regs reg;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 882c3437ba5c..8c83601fc695 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -21,6 +21,7 @@ struct xe_lrc_snapshot;
struct xe_vm;
#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
+#define LRC_GTT_ADDRESS_MASK GENMASK(31, 12)
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
u32 ring_size);
--
2.34.1
More information about the Intel-xe
mailing list