[Intel-xe] [PATCH 5/6] drm/xe/debugfs: Add dump of default LRCs' MI instructions
Lucas De Marchi
lucas.demarchi at intel.com
Thu Oct 12 21:50:33 UTC 2023
On Wed, Oct 11, 2023 at 04:10:03PM -0700, Matt Roper wrote:
>For non-RCS engines, nearly all of the LRC state is composed of MI
>instructions (specifically MI_LOAD_REGISTER_IMM). Providing a dump
>interface allows us to verify that the context image layout matches
>what's documented in the bspec, and also allows us to check whether LRC
>workarounds are being properly captured by the default state we record
>at startup.
>
>For now, the non-MI instructions found in the RCS and CCS engines will
>dump as "unknown;" parsing of those will be added in a follow-up patch.
>
>Bspec: 64993
>Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
>---
> .../gpu/drm/xe/instructions/xe_inst_defs.h | 1 +
> .../gpu/drm/xe/instructions/xe_mi_commands.h | 3 +
> drivers/gpu/drm/xe/xe_gt_debugfs.c | 41 +++++++++
> drivers/gpu/drm/xe/xe_lrc.c | 84 +++++++++++++++++++
> drivers/gpu/drm/xe/xe_lrc.h | 5 ++
> 5 files changed, 134 insertions(+)
>
>diff --git a/drivers/gpu/drm/xe/instructions/xe_inst_defs.h b/drivers/gpu/drm/xe/instructions/xe_inst_defs.h
>index c992ad767f08..d665a7e21f3b 100644
>--- a/drivers/gpu/drm/xe/instructions/xe_inst_defs.h
>+++ b/drivers/gpu/drm/xe/instructions/xe_inst_defs.h
>@@ -15,6 +15,7 @@
> */
> #define XE_INST_CMD_TYPE GENMASK(31, 29)
> #define XE_INST_MI REG_FIELD_PREP(XE_INST_CMD_TYPE, 0x0)
>+#define XE_INST_GFXPIPE REG_FIELD_PREP(XE_INST_CMD_TYPE, 0x3)
>
> /*
> * Most (but not all) instructions have a "length" field in the instruction
>diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>index 4a37b2b86b40..8cd1fc0e4f17 100644
>--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>@@ -30,6 +30,9 @@
> #define MI_ARB_DISABLE 0x0
>
> #define MI_BATCH_BUFFER_END __MI_INST(0xA)
>+#define MI_TOPOLOGY_FILTER __MI_INST(0xD)
>+#define MI_FORCE_WAKEUP __MI_INST(0x1D)
>+
> #define MI_STORE_DATA_IMM __MI_INST(0x20)
> #define MI_SDI_GGTT REG_BIT(22)
> #define MI_SDI_LEN_DW GENMASK(9, 0)
>diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
>index cd6d28c7b923..e2483cb7721a 100644
>--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
>+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
>@@ -15,6 +15,7 @@
> #include "xe_gt_mcr.h"
> #include "xe_gt_topology.h"
> #include "xe_hw_engine.h"
>+#include "xe_lrc.h"
> #include "xe_macros.h"
> #include "xe_pat.h"
> #include "xe_reg_sr.h"
>@@ -149,6 +150,41 @@ static int pat(struct seq_file *m, void *data)
> return 0;
> }
>
>+static int rcs_default_lrc(struct seq_file *m, void *data) {
>+ struct drm_printer p = drm_seq_file_printer(m);
>+
>+ xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER);
>+ return 0;
>+}
>+
>+static int ccs_default_lrc(struct seq_file *m, void *data) {
>+ struct drm_printer p = drm_seq_file_printer(m);
>+
>+ xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE);
>+ return 0;
>+}
>+
>+static int bcs_default_lrc(struct seq_file *m, void *data) {
>+ struct drm_printer p = drm_seq_file_printer(m);
>+
>+ xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY);
>+ return 0;
>+}
>+
>+static int vcs_default_lrc(struct seq_file *m, void *data) {
>+ struct drm_printer p = drm_seq_file_printer(m);
>+
>+ xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE);
>+ return 0;
>+}
>+
>+static int vecs_default_lrc(struct seq_file *m, void *data) {
>+ struct drm_printer p = drm_seq_file_printer(m);
>+
>+ xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE);
>+ return 0;
>+}
>+
> static const struct drm_info_list debugfs_list[] = {
> {"hw_engines", hw_engines, 0},
> {"force_reset", force_reset, 0},
>@@ -159,6 +195,11 @@ static const struct drm_info_list debugfs_list[] = {
> {"register-save-restore", register_save_restore, 0},
> {"workarounds", workarounds, 0},
> {"pat", pat, 0},
>+ {"default_lrc_rcs", rcs_default_lrc},
>+ {"default_lrc_ccs", ccs_default_lrc},
>+ {"default_lrc_bcs", bcs_default_lrc},
>+ {"default_lrc_vcs", vcs_default_lrc},
>+ {"default_lrc_vecs", vecs_default_lrc},
> };
>
> void xe_gt_debugfs_register(struct xe_gt *gt)
>diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
>index a04867131839..825b229212ae 100644
>--- a/drivers/gpu/drm/xe/xe_lrc.c
>+++ b/drivers/gpu/drm/xe/xe_lrc.c
>@@ -6,6 +6,7 @@
> #include "xe_lrc.h"
>
> #include "instructions/xe_mi_commands.h"
>+#include "instructions/xe_gfxpipe_commands.h"
> #include "regs/xe_engine_regs.h"
> #include "regs/xe_gpu_commands.h"
> #include "regs/xe_gt_regs.h"
>@@ -903,3 +904,86 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
> {
> return __xe_lrc_parallel_map(lrc);
> }
>+
>+static int instr_dw(u32 cmd_header)
>+{
>+ /* Most instructions have the # of dwords (minus 2) in 7:0 */
>+ return REG_FIELD_GET(XE_INST_LEN_MASK, cmd_header) + 2;
>+}
>+
>+static int dump_mi_command(struct drm_printer *p,
>+ struct xe_gt *gt,
>+ u32 *dw,
>+ int remaining_dw)
>+{
>+ u32 numdw = instr_dw(*dw);
>+ u32 opcode = REG_FIELD_GET(MI_OPCODE, *dw);
missing a warning and return if numdw > remaining_dw so we avoid reading
random memory.
>+
>+ switch (*dw & MI_OPCODE) {
>+ case MI_NOOP:
>+ int num_noop = 1;
>+ while (num_noop < remaining_dw &&
>+ (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
>+ num_noop++;
>+ drm_printf(p, "MI_NOOP (%d dwords)\n", num_noop);
>+ return num_noop;
>+
>+ case MI_LOAD_REGISTER_IMM:
>+ drm_printf(p, "MI_LOAD_REGISTER_IMM: %d regs\n", (numdw - 1) / 2);
>+ for (int i = 1; i < numdw; i += 2)
>+ drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
>+ return numdw;
>+
>+ case MI_TOPOLOGY_FILTER:
>+ drm_printf(p, "MI_TOPOLOGY_FILTER\n");
>+ return 1;
>+
>+ case MI_FORCE_WAKEUP:
>+ drm_printf(p, "MI_FORCE_WAKEUP\n");
>+ return numdw;
>+
>+ case MI_BATCH_BUFFER_END:
>+ drm_printf(p, "MI_BATCH_BUFFER_END\n");
>+ return 0;
>+
>+ default:
>+ drm_printf(p, "unknown MI opcode %#x, likely %d dwords\n",
>+ opcode, numdw);
thinking that if in addition to the decoding of the dw, we shouldn't
always print the dw itself and have the decoding as a # comment
Something like...
0x0000000 # MI_NOOP (1 dwords)
0x8800007 # MI_LOAD_REGISTER_IMM (15 regs)
- 0x1a244 = 0xffff0048
- 0x1a034 = 0x000000f8
...
That way we always have the raw data. Just a suggestion, up to you.
>+ return numdw;
>+ }
>+}
>+
>+void xe_lrc_dump_default(struct drm_printer *p,
>+ struct xe_gt *gt,
>+ enum xe_engine_class hwe_class)
>+{
>+ u32 *dw;
>+ int remaining_dw, num_dw;
>+
>+ if (!gt->default_lrc[hwe_class]) {
>+ drm_printf(p, "No default LRC for class %d\n", hwe_class);
>+ return;
>+ }
>+
>+ /* First page of LRC is a ppHWSP */
since LRC_PPHWSP_SIZE is abstract here and may not always be one page,
maybe make the comment as "Skip the beginning of LRC since it contains
the ppHWSP".
>+ dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
>+ remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
>+
>+ while (remaining_dw) {
maybe make this:
while (remaining_dw > 0) {
>+ if ((*dw & XE_INST_CMD_TYPE) == XE_INST_MI) {
>+ num_dw = dump_mi_command(p, gt, dw, remaining_dw);
>+ } else {
>+ num_dw = min(instr_dw(*dw), remaining_dw);
>+ drm_printf(p, "Unknown instr <%#x> of type %#x, likely %d dwords\n",
>+ *dw, REG_FIELD_GET(XE_INST_CMD_TYPE, *dw),
>+ num_dw);
>+ }
>+
>+ /* 0 is returned on MI_BATCH_BUFFER_END */
>+ if (num_dw == 0)
>+ break;
maybe make MI_BATCH_BUFFER_END return remaining_dw so it "consumes the
remainder of the buffer"? Then you can remove this check
>+
>+ dw += num_dw;
>+ remaining_dw -= num_dw;
suggestion to make the while condition be > 0 so we avoid wraparounds.
Lucas De Marchi
>+ }
>+}
>diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
>index 3a6e8fc5a837..a7056eda5e0c 100644
>--- a/drivers/gpu/drm/xe/xe_lrc.h
>+++ b/drivers/gpu/drm/xe/xe_lrc.h
>@@ -7,6 +7,7 @@
>
> #include "xe_lrc_types.h"
>
>+struct drm_printer;
> struct xe_device;
> struct xe_exec_queue;
> enum xe_engine_class;
>@@ -47,4 +48,8 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
>
> size_t xe_lrc_skip_size(struct xe_device *xe);
>
>+void xe_lrc_dump_default(struct drm_printer *p,
>+ struct xe_gt *gt,
>+ enum xe_engine_class);
>+
> #endif
>--
>2.41.0
>
More information about the Intel-xe
mailing list