[PATCH v2] drm/xe/guc: Add register defines for GuC based register capture
Jani Nikula
jani.nikula at linux.intel.com
Wed Jan 17 08:23:27 UTC 2024
On Tue, 16 Jan 2024, Zhanjun Dong <zhanjun.dong at intel.com> wrote:
> Add registers defines and list of registers for GuC based error state capture.
>
> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
> ---
> drivers/gpu/drm/xe/Kconfig | 11 +++
> drivers/gpu/drm/xe/Makefile | 1 +
> drivers/gpu/drm/xe/regs/xe_engine_regs.h | 12 +++
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 20 +++++
> drivers/gpu/drm/xe/xe_guc.c | 7 ++
> drivers/gpu/drm/xe/xe_guc_capture.c | 101 +++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_guc_capture.h | 19 +++++
> 7 files changed, 171 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/xe_guc_capture.c
> create mode 100644 drivers/gpu/drm/xe/xe_guc_capture.h
>
> diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
> index 1b57ae38210d..236763569877 100644
> --- a/drivers/gpu/drm/xe/Kconfig
> +++ b/drivers/gpu/drm/xe/Kconfig
> @@ -83,6 +83,17 @@ config DRM_XE_FORCE_PROBE
>
> Use "!*" to block the probe of the driver for all known devices.
>
> +config DRM_XE_CAPTURE_ERROR
> + bool "Enable capturing GPU state following a hang"
> + depends on DRM_XE
> + default y
> + help
> + This option enables capturing the GPU state when a hang is detected.
> + This information is vital for triaging hangs and assists in debugging.
> + Please report any hang to your Intel representative to help with triaging.
> +
> + If in doubt, say "Y".
> +
> menu "drm/Xe Debugging"
> depends on DRM_XE
> depends on EXPERT
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index e16b84f79ddf..4cba24ff55e2 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -91,6 +91,7 @@ xe-y += xe_bb.o \
> xe_gt_topology.o \
> xe_guc.o \
> xe_guc_ads.o \
> + xe_guc_capture.o \
> xe_guc_ct.o \
> xe_guc_db_mgr.o \
> xe_guc_debugfs.o \
> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> index 0b1266c88a6a..06015703a33e 100644
> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> @@ -64,10 +64,16 @@
>
> #define RING_ACTHD_UDW(base) XE_REG((base) + 0x5c)
> #define RING_DMA_FADD_UDW(base) XE_REG((base) + 0x60)
> +#define RING_IPEIR(base) XE_REG((base) + 0x64)
> #define RING_IPEHR(base) XE_REG((base) + 0x68)
> +#define RING_INSTDONE(base) XE_REG((base) + 0x6c)
> +#define RING_INSTPS(base) XE_REG((base) + 0x70)
> +
> #define RING_ACTHD(base) XE_REG((base) + 0x74)
> #define RING_DMA_FADD(base) XE_REG((base) + 0x78)
> #define RING_HWS_PGA(base) XE_REG((base) + 0x80)
> +#define IPEIR(base) XE_REG((base) + 0x88)
> +
> #define RING_HWSTAM(base) XE_REG((base) + 0x98)
> #define RING_MI_MODE(base) XE_REG((base) + 0x9c)
> #define RING_NOPID(base) XE_REG((base) + 0x94)
> @@ -111,9 +117,12 @@
> #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1)
> #define REPLAY_MODE_GRANULARITY REG_BIT(0)
>
> +#define RING_BBSTATE(base) XE_REG((base) + 0x110)
> #define RING_BBADDR(base) XE_REG((base) + 0x140)
> #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168)
>
> +#define CCID(base) XE_REG((base) + 0x180)
> +
> #define BCS_SWCTRL(base) XE_REG((base) + 0x200, XE_REG_OPTION_MASKED)
> #define BCS_SWCTRL_DISABLE_256B REG_BIT(2)
>
> @@ -129,6 +138,9 @@
> #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
> #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
>
> +#define RING_PDP_UDW(base, n) XE_REG((base) + 0x270 + (n) * 8 + 4)
> +#define RING_PDP_LDW(base, n) XE_REG((base) + 0x270 + (n) * 8)
> +
> #define RING_MODE(base) XE_REG((base) + 0x29c)
> #define GFX_DISABLE_LEGACY_MODE REG_BIT(3)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index 4017319c6300..d68a8b447842 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -67,6 +67,8 @@
> #define VE1_AUX_INV XE_REG(0x42b8)
> #define AUX_INV REG_BIT(0)
>
> +#define AUX_ERR_DBG XE_REG(0x43f4)
> +
> #define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4)
> #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910)
>
> @@ -94,6 +96,8 @@
> #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16)
> #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
>
> +#define XEHPG_INSTDONE_GEOM_SVG XE_REG_MCR(0x666c)
> +
> #define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED)
> #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11)
>
> @@ -110,6 +114,10 @@
> #define FLSH_IGNORES_PSD REG_BIT(10)
> #define FD_END_COLLECT REG_BIT(5)
>
> +#define SC_INSTDONE XE_REG(0x7100)
> +#define SC_INSTDONE_EXTRA XE_REG(0x7104)
> +#define SC_INSTDONE_EXTRA2 XE_REG(0x7108)
> +
> #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED)
> #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
>
> @@ -299,6 +307,11 @@
>
> #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
>
> +#define FAULT_TLB_DATA0 XE_REG(0xceb8)
> +#define FAULT_TLB_DATA1 XE_REG(0xcebc)
> +
> +#define RING_FAULT_REG XE_REG(0xcec4)
> +
> #define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28)
> #define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c)
> #define COMP_MOD_CTRL XE_REG_MCR(0xcf30)
> @@ -317,6 +330,11 @@
> #define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12)
> #define GLOBAL_INVALIDATION_MODE REG_BIT(2)
>
> +#define GAM_DONE XE_REG(0xcf68)
> +
> +#define SAMPLER_INSTDONE XE_REG_MCR(0xe160)
> +#define ROW_INSTDONE XE_REG_MCR(0xe164)
> +
> #define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
> #define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0)
>
> @@ -482,6 +500,8 @@
> #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
> #define GT_RENDER_USER_INTERRUPT REG_BIT(0)
>
> +#define SFC_DONE(n) XE_REG(0x1cc000 + (n) * 0x1000)
> +
> #define PVC_GT0_PACKAGE_ENERGY_STATUS XE_REG(0x281004)
> #define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008)
> #define PVC_GT0_PACKAGE_POWER_SKU_UNIT XE_REG(0x281068)
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> index 235d27b17ff9..4f9257075905 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -17,6 +17,7 @@
> #include "xe_force_wake.h"
> #include "xe_gt.h"
> #include "xe_guc_ads.h"
> +#include "xe_guc_capture.h"
> #include "xe_guc_ct.h"
> #include "xe_guc_hwconfig.h"
> #include "xe_guc_log.h"
> @@ -277,6 +278,12 @@ int xe_guc_init(struct xe_guc *guc)
> if (ret)
> goto out;
>
> +#if IS_ENABLED(CONFIG_DRM_XE_CAPTURE_ERROR)
> + ret = xe_guc_capture_init(guc);
> + if (ret)
> + goto out;
> +#endif
No. Please make xe_guc_capture_init() a static inline no-op stub in the
header for CONFIG_DRM_XE_CAPTURE_ERROR=n. Examples of this are literally
everywhere in kernel. Do not add conditional compilation in code.
> +
> ret = xe_guc_ads_init(&guc->ads);
> if (ret)
> goto out;
> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
> new file mode 100644
> index 000000000000..0870bfd1b88d
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_guc_capture.c
> @@ -0,0 +1,101 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2021-2022 Intel Corporation
It's 2024.
> + */
> +
> +#include <linux/types.h>
> +
> +#include <drm/drm_print.h>
> +
> +#include "abi/guc_actions_abi.h"
> +#include "regs/xe_regs.h"
> +#include "regs/xe_engine_regs.h"
> +#include "regs/xe_gt_regs.h"
> +#include "regs/xe_guc_regs.h"
> +
> +#include "xe_bo.h"
> +#include "xe_device.h"
> +#include "xe_exec_queue_types.h"
> +#include "xe_hw_engine_types.h"
> +#include "xe_gt.h"
> +#include "xe_gt_printk.h"
> +#include "xe_guc.h"
> +#include "xe_guc_capture.h"
> +#include "xe_guc_ct.h"
> +
> +#include "xe_guc_log.h"
> +#include "xe_gt_mcr.h"
> +#include "xe_guc_submit.h"
> +#include "xe_macros.h"
> +#include "xe_map.h"
Superfluous newlines, not sorted.
> +
> +#if IS_ENABLED(CONFIG_DRM_XE_CAPTURE_ERROR)
Please make the build of the entire file conditional in Makefile
instead.
> +
> +/*
> + * Define all device tables of GuC error capture register lists
> + * NOTE: For engine-registers, GuC only needs the register offsets
> + * from the engine-mmio-base
> + */
> +#define COMMON_XELP_BASE_GLOBAL \
> + { FORCEWAKE_GT, 0, 0, "FORCEWAKE" }, \
> + { FAULT_TLB_DATA0, 0, 0, "FAULT_TLB_DATA0" }, \
> + { FAULT_TLB_DATA1, 0, 0, "FAULT_TLB_DATA1" }, \
> + { AUX_ERR_DBG, 0, 0, "AUX_ERR_DBG" }, \
> + { GAM_DONE, 0, 0, "GAM_DONE" }, \
> + { RING_FAULT_REG, 0, 0, "FAULT_REG" }
> +
> +#define COMMON_BASE_ENGINE_INSTANCE \
> + { RING_PSMI_CTL(0), 0, 0, "RC PSMI" }, \
> + { RING_ESR(0), 0, 0, "ESR" }, \
> + { RING_EMR(0), 0, 0, "EMR" }, \
> + { RING_EIR(0), 0, 0, "EIR" }, \
> + { RING_EXECLIST_STATUS_HI(0), 0, 0, "RING_EXECLIST_STATUS_HI" }, \
> + { RING_EXECLIST_STATUS_LO(0), 0, 0, "RING_EXECLIST_STATUS_LO" }, \
> + { RING_DMA_FADD(0), 0, 0, "RING_DMA_FADD_LDW" }, \
> + { RING_DMA_FADD_UDW(0), 0, 0, "RING_DMA_FADD_UDW" }, \
> + { RING_IPEIR(0), 0, 0, "IPEIR" }, \
> + { RING_IPEHR(0), 0, 0, "IPEHR" }, \
> + { RING_INSTPS(0), 0, 0, "INSTPS" }, \
> + { RING_BBADDR(0), 0, 0, "RING_BBADDR_LOW32" }, \
> + { RING_BBADDR_UDW(0), 0, 0, "RING_BBADDR_UP32" }, \
> + { RING_BBSTATE(0), 0, 0, "BB_STATE" }, \
> + { CCID(0), 0, 0, "CCID" }, \
> + { RING_ACTHD(0), 0, 0, "ACTHD_LDW" }, \
> + { RING_ACTHD_UDW(0), 0, 0, "ACTHD_UDW" }, \
> + { INSTPM(0), 0, 0, "INSTPM" }, \
> + { RING_INSTDONE(0), 0, 0, "INSTDONE" }, \
> + { RING_NOPID(0), 0, 0, "RING_NOPID" }, \
> + { RING_START(0), 0, 0, "START" }, \
> + { RING_HEAD(0), 0, 0, "HEAD" }, \
> + { RING_TAIL(0), 0, 0, "TAIL" }, \
> + { RING_CTL(0), 0, 0, "CTL" }, \
> + { RING_MI_MODE(0), 0, 0, "MODE" }, \
> + { RING_CONTEXT_CONTROL(0), 0, 0, "RING_CONTEXT_CONTROL" }, \
> + { RING_HWS_PGA(0), 0, 0, "HWS" }, \
> + { RING_MODE(0), 0, 0, "GFX_MODE" }, \
> + { RING_PDP_LDW(0, 0), 0, 0, "PDP0_LDW" }, \
> + { RING_PDP_UDW(0, 0), 0, 0, "PDP0_UDW" }, \
> + { RING_PDP_LDW(0, 1), 0, 0, "PDP1_LDW" }, \
> + { RING_PDP_UDW(0, 1), 0, 0, "PDP1_UDW" }, \
> + { RING_PDP_LDW(0, 2), 0, 0, "PDP2_LDW" }, \
> + { RING_PDP_UDW(0, 2), 0, 0, "PDP2_UDW" }, \
> + { RING_PDP_LDW(0, 3), 0, 0, "PDP3_LDW" }, \
> + { RING_PDP_UDW(0, 3), 0, 0, "PDP3_UDW" }
> +
> +#define COMMON_XELP_BASE_RENDER \
> + { SC_INSTDONE, 0, 0, "SC_INSTDONE" }, \
> + { SC_INSTDONE_EXTRA, 0, 0, "SC_INSTDONE_EXTRA" }, \
> + { SC_INSTDONE_EXTRA2, 0, 0, "SC_INSTDONE_EXTRA2" }
> +
> +#define COMMON_XELP_BASE_VEC \
> + { SFC_DONE(0), 0, 0, "SFC_DONE[0]" }, \
> + { SFC_DONE(1), 0, 0, "SFC_DONE[1]" }, \
> + { SFC_DONE(2), 0, 0, "SFC_DONE[2]" }, \
> + { SFC_DONE(3), 0, 0, "SFC_DONE[3]" }
> +
> +int xe_guc_capture_init(struct xe_guc *guc)
> +{
> + return 0;
> +}
> +
> +#endif
> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h
> new file mode 100644
> index 000000000000..35fedc78d9e2
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_guc_capture.h
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2021-2021 Intel Corporation
2024.
> + */
> +
> +#ifndef _XE_GUC_CAPTURE_H
> +#define _XE_GUC_CAPTURE_H
> +
> +#include <linux/types.h>
> +#include "xe_exec_queue_types.h"
You don't need either of those includes.
> +
> +struct xe_hw_engine;
> +
Superfluous newline.
> +struct xe_gt;
> +struct xe_guc;
> +
> +int xe_guc_capture_init(struct xe_guc *guc);
> +
> +#endif /* _XE_GUC_CAPTURE_H */
--
Jani Nikula, Intel
More information about the Intel-xe
mailing list