[Intel-xe] [PATCH 03/21] drm/xe/oa: Add registers and GPU commands used by OA

Dixit, Ashutosh ashutosh.dixit at intel.com
Fri Nov 17 22:52:51 UTC 2023


On Fri, 13 Oct 2023 10:06:29 -0700, Umesh Nerlige Ramappa wrote:
>
> On Tue, Sep 19, 2023 at 09:10:31AM -0700, Ashutosh Dixit wrote:
> > Add registers and GPU commands used by OA in subsequent patches. The xe oa
> > code programs OA units which generate performance data. The code also
> > submits command buffers to change hardware engine context images and
> > implement waits.
> >
> > v2: Remove unused registers (used by noa wait) (Umesh)
> >
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
>
> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>

Thanks, though I am also thinking of doing s/GEN12_/XE_/ in all the
register name #define's, to better align with the new XE/XE2 nomenclature.

> > ---
> > drivers/gpu/drm/xe/regs/xe_engine_regs.h  |   2 +
> > drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  13 ++
> > drivers/gpu/drm/xe/regs/xe_oa_regs.h      | 173 ++++++++++++++++++++++
> > 3 files changed, 188 insertions(+)
> > create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
> >
> > diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > index 692213d09ceaa..c12d23526f6ba 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > @@ -115,6 +115,8 @@
> > #define RING_EXECLIST_CONTROL(base)		XE_REG((base) + 0x550)
> > #define	  EL_CTRL_LOAD				REG_BIT(0)
> >
> > +#define GEN8_RING_CS_GPR(base, n)		XE_REG((base) + 0x600 + (n) * 8)
> > +
> > #define VDBOX_CGCTL3F10(base)			XE_REG((base) + 0x3f10)
> > #define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
> >
> > diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> > index 12120dd37aa2a..f74cab662ad5b 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> > @@ -14,6 +14,7 @@
> >
> > #define MI_INSTR(opcode, flags) \
> >	(__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags))
> > +#define MI_OPCODE(x)		(((x) >> 23) & 0x3f)
> >
> > #define MI_NOOP			MI_INSTR(0, 0)
> > #define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
> > @@ -23,12 +24,19 @@
> > #define   MI_ARB_DISABLE		(0<<0)
> >
> > #define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
> > +
> > #define MI_STORE_DATA_IMM	MI_INSTR(0x20, 0)
> > +#define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
> >
> > #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
> > #define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
> > #define   MI_LRI_MMIO_REMAP_EN		REG_BIT(17)
> > #define   MI_LRI_FORCE_POSTED		(1<<12)
> > +#define   IS_MI_LRI_CMD(x)		(MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
> > +#define   MI_LRI_LEN(x)			(((x) & 0xff) + 1)
> > +
> > +#define MI_STORE_REGISTER_MEM	MI_INSTR(0x24, 1)
> > +#define   MI_SRM_LRM_GLOBAL_GTT		REG_BIT(22)
> >
> > #define MI_FLUSH_DW		MI_INSTR(0x26, 1)
> > #define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
> > @@ -37,7 +45,12 @@
> > #define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
> > #define   MI_FLUSH_DW_USE_GTT		(1<<2)
> >
> > +#define MI_LOAD_REGISTER_MEM	MI_INSTR(0x29, 1)
> > +
> > +#define MI_LOAD_REGISTER_REG	MI_INSTR(0x2A, 1)
> > +
> > #define MI_BATCH_BUFFER_START		MI_INSTR(0x31, 1)
> > +#define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/
> >
> > #define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
> > #define   SRC_ACCESS_TYPE_SHIFT		21
> > diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
> > new file mode 100644
> > index 0000000000000..0b378cb7a6ddb
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
> > @@ -0,0 +1,173 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2023 Intel Corporation
> > + */
> > +
> > +#ifndef __XE_OA_REGS__
> > +#define __XE_OA_REGS__
> > +
> > +#define REG_EQUAL(reg, xe_reg) ((reg) == (xe_reg.addr))
> > +#define REG_EQUAL_MCR(reg, xe_reg) ((reg) == (xe_reg.__reg.addr))
> > +
> > +#define HALF_SLICE_CHICKEN2 XE_REG_MCR(0xe180)
> > +#define   GEN8_ST_PO_DISABLE	REG_BIT(13)
> > +
> > +#define GEN7_ROW_CHICKEN2		XE_REG(0xe4f4)
> > +#define GEN8_ROW_CHICKEN		XE_REG_MCR(0xe4f0)
> > +#define   STALL_DOP_GATING_DISABLE	REG_BIT(5)
> > +#define   GEN12_DISABLE_DOP_GATING	REG_BIT(0)
> > +
> > +#define RPM_CONFIG1			XE_REG(0xd04)
> > +#define   GEN10_GT_NOA_ENABLE		REG_BIT(9)
> > +
> > +#define WAIT_FOR_RC6_EXIT XE_REG(0x20cc)
> > +#define   HSW_WAIT_FOR_RC6_EXIT_ENABLE	REG_BIT(0)
> > +
> > +#define EU_PERF_CNTL0 XE_REG(0xe458)
> > +#define EU_PERF_CNTL4 XE_REG(0xe45c)
> > +#define EU_PERF_CNTL1 XE_REG(0xe558)
> > +#define EU_PERF_CNTL5 XE_REG(0xe55c)
> > +#define EU_PERF_CNTL2 XE_REG(0xe658)
> > +#define EU_PERF_CNTL6 XE_REG(0xe65c)
> > +#define EU_PERF_CNTL3 XE_REG(0xe758)
> > +
> > +#define OABUFFER_SIZE_MASK	REG_GENMASK(5, 3)
> > +#define OABUFFER_SIZE_128K	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0)
> > +#define OABUFFER_SIZE_256K	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1)
> > +#define OABUFFER_SIZE_512K	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2)
> > +#define OABUFFER_SIZE_1M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3)
> > +#define OABUFFER_SIZE_2M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4)
> > +#define OABUFFER_SIZE_4M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5)
> > +#define OABUFFER_SIZE_8M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6)
> > +#define OABUFFER_SIZE_16M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7)
> > +
> > +#define GEN12_OA_TLB_INV_CR XE_REG(0xceec)
> > +
> > +/* Gen12 OAR unit */
> > +#define GEN12_OAR_OACONTROL XE_REG(0x2960)
> > +#define  GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
> > +#define  GEN12_OAR_OACONTROL_COUNTER_ENABLE	REG_BIT(0)
> > +
> > +#define GEN8_OACTXCONTROL XE_REG(0x2360)
> > +#define  GEN8_OA_COUNTER_RESUME			REG_BIT(0)
> > +
> > +#define GEN12_OACTXCONTROL(base) XE_REG((base) + 0x360)
> > +#define GEN12_OAR_OASTATUS XE_REG(0x2968)
> > +
> > +/* Gen12 OAG unit */
> > +#define GEN12_OAG_OAHEADPTR XE_REG(0xdb00)
> > +#define  GEN12_OAG_OAHEADPTR_MASK 0xffffffc0
> > +#define GEN12_OAG_OATAILPTR XE_REG(0xdb04)
> > +#define  GEN12_OAG_OATAILPTR_MASK 0xffffffc0
> > +
> > +#define GEN12_OAG_OABUFFER XE_REG(0xdb08)
> > +#define  GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK  (0x7)
> > +#define  GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3)
> > +#define  GEN12_OAG_OABUFFER_MEMORY_SELECT     REG_BIT(0) /* 0: PPGTT, 1: GGTT */
> > +
> > +#define GEN12_OAG_OAGLBCTXCTRL XE_REG(0x2b28)
> > +#define  GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2
> > +#define  GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE	REG_BIT(1)
> > +#define  GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME	REG_BIT(0)
> > +
> > +#define GEN12_OAG_OACONTROL XE_REG(0xdaf4)
> > +#define  GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2
> > +#define  GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE	REG_BIT(0)
> > +
> > +#define GEN12_OAG_OA_DEBUG XE_REG(0xdaf8)
> > +#define  GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO		REG_BIT(6)
> > +#define  GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS	REG_BIT(5)
> > +#define  GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS	REG_BIT(2)
> > +#define  GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS	REG_BIT(1)
> > +
> > +#define GEN12_OAG_OASTATUS XE_REG(0xdafc)
> > +#define  GEN12_OAG_OASTATUS_COUNTER_OVERFLOW	REG_BIT(2)
> > +#define  GEN12_OAG_OASTATUS_BUFFER_OVERFLOW	REG_BIT(1)
> > +#define  GEN12_OAG_OASTATUS_REPORT_LOST		REG_BIT(0)
> > +
> > +#define GDT_CHICKEN_BITS    XE_REG(0x9840)
> > +#define   GT_NOA_ENABLE	    0x00000080
> > +
> > +#define GEN12_SQCNT1				XE_REG(0x8718)
> > +#define   GEN12_SQCNT1_PMON_ENABLE		REG_BIT(30)
> > +#define   GEN12_SQCNT1_OABPC			REG_BIT(29)
> > +
> > +/* Gen12 OAM unit */
> > +#define GEN12_OAM_HEAD_POINTER_OFFSET   (0x1a0)
> > +#define  GEN12_OAM_HEAD_POINTER_MASK    0xffffffc0
> > +
> > +#define GEN12_OAM_TAIL_POINTER_OFFSET   (0x1a4)
> > +#define  GEN12_OAM_TAIL_POINTER_MASK    0xffffffc0
> > +
> > +#define GEN12_OAM_BUFFER_OFFSET         (0x1a8)
> > +#define  GEN12_OAM_BUFFER_SIZE_MASK     (0x7)
> > +#define  GEN12_OAM_BUFFER_SIZE_SHIFT    (3)
> > +#define  GEN12_OAM_BUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
> > +
> > +#define GEN12_OAM_CONTEXT_CONTROL_OFFSET              (0x1bc)
> > +#define  GEN12_OAM_CONTEXT_CONTROL_TIMER_PERIOD_SHIFT 2
> > +#define  GEN12_OAM_CONTEXT_CONTROL_TIMER_ENABLE       REG_BIT(1)
> > +#define  GEN12_OAM_CONTEXT_CONTROL_COUNTER_RESUME     REG_BIT(0)
> > +
> > +#define GEN12_OAM_CONTROL_OFFSET                (0x194)
> > +#define  GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT 1
> > +#define  GEN12_OAM_CONTROL_COUNTER_ENABLE       REG_BIT(0)
> > +
> > +#define GEN12_OAM_DEBUG_OFFSET                      (0x198)
> > +#define  GEN12_OAM_DEBUG_BUFFER_SIZE_SELECT         REG_BIT(12)
> > +#define  GEN12_OAM_DEBUG_INCLUDE_CLK_RATIO          REG_BIT(6)
> > +#define  GEN12_OAM_DEBUG_DISABLE_CLK_RATIO_REPORTS  REG_BIT(5)
> > +#define  GEN12_OAM_DEBUG_DISABLE_GO_1_0_REPORTS     REG_BIT(2)
> > +#define  GEN12_OAM_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1)
> > +
> > +#define GEN12_OAM_STATUS_OFFSET            (0x19c)
> > +#define  GEN12_OAM_STATUS_COUNTER_OVERFLOW REG_BIT(2)
> > +#define  GEN12_OAM_STATUS_BUFFER_OVERFLOW  REG_BIT(1)
> > +#define  GEN12_OAM_STATUS_REPORT_LOST      REG_BIT(0)
> > +
> > +#define GEN12_OAM_MMIO_TRG_OFFSET	(0x1d0)
> > +
> > +#define GEN12_OAM_MMIO_TRG(base) \
> > +	XE_REG((base) + GEN12_OAM_MMIO_TRG_OFFSET)
> > +
> > +#define GEN12_OAM_HEAD_POINTER(base) \
> > +	XE_REG((base) + GEN12_OAM_HEAD_POINTER_OFFSET)
> > +#define GEN12_OAM_TAIL_POINTER(base) \
> > +	XE_REG((base) + GEN12_OAM_TAIL_POINTER_OFFSET)
> > +#define GEN12_OAM_BUFFER(base) \
> > +	XE_REG((base) + GEN12_OAM_BUFFER_OFFSET)
> > +#define GEN12_OAM_CONTEXT_CONTROL(base) \
> > +	XE_REG((base) + GEN12_OAM_CONTEXT_CONTROL_OFFSET)
> > +#define GEN12_OAM_CONTROL(base) \
> > +	XE_REG((base) + GEN12_OAM_CONTROL_OFFSET)
> > +#define GEN12_OAM_DEBUG(base) \
> > +	XE_REG((base) + GEN12_OAM_DEBUG_OFFSET)
> > +#define GEN12_OAM_STATUS(base) \
> > +	XE_REG((base) + GEN12_OAM_STATUS_OFFSET)
> > +
> > +#define GEN12_OAM_CEC0_0_OFFSET		(0x40)
> > +#define GEN12_OAM_CEC7_1_OFFSET		(0x7c)
> > +#define GEN12_OAM_CEC0_0(base) \
> > +	XE_REG((base) + GEN12_OAM_CEC0_0_OFFSET)
> > +#define GEN12_OAM_CEC7_1(base) \
> > +	XE_REG((base) + GEN12_OAM_CEC7_1_OFFSET)
> > +
> > +#define GEN12_OAM_STARTTRIG1_OFFSET	(0x00)
> > +#define GEN12_OAM_STARTTRIG8_OFFSET	(0x1c)
> > +#define GEN12_OAM_STARTTRIG1(base) \
> > +	XE_REG((base) + GEN12_OAM_STARTTRIG1_OFFSET)
> > +#define GEN12_OAM_STARTTRIG8(base) \
> > +	XE_REG((base) + GEN12_OAM_STARTTRIG8_OFFSET)
> > +
> > +#define GEN12_OAM_REPORTTRIG1_OFFSET	(0x20)
> > +#define GEN12_OAM_REPORTTRIG8_OFFSET	(0x3c)
> > +#define GEN12_OAM_REPORTTRIG1(base) \
> > +	XE_REG((base) + GEN12_OAM_REPORTTRIG1_OFFSET)
> > +#define GEN12_OAM_REPORTTRIG8(base) \
> > +	XE_REG((base) + GEN12_OAM_REPORTTRIG8_OFFSET)
> > +
> > +#define GEN12_OAM_PERF_COUNTER_B0_OFFSET	(0x84)
> > +#define GEN12_OAM_PERF_COUNTER_B(base, idx) \
> > +	XE_REG((base) + GEN12_OAM_PERF_COUNTER_B0_OFFSET + 4 * (idx))
> > +
> > +#endif /* __XE_OA_REGS__ */
> > --
> > 2.41.0
> >


More information about the Intel-xe mailing list