[Intel-xe] [PATCH 03/10] drm/xe/oa: Add registers and GPU commands used by OA
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Tue Aug 22 15:44:23 UTC 2023
On Mon, Aug 07, 2023 at 06:31:52PM -0700, Ashutosh Dixit wrote:
>Add registers and GPU commands used by OA in subsequent patches. The xe oa
>code programs OA units which generate performance data. The code also
>submits command buffers to change hardware engine context images and
>implement waits.
>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
>---
> drivers/gpu/drm/xe/regs/xe_engine_regs.h | 5 +
> drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 27 ++++
> drivers/gpu/drm/xe/regs/xe_oa_regs.h | 173 ++++++++++++++++++++++
> 3 files changed, 205 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
>
>diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>index 79873bf64e8dd..044a4920f1568 100644
>--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>@@ -84,6 +84,9 @@
>
> #define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8)
>
>+#define MI_PREDICATE_RESULT_2(base) XE_REG((base) + 0x3bc)
>+#define MI_PREDICATE_RESULT_1(base) XE_REG((base) + 0x41c)
>+
We can drop these changes until we decide to implement the noa wait.
Umesh
> #define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4)
> #define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30)
> #define RING_FORCE_TO_NONPRIV_ACCESS_MASK REG_GENMASK(29, 28)
>@@ -108,6 +111,8 @@
> #define RING_EXECLIST_CONTROL(base) XE_REG((base) + 0x550)
> #define EL_CTRL_LOAD REG_BIT(0)
>
>+#define GEN8_RING_CS_GPR(base, n) XE_REG((base) + 0x600 + (n) * 8)
>+
> #define VDBOX_CGCTL3F10(base) XE_REG((base) + 0x3f10)
> #define IECPUNIT_CLKGATE_DIS REG_BIT(22)
>
>diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
>index 12120dd37aa2a..672100d375312 100644
>--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
>+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
>@@ -14,8 +14,10 @@
>
> #define MI_INSTR(opcode, flags) \
> (__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags))
>+#define MI_OPCODE(x) (((x) >> 23) & 0x3f)
>
> #define MI_NOOP MI_INSTR(0, 0)
>+#define MI_SET_PREDICATE MI_INSTR(0x01, 0)
> #define MI_USER_INTERRUPT MI_INSTR(0x02, 0)
>
> #define MI_ARB_ON_OFF MI_INSTR(0x08, 0)
>@@ -23,12 +25,32 @@
> #define MI_ARB_DISABLE (0<<0)
>
> #define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0)
>+
>+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
>+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
>+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
>+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
>+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
>+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
>+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
>+#define MI_MATH_REG(x) (x)
>+#define MI_MATH_REG_SRCA 0x20
>+#define MI_MATH_REG_SRCB 0x21
>+#define MI_MATH_REG_ACCU 0x31
>+#define MI_MATH_REG_CF 0x33
>+
> #define MI_STORE_DATA_IMM MI_INSTR(0x20, 0)
>+#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
>
> #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> #define MI_LRI_LRM_CS_MMIO REG_BIT(19)
> #define MI_LRI_MMIO_REMAP_EN REG_BIT(17)
> #define MI_LRI_FORCE_POSTED (1<<12)
>+#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
>+#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
>+
>+#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
>+#define MI_SRM_LRM_GLOBAL_GTT REG_BIT(22)
>
> #define MI_FLUSH_DW MI_INSTR(0x26, 1)
> #define MI_FLUSH_DW_STORE_INDEX (1<<21)
>@@ -37,7 +59,12 @@
> #define MI_FLUSH_DW_OP_STOREDW (1<<14)
> #define MI_FLUSH_DW_USE_GTT (1<<2)
>
>+#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
>+
>+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
>+
> #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 1)
>+#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
>
> #define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3)
> #define SRC_ACCESS_TYPE_SHIFT 21
>diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
>new file mode 100644
>index 0000000000000..0b378cb7a6ddb
>--- /dev/null
>+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
>@@ -0,0 +1,173 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Copyright © 2023 Intel Corporation
>+ */
>+
>+#ifndef __XE_OA_REGS__
>+#define __XE_OA_REGS__
>+
>+#define REG_EQUAL(reg, xe_reg) ((reg) == (xe_reg.addr))
>+#define REG_EQUAL_MCR(reg, xe_reg) ((reg) == (xe_reg.__reg.addr))
>+
>+#define HALF_SLICE_CHICKEN2 XE_REG_MCR(0xe180)
>+#define GEN8_ST_PO_DISABLE REG_BIT(13)
>+
>+#define GEN7_ROW_CHICKEN2 XE_REG(0xe4f4)
>+#define GEN8_ROW_CHICKEN XE_REG_MCR(0xe4f0)
>+#define STALL_DOP_GATING_DISABLE REG_BIT(5)
>+#define GEN12_DISABLE_DOP_GATING REG_BIT(0)
>+
>+#define RPM_CONFIG1 XE_REG(0xd04)
>+#define GEN10_GT_NOA_ENABLE REG_BIT(9)
>+
>+#define WAIT_FOR_RC6_EXIT XE_REG(0x20cc)
>+#define HSW_WAIT_FOR_RC6_EXIT_ENABLE REG_BIT(0)
>+
>+#define EU_PERF_CNTL0 XE_REG(0xe458)
>+#define EU_PERF_CNTL4 XE_REG(0xe45c)
>+#define EU_PERF_CNTL1 XE_REG(0xe558)
>+#define EU_PERF_CNTL5 XE_REG(0xe55c)
>+#define EU_PERF_CNTL2 XE_REG(0xe658)
>+#define EU_PERF_CNTL6 XE_REG(0xe65c)
>+#define EU_PERF_CNTL3 XE_REG(0xe758)
>+
>+#define OABUFFER_SIZE_MASK REG_GENMASK(5, 3)
>+#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0)
>+#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1)
>+#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2)
>+#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3)
>+#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4)
>+#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5)
>+#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6)
>+#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7)
>+
>+#define GEN12_OA_TLB_INV_CR XE_REG(0xceec)
>+
>+/* Gen12 OAR unit */
>+#define GEN12_OAR_OACONTROL XE_REG(0x2960)
>+#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
>+#define GEN12_OAR_OACONTROL_COUNTER_ENABLE REG_BIT(0)
>+
>+#define GEN8_OACTXCONTROL XE_REG(0x2360)
>+#define GEN8_OA_COUNTER_RESUME REG_BIT(0)
>+
>+#define GEN12_OACTXCONTROL(base) XE_REG((base) + 0x360)
>+#define GEN12_OAR_OASTATUS XE_REG(0x2968)
>+
>+/* Gen12 OAG unit */
>+#define GEN12_OAG_OAHEADPTR XE_REG(0xdb00)
>+#define GEN12_OAG_OAHEADPTR_MASK 0xffffffc0
>+#define GEN12_OAG_OATAILPTR XE_REG(0xdb04)
>+#define GEN12_OAG_OATAILPTR_MASK 0xffffffc0
>+
>+#define GEN12_OAG_OABUFFER XE_REG(0xdb08)
>+#define GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK (0x7)
>+#define GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3)
>+#define GEN12_OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
>+
>+#define GEN12_OAG_OAGLBCTXCTRL XE_REG(0x2b28)
>+#define GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2
>+#define GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE REG_BIT(1)
>+#define GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME REG_BIT(0)
>+
>+#define GEN12_OAG_OACONTROL XE_REG(0xdaf4)
>+#define GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2
>+#define GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0)
>+
>+#define GEN12_OAG_OA_DEBUG XE_REG(0xdaf8)
>+#define GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)
>+#define GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5)
>+#define GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS REG_BIT(2)
>+#define GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1)
>+
>+#define GEN12_OAG_OASTATUS XE_REG(0xdafc)
>+#define GEN12_OAG_OASTATUS_COUNTER_OVERFLOW REG_BIT(2)
>+#define GEN12_OAG_OASTATUS_BUFFER_OVERFLOW REG_BIT(1)
>+#define GEN12_OAG_OASTATUS_REPORT_LOST REG_BIT(0)
>+
>+#define GDT_CHICKEN_BITS XE_REG(0x9840)
>+#define GT_NOA_ENABLE 0x00000080
>+
>+#define GEN12_SQCNT1 XE_REG(0x8718)
>+#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
>+#define GEN12_SQCNT1_OABPC REG_BIT(29)
>+
>+/* Gen12 OAM unit */
>+#define GEN12_OAM_HEAD_POINTER_OFFSET (0x1a0)
>+#define GEN12_OAM_HEAD_POINTER_MASK 0xffffffc0
>+
>+#define GEN12_OAM_TAIL_POINTER_OFFSET (0x1a4)
>+#define GEN12_OAM_TAIL_POINTER_MASK 0xffffffc0
>+
>+#define GEN12_OAM_BUFFER_OFFSET (0x1a8)
>+#define GEN12_OAM_BUFFER_SIZE_MASK (0x7)
>+#define GEN12_OAM_BUFFER_SIZE_SHIFT (3)
>+#define GEN12_OAM_BUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
>+
>+#define GEN12_OAM_CONTEXT_CONTROL_OFFSET (0x1bc)
>+#define GEN12_OAM_CONTEXT_CONTROL_TIMER_PERIOD_SHIFT 2
>+#define GEN12_OAM_CONTEXT_CONTROL_TIMER_ENABLE REG_BIT(1)
>+#define GEN12_OAM_CONTEXT_CONTROL_COUNTER_RESUME REG_BIT(0)
>+
>+#define GEN12_OAM_CONTROL_OFFSET (0x194)
>+#define GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT 1
>+#define GEN12_OAM_CONTROL_COUNTER_ENABLE REG_BIT(0)
>+
>+#define GEN12_OAM_DEBUG_OFFSET (0x198)
>+#define GEN12_OAM_DEBUG_BUFFER_SIZE_SELECT REG_BIT(12)
>+#define GEN12_OAM_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)
>+#define GEN12_OAM_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5)
>+#define GEN12_OAM_DEBUG_DISABLE_GO_1_0_REPORTS REG_BIT(2)
>+#define GEN12_OAM_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1)
>+
>+#define GEN12_OAM_STATUS_OFFSET (0x19c)
>+#define GEN12_OAM_STATUS_COUNTER_OVERFLOW REG_BIT(2)
>+#define GEN12_OAM_STATUS_BUFFER_OVERFLOW REG_BIT(1)
>+#define GEN12_OAM_STATUS_REPORT_LOST REG_BIT(0)
>+
>+#define GEN12_OAM_MMIO_TRG_OFFSET (0x1d0)
>+
>+#define GEN12_OAM_MMIO_TRG(base) \
>+ XE_REG((base) + GEN12_OAM_MMIO_TRG_OFFSET)
>+
>+#define GEN12_OAM_HEAD_POINTER(base) \
>+ XE_REG((base) + GEN12_OAM_HEAD_POINTER_OFFSET)
>+#define GEN12_OAM_TAIL_POINTER(base) \
>+ XE_REG((base) + GEN12_OAM_TAIL_POINTER_OFFSET)
>+#define GEN12_OAM_BUFFER(base) \
>+ XE_REG((base) + GEN12_OAM_BUFFER_OFFSET)
>+#define GEN12_OAM_CONTEXT_CONTROL(base) \
>+ XE_REG((base) + GEN12_OAM_CONTEXT_CONTROL_OFFSET)
>+#define GEN12_OAM_CONTROL(base) \
>+ XE_REG((base) + GEN12_OAM_CONTROL_OFFSET)
>+#define GEN12_OAM_DEBUG(base) \
>+ XE_REG((base) + GEN12_OAM_DEBUG_OFFSET)
>+#define GEN12_OAM_STATUS(base) \
>+ XE_REG((base) + GEN12_OAM_STATUS_OFFSET)
>+
>+#define GEN12_OAM_CEC0_0_OFFSET (0x40)
>+#define GEN12_OAM_CEC7_1_OFFSET (0x7c)
>+#define GEN12_OAM_CEC0_0(base) \
>+ XE_REG((base) + GEN12_OAM_CEC0_0_OFFSET)
>+#define GEN12_OAM_CEC7_1(base) \
>+ XE_REG((base) + GEN12_OAM_CEC7_1_OFFSET)
>+
>+#define GEN12_OAM_STARTTRIG1_OFFSET (0x00)
>+#define GEN12_OAM_STARTTRIG8_OFFSET (0x1c)
>+#define GEN12_OAM_STARTTRIG1(base) \
>+ XE_REG((base) + GEN12_OAM_STARTTRIG1_OFFSET)
>+#define GEN12_OAM_STARTTRIG8(base) \
>+ XE_REG((base) + GEN12_OAM_STARTTRIG8_OFFSET)
>+
>+#define GEN12_OAM_REPORTTRIG1_OFFSET (0x20)
>+#define GEN12_OAM_REPORTTRIG8_OFFSET (0x3c)
>+#define GEN12_OAM_REPORTTRIG1(base) \
>+ XE_REG((base) + GEN12_OAM_REPORTTRIG1_OFFSET)
>+#define GEN12_OAM_REPORTTRIG8(base) \
>+ XE_REG((base) + GEN12_OAM_REPORTTRIG8_OFFSET)
>+
>+#define GEN12_OAM_PERF_COUNTER_B0_OFFSET (0x84)
>+#define GEN12_OAM_PERF_COUNTER_B(base, idx) \
>+ XE_REG((base) + GEN12_OAM_PERF_COUNTER_B0_OFFSET + 4 * (idx))
>+
>+#endif /* __XE_OA_REGS__ */
>--
>2.41.0
>
More information about the Intel-xe
mailing list