[PATCH 03/11] drm/xe/oa: Add registers and GPU commands used by OA
Ashutosh Dixit
ashutosh.dixit at intel.com
Tue Aug 8 01:21:47 UTC 2023
Add registers and GPU commands used by OA in subsequent patches. The xe oa
code programs OA units which generate performance data. The code also
submits command buffers to change hardware engine context images and
implement waits.
Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
---
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 5 +
drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 27 ++++
drivers/gpu/drm/xe/regs/xe_oa_regs.h | 173 ++++++++++++++++++++++
3 files changed, 205 insertions(+)
create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 79873bf64e8dd..044a4920f1568 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -84,6 +84,9 @@
#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8)
+#define MI_PREDICATE_RESULT_2(base) XE_REG((base) + 0x3bc)
+#define MI_PREDICATE_RESULT_1(base) XE_REG((base) + 0x41c)
+
#define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4)
#define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30)
#define RING_FORCE_TO_NONPRIV_ACCESS_MASK REG_GENMASK(29, 28)
@@ -108,6 +111,8 @@
#define RING_EXECLIST_CONTROL(base) XE_REG((base) + 0x550)
#define EL_CTRL_LOAD REG_BIT(0)
+#define GEN8_RING_CS_GPR(base, n) XE_REG((base) + 0x600 + (n) * 8)
+
#define VDBOX_CGCTL3F10(base) XE_REG((base) + 0x3f10)
#define IECPUNIT_CLKGATE_DIS REG_BIT(22)
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 12120dd37aa2a..672100d375312 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -14,8 +14,10 @@
#define MI_INSTR(opcode, flags) \
(__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags))
+#define MI_OPCODE(x) (((x) >> 23) & 0x3f)
#define MI_NOOP MI_INSTR(0, 0)
+#define MI_SET_PREDICATE MI_INSTR(0x01, 0)
#define MI_USER_INTERRUPT MI_INSTR(0x02, 0)
#define MI_ARB_ON_OFF MI_INSTR(0x08, 0)
@@ -23,12 +25,32 @@
#define MI_ARB_DISABLE (0<<0)
#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0)
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_CF 0x33
+
#define MI_STORE_DATA_IMM MI_INSTR(0x20, 0)
+#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_LRM_CS_MMIO REG_BIT(19)
#define MI_LRI_MMIO_REMAP_EN REG_BIT(17)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
+#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
+
+#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
+#define MI_SRM_LRM_GLOBAL_GTT REG_BIT(22)
#define MI_FLUSH_DW MI_INSTR(0x26, 1)
#define MI_FLUSH_DW_STORE_INDEX (1<<21)
@@ -37,7 +59,12 @@
#define MI_FLUSH_DW_OP_STOREDW (1<<14)
#define MI_FLUSH_DW_USE_GTT (1<<2)
+#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 1)
+#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
#define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3)
#define SRC_ACCESS_TYPE_SHIFT 21
diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
new file mode 100644
index 0000000000000..0b378cb7a6ddb
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __XE_OA_REGS__
+#define __XE_OA_REGS__
+
+#define REG_EQUAL(reg, xe_reg) ((reg) == (xe_reg.addr))
+#define REG_EQUAL_MCR(reg, xe_reg) ((reg) == (xe_reg.__reg.addr))
+
+#define HALF_SLICE_CHICKEN2 XE_REG_MCR(0xe180)
+#define GEN8_ST_PO_DISABLE REG_BIT(13)
+
+#define GEN7_ROW_CHICKEN2 XE_REG(0xe4f4)
+#define GEN8_ROW_CHICKEN XE_REG_MCR(0xe4f0)
+#define STALL_DOP_GATING_DISABLE REG_BIT(5)
+#define GEN12_DISABLE_DOP_GATING REG_BIT(0)
+
+#define RPM_CONFIG1 XE_REG(0xd04)
+#define GEN10_GT_NOA_ENABLE REG_BIT(9)
+
+#define WAIT_FOR_RC6_EXIT XE_REG(0x20cc)
+#define HSW_WAIT_FOR_RC6_EXIT_ENABLE REG_BIT(0)
+
+#define EU_PERF_CNTL0 XE_REG(0xe458)
+#define EU_PERF_CNTL4 XE_REG(0xe45c)
+#define EU_PERF_CNTL1 XE_REG(0xe558)
+#define EU_PERF_CNTL5 XE_REG(0xe55c)
+#define EU_PERF_CNTL2 XE_REG(0xe658)
+#define EU_PERF_CNTL6 XE_REG(0xe65c)
+#define EU_PERF_CNTL3 XE_REG(0xe758)
+
+#define OABUFFER_SIZE_MASK REG_GENMASK(5, 3)
+#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0)
+#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1)
+#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2)
+#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3)
+#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4)
+#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5)
+#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6)
+#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7)
+
+#define GEN12_OA_TLB_INV_CR XE_REG(0xceec)
+
+/* Gen12 OAR unit */
+#define GEN12_OAR_OACONTROL XE_REG(0x2960)
+#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
+#define GEN12_OAR_OACONTROL_COUNTER_ENABLE REG_BIT(0)
+
+#define GEN8_OACTXCONTROL XE_REG(0x2360)
+#define GEN8_OA_COUNTER_RESUME REG_BIT(0)
+
+#define GEN12_OACTXCONTROL(base) XE_REG((base) + 0x360)
+#define GEN12_OAR_OASTATUS XE_REG(0x2968)
+
+/* Gen12 OAG unit */
+#define GEN12_OAG_OAHEADPTR XE_REG(0xdb00)
+#define GEN12_OAG_OAHEADPTR_MASK 0xffffffc0
+#define GEN12_OAG_OATAILPTR XE_REG(0xdb04)
+#define GEN12_OAG_OATAILPTR_MASK 0xffffffc0
+
+#define GEN12_OAG_OABUFFER XE_REG(0xdb08)
+#define GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK (0x7)
+#define GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3)
+#define GEN12_OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
+
+#define GEN12_OAG_OAGLBCTXCTRL XE_REG(0x2b28)
+#define GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2
+#define GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE REG_BIT(1)
+#define GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME REG_BIT(0)
+
+#define GEN12_OAG_OACONTROL XE_REG(0xdaf4)
+#define GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2
+#define GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0)
+
+#define GEN12_OAG_OA_DEBUG XE_REG(0xdaf8)
+#define GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)
+#define GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5)
+#define GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS REG_BIT(2)
+#define GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1)
+
+#define GEN12_OAG_OASTATUS XE_REG(0xdafc)
+#define GEN12_OAG_OASTATUS_COUNTER_OVERFLOW REG_BIT(2)
+#define GEN12_OAG_OASTATUS_BUFFER_OVERFLOW REG_BIT(1)
+#define GEN12_OAG_OASTATUS_REPORT_LOST REG_BIT(0)
+
+#define GDT_CHICKEN_BITS XE_REG(0x9840)
+#define GT_NOA_ENABLE 0x00000080
+
+#define GEN12_SQCNT1 XE_REG(0x8718)
+#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
+#define GEN12_SQCNT1_OABPC REG_BIT(29)
+
+/* Gen12 OAM unit */
+#define GEN12_OAM_HEAD_POINTER_OFFSET (0x1a0)
+#define GEN12_OAM_HEAD_POINTER_MASK 0xffffffc0
+
+#define GEN12_OAM_TAIL_POINTER_OFFSET (0x1a4)
+#define GEN12_OAM_TAIL_POINTER_MASK 0xffffffc0
+
+#define GEN12_OAM_BUFFER_OFFSET (0x1a8)
+#define GEN12_OAM_BUFFER_SIZE_MASK (0x7)
+#define GEN12_OAM_BUFFER_SIZE_SHIFT (3)
+#define GEN12_OAM_BUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
+
+#define GEN12_OAM_CONTEXT_CONTROL_OFFSET (0x1bc)
+#define GEN12_OAM_CONTEXT_CONTROL_TIMER_PERIOD_SHIFT 2
+#define GEN12_OAM_CONTEXT_CONTROL_TIMER_ENABLE REG_BIT(1)
+#define GEN12_OAM_CONTEXT_CONTROL_COUNTER_RESUME REG_BIT(0)
+
+#define GEN12_OAM_CONTROL_OFFSET (0x194)
+#define GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT 1
+#define GEN12_OAM_CONTROL_COUNTER_ENABLE REG_BIT(0)
+
+#define GEN12_OAM_DEBUG_OFFSET (0x198)
+#define GEN12_OAM_DEBUG_BUFFER_SIZE_SELECT REG_BIT(12)
+#define GEN12_OAM_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)
+#define GEN12_OAM_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5)
+#define GEN12_OAM_DEBUG_DISABLE_GO_1_0_REPORTS REG_BIT(2)
+#define GEN12_OAM_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1)
+
+#define GEN12_OAM_STATUS_OFFSET (0x19c)
+#define GEN12_OAM_STATUS_COUNTER_OVERFLOW REG_BIT(2)
+#define GEN12_OAM_STATUS_BUFFER_OVERFLOW REG_BIT(1)
+#define GEN12_OAM_STATUS_REPORT_LOST REG_BIT(0)
+
+#define GEN12_OAM_MMIO_TRG_OFFSET (0x1d0)
+
+#define GEN12_OAM_MMIO_TRG(base) \
+ XE_REG((base) + GEN12_OAM_MMIO_TRG_OFFSET)
+
+#define GEN12_OAM_HEAD_POINTER(base) \
+ XE_REG((base) + GEN12_OAM_HEAD_POINTER_OFFSET)
+#define GEN12_OAM_TAIL_POINTER(base) \
+ XE_REG((base) + GEN12_OAM_TAIL_POINTER_OFFSET)
+#define GEN12_OAM_BUFFER(base) \
+ XE_REG((base) + GEN12_OAM_BUFFER_OFFSET)
+#define GEN12_OAM_CONTEXT_CONTROL(base) \
+ XE_REG((base) + GEN12_OAM_CONTEXT_CONTROL_OFFSET)
+#define GEN12_OAM_CONTROL(base) \
+ XE_REG((base) + GEN12_OAM_CONTROL_OFFSET)
+#define GEN12_OAM_DEBUG(base) \
+ XE_REG((base) + GEN12_OAM_DEBUG_OFFSET)
+#define GEN12_OAM_STATUS(base) \
+ XE_REG((base) + GEN12_OAM_STATUS_OFFSET)
+
+#define GEN12_OAM_CEC0_0_OFFSET (0x40)
+#define GEN12_OAM_CEC7_1_OFFSET (0x7c)
+#define GEN12_OAM_CEC0_0(base) \
+ XE_REG((base) + GEN12_OAM_CEC0_0_OFFSET)
+#define GEN12_OAM_CEC7_1(base) \
+ XE_REG((base) + GEN12_OAM_CEC7_1_OFFSET)
+
+#define GEN12_OAM_STARTTRIG1_OFFSET (0x00)
+#define GEN12_OAM_STARTTRIG8_OFFSET (0x1c)
+#define GEN12_OAM_STARTTRIG1(base) \
+ XE_REG((base) + GEN12_OAM_STARTTRIG1_OFFSET)
+#define GEN12_OAM_STARTTRIG8(base) \
+ XE_REG((base) + GEN12_OAM_STARTTRIG8_OFFSET)
+
+#define GEN12_OAM_REPORTTRIG1_OFFSET (0x20)
+#define GEN12_OAM_REPORTTRIG8_OFFSET (0x3c)
+#define GEN12_OAM_REPORTTRIG1(base) \
+ XE_REG((base) + GEN12_OAM_REPORTTRIG1_OFFSET)
+#define GEN12_OAM_REPORTTRIG8(base) \
+ XE_REG((base) + GEN12_OAM_REPORTTRIG8_OFFSET)
+
+#define GEN12_OAM_PERF_COUNTER_B0_OFFSET (0x84)
+#define GEN12_OAM_PERF_COUNTER_B(base, idx) \
+ XE_REG((base) + GEN12_OAM_PERF_COUNTER_B0_OFFSET + 4 * (idx))
+
+#endif /* __XE_OA_REGS__ */
--
2.41.0
More information about the Intel-gfx-trybot
mailing list