[PATCH v2] drm/xe: initial changes for XE OA

Wed Jul 5 18:03:38 UTC 2023

Somewhat functional but need cleanup

v2:
- Include oa_unit_id in uapi
- Disable unlanded reports debug message

Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
---
 drivers/gpu/drm/i915/i915_perf_oa_regs.h  |    4 +-
 drivers/gpu/drm/xe/Makefile               |    1 +
 drivers/gpu/drm/xe/regs/xe_engine_regs.h  |    5 +
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |   18 +
 drivers/gpu/drm/xe/regs/xe_oa_regs.h      |  174 ++
 drivers/gpu/drm/xe/xe_device.c            |   16 +
 drivers/gpu/drm/xe/xe_device_types.h      |    4 +
 drivers/gpu/drm/xe/xe_gt_types.h          |    3 +
 drivers/gpu/drm/xe/xe_hw_engine_types.h   |    2 +
 drivers/gpu/drm/xe/xe_module.c            |    5 +
 drivers/gpu/drm/xe/xe_oa.c                | 3429 +++++++++++++++++++++
 drivers/gpu/drm/xe/xe_oa.h                |  402 +++
 drivers/gpu/drm/xe/xe_query.c             |    7 +-
 include/uapi/drm/xe_drm.h                 |  285 +-
 14 files changed, 4351 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
 create mode 100644 drivers/gpu/drm/xe/xe_oa.c
 create mode 100644 drivers/gpu/drm/xe/xe_oa.h

diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
index e5ac7a8b5cb6e..e775871f570e5 100644
--- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
+++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
@@ -45,8 +45,8 @@
 #define GEN8_OACTXCONTROL _MMIO(0x2360)
 #define  GEN8_OA_TIMER_PERIOD_MASK	    0x3F
 #define  GEN8_OA_TIMER_PERIOD_SHIFT	    2
-#define  GEN8_OA_TIMER_ENABLE		    (1 << 1)
-#define  GEN8_OA_COUNTER_RESUME		    (1 << 0)
+#define  GEN8_OA_TIMER_ENABLE		    BIT(1)
+#define  GEN8_OA_COUNTER_RESUME		    BIT(0)
 
 #define GEN7_OABUFFER _MMIO(0x23B0) /* R/W */
 #define  GEN7_OABUFFER_OVERRUN_DISABLE	    (1 << 3)
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 336f0eb8f91ef..1be30cfbc2878 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -82,6 +82,7 @@ xe-y += xe_bb.o \
 	xe_mmio.o \
 	xe_mocs.o \
 	xe_module.o \
+	xe_oa.o \
 	xe_pat.o \
 	xe_pci.o \
 	xe_pcode.o \
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 79873bf64e8dd..044a4920f1568 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -84,6 +84,9 @@
 
 #define RING_CTX_TIMESTAMP(base)		XE_REG((base) + 0x3a8)
 
+#define MI_PREDICATE_RESULT_2(base)		XE_REG((base) + 0x3bc)
+#define MI_PREDICATE_RESULT_1(base)		XE_REG((base) + 0x41c)
+
 #define RING_FORCE_TO_NONPRIV(base, i)		XE_REG(((base) + 0x4d0) + (i) * 4)
 #define   RING_FORCE_TO_NONPRIV_DENY		REG_BIT(30)
 #define   RING_FORCE_TO_NONPRIV_ACCESS_MASK	REG_GENMASK(29, 28)
@@ -108,6 +111,8 @@
 #define RING_EXECLIST_CONTROL(base)		XE_REG((base) + 0x550)
 #define	  EL_CTRL_LOAD				REG_BIT(0)
 
+#define GEN8_RING_CS_GPR(base, n)		XE_REG((base) + 0x600 + (n) * 8)
+
 #define VDBOX_CGCTL3F10(base)			XE_REG((base) + 0x3f10)
 #define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 12120dd37aa2a..632f96af38ec2 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -16,6 +16,7 @@
 	(__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags))
 
 #define MI_NOOP			MI_INSTR(0, 0)
+#define MI_SET_PREDICATE	MI_INSTR(0x01, 0)
 #define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
 
 #define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
@@ -23,6 +24,20 @@
 #define   MI_ARB_DISABLE		(0<<0)
 
 #define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
+
+#define	MI_MATH(x)		MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+#define	  MI_MATH_LOAD(op1, op2)	MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_ADD			MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB			MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)	MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)	MI_MATH_INSTR(0x580, op1, op2)
+#define   MI_MATH_REG(x) (x)
+#define   MI_MATH_REG_SRCA		0x20
+#define   MI_MATH_REG_SRCB		0x21
+#define   MI_MATH_REG_ACCU		0x31
+#define   MI_MATH_REG_CF		0x33
+
 #define MI_STORE_DATA_IMM	MI_INSTR(0x20, 0)
 
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
@@ -37,7 +52,10 @@
 #define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
 #define   MI_FLUSH_DW_USE_GTT		(1<<2)
 
+#define MI_LOAD_REGISTER_REG	MI_INSTR(0x2A, 1)
+
 #define MI_BATCH_BUFFER_START		MI_INSTR(0x31, 1)
+#define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/
 
 #define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
 #define   SRC_ACCESS_TYPE_SHIFT		21
diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
new file mode 100644
index 0000000000000..4eafb1038b03f
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_OA_REGS__
+#define __XE_OA_REGS__
+
+#define PERF_REG XE_REG
+
+#define REG_EQUAL(reg, xe_reg) ((reg) == (xe_reg.addr))
+#define REG_EQUAL_MCR(reg, xe_reg) ((reg) == (xe_reg.__reg.addr))
+
+#define HALF_SLICE_CHICKEN2 XE_REG_MCR(0xe180)
+#define   GEN8_ST_PO_DISABLE	BIT(13)
+
+#define GEN7_ROW_CHICKEN2		XE_REG(0xe4f4)
+#define GEN8_ROW_CHICKEN		XE_REG_MCR(0xe4f0)
+#define   STALL_DOP_GATING_DISABLE	REG_BIT(5)
+#define   GEN12_DISABLE_DOP_GATING	REG_BIT(0)
+
+#define RPM_CONFIG1			XE_REG(0xd04)
+#define   GEN10_GT_NOA_ENABLE		REG_BIT(9)
+
+#define WAIT_FOR_RC6_EXIT XE_REG(0x20cc)
+#define   HSW_WAIT_FOR_RC6_EXIT_ENABLE	BIT(0)
+
+#define EU_PERF_CNTL0 PERF_REG(0xe458)
+#define EU_PERF_CNTL4 PERF_REG(0xe45c)
+#define EU_PERF_CNTL1 PERF_REG(0xe558)
+#define EU_PERF_CNTL5 PERF_REG(0xe55c)
+#define EU_PERF_CNTL2 PERF_REG(0xe658)
+#define EU_PERF_CNTL6 PERF_REG(0xe65c)
+#define EU_PERF_CNTL3 PERF_REG(0xe758)
+
+#define OABUFFER_SIZE_128K  (0 << 3)
+#define OABUFFER_SIZE_256K  (1 << 3)
+#define OABUFFER_SIZE_512K  (2 << 3)
+#define OABUFFER_SIZE_1M    (3 << 3)
+#define OABUFFER_SIZE_2M    (4 << 3)
+#define OABUFFER_SIZE_4M    (5 << 3)
+#define OABUFFER_SIZE_8M    (6 << 3)
+#define OABUFFER_SIZE_16M   (7 << 3)
+
+#define GEN12_OA_TLB_INV_CR XE_REG(0xceec)
+
+/* Gen12 OAR unit */
+#define GEN12_OAR_OACONTROL XE_REG(0x2960)
+#define  GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
+#define  GEN12_OAR_OACONTROL_COUNTER_ENABLE       (1 << 0)
+
+#define GEN8_OACTXCONTROL XE_REG(0x2360)
+#define  GEN8_OA_COUNTER_RESUME		    (1 << 0)
+
+#define GEN12_OACTXCONTROL(base) XE_REG((base) + 0x360)
+#define GEN12_OAR_OASTATUS XE_REG(0x2968)
+
+/* Gen12 OAG unit */
+#define GEN12_OAG_OAHEADPTR XE_REG(0xdb00)
+#define  GEN12_OAG_OAHEADPTR_MASK 0xffffffc0
+#define GEN12_OAG_OATAILPTR XE_REG(0xdb04)
+#define  GEN12_OAG_OATAILPTR_MASK 0xffffffc0
+
+#define GEN12_OAG_OABUFFER XE_REG(0xdb08)
+#define  GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK  (0x7)
+#define  GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3)
+#define  GEN12_OAG_OABUFFER_MEMORY_SELECT     (1 << 0) /* 0: PPGTT, 1: GGTT */
+
+#define GEN12_OAG_OAGLBCTXCTRL XE_REG(0x2b28)
+#define  GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2
+#define  GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE       (1 << 1)
+#define  GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME     (1 << 0)
+
+#define GEN12_OAG_OACONTROL XE_REG(0xdaf4)
+#define  GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2
+#define  GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE       (1 << 0)
+
+#define GEN12_OAG_OA_DEBUG XE_REG(0xdaf8)
+#define  GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO          (1 << 6)
+#define  GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS  (1 << 5)
+#define  GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS     (1 << 2)
+#define  GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1)
+
+#define GEN12_OAG_OASTATUS XE_REG(0xdafc)
+#define  GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2)
+#define  GEN12_OAG_OASTATUS_BUFFER_OVERFLOW  (1 << 1)
+#define  GEN12_OAG_OASTATUS_REPORT_LOST      (1 << 0)
+
+#define GDT_CHICKEN_BITS    XE_REG(0x9840)
+#define   GT_NOA_ENABLE	    0x00000080
+
+#define GEN12_SQCNT1				XE_REG(0x8718)
+#define   GEN12_SQCNT1_PMON_ENABLE		REG_BIT(30)
+#define   GEN12_SQCNT1_OABPC			REG_BIT(29)
+
+/* Gen12 OAM unit */
+#define GEN12_OAM_HEAD_POINTER_OFFSET   (0x1a0)
+#define  GEN12_OAM_HEAD_POINTER_MASK    0xffffffc0
+
+#define GEN12_OAM_TAIL_POINTER_OFFSET   (0x1a4)
+#define  GEN12_OAM_TAIL_POINTER_MASK    0xffffffc0
+
+#define GEN12_OAM_BUFFER_OFFSET         (0x1a8)
+#define  GEN12_OAM_BUFFER_SIZE_MASK     (0x7)
+#define  GEN12_OAM_BUFFER_SIZE_SHIFT    (3)
+#define  GEN12_OAM_BUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
+
+#define GEN12_OAM_CONTEXT_CONTROL_OFFSET              (0x1bc)
+#define  GEN12_OAM_CONTEXT_CONTROL_TIMER_PERIOD_SHIFT 2
+#define  GEN12_OAM_CONTEXT_CONTROL_TIMER_ENABLE       REG_BIT(1)
+#define  GEN12_OAM_CONTEXT_CONTROL_COUNTER_RESUME     REG_BIT(0)
+
+#define GEN12_OAM_CONTROL_OFFSET                (0x194)
+#define  GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT 1
+#define  GEN12_OAM_CONTROL_COUNTER_ENABLE       REG_BIT(0)
+
+#define GEN12_OAM_DEBUG_OFFSET                      (0x198)
+#define  GEN12_OAM_DEBUG_BUFFER_SIZE_SELECT         REG_BIT(12)
+#define  GEN12_OAM_DEBUG_INCLUDE_CLK_RATIO          REG_BIT(6)
+#define  GEN12_OAM_DEBUG_DISABLE_CLK_RATIO_REPORTS  REG_BIT(5)
+#define  GEN12_OAM_DEBUG_DISABLE_GO_1_0_REPORTS     REG_BIT(2)
+#define  GEN12_OAM_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1)
+
+#define GEN12_OAM_STATUS_OFFSET            (0x19c)
+#define  GEN12_OAM_STATUS_COUNTER_OVERFLOW REG_BIT(2)
+#define  GEN12_OAM_STATUS_BUFFER_OVERFLOW  REG_BIT(1)
+#define  GEN12_OAM_STATUS_REPORT_LOST      REG_BIT(0)
+
+#define GEN12_OAM_MMIO_TRG_OFFSET	(0x1d0)
+
+#define GEN12_OAM_MMIO_TRG(base) \
+	XE_REG((base) + GEN12_OAM_MMIO_TRG_OFFSET)
+
+#define GEN12_OAM_HEAD_POINTER(base) \
+	XE_REG((base) + GEN12_OAM_HEAD_POINTER_OFFSET)
+#define GEN12_OAM_TAIL_POINTER(base) \
+	XE_REG((base) + GEN12_OAM_TAIL_POINTER_OFFSET)
+#define GEN12_OAM_BUFFER(base) \
+	XE_REG((base) + GEN12_OAM_BUFFER_OFFSET)
+#define GEN12_OAM_CONTEXT_CONTROL(base) \
+	XE_REG((base) + GEN12_OAM_CONTEXT_CONTROL_OFFSET)
+#define GEN12_OAM_CONTROL(base) \
+	XE_REG((base) + GEN12_OAM_CONTROL_OFFSET)
+#define GEN12_OAM_DEBUG(base) \
+	XE_REG((base) + GEN12_OAM_DEBUG_OFFSET)
+#define GEN12_OAM_STATUS(base) \
+	XE_REG((base) + GEN12_OAM_STATUS_OFFSET)
+
+#define GEN12_OAM_CEC0_0_OFFSET		(0x40)
+#define GEN12_OAM_CEC7_1_OFFSET		(0x7c)
+#define GEN12_OAM_CEC0_0(base) \
+	XE_REG((base) + GEN12_OAM_CEC0_0_OFFSET)
+#define GEN12_OAM_CEC7_1(base) \
+	XE_REG((base) + GEN12_OAM_CEC7_1_OFFSET)
+
+#define GEN12_OAM_STARTTRIG1_OFFSET	(0x00)
+#define GEN12_OAM_STARTTRIG8_OFFSET	(0x1c)
+#define GEN12_OAM_STARTTRIG1(base) \
+	XE_REG((base) + GEN12_OAM_STARTTRIG1_OFFSET)
+#define GEN12_OAM_STARTTRIG8(base) \
+	XE_REG((base) + GEN12_OAM_STARTTRIG8_OFFSET)
+
+#define GEN12_OAM_REPORTTRIG1_OFFSET	(0x20)
+#define GEN12_OAM_REPORTTRIG8_OFFSET	(0x3c)
+#define GEN12_OAM_REPORTTRIG1(base) \
+	XE_REG((base) + GEN12_OAM_REPORTTRIG1_OFFSET)
+#define GEN12_OAM_REPORTTRIG8(base) \
+	XE_REG((base) + GEN12_OAM_REPORTTRIG8_OFFSET)
+
+#define GEN12_OAM_PERF_COUNTER_B0_OFFSET	(0x84)
+#define GEN12_OAM_PERF_COUNTER_B(base, idx) \
+	XE_REG((base) + GEN12_OAM_PERF_COUNTER_B0_OFFSET + 4 * (idx))
+
+#endif /* __XE_OA_REGS__ */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 07ae208af809d..1368cc26e1604 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -25,6 +25,7 @@
 #include "xe_irq.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
+#include "xe_oa.h"
 #include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_query.h"
@@ -107,6 +108,11 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
+
+	DRM_IOCTL_DEF_DRV(XE_OA_OPEN, xe_oa_stream_open_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_OA_ADD_CONFIG, xe_oa_add_config_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_OA_REMOVE_CONFIG, xe_oa_remove_config_ioctl, DRM_RENDER_ALLOW),
+
 };
 
 static const struct file_operations xe_driver_fops = {
@@ -317,6 +323,10 @@ int xe_device_probe(struct xe_device *xe)
 			goto err_irq_shutdown;
 	}
 
+	err = xe_oa_init(xe);
+	if (err)
+		goto err_irq_shutdown;
+
 	err = xe_display_init(xe);
 	if (err)
 		goto err_fini_display;
@@ -327,6 +337,8 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_display_register(xe);
 
+	xe_oa_register(xe);
+
 	xe_debugfs_register(xe);
 
 	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
@@ -355,10 +367,14 @@ static void xe_device_remove_display(struct xe_device *xe)
 
 void xe_device_remove(struct xe_device *xe)
 {
+	xe_oa_unregister(xe);
+
 	xe_device_remove_display(xe);
 
 	xe_display_unlink(xe);
 
+	xe_oa_fini(xe);
+
 	xe_irq_shutdown(xe);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 26a8de77138a8..6bcbfbd69b877 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -16,6 +16,7 @@
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
 #include "xe_step_types.h"
+#include "xe_oa.h"
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
 #include "ext/intel_device_info.h"
@@ -350,6 +351,9 @@ struct xe_device {
 	/** @d3cold_allowed: Indicates if d3cold is a valid device state */
 	bool d3cold_allowed;
 
+	/** @oa: oa perf counter subsystem */
+	struct xe_oa oa;
+
 	/* private: */
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 7d4de019f9a5e..0626f993023b9 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -13,6 +13,7 @@
 #include "xe_reg_sr_types.h"
 #include "xe_sa_types.h"
 #include "xe_uc_types.h"
+#include "xe_oa.h"
 
 struct xe_engine_ops;
 struct xe_migrate;
@@ -338,6 +339,8 @@ struct xe_gt {
 		/** @oob: bitmap with active OOB workaroudns */
 		unsigned long *oob;
 	} wa_active;
+
+	struct xe_oa_gt oa;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index d788e67312b99..b2f3b5e5583ed 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -107,6 +107,8 @@ struct xe_hw_engine {
 	void (*irq_handler)(struct xe_hw_engine *, u16);
 	/** @engine_id: id  for this hw engine */
 	enum xe_hw_engine_id engine_id;
+	/** @oa_group: oa unit for this hw engine */
+	struct xe_oa_group *oa_group;
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 75e5be939f530..5b5f6c4ea9022 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -11,6 +11,7 @@
 #include "xe_drv.h"
 #include "xe_hw_fence.h"
 #include "xe_module.h"
+#include "xe_oa.h"
 #include "xe_pci.h"
 #include "xe_sched_job.h"
 
@@ -53,6 +54,10 @@ static const struct init_funcs init_funcs[] = {
 		.init = xe_register_pci_driver,
 		.exit = xe_unregister_pci_driver,
 	},
+	{
+		.init = xe_oa_sysctl_register,
+		.exit = xe_oa_sysctl_unregister,
+	},
 };
 
 static int __init xe_init(void)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
new file mode 100644
index 0000000000000..94437a012d249
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -0,0 +1,3429 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+/*
+ * Current list of features missing in xe kmd:
+ * - get_default_sseu_config
+ * - xe_engine_set_nopreempt
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/nospec.h>
+#include <linux/sizes.h>
+#include <linux/uuid.h>
+
+#include <drm/xe_drm.h>
+#include <drm/drm_drv.h>
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_lrc_layout.h"
+#include "regs/xe_oa_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_lrc.h"
+#include "xe_migrate.h"
+#include "xe_mmio.h"
+#include "xe_oa.h"
+#include "xe_pm.h"
+#include "xe_sched_job.h"
+#include "xe_vm.h"
+
+#define __UNUSED__ __attribute__((unused))
+
+#define OA_BUFFER_SIZE		SZ_16M
+
+#define OA_TAKEN(tail, head)	((tail - head) & (OA_BUFFER_SIZE - 1))
+
+#define OA_TAIL_MARGIN_NSEC	100000ULL
+#define INVALID_TAIL_PTR	0xffffffff
+
+#define DEFAULT_POLL_FREQUENCY_HZ 200
+#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
+
+static u32 xe_oa_stream_paranoid = true;
+
+#define OA_EXPONENT_MAX 31
+#define INVALID_CTX_ID 0xffffffff
+
+/* On Gen8+ automatically triggered OA reports include a 'reason' field... */
+#define OAREPORT_REASON_MASK           0x3f
+#define OAREPORT_REASON_MASK_EXTENDED  0x7f
+#define OAREPORT_REASON_SHIFT          19
+#define OAREPORT_REASON_TIMER          (1<<0)
+#define OAREPORT_REASON_CTX_SWITCH     (1<<3)
+#define OAREPORT_REASON_CLK_RATIO      (1<<5)
+
+#define HAS_MI_SET_PREDICATE(xe) (GRAPHICS_VERx100(xe) >= 1270)
+
+#define GEN11_SW_CTX_ID_SHIFT			37
+#define GEN11_SW_CTX_ID_WIDTH			11
+#define XEHP_SW_CTX_ID_SHIFT			39
+#define XEHP_SW_CTX_ID_WIDTH			16
+#define XEHP_SW_COUNTER_SHIFT			58
+#define XEHP_SW_COUNTER_WIDTH			6
+#define GEN12_GUC_SW_CTX_ID_SHIFT		39
+#define GEN12_GUC_SW_CTX_ID_WIDTH		16
+#define MAX_CONTEXT_HW_ID	(1 << 21) /* exclusive */
+#define GEN11_MAX_CONTEXT_HW_ID	(1 << 11) /* exclusive */
+/* in Gen12 ID 0x7FF is reserved to indicate idle */
+#define GEN12_MAX_CONTEXT_HW_ID	(GEN11_MAX_CONTEXT_HW_ID - 1)
+/* in Xe_HP ID 0xFFFF is reserved to indicate "invalid context" */
+#define XEHP_MAX_CONTEXT_HW_ID	0xFFFF
+
+#define MI_STORE_REGISTER_MEM	MI_INSTR(0x24, 1)
+#define MI_LOAD_REGISTER_MEM	MI_INSTR(0x29, 1)
+#define MI_SRM_LRM_GLOBAL_GTT	BIT(22)
+#define MI_OPCODE(x)		(((x) >> 23) & 0x3f)
+#define IS_MI_LRI_CMD(x)	(MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
+#define MI_LRI_LEN(x)		(((x) & 0xff) + 1)
+#define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
+#define MI_USE_GGTT		MI_SRM_LRM_GLOBAL_GTT
+
+#define	  GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE	REG_BIT(8)
+#define GEN8_R_PWR_CLK_STATE(base)		XE_REG((base) + 0xc8)
+#define CTX_R_PWR_CLK_STATE			(0x42 + 1)
+
+static int oa_sample_rate_hard_limit;
+static u32 xe_oa_max_sample_rate = 100000;
+
+struct flex {
+	struct xe_reg reg;
+	u32 offset;
+	u32 value;
+};
+
+static const struct xe_oa_format oa_formats[] = {
+	[XE_OA_FORMAT_C4_B8]			= { 7, 64 },
+	[XE_OA_FORMAT_A12]			= { 0, 64 },
+	[XE_OA_FORMAT_A12_B8_C8]		= { 2, 128 },
+	[XE_OA_FORMAT_A32u40_A4u32_B8_C8]	= { 5, 256 },
+	[XE_OAR_FORMAT_A32u40_A4u32_B8_C8]	= { 5, 256 },
+	[XE_OA_FORMAT_A24u40_A14u32_B8_C8]	= { 5, 256 },
+	[XE_OAM_FORMAT_MPEC8u64_B8_C8]		= { 1, 192, TYPE_OAM, HDR_64_BIT },
+	[XE_OAM_FORMAT_MPEC8u32_B8_C8]		= { 2, 128, TYPE_OAM, HDR_64_BIT },
+};
+
+static const u32 mtl_oa_base[] = {
+	[OA_GROUP_OAM_SAMEDIA_0] = 0x393000,
+};
+
+#define SAMPLE_OA_REPORT      BIT(0)
+
+struct perf_open_properties {
+	u32 sample_flags;
+
+	u64 single_context:1; // FIXME: rename to single_engine?
+	u64 hold_preemption:1;
+	u64 ctx_handle; // FIXME: rename to engine_id?
+
+	/* OA sampling state */
+	int metrics_set;
+	int oa_format;
+	bool oa_periodic;
+	int oa_period_exponent;
+
+	// struct intel_sseu sseu; // FIXME: support in xe kmd?
+
+	struct xe_hw_engine *hwe;
+
+	u64 poll_oa_period;
+};
+
+struct xe_oa_config_bo {
+	struct llist_node node;
+
+	struct xe_oa_config *oa_config;
+	struct xe_bb *bb; // FIXME: check
+};
+
+static struct ctl_table_header *sysctl_header;
+
+void xe_oa_config_release(struct kref *ref)
+{
+	struct xe_oa_config *oa_config =
+		container_of(ref, typeof(*oa_config), ref);
+
+	kfree(oa_config->flex_regs);
+	kfree(oa_config->b_counter_regs);
+	kfree(oa_config->mux_regs);
+
+	kfree_rcu(oa_config, rcu);
+}
+
+void xe_oa_config_put(struct xe_oa_config *oa_config)
+{
+	if (!oa_config)
+		return;
+
+	kref_put(&oa_config->ref, xe_oa_config_release);
+}
+
+struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config)
+{
+	if (kref_get_unless_zero(&oa_config->ref))
+		return oa_config;
+	else
+		return NULL;
+}
+
+struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set)
+{
+	struct xe_oa_config *oa_config;
+
+	rcu_read_lock();
+	oa_config = idr_find(&oa->metrics_idr, metrics_set);
+	if (oa_config)
+		oa_config = xe_oa_config_get(oa_config);
+	rcu_read_unlock();
+
+	return oa_config;
+}
+
+static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
+{
+	xe_oa_config_put(oa_bo->oa_config);
+	xe_bb_free(oa_bo->bb, NULL);
+	kfree(oa_bo);
+}
+
+static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream)
+{
+	return &stream->hwe->oa_group->regs;
+}
+
+static u32 gen12_oa_hw_tail_read(struct xe_oa_stream *stream)
+{
+	return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) &
+		GEN12_OAG_OATAILPTR_MASK;
+}
+
+#define oa_report_header_64bit(__s) \
+	((__s)->oa_buffer.format->header == HDR_64_BIT)
+
+static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+{
+	return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+}
+
+static u64 oa_report_reason(struct xe_oa_stream *stream, void *report)
+{
+	return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) &
+	       (GRAPHICS_VER(stream->oa->xe) >= 12 ?
+		OAREPORT_REASON_MASK_EXTENDED :
+		OAREPORT_REASON_MASK);
+}
+
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+{
+	if (oa_report_header_64bit(stream))
+		*(u64 *)report = 0;
+	else
+		*report = 0;
+}
+
+static bool oa_report_ctx_invalid(struct xe_oa_stream *stream, void *report)
+{
+	return false;
+}
+
+static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+{
+	return oa_report_header_64bit(stream) ?
+		*((u64 *)report + 1) :
+		*((u32 *)report + 1);
+}
+
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+{
+	if (oa_report_header_64bit(stream))
+		*(u64 *)&report[2] = 0;
+	else
+		report[1] = 0;
+}
+
+static u32 oa_context_id(struct xe_oa_stream *stream, u32 *report)
+{
+	u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2];
+
+	return ctx_id & stream->specific_ctx_id_mask;
+}
+
+static void oa_context_id_squash(struct xe_oa_stream *stream, u32 *report)
+{
+	if (oa_report_header_64bit(stream))
+		report[4] = INVALID_CTX_ID;
+	else
+		report[2] = INVALID_CTX_ID;
+}
+
+static bool oa_buffer_check_unlocked(struct xe_oa_stream *stream)
+{
+	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
+	int report_size = stream->oa_buffer.format->size;
+	u32 head, tail, read_tail;
+	unsigned long flags;
+	bool pollin;
+	u32 hw_tail;
+	u64 now;
+	u32 partial_report_size;
+
+	/* We have to consider the (unlikely) possibility that read() errors
+	 * could result in an OA buffer reset which might reset the head and
+	 * tail state.
+	 */
+	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
+
+	hw_tail = gen12_oa_hw_tail_read(stream);
+
+	/* The tail pointer increases in 64 byte increments, not in report_size
+	 * steps. Also the report size may not be a power of 2. Compute
+	 * potentially partially landed report in the OA buffer
+	 */
+	partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
+	partial_report_size %= report_size;
+
+	/* Subtract partial amount off the tail */
+	hw_tail = OA_TAKEN(hw_tail, partial_report_size);
+
+	now = ktime_get_mono_fast_ns();
+
+	/* NB: The head we observe here might effectively be a little
+	 * out of date. If a read() is in progress, the head could be
+	 * anywhere between this head and stream->oa_buffer.tail.
+	 */
+	head = stream->oa_buffer.head - gtt_offset;
+	read_tail = stream->oa_buffer.tail - gtt_offset;
+
+	tail = hw_tail;
+
+	/* Walk the stream backward until we find a report with report
+	 * id and timestmap not at 0. Since the circular buffer pointers
+	 * progress by increments of 64 bytes and that reports can be up
+	 * to 256 bytes long, we can't tell whether a report has fully
+	 * landed in memory before the report id and timestamp of the
+	 * following report have effectively landed.
+	 *
+	 * This is assuming that the writes of the OA unit land in
+	 * memory in the order they were written to.
+	 * If not : (╯°□°）╯︵ ┻━┻
+	 */
+	while (OA_TAKEN(tail, read_tail) >= report_size) {
+		void *report = stream->oa_buffer.vaddr + tail;
+
+		if (oa_report_id(stream, report) ||
+		    oa_timestamp(stream, report))
+			break;
+
+		tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
+	}
+#if 0 // FIXME
+	if (OA_TAKEN(hw_tail, tail) > report_size &&
+	    __ratelimit(&stream->oa->tail_pointer_race))
+		drm_dbg(&stream->oa->xe->drm,
+			"unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
+			head, tail, hw_tail);
+#endif
+	stream->oa_buffer.tail = gtt_offset + tail;
+
+	pollin = OA_TAKEN(stream->oa_buffer.tail,
+			  stream->oa_buffer.head) >= report_size;
+
+	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
+
+	return pollin;
+}
+
+static int append_oa_status(struct xe_oa_stream *stream,
+			    char __user *buf,
+			    size_t count,
+			    size_t *offset,
+			    enum drm_xe_oa_record_type type)
+{
+	struct drm_xe_oa_record_header header = { type, 0, sizeof(header) };
+
+	if ((count - *offset) < header.size)
+		return -ENOSPC;
+
+	if (copy_to_user(buf + *offset, &header, sizeof(header)))
+		return -EFAULT;
+
+	(*offset) += header.size;
+
+	return 0;
+}
+
+static int append_oa_sample(struct xe_oa_stream *stream,
+			    char __user *buf,
+			    size_t count,
+			    size_t *offset,
+			    const u8 *report)
+{
+	int report_size = stream->oa_buffer.format->size;
+	struct drm_xe_oa_record_header header;
+	int report_size_partial;
+	u8 *oa_buf_end;
+
+	header.type = DRM_XE_OA_RECORD_SAMPLE;
+	header.pad = 0;
+	header.size = stream->sample_size;
+
+	if ((count - *offset) < header.size)
+		return -ENOSPC;
+
+	buf += *offset;
+	if (copy_to_user(buf, &header, sizeof(header)))
+		return -EFAULT;
+	buf += sizeof(header);
+
+	oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE;
+	report_size_partial = oa_buf_end - report;
+
+	if (report_size_partial < report_size) {
+		if (copy_to_user(buf, report, report_size_partial))
+			return -EFAULT;
+		buf += report_size_partial;
+
+		if (copy_to_user(buf, stream->oa_buffer.vaddr,
+				 report_size - report_size_partial))
+			return -EFAULT;
+	} else if (copy_to_user(buf, report, report_size)) {
+		return -EFAULT;
+	}
+
+	(*offset) += header.size;
+
+	return 0;
+}
+
+static int gen8_append_oa_reports(struct xe_oa_stream *stream,
+				  char __user *buf,
+				  size_t count,
+				  size_t *offset)
+{
+	int report_size = stream->oa_buffer.format->size;
+	u8 *oa_buf_base = stream->oa_buffer.vaddr;
+	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
+	u32 mask = (OA_BUFFER_SIZE - 1);
+	size_t start_offset = *offset;
+	unsigned long flags;
+	u32 head, tail;
+	int ret = 0;
+
+	if (drm_WARN_ON(&stream->gt->tile->xe->drm, !stream->enabled))
+		return -EIO;
+
+	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
+
+	head = stream->oa_buffer.head;
+	tail = stream->oa_buffer.tail;
+
+	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
+
+	/*
+	 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
+	 * while indexing relative to oa_buf_base.
+	 */
+	head -= gtt_offset;
+	tail -= gtt_offset;
+
+	/*
+	 * An out of bounds or misaligned head or tail pointer implies a driver
+	 * bug since we validate + align the tail pointers we read from the
+	 * hardware and we are in full control of the head pointer which should
+	 * only be incremented by multiples of the report size.
+	 */
+	if (drm_WARN_ONCE(&stream->gt->tile->xe->drm,
+			  head > OA_BUFFER_SIZE ||
+			  tail > OA_BUFFER_SIZE,
+			  "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
+			  head, tail))
+		return -EIO;
+
+
+	for (/* none */;
+	     OA_TAKEN(tail, head);
+	     head = (head + report_size) & mask) {
+		u8 *report = oa_buf_base + head;
+		u32 *report32 = (void *)report;
+		u32 ctx_id;
+		u64 reason;
+
+		/*
+		 * The reason field includes flags identifying what
+		 * triggered this specific report (mostly timer
+		 * triggered or e.g. due to a context switch).
+		 *
+		 * In MMIO triggered reports, some platforms do not set the
+		 * reason bit in this field and it is valid to have a reason
+		 * field of zero.
+		 */
+		reason = oa_report_reason(stream, report);
+		ctx_id = oa_context_id(stream, report32);
+
+		/*
+		 * Squash whatever is in the CTX_ID field if it's marked as
+		 * invalid to be sure we avoid false-positive, single-context
+		 * filtering below...
+		 *
+		 * Note: that we don't clear the valid_ctx_bit so userspace can
+		 * understand that the ID has been squashed by the kernel.
+		 */
+		if (oa_report_ctx_invalid(stream, report)) {
+			ctx_id = INVALID_CTX_ID;
+			oa_context_id_squash(stream, report32);
+		}
+
+		/*
+		 * NB: For Gen 8 the OA unit no longer supports clock gating
+		 * off for a specific context and the kernel can't securely
+		 * stop the counters from updating as system-wide / global
+		 * values.
+		 *
+		 * Automatic reports now include a context ID so reports can be
+		 * filtered on the cpu but it's not worth trying to
+		 * automatically subtract/hide counter progress for other
+		 * contexts while filtering since we can't stop userspace
+		 * issuing MI_REPORT_PERF_COUNT commands which would still
+		 * provide a side-band view of the real values.
+		 *
+		 * To allow userspace (such as Mesa/GL_INTEL_performance_query)
+		 * to normalize counters for a single filtered context then it
+		 * needs be forwarded bookend context-switch reports so that it
+		 * can track switches in between MI_REPORT_PERF_COUNT commands
+		 * and can itself subtract/ignore the progress of counters
+		 * associated with other contexts. Note that the hardware
+		 * automatically triggers reports when switching to a new
+		 * context which are tagged with the ID of the newly active
+		 * context. To avoid the complexity (and likely fragility) of
+		 * reading ahead while parsing reports to try and minimize
+		 * forwarding redundant context switch reports (i.e. between
+		 * other, unrelated contexts) we simply elect to forward them
+		 * all.
+		 *
+		 * We don't rely solely on the reason field to identify context
+		 * switches since it's not-uncommon for periodic samples to
+		 * identify a switch before any 'context switch' report.
+		 */
+		if (!stream->engine || // FIXME: check
+		    stream->specific_ctx_id == ctx_id ||
+		    stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
+		    reason & OAREPORT_REASON_CTX_SWITCH) {
+
+			/*
+			 * While filtering for a single context we avoid
+			 * leaking the IDs of other contexts.
+			 */
+			if (stream->engine && // FIXME: check
+			    stream->specific_ctx_id != ctx_id) {
+				oa_context_id_squash(stream, report32);
+			}
+
+			ret = append_oa_sample(stream, buf, count, offset,
+					       report);
+			if (ret)
+				break;
+
+			stream->oa_buffer.last_ctx_id = ctx_id;
+		}
+
+		if (is_power_of_2(report_size)) {
+			/*
+			 * Clear out the report id and timestamp as a means
+			 * to detect unlanded reports.
+			 */
+			oa_report_id_clear(stream, report32);
+			oa_timestamp_clear(stream, report32);
+		} else {
+			/* Zero out the entire report */
+			memset(report32, 0, report_size);
+		}
+	}
+
+	if (start_offset != *offset) {
+		struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr;
+
+		spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
+
+		/*
+		 * We removed the gtt_offset for the copy loop above, indexing
+		 * relative to oa_buf_base so put back here...
+		 */
+		head += gtt_offset;
+		xe_mmio_write32(stream->gt, oaheadptr,
+				head & GEN12_OAG_OAHEADPTR_MASK);
+		stream->oa_buffer.head = head;
+
+		spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
+	}
+
+	return ret;
+}
+
+static void gen12_init_oa_buffer(struct xe_oa_stream *stream)
+{
+	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
+	unsigned long flags;
+
+	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
+
+	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0);
+	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
+			gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
+	stream->oa_buffer.head = gtt_offset;
+
+	/*
+	 * PRM says:
+	 *
+	 *  "This MMIO must be set before the OATAILPTR
+	 *  register and after the OAHEADPTR register. This is
+	 *  to enable proper functionality of the overflow
+	 *  bit."
+	 */
+	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, gtt_offset |
+			OABUFFER_SIZE_16M | GEN12_OAG_OABUFFER_MEMORY_SELECT);
+	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr,
+			gtt_offset & GEN12_OAG_OATAILPTR_MASK);
+
+	/* Mark that we need updated tail pointers to read from... */
+	stream->oa_buffer.tail = gtt_offset;
+
+	/*
+	 * Reset state used to recognise context switches, affecting which
+	 * reports we will forward to userspace while filtering for a single
+	 * context.
+	 */
+	stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
+
+	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
+
+	/*
+	 * NB: although the OA buffer will initially be allocated
+	 * zeroed via shmfs (and so this memset is redundant when
+	 * first allocating), we may re-init the OA buffer, either
+	 * when re-enabling a stream or in error/reset paths.
+	 *
+	 * The reason we clear the buffer for each re-init is for the
+	 * sanity check in gen8_append_oa_reports() that looks at the
+	 * reason field to make sure it's non-zero which relies on
+	 * the assumption that new reports are being written to zeroed
+	 * memory...
+	 */
+	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size);
+}
+
+static void gen12_oa_enable(struct xe_oa_stream *stream)
+{
+	const struct xe_oa_regs *regs;
+	u32 val;
+
+	/*
+	 * If we don't want OA reports from the OA buffer, then we don't
+	 * even need to program the OAG unit.
+	 */
+	if (!(stream->sample_flags & SAMPLE_OA_REPORT))
+		return;
+
+	gen12_init_oa_buffer(stream);
+
+	regs = __oa_regs(stream);
+	val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) |
+	      GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE;
+
+	xe_mmio_write32(stream->gt, regs->oa_ctrl, val);
+}
+
+static void gen12_oa_disable(struct xe_oa_stream *stream)
+{
+	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0);
+	if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl, 0,
+			   GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 50000, NULL, false))
+		drm_err(&stream->oa->xe->drm,
+			"wait for OA to be disabled timed out\n");
+
+	xe_mmio_write32(stream->gt, GEN12_OA_TLB_INV_CR, 1);
+	if (xe_mmio_wait32(stream->gt, GEN12_OA_TLB_INV_CR, 0, 1, 50000, NULL, false))
+		drm_err(&stream->oa->xe->drm,
+			"wait for OA tlb invalidate timed out\n");
+}
+
+static int gen8_oa_read(struct xe_oa_stream *stream,
+			char __user *buf,
+			size_t count,
+			size_t *offset)
+{
+	struct xe_reg oastatus_reg = __oa_regs(stream)->oa_status;
+	u32 oastatus;
+	int ret;
+
+	if (drm_WARN_ON(&stream->gt->tile->xe->drm, !stream->oa_buffer.vaddr))
+		return -EIO;
+
+	oastatus = xe_mmio_read32(stream->gt, oastatus_reg);
+
+	/*
+	 * We treat OABUFFER_OVERFLOW as a significant error:
+	 *
+	 * Although theoretically we could handle this more gracefully
+	 * sometimes, some Gens don't correctly suppress certain
+	 * automatically triggered reports in this condition and so we
+	 * have to assume that old reports are now being trampled
+	 * over.
+	 *
+	 * Considering how we don't currently give userspace control
+	 * over the OA buffer size and always configure a large 16MB
+	 * buffer, then a buffer overflow does anyway likely indicate
+	 * that something has gone quite badly wrong.
+	 */
+	if (oastatus & GEN12_OAG_OASTATUS_BUFFER_OVERFLOW) {
+		ret = append_oa_status(stream, buf, count, offset,
+				       DRM_XE_OA_RECORD_OA_BUFFER_LOST);
+		if (ret)
+			return ret;
+
+		drm_dbg(&stream->oa->xe->drm,
+			"OA buffer overflow (exponent = %d): force restart\n",
+			stream->period_exponent);
+
+		gen12_oa_disable(stream);
+		gen12_oa_enable(stream);
+
+		/*
+		 * Note: .oa_enable() is expected to re-init the oabuffer and
+		 * reset GEN8_OASTATUS for us
+		 */
+		oastatus = xe_mmio_read32(stream->gt, oastatus_reg);
+	}
+
+	if (oastatus & GEN12_OAG_OASTATUS_REPORT_LOST) {
+		ret = append_oa_status(stream, buf, count, offset,
+				       DRM_XE_OA_RECORD_OA_REPORT_LOST);
+		if (ret)
+			return ret;
+
+		xe_mmio_rmw32(stream->gt, oastatus_reg,
+			      GEN12_OAG_OASTATUS_COUNTER_OVERFLOW |
+			      GEN12_OAG_OASTATUS_REPORT_LOST, 0);
+	}
+
+	return gen8_append_oa_reports(stream, buf, count, offset);
+}
+
+static int xe_oa_wait_unlocked(struct xe_oa_stream *stream)
+{
+	/* We would wait indefinitely if periodic sampling is not enabled */
+	if (!stream->periodic)
+		return -EIO;
+
+	return wait_event_interruptible(stream->poll_wq,
+					oa_buffer_check_unlocked(stream));
+}
+
+static void xe_oa_poll_wait(struct xe_oa_stream *stream,
+			    struct file *file,
+			    poll_table *wait)
+{
+	poll_wait(file, &stream->poll_wq, wait);
+}
+
+static int __xe_oa_read(struct xe_oa_stream *stream,
+			char __user *buf,
+			size_t count,
+			size_t *offset)
+{
+	return gen8_oa_read(stream, buf, count, offset);
+}
+
+#if 0
+// If this is needed need to look into further
+static int oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
+{
+	struct xe_hw_engine *hwe = stream->hwe;
+	struct xe_engine *e;
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	struct xe_vm *vm;
+	u64 batch_ofs;
+	long timeout;
+	int err = 0;
+
+	if (stream->engine) {
+		/*
+		 * FIXME: can we send kernel bb in e->vm context? Seems to be
+		 * causing big problems (cat err) which need to be investigated
+		*/
+		e = stream->engine;
+		XE_BUG_ON(!e->vm);
+		err = dma_resv_lock_interruptible(&e->vm->resv, NULL);
+		if (err)
+			goto exit;
+		down_write(&e->vm->lock);
+		job = xe_bb_create_job(e, bb);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto vm_unlock;
+		}
+	} else {
+		vm = xe_migrate_get_vm(stream->gt->tile->migrate);
+		e = xe_engine_create(hwe->gt->tile->xe, vm, BIT(hwe->logical_instance), 1,
+				     hwe, ENGINE_FLAG_WA);
+		if (IS_ERR(e)) {
+			err = PTR_ERR(e);
+			drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_engine_create,e failed=%d",
+				stream->gt->info.id, hwe->name, err);
+			goto put_vm;
+		}
+
+		batch_ofs = xe_bo_ggtt_addr(stream->gt->tile->mem.kernel_bb_pool->bo);
+		/* Will add MI_BATCH_BUFFER_END */
+		job = xe_bb_create_wa_job(e, bb, batch_ofs);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto put_engine;
+		}
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	if (timeout < 0)
+		err = timeout;
+	else if (!timeout)
+		err = -ETIME;
+put_engine:
+	if (!stream->engine)
+		xe_engine_put(e);
+put_vm:
+	if (!stream->engine)
+		xe_vm_put(vm);
+vm_unlock:
+	if (stream->engine) {
+		dma_resv_unlock(&e->vm->resv);
+		up_write(&e->vm->lock);
+	}
+exit:
+	return err;
+}
+#endif
+
+/*
+  FIXME: Currently submits only to stream->engine or new engine for
+  stream->hwe. If needed, add 'struct xe_engine *' argument
+
+  For now unconditionally create engine otherwise we hit BUG_ON in
+  xe_bb_create_wa_job. If jobs need to be sent to the same engine for
+  serialization may need to replace xe_bb_create_wa_job with a similar
+  function.
+
+  Also the code is wrong for gen12_guc_sw_ctx_id because there we need to
+  submit against the real engine/context rather than the new engine created
+  below.
+*/
+static int oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
+{
+	struct xe_hw_engine *hwe = stream->hwe;
+	struct xe_engine *e = stream->engine;
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	struct xe_vm *vm;
+	u64 batch_ofs;
+	long timeout;
+	int err = 0;
+
+	vm = xe_migrate_get_vm(stream->gt->tile->migrate);
+	// if (!stream->engine) {
+		e = xe_engine_create(hwe->gt->tile->xe, vm, BIT(hwe->logical_instance), 1,
+				     hwe, ENGINE_FLAG_WA);
+		if (IS_ERR(e)) {
+			err = PTR_ERR(e);
+			drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_engine_create,e failed=%d",
+				stream->gt->info.id, hwe->name, err);
+			goto put_vm;
+		}
+	// }
+
+	batch_ofs = xe_bo_ggtt_addr(stream->gt->tile->mem.kernel_bb_pool->bo);
+	/* Will add MI_BATCH_BUFFER_END */
+	job = xe_bb_create_wa_job(e, bb, batch_ofs);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto put_engine;
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	if (timeout < 0)
+		err = timeout;
+	else if (!timeout)
+		err = -ETIME;
+
+put_engine:
+	// if (!stream->engine)
+		xe_engine_put(e);
+put_vm:
+	xe_vm_put(vm);
+
+	return err;
+}
+
+static void oa_pin_context(struct xe_oa_stream *stream)
+{
+	// contexts are already pinned for now
+}
+
+static void __store_reg_to_mem(struct xe_bb *bb, struct xe_reg reg, u32 ggtt_offset)
+{
+	u32 cmd;
+
+	cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+	cmd++;
+
+	bb->cs[bb->len++] = cmd;
+	bb->cs[bb->len++] = reg.addr;
+	bb->cs[bb->len++] = ggtt_offset;
+	bb->cs[bb->len++] = 0;
+}
+
+static int __read_reg(struct xe_oa_stream *stream, struct xe_reg reg, u32 ggtt_offset)
+{
+	struct xe_bb *bb;
+	int err = 0;
+
+	bb = xe_bb_new(stream->gt, 4 + 1, false);
+	if (IS_ERR(bb)) {
+		err = PTR_ERR(bb);
+		goto exit;
+	}
+
+	__store_reg_to_mem(bb, reg, ggtt_offset);
+
+	err = oa_submit_bb(stream, bb);
+	xe_bb_free(bb, NULL);
+exit:
+	return err;
+}
+
+static int gen12_guc_sw_ctx_id(struct xe_oa_stream *stream, u32 *ctx_id)
+{
+	struct xe_bo *bo;
+	u32 *ptr;
+	int err = 0;
+
+	bo = xe_bo_create_pin_map(stream->gt->tile->xe, stream->gt->tile, NULL,
+				  4096, ttm_bo_type_kernel,
+				  // XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_SYSTEM_BIT | // FIXME: check
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto exit;
+	}
+
+	err = __read_reg(stream, RING_EXECLIST_STATUS_HI(stream->hwe->mmio_base),
+			 xe_bo_ggtt_addr(bo));
+	if (err)
+		goto unpin;
+
+	ptr = bo->vmap.is_iomem ? bo->vmap.vaddr_iomem : bo->vmap.vaddr;
+
+	*ctx_id = *ptr;
+unpin:
+	xe_bo_unpin_map_no_vm(bo);
+exit:
+	return err;
+}
+
+static int gen12_get_render_context_id(struct xe_oa_stream *stream)
+{
+	u32 ctx_id, mask;
+	int ret;
+
+	if (xe_device_guc_submission_enabled(stream->gt->tile->xe)) {
+		ret = gen12_guc_sw_ctx_id(stream, &ctx_id);
+		if (ret)
+			return ret;
+
+		mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
+			(GEN12_GUC_SW_CTX_ID_SHIFT - 32);
+	} else if (GRAPHICS_VERx100(stream->gt->tile->xe) >= 1250) {
+		ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
+			(XEHP_SW_CTX_ID_SHIFT - 32);
+
+		mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
+			(XEHP_SW_CTX_ID_SHIFT - 32);
+	} else {
+		ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
+			 (GEN11_SW_CTX_ID_SHIFT - 32);
+
+		mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
+			(GEN11_SW_CTX_ID_SHIFT - 32);
+	}
+	stream->specific_ctx_id = ctx_id & mask;
+	stream->specific_ctx_id_mask = mask;
+
+	return 0;
+}
+
+#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
+
+static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
+{
+	u32 idx = *offset;
+	u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
+	bool found = false;
+
+	idx++;
+	for (; idx < len; idx += 2) {
+		if (state[idx] == reg) {
+			found = true;
+			break;
+		}
+	}
+
+	*offset = idx;
+	return found;
+}
+
+static u32 oa_context_image_offset(struct xe_oa_stream *stream, u32 reg)
+{
+	// FIXME: check len and state assignments below
+	u32 len = (xe_lrc_size(stream->gt->tile->xe, stream->hwe->class) - PAGE_SIZE) / 4;
+	u32 *state = stream->gt->default_lrc[stream->hwe->class];
+	u32 offset;
+
+	/*
+	 * FIXME: maybe ok but really __xe_lrc_regs_offset should be added to
+	 * state. The same offset should be used in gen12_configure_oar_context
+	 * where ctx_oactxctrl_offset is consumed. Also instead of default_lrc
+	 * we could use stream->engine->lrc or stream->hwe->kernel_lrc
+	 */
+	if (drm_WARN_ON(&stream->oa->xe->drm, !state))
+		return U32_MAX;
+
+	for (offset = 0; offset < len; ) {
+		if (IS_MI_LRI_CMD(state[offset])) {
+			/*
+			 * We expect reg-value pairs in MI_LRI command, so
+			 * MI_LRI_LEN() should be even, if not, issue a warning.
+			 */
+			drm_WARN_ON(&stream->oa->xe->drm,
+				    MI_LRI_LEN(state[offset]) & 0x1);
+
+			if (oa_find_reg_in_lri(state, reg, &offset, len))
+				break;
+		} else {
+			offset++;
+		}
+	}
+
+	return offset < len ? offset : U32_MAX;
+}
+
+static int set_oa_ctx_ctrl_offset(struct xe_oa_stream *stream)
+{
+	struct xe_reg reg = GEN12_OACTXCONTROL(stream->hwe->mmio_base);
+	u32 offset = stream->oa->ctx_oactxctrl_offset;
+
+	/* Do this only once. Failure is stored as offset of U32_MAX */
+	if (offset)
+		goto exit;
+
+	offset = oa_context_image_offset(stream, reg.addr);
+	stream->oa->ctx_oactxctrl_offset = offset;
+
+	drm_dbg(&stream->oa->xe->drm,
+		"%s oa ctx control at 0x%08x dword offset\n",
+		stream->hwe->name, offset);
+exit:
+	return offset && offset != U32_MAX ? 0 : -ENODEV;
+}
+
+static bool engine_supports_mi_query(struct xe_hw_engine *hwe)
+{
+	return hwe->class == XE_ENGINE_CLASS_RENDER;
+}
+
+static int oa_get_render_ctx_id(struct xe_oa_stream *stream)
+{
+	int ret = 0;
+
+	 oa_pin_context(stream);
+
+	if (engine_supports_mi_query(stream->hwe)) {
+		/*
+		 * We are enabling perf query here. If we don't find the context
+		 * offset here, just return an error.
+		 */
+		ret = set_oa_ctx_ctrl_offset(stream);
+		if (ret) {
+			drm_err(&stream->gt->tile->xe->drm,
+				"set_oa_ctx_ctrl_offset failed for %s\n",
+				stream->hwe->name);
+			return ret;
+		}
+	}
+
+	ret = gen12_get_render_context_id(stream);
+
+	drm_dbg(&stream->gt->tile->xe->drm,
+		"filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
+		stream->specific_ctx_id,
+		stream->specific_ctx_id_mask);
+
+	return ret;
+}
+
+static void oa_put_render_ctx_id(struct xe_oa_stream *stream)
+{
+	stream->specific_ctx_id = INVALID_CTX_ID;
+	stream->specific_ctx_id_mask = 0;
+}
+
+static void free_oa_buffer(struct xe_oa_stream *stream)
+{
+	xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+}
+
+static void free_oa_configs(struct xe_oa_stream *stream)
+{
+	struct xe_oa_config_bo *oa_bo, *tmp;
+
+	// FIXME: check functions below
+	xe_oa_config_put(stream->oa_config);
+	llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
+		free_oa_config_bo(oa_bo);
+}
+
+static void free_noa_wait(struct xe_oa_stream *stream)
+{
+	xe_bo_unpin_map_no_vm(stream->noa_wait);
+}
+
+static bool engine_supports_oa(const struct xe_hw_engine *hwe)
+{
+	return hwe->oa_group;
+}
+
+static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type)
+{
+	return hwe->oa_group && hwe->oa_group->type == type;
+}
+
+static void gen8_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
+			    struct xe_bb *bb, const struct flex *flex, u32 count)
+{
+	u32 offset = xe_bo_ggtt_addr(lrc->bo);
+
+	do {
+		bb->cs[bb->len++] = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		bb->cs[bb->len++] = offset + flex->offset * sizeof(u32);
+		bb->cs[bb->len++] = 0;
+		bb->cs[bb->len++] = flex->value;
+
+	} while (flex++, --count);
+}
+
+static int gen8_modify_context(struct xe_oa_stream *stream, struct xe_lrc *lrc,
+			       const struct flex *flex, u32 count)
+{
+	struct xe_bb *bb;
+	int err = 0;
+
+	bb = xe_bb_new(stream->gt, 4 * count + 1, false);
+	if (IS_ERR(bb)) {
+		err = PTR_ERR(bb);
+		goto exit;
+	}
+
+	gen8_store_flex(stream, lrc, bb, flex, count);
+
+	err = oa_submit_bb(stream, bb);
+	xe_bb_free(bb, NULL);
+exit:
+	return err;
+}
+
+static void gen8_load_flex(struct xe_oa_stream *stream, struct xe_bb *bb,
+			   const struct flex *flex, u32 count)
+{
+	XE_BUG_ON(!count || count > 63);
+
+	bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count);
+
+	do {
+		bb->cs[bb->len++] = flex->reg.addr;
+		bb->cs[bb->len++] = flex->value;
+
+	} while (flex++, --count);
+
+	bb->cs[bb->len++] = MI_NOOP;
+}
+
+static int gen8_modify_self(struct xe_oa_stream *stream,
+			    const struct flex *flex, u32 count)
+{
+	struct xe_bb *bb;
+	int err = 0;
+
+	bb = xe_bb_new(stream->gt, 2 * count + 3, false);
+	if (IS_ERR(bb)) {
+		err = PTR_ERR(bb);
+		goto exit;
+	}
+
+	gen8_load_flex(stream, bb, flex, count);
+
+	err = oa_submit_bb(stream, bb);
+	xe_bb_free(bb, NULL);
+exit:
+	return err;
+}
+
+static int gen8_configure_context(struct xe_oa_stream *stream,
+				  struct xe_engine *engine,
+				  struct flex *flex, u32 count)
+{
+	int i, err = 0;
+
+	for (i = 0; i < engine->width; i++) {
+		// flex->value = intel_sseu_make_rpcs(ce->engine->gt, &ce->sseu); // FIXME
+		err = gen8_modify_context(stream, &engine->lrc[i], flex, count);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int oa_configure_all_contexts(struct xe_oa_stream *stream,
+				     struct flex *regs,
+				     size_t num_regs, bool enable)
+{
+	struct xe_file *xef = stream->xef;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_engine *e;
+	unsigned long idx;
+	int err;
+
+	// FIXME: below crashes during close, need to check xef mutex
+	return 0;
+
+	// FIXME: check
+	mutex_lock(&xef->engine.lock);
+	xa_for_each(&xef->engine.xa, idx, e) {
+		xe_engine_get(e);
+		err = gen8_configure_context(stream, e, regs, num_regs);
+		xe_engine_put(e);
+		if (err)
+			return err;
+	}
+	mutex_unlock(&xef->engine.lock);
+
+	/*
+	 * After updating all other contexts, we need to modify ourselves.  If
+	 * we don't modify the kernel_context, we do not get events while idle.
+	 */
+	for_each_hw_engine(hwe, stream->gt, id) {
+		/*
+		 * FIXME: at present there is no way to create an engine using
+		 * hwe->kernel_lrc. Also in xe we don't use kernel_lrc when idle,
+		 * though we would need a 'context' restored to get events when idle
+		 * to make sure registers are programmed correctly.
+		 */
+	}
+
+	return 0;
+}
+
+static __UNUSED__ int
+lrc_configure_all_contexts(struct xe_oa_stream *stream,
+			   const struct xe_oa_config *oa_config)
+{
+	return 0; // FIXME: not used for gen12+
+}
+
+static int gen12_configure_all_contexts(struct xe_oa_stream *stream, bool enable)
+{
+	struct flex regs[] = {
+		{
+			GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE),
+			CTX_R_PWR_CLK_STATE,
+		},
+	};
+
+	if (stream->hwe->class != XE_ENGINE_CLASS_RENDER)
+		return 0;
+
+	// FIXME: what should this do when enable == false?
+
+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs), enable);
+}
+
+static int gen12_configure_oar_context(struct xe_oa_stream *stream, bool enable)
+{
+	int err;
+	u32 format = stream->oa_buffer.format->format;
+	u32 offset = stream->oa->ctx_oactxctrl_offset;
+	struct flex regs_context[] = {
+		{
+			GEN8_OACTXCONTROL,
+			offset + 1,
+			enable ? GEN8_OA_COUNTER_RESUME : 0,
+		},
+	};
+	/* Offsets in regs_lri are not used since this configuration is only
+	 * applied using LRI. Initialize the correct offsets for posterity.
+	 */
+#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
+	struct flex regs_lri[] = {
+		{
+			GEN12_OAR_OACONTROL,
+			GEN12_OAR_OACONTROL_OFFSET + 1,
+			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+			(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
+		},
+		{
+			RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
+			CTX_CONTEXT_CONTROL,
+			_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
+				      enable ?
+				      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
+				      0)
+		},
+	};
+
+	/*
+	 * Modify the context image of pinned context with regs_context
+	 * FIXME: for now only modifying engine->lrc[0], but maybe this should
+	 * be changed to modify all lrc's underlying the engine?
+	 */
+	err = gen8_modify_context(stream, &stream->engine->lrc[0],
+				  regs_context, ARRAY_SIZE(regs_context));
+	if (err)
+		return err;
+
+	/* Apply regs_lri using LRI with pinned context */
+	return gen8_modify_self(stream, regs_lri, ARRAY_SIZE(regs_lri));
+}
+
+bool HAS_OA_BPC_REPORTING(struct xe_device *xe) // FIXME
+{
+	switch (xe->info.platform) {
+	case XE_DG2:
+	case XE_PVC:
+	case XE_METEORLAKE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void gen12_disable_metric_set(struct xe_oa_stream *stream)
+{
+	u32 sqcnt1;
+
+	/*
+	 * Wa_1508761755:xehpsdv, dg2
+	 * Enable thread stall DOP gating and EU DOP gating.
+	 */
+	if (stream->gt->tile->xe->info.platform == XE_DG2) {
+		xe_gt_mcr_multicast_write(stream->gt, GEN8_ROW_CHICKEN,
+				_MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+		xe_mmio_write32(stream->gt, GEN7_ROW_CHICKEN2,
+				_MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
+	}
+
+	/* Reset all contexts' slices/subslices configurations. */
+	gen12_configure_all_contexts(stream, false);
+
+	/* disable the context save/restore or OAR counters */
+	if (stream->engine)
+		gen12_configure_oar_context(stream, false);
+
+	/* Make sure we disable noa to save power. */
+	xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+
+	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+		 (HAS_OA_BPC_REPORTING(stream->gt->tile->xe) ? GEN12_SQCNT1_OABPC : 0);
+
+	/* Reset PMON Enable to save power. */
+	xe_mmio_rmw32(stream->gt, GEN12_SQCNT1, sqcnt1, 0);
+}
+
+static int intel_guc_slpc_override_gucrc_mode(struct xe_gt *gt, u32 mode)
+{
+	return 0; // FIXME
+}
+
+static int intel_guc_slpc_unset_gucrc_mode(struct xe_gt *gt)
+{
+	return 0; // FIXME
+}
+
+void xe_oa_engine_pm_get(struct xe_oa_stream *stream)
+{
+	xe_device_mem_access_get(stream->oa->xe); // FIXME
+}
+
+void xe_oa_engine_pm_put(struct xe_oa_stream *stream)
+{
+	xe_device_mem_access_put(stream->oa->xe); // FIXME
+}
+
+static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
+{
+	struct xe_oa_group *g = stream->hwe->oa_group;
+	struct xe_gt *gt = stream->hwe->gt;
+	struct xe_oa *oa = stream->oa;
+
+	if (WARN_ON(stream != g->exclusive_stream))
+		return;
+
+	/*
+	 * Unset exclusive_stream first, it will be checked while disabling
+	 * the metric set on gen8+.
+	 *
+	 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
+	 */
+	WRITE_ONCE(g->exclusive_stream, NULL);
+	gen12_disable_metric_set(stream);
+
+	free_oa_buffer(stream);
+
+	/*
+	 * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6.
+	 */
+	if (stream->override_gucrc)
+		drm_WARN_ON(&gt->tile->xe->drm, intel_guc_slpc_unset_gucrc_mode(gt));
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_oa_engine_pm_put(stream);
+
+	if (stream->engine)
+		oa_put_render_ctx_id(stream);
+
+	free_oa_configs(stream);
+	free_noa_wait(stream);
+
+	if (oa->spurious_report_rs.missed) {
+		drm_notice(&gt->tile->xe->drm,
+			   "%d spurious OA report notices suppressed due to ratelimiting\n",
+			   oa->spurious_report_rs.missed);
+	}
+}
+
+static int alloc_oa_buffer(struct xe_oa_stream *stream)
+{
+	struct xe_bo *bo;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
+	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
+
+	bo = xe_bo_create_pin_map(stream->gt->tile->xe, stream->gt->tile, NULL,
+				  OA_BUFFER_SIZE, ttm_bo_type_kernel,
+				  // XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_SYSTEM_BIT | // FIXME: check
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	stream->oa_buffer.bo = bo;
+	stream->oa_buffer.vaddr = bo->vmap.is_iomem ?
+		bo->vmap.vaddr_iomem : bo->vmap.vaddr;
+
+	return 0;
+}
+
+static u32 *save_restore_register(struct xe_oa_stream *stream, u32 *cs,
+				  bool save, struct xe_reg reg, u32 offset,
+				  u32 dword_count)
+{
+	u32 cmd;
+	u32 d;
+
+	cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
+	cmd |= MI_SRM_LRM_GLOBAL_GTT;
+	cmd++;
+
+	for (d = 0; d < dword_count; d++) {
+		*cs++ = cmd;
+		*cs++ = reg.addr + 4 * d;
+		*cs++ = xe_bo_ggtt_addr(stream->noa_wait) + offset + 4 * d;
+		*cs++ = 0;
+	}
+
+	return cs;
+}
+
+static u64 div_u64_roundup(u64 nom, u32 den)
+{
+	return div_u64(nom + den - 1, den);
+}
+
+static u64 intel_gt_ns_to_clock_interval(const struct xe_gt *gt, u64 ns)
+{
+	return div_u64_roundup(gt->info.clock_freq * ns, NSEC_PER_SEC);
+}
+
+static int alloc_noa_wait(struct xe_oa_stream *stream)
+{
+	struct xe_bo *bo;
+	const u64 delay_ticks = 0xffffffffffffffff -
+		intel_gt_ns_to_clock_interval(stream->gt,
+			      atomic64_read(&stream->oa->noa_programming_delay));
+	const u32 base = stream->hwe->mmio_base;
+#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
+	u32 *batch, *ts0, *cs, *jump;
+	int ret, i;
+	enum {
+		START_TS,
+		NOW_TS,
+		DELTA_TS,
+		JUMP_PREDICATE,
+		DELTA_TARGET,
+		N_CS_GPR
+	};
+	struct xe_reg mi_predicate_result = HAS_MI_SET_PREDICATE(stream->gt->tile->xe) ?
+					MI_PREDICATE_RESULT_2(base) :
+					MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
+
+	bo = xe_bo_create_pin_map(stream->gt->tile->xe, stream->gt->tile, NULL,
+				  8192, ttm_bo_type_kernel,
+				  // XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_SYSTEM_BIT | // FIXME: check
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	batch = cs = bo->vmap.is_iomem ? bo->vmap.vaddr_iomem : bo->vmap.vaddr;
+	stream->noa_wait = bo;
+
+#define GPR_SAVE_OFFSET 4096
+#define PREDICATE_SAVE_OFFSET 4160
+
+	/* Save registers. */
+	for (i = 0; i < N_CS_GPR; i++)
+		cs = save_restore_register(
+			stream, cs, true /* save */, CS_GPR(i),
+			GPR_SAVE_OFFSET + 8 * i, 2);
+	cs = save_restore_register(
+		stream, cs, true /* save */, mi_predicate_result,
+		PREDICATE_SAVE_OFFSET, 1);
+
+	/* First timestamp snapshot location. */
+	ts0 = cs;
+
+	/*
+	 * Initial snapshot of the timestamp register to implement the wait.
+	 * We work with 32b values, so clear out the top 32b bits of the
+	 * register because the ALU works 64bits.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = CS_GPR(START_TS).addr + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = RING_TIMESTAMP(base).addr;
+	*cs++ = CS_GPR(START_TS).addr;
+
+	/*
+	 * This is the location we're going to jump back into until the
+	 * required amount of time has passed.
+	 */
+	jump = cs;
+
+	/*
+	 * Take another snapshot of the timestamp register. Take care to clear
+	 * up the top 32bits of CS_GPR(1) as we're using it for other
+	 * operations below.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = CS_GPR(NOW_TS).addr + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = RING_TIMESTAMP(base).addr;
+	*cs++ = CS_GPR(NOW_TS).addr;
+
+	/*
+	 * Do a diff between the 2 timestamps and store the result back into
+	 * CS_GPR(1).
+	 */
+	*cs++ = MI_MATH(5);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+	/*
+	 * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
+	 * timestamp have rolled over the 32bits) into the predicate register
+	 * to be used for the predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = CS_GPR(JUMP_PREDICATE).addr;
+	*cs++ = mi_predicate_result.addr;
+
+	if (HAS_MI_SET_PREDICATE(stream->gt->tile->xe))
+		*cs++ = MI_SET_PREDICATE | 1;
+
+	/* Restart from the beginning if we had timestamps roll over. */
+	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_PREDICATE;
+	// *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4; // FIXME
+	*cs++ = 0;
+
+	if (HAS_MI_SET_PREDICATE(stream->gt->tile->xe))
+		*cs++ = MI_SET_PREDICATE;
+
+	/*
+	 * Now add the diff between to previous timestamps and add it to :
+	 *      (((1 * << 64) - 1) - delay_ns)
+	 *
+	 * When the Carry Flag contains 1 this means the elapsed time is
+	 * longer than the expected delay, and we can exit the wait loop.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	*cs++ = CS_GPR(DELTA_TARGET).addr;
+	*cs++ = lower_32_bits(delay_ticks);
+	*cs++ = CS_GPR(DELTA_TARGET).addr + 4;
+	*cs++ = upper_32_bits(delay_ticks);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+	*cs++ = MI_ARB_CHECK;
+
+	/*
+	 * Transfer the result into the predicate register to be used for the
+	 * predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = CS_GPR(JUMP_PREDICATE).addr;
+	*cs++ = mi_predicate_result.addr;
+
+	if (HAS_MI_SET_PREDICATE(stream->gt->tile->xe))
+		*cs++ = MI_SET_PREDICATE | 1;
+
+	/* Predicate the jump.  */
+	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_PREDICATE;
+	// *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4; // FIXME
+	*cs++ = 0;
+
+	if (HAS_MI_SET_PREDICATE(stream->gt->tile->xe))
+		*cs++ = MI_SET_PREDICATE;
+
+	/* Restore registers. */
+	for (i = 0; i < N_CS_GPR; i++)
+		cs = save_restore_register(
+			stream, cs, false /* restore */, CS_GPR(i),
+			GPR_SAVE_OFFSET + 8 * i, 2);
+	cs = save_restore_register(
+		stream, cs, false /* restore */, mi_predicate_result,
+		PREDICATE_SAVE_OFFSET, 1);
+
+	/* And return to the ring. */
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	return ret;
+}
+
+void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
+{
+	u32 i;
+
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
+
+	for (i = 0; i < n_regs; i++) {
+		if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+			u32 n_lri = min_t(u32,
+					  n_regs - i,
+					  MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+			bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(n_lri);
+		}
+		bb->cs[bb->len++] = reg_data[i].addr.addr;
+		bb->cs[bb->len++] = reg_data[i].value;
+	}
+}
+
+static int num_lri_dwords(int num_regs)
+{
+	int count = 0;
+
+	if (num_regs > 0) {
+		count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
+		count += num_regs * 2;
+	}
+
+	return count;
+}
+
+static struct xe_oa_config_bo *
+alloc_oa_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config)
+{
+	struct xe_oa_config_bo *oa_bo;
+	size_t config_length = 0;
+	struct xe_bb *bb;
+
+	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+	if (!oa_bo)
+		return ERR_PTR(-ENOMEM);
+
+	config_length += num_lri_dwords(oa_config->mux_regs_len);
+	config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+	config_length += num_lri_dwords(oa_config->flex_regs_len);
+#if 1 // FIXME: noa_wait (see 93937659dc64)
+	config_length++; /* MI_BATCH_BUFFER_END */
+#else
+	config_length += 4; /* MI_BATCH_BUFFER_START */
+#endif
+	config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32);
+
+	bb = xe_bb_new(stream->gt, config_length, false);
+	if (IS_ERR(bb))
+		goto err_free;
+
+	write_cs_mi_lri(bb, oa_config->mux_regs, oa_config->mux_regs_len);
+	write_cs_mi_lri(bb, oa_config->b_counter_regs, oa_config->b_counter_regs_len);
+	write_cs_mi_lri(bb, oa_config->flex_regs, oa_config->flex_regs_len);
+
+#if 0 // FIXME: noa_wait (see 93937659dc64)
+	// xe_bb_create_job adds MI_BATCH_BUFFER_END
+	// TBD: how to handle noa_wait in xe_bb_create_job
+
+	/* Jump into the active wait. */
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_START;
+	bb->cs[bb->len++] = xe_bo_ggtt_addr(stream->noa_wait);
+	bb->cs[bb->len++] = 0;
+#endif
+	oa_bo->bb = bb;
+	oa_bo->oa_config = xe_oa_config_get(oa_config);
+	llist_add(&oa_bo->node, &stream->oa_config_bos);
+
+	return oa_bo;
+err_free:
+	kfree(oa_bo);
+	return ERR_CAST(bb);
+}
+
+static struct xe_oa_config_bo *get_oa_vma(struct xe_oa_stream *stream)
+{
+	struct xe_oa_config *oa_config = stream->oa_config;
+	struct xe_oa_config_bo *oa_bo;
+
+	/*
+	 * Look for the buffer in the already allocated BOs attached
+	 * to the stream.
+	 */
+	llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
+		if (oa_bo->oa_config == oa_config &&
+		    memcmp(oa_bo->oa_config->uuid,
+			   oa_config->uuid,
+			   sizeof(oa_config->uuid)) == 0)
+			goto out;
+	}
+
+	oa_bo = alloc_oa_config_buffer(stream, oa_config);
+out:
+	return oa_bo;
+}
+
+// FIXME: check entire function and called functions
+static int emit_oa_config(struct xe_oa_stream *stream)
+{
+	struct xe_oa_config_bo *oa_bo;
+	int err = 0;
+
+	oa_bo = get_oa_vma(stream);
+	if (IS_ERR(oa_bo)) {
+		err = PTR_ERR(oa_bo);
+		goto exit;
+	}
+
+	err = oa_submit_bb(stream, oa_bo->bb);
+exit:
+	return err;
+}
+
+static __UNUSED__ void oa_context(struct xe_oa_stream *stream) {}
+
+static __UNUSED__ u32 oa_config_flex_reg(const struct xe_oa_config *oa_config,
+					 struct xe_reg reg)
+{
+	u32 mmio = reg.addr;
+	int i;
+
+	/*
+	 * This arbitrary default will select the 'EU FPU0 Pipeline
+	 * Active' event. In the future it's anticipated that there
+	 * will be an explicit 'No Event' we can select, but not yet...
+	 */
+	if (!oa_config)
+		return 0;
+
+	for (i = 0; i < oa_config->flex_regs_len; i++) {
+		if (oa_config->flex_regs[i].addr.addr == mmio)
+			return oa_config->flex_regs[i].value;
+	}
+
+	return 0;
+}
+
+static __UNUSED__ void gen8_update_reg_state_unlocked(const struct xe_oa_stream *stream) {}
+
+static __UNUSED__ u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
+{
+	return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS,
+			     (stream->sample_flags & SAMPLE_OA_REPORT) ?
+			     0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
+}
+
+static int gen12_enable_metric_set(struct xe_oa_stream *stream)
+{
+	u32 sqcnt1;
+	int ret;
+
+	/*
+	 * Wa_1508761755:xehpsdv, dg2
+	 * EU NOA signals behave incorrectly if EU clock gating is enabled.
+	 * Disable thread stall DOP gating and EU DOP gating.
+	 */
+	if (stream->gt->tile->xe->info.platform == XE_DG2) {
+		xe_gt_mcr_multicast_write(stream->gt, GEN8_ROW_CHICKEN, // FIXME: check
+				_MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+		xe_mmio_write32(stream->gt, GEN7_ROW_CHICKEN2,
+				_MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
+	}
+
+	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
+			   /* Disable clk ratio reports, like previous Gens. */
+			   _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
+					      GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
+			   /*
+			    * If the user didn't require OA reports, instruct
+			    * the hardware not to emit ctx switch reports.
+			    */
+			   oag_report_ctx_switches(stream));
+
+	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
+			   (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
+			    GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
+			    (stream->period_exponent <<
+				GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) : 0);
+
+	/*
+	 * Initialize Super Queue Internal Cnt Register
+	 * Set PMON Enable in order to collect valid metrics.
+	 * Enable byets per clock reporting in OA for XEHPSDV onward.
+	 */
+	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+		 (HAS_OA_BPC_REPORTING(stream->gt->tile->xe) ? GEN12_SQCNT1_OABPC : 0);
+
+	xe_mmio_rmw32(stream->gt, GEN12_SQCNT1, 0, sqcnt1);
+
+	/*
+	 * Update all contexts prior writing the mux configurations as we need
+	 * to make sure all slices/subslices are ON before writing to NOA
+	 * registers.
+	 */
+	ret = gen12_configure_all_contexts(stream, true);
+	if (ret)
+		return ret;
+
+	/*
+	 * For Gen12, performance counters are context
+	 * saved/restored. Only enable it for the context that
+	 * requested this.
+	 */
+	if (stream->engine) {
+		ret = gen12_configure_oar_context(stream, true);
+		if (ret)
+			return ret;
+	}
+
+	return emit_oa_config(stream);
+}
+
+static void xe_oa_stream_enable(struct xe_oa_stream *stream)
+{
+	stream->pollin = false;
+
+	gen12_oa_enable(stream);
+
+	if (stream->sample_flags & SAMPLE_OA_REPORT)
+		hrtimer_start(&stream->poll_check_timer,
+			      ns_to_ktime(stream->poll_oa_period),
+			      HRTIMER_MODE_REL_PINNED);
+}
+
+static void xe_oa_stream_disable(struct xe_oa_stream *stream)
+{
+	gen12_oa_disable(stream);
+
+	if (stream->sample_flags & SAMPLE_OA_REPORT)
+		hrtimer_cancel(&stream->poll_check_timer);
+}
+
+static int xe_oa_stream_enable_sync(struct xe_oa_stream *stream)
+{
+	return gen12_enable_metric_set(stream);
+}
+
+static __UNUSED__ void get_default_sseu_config(void) {}
+static __UNUSED__ void get_sseu_config(void) {}
+
+/*
+ * OA timestamp frequency = CS timestamp frequency in most platforms. On some
+ * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such
+ * cases, return the adjusted CS timestamp frequency to the user.
+ */
+u32 xe_oa_timestamp_frequency(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	u32 reg, shift;
+
+	/*
+	 * Wa_18013179988:dg2
+	 * Wa_14015846243:mtl
+	 */
+	switch (xe->info.platform) {
+	case XE_DG2:
+	case XE_METEORLAKE:
+		xe_device_mem_access_get(xe); // FIXME: check
+		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+		reg = xe_mmio_read32(xe_root_mmio_gt(xe), RPM_CONFIG0);
+		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+		xe_device_mem_access_put(xe);
+
+		shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
+		return xe_root_mmio_gt(xe)->info.clock_freq << (3 - shift);
+
+	default:
+		return xe_root_mmio_gt(xe)->info.clock_freq;
+	}
+
+	// FIXME: should this be per gt, even in i915?
+
+}
+
+static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
+{
+	struct xe_oa_stream *stream =
+		container_of(hrtimer, typeof(*stream), poll_check_timer);
+
+	if (oa_buffer_check_unlocked(stream)) {
+		stream->pollin = true;
+		wake_up(&stream->poll_wq);
+	}
+
+	hrtimer_forward_now(hrtimer,
+			    ns_to_ktime(stream->poll_oa_period));
+
+	return HRTIMER_RESTART;
+}
+
+static int xe_oa_stream_init(struct xe_oa_stream *stream,
+			     struct drm_xe_oa_open_param *param,
+			     struct perf_open_properties *props)
+{
+	struct xe_oa_group *g = props->hwe->oa_group;
+	struct xe_gt *gt = props->hwe->gt;
+	struct xe_oa *oa = stream->oa;
+	int ret;
+
+	/* FIXME: More checks here should be moved to read_properties_unlocked */
+	/* Also cleanup 'struct xe_oa_stream' for duplictates */
+
+	/*
+	 * If the sysfs metrics/ directory wasn't registered for some
+	 * reason then don't let userspace try their luck with config IDs
+	 */
+	if (!oa->metrics_kobj) {
+		drm_dbg(&oa->xe->drm, "OA metrics weren't advertised via sysfs\n");
+		return -EINVAL;
+	}
+
+	if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
+	    (GRAPHICS_VER(oa->xe) < 12 || !stream->engine)) {
+		drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * To avoid the complexity of having to accurately filter counter
+	 * reports and marshal to the appropriate client we currently only
+	 * allow exclusive access
+	 */
+	if (g->exclusive_stream) {
+		drm_dbg(&oa->xe->drm, "OA unit already in use\n");
+		return -EBUSY;
+	}
+
+	stream->hwe = props->hwe;
+	stream->gt = stream->hwe->gt;
+
+	stream->sample_size = sizeof(struct drm_xe_oa_record_header);
+
+	stream->oa_buffer.format = &oa->oa_formats[props->oa_format];
+	if (drm_WARN_ON(&oa->xe->drm, stream->oa_buffer.format->size == 0))
+		return -EINVAL;
+
+	stream->sample_flags = props->sample_flags;
+	stream->sample_size += stream->oa_buffer.format->size;
+
+	stream->hold_preemption = props->hold_preemption;
+
+	stream->periodic = props->oa_periodic;
+	if (stream->periodic)
+		stream->period_exponent = props->oa_period_exponent;
+
+	if (stream->engine) {
+		ret = oa_get_render_ctx_id(stream);
+		if (ret) {
+			drm_dbg(&oa->xe->drm, "Invalid context id to filter with\n");
+			return ret;
+		}
+	}
+
+	ret = alloc_noa_wait(stream);
+	if (ret) {
+		drm_dbg(&oa->xe->drm, "Unable to allocate NOA wait batch buffer\n");
+		goto err_noa_wait_alloc;
+	}
+
+	stream->oa_config = xe_oa_get_oa_config(oa, props->metrics_set);
+	if (!stream->oa_config) {
+		drm_dbg(&oa->xe->drm, "Invalid OA config id=%i\n", props->metrics_set);
+		ret = -EINVAL;
+		goto err_config;
+	}
+
+	/* PRM - observability performance counters:
+	 *
+	 *   OACONTROL, performance counter enable, note:
+	 *
+	 *   "When this bit is set, in order to have coherent counts,
+	 *   RC6 power state and trunk clock gating must be disabled.
+	 *   This can be achieved by programming MMIO registers as
+	 *   0xA094=0 and 0xA090[31]=1"
+	 *
+	 *   In our case we are expecting that taking pm + FORCEWAKE
+	 *   references will effectively disable RC6.
+	 */
+
+	xe_oa_engine_pm_get(stream);
+	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	/*
+	 * Wa_16011777198:dg2: GuC resets render as part of the Wa. This causes
+	 * OA to lose the configuration state. Prevent this by overriding GUCRC
+	 * mode.
+	 */
+	if (xe_device_guc_submission_enabled(oa->xe) &&
+	    (IS_SUBPLATFORM_STEP(oa->xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
+	     IS_SUBPLATFORM_STEP(oa->xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, STEP_B0))) {
+		ret = intel_guc_slpc_override_gucrc_mode(gt, 0); // FIXME
+		if (ret) {
+			drm_dbg(&oa->xe->drm, "Unable to override gucrc mode\n");
+			goto err_gucrc;
+		}
+
+		stream->override_gucrc = true;
+	}
+
+	ret = alloc_oa_buffer(stream);
+	if (ret)
+		goto err_oa_buf_alloc;
+
+	// stream->engine->gt->perf.sseu = props->sseu; // FIXME
+	WRITE_ONCE(g->exclusive_stream, stream);
+
+	ret = xe_oa_stream_enable_sync(stream);
+	if (ret) {
+		drm_dbg(&oa->xe->drm, "Unable to enable metric set\n");
+		goto err_enable;
+	}
+
+	drm_dbg(&oa->xe->drm, "opening stream oa config uuid=%s\n",
+		stream->oa_config->uuid);
+
+	hrtimer_init(&stream->poll_check_timer,
+		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	stream->poll_check_timer.function = oa_poll_check_timer_cb;
+	init_waitqueue_head(&stream->poll_wq);
+	spin_lock_init(&stream->oa_buffer.ptr_lock);
+	mutex_init(&stream->lock);
+
+	return 0;
+
+err_enable:
+	WRITE_ONCE(g->exclusive_stream, NULL);
+	gen12_disable_metric_set(stream);
+
+	free_oa_buffer(stream);
+
+err_oa_buf_alloc:
+	if (stream->override_gucrc)
+		intel_guc_slpc_unset_gucrc_mode(gt);
+
+err_gucrc:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_oa_engine_pm_put(stream);
+
+	free_oa_configs(stream);
+
+err_config:
+	free_noa_wait(stream);
+
+err_noa_wait_alloc:
+	if (stream->engine)
+		oa_put_render_ctx_id(stream);
+
+	return ret;
+}
+
+__UNUSED__ void xe_oa_init_reg_state(void) {}
+
+static ssize_t xe_oa_read(struct file *file, char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	struct xe_oa_stream *stream = file->private_data;
+	size_t offset = 0;
+	int ret;
+
+	/* To ensure it's handled consistently we simply treat all reads of a
+	 * disabled stream as an error. In particular it might otherwise lead
+	 * to a deadlock for blocking file descriptors...
+	 */
+	if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT))
+		return -EIO;
+
+	if (!(file->f_flags & O_NONBLOCK)) {
+		/* There's the small chance of false positives from
+		 * stream->ops->wait_unlocked.
+		 *
+		 * E.g. with single context filtering since we only wait until
+		 * oabuffer has >= 1 report we don't immediately know whether
+		 * any reports really belong to the current context
+		 */
+		do {
+			ret = xe_oa_wait_unlocked(stream);
+			if (ret)
+				return ret;
+
+			mutex_lock(&stream->lock);
+			ret = __xe_oa_read(stream, buf, count, &offset);
+			mutex_unlock(&stream->lock);
+		} while (!offset && !ret);
+	} else {
+		mutex_lock(&stream->lock);
+		ret = __xe_oa_read(stream, buf, count, &offset);
+		mutex_unlock(&stream->lock);
+	}
+
+	/* We allow the poll checking to sometimes report false positive EPOLLIN
+	 * events where we might actually report EAGAIN on read() if there's
+	 * not really any data available. In this situation though we don't
+	 * want to enter a busy loop between poll() reporting a EPOLLIN event
+	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
+	 * effectively ensures we back off until the next hrtimer callback
+	 * before reporting another EPOLLIN event.
+	 * The exception to this is if ops->read() returned -ENOSPC which means
+	 * that more OA data is available than could fit in the user provided
+	 * buffer. In this case we want the next poll() call to not block.
+	 */
+	if (ret != -ENOSPC)
+		stream->pollin = false;
+
+	/* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */
+	return offset ?: (ret ?: -EAGAIN);
+}
+
+static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream,
+				  struct file *file, poll_table *wait)
+{
+	__poll_t events = 0;
+
+	xe_oa_poll_wait(stream, file, wait);
+
+	/* Note: we don't explicitly check whether there's something to read
+	 * here since this path may be very hot depending on what else
+	 * userspace is polling, or on the timeout in use. We rely solely on
+	 * the hrtimer/oa_poll_check_timer_cb to notify us when there are
+	 * samples to read.
+	 */
+	if (stream->pollin)
+		events |= EPOLLIN;
+
+	return events;
+}
+
+static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
+{
+	struct xe_oa_stream *stream = file->private_data;
+	__poll_t ret;
+
+	mutex_lock(&stream->lock);
+	ret = xe_oa_poll_locked(stream, file, wait);
+	mutex_unlock(&stream->lock);
+
+	return ret;
+}
+
+static void xe_engine_set_nopreempt(struct xe_engine *engine)
+{
+	// FIXME
+}
+
+static void xe_engine_clear_nopreempt(struct xe_engine *engine)
+{
+	// FIXME
+}
+
+static void xe_oa_enable_locked(struct xe_oa_stream *stream)
+{
+	if (stream->enabled)
+		return;
+
+	/* Allow stream->ops->enable() to refer to this */
+	stream->enabled = true;
+
+	xe_oa_stream_enable(stream);
+
+	if (stream->hold_preemption)
+		xe_engine_set_nopreempt(stream->engine);
+}
+
+static void xe_oa_disable_locked(struct xe_oa_stream *stream)
+{
+	if (!stream->enabled)
+		return;
+
+	/* Allow stream->ops->disable() to refer to this */
+	stream->enabled = false;
+
+	if (stream->hold_preemption)
+		xe_engine_clear_nopreempt(stream->engine);
+
+	xe_oa_stream_disable(stream);
+}
+
+static long xe_oa_config_locked(struct xe_oa_stream *stream,
+				unsigned long metrics_set)
+{
+	struct xe_oa_config *config;
+	long ret = stream->oa_config->id;
+
+	config = xe_oa_get_oa_config(stream->oa, metrics_set);
+	if (!config)
+		return -EINVAL;
+
+	if (config != stream->oa_config) {
+		int err;
+
+		/*
+		 * If OA is bound to a specific context, emit the
+		 * reconfiguration inline from that context. The update
+		 * will then be ordered with respect to submission on that
+		 * context.
+		 *
+		 * When set globally, we use a low priority kernel context,
+		 * so it will effectively take effect when idle.
+		 */
+		// err = emit_oa_config(stream, config, oa_context(stream), NULL); // FIXME
+		if (!err)
+			config = xchg(&stream->oa_config, config);
+		else
+			ret = err;
+	}
+
+	xe_oa_config_put(config);
+
+	return ret;
+}
+
+static long xe_oa_ioctl_locked(struct xe_oa_stream *stream,
+			       unsigned int cmd,
+			       unsigned long arg)
+{
+	switch (cmd) {
+	case XE_OA_IOCTL_ENABLE:
+		xe_oa_enable_locked(stream);
+		return 0;
+	case XE_OA_IOCTL_DISABLE:
+		xe_oa_disable_locked(stream);
+		return 0;
+	case XE_OA_IOCTL_CONFIG:
+		return xe_oa_config_locked(stream, arg);
+	}
+
+	return -EINVAL;
+}
+
+static long xe_oa_ioctl(struct file *file,
+			    unsigned int cmd,
+			    unsigned long arg)
+{
+	struct xe_oa_stream *stream = file->private_data;
+	long ret;
+
+	mutex_lock(&stream->lock);
+	ret = xe_oa_ioctl_locked(stream, cmd, arg);
+	mutex_unlock(&stream->lock);
+
+	return ret;
+}
+
+static void xe_oa_destroy_locked(struct xe_oa_stream *stream)
+{
+	if (stream->enabled)
+		xe_oa_disable_locked(stream);
+
+	xe_oa_stream_destroy(stream);
+
+	if (stream->engine)
+		xe_engine_put(stream->engine); // FIXME: check
+
+	kfree(stream);
+}
+
+static int xe_oa_release(struct inode *inode, struct file *file)
+{
+	struct xe_oa_stream *stream = file->private_data;
+	struct xe_gt *gt = stream->gt;
+
+	/*
+	 * Within this call, we know that the fd is being closed and we have no
+	 * other user of stream->lock. Use the perf lock to destroy the stream
+	 * here.
+	 */
+	mutex_lock(&gt->oa.lock);
+	xe_oa_destroy_locked(stream);
+	mutex_unlock(&gt->oa.lock);
+
+	/* Release the reference the perf stream kept on the driver. */
+	drm_dev_put(&gt->tile->xe->drm);
+
+	return 0;
+}
+
+static const struct file_operations fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.release	= xe_oa_release,
+	.poll		= xe_oa_poll,
+	.read		= xe_oa_read,
+	.unlocked_ioctl	= xe_oa_ioctl,
+	/* Our ioctl have no arguments, so it's safe to use the same function
+	 * to handle 32bits compatibility.
+	 */
+	.compat_ioctl   = xe_oa_ioctl,
+};
+
+static int
+xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
+			       struct drm_xe_oa_open_param *param,
+			       struct perf_open_properties *props,
+			       struct drm_file *file)
+{
+	struct xe_file *xef = to_xe_file(file);
+	struct xe_engine *engine = NULL;
+	struct xe_oa_stream *stream = NULL;
+	unsigned long f_flags = 0;
+	bool privileged_op = true;
+	int stream_fd;
+	int ret;
+
+	if (props->single_context) {
+		u32 engine_id = props->ctx_handle;
+
+		engine = xe_engine_lookup(xef, engine_id);
+		if (XE_IOCTL_ERR(oa->xe, !engine)) {
+			ret = -ENOENT;
+			goto err;
+		}
+	}
+
+	/*
+	 * For Gen12+ we gain a new OAR unit that only monitors the RCS on a
+	 * per context basis. So we can relax requirements there if the user
+	 * doesn't request global stream access (i.e. query based sampling
+	 * using MI_RECORD_PERF_COUNT.
+	 */
+	if (GRAPHICS_VER(oa->xe) >= 12 && engine &&
+		 (props->sample_flags & SAMPLE_OA_REPORT) == 0)
+		privileged_op = false;
+
+	if (props->hold_preemption) {
+		if (!props->single_context) {
+			drm_dbg(&oa->xe->drm,
+				"preemption disable with no context\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		privileged_op = true;
+	}
+
+	// get_default_sseu_config(&props->sseu, props->engine); // FIXME
+
+	/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
+	 * we check a dev.xe.perf_stream_paranoid sysctl option
+	 * to determine if it's ok to access system wide OA counters
+	 * without CAP_PERFMON or CAP_SYS_ADMIN privileges.
+	 */
+	if (privileged_op &&
+	    xe_oa_stream_paranoid && !perfmon_capable()) {
+		drm_dbg(&oa->xe->drm,
+			"Insufficient privileges to open xe perf stream\n");
+		ret = -EACCES;
+		goto err_engine;
+	}
+
+	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+	if (!stream) {
+		ret = -ENOMEM;
+		goto err_engine;
+	}
+
+	stream->xef = xef;
+	stream->oa = oa;
+	stream->engine = engine;
+	stream->poll_oa_period = props->poll_oa_period;
+
+	ret = xe_oa_stream_init(stream, param, props);
+	if (ret)
+		goto err_alloc;
+
+	/* we avoid simply assigning stream->sample_flags = props->sample_flags
+	 * to have _stream_init check the combination of sample flags more
+	 * thoroughly, but still this is the expected result at this point.
+	 */
+	if (WARN_ON(stream->sample_flags != props->sample_flags)) {
+		ret = -ENODEV;
+		goto err_flags;
+	}
+
+	if (param->flags & XE_OA_FLAG_FD_CLOEXEC)
+		f_flags |= O_CLOEXEC;
+	if (param->flags & XE_OA_FLAG_FD_NONBLOCK)
+		f_flags |= O_NONBLOCK;
+
+	stream_fd = anon_inode_getfd("[xe_oa]", &fops, stream, f_flags);
+	if (stream_fd < 0) {
+		ret = stream_fd;
+		goto err_flags;
+	}
+
+	if (!(param->flags & XE_OA_FLAG_DISABLED))
+		xe_oa_enable_locked(stream);
+
+	/* Take a reference on the driver that will be kept with stream_fd
+	 * until its release.
+	 */
+	drm_dev_get(&oa->xe->drm);
+
+	return stream_fd;
+err_flags:
+	xe_oa_stream_destroy(stream);
+err_alloc:
+	kfree(stream);
+err_engine:
+	if (engine)
+		xe_engine_put(engine);
+err:
+	return ret;
+}
+
+static u64 oa_exponent_to_ns(struct xe_oa *oa, int exponent)
+{
+	u64 nom = (2ULL << exponent) * NSEC_PER_SEC;
+	u32 den = xe_oa_timestamp_frequency(oa->xe);
+
+	return div_u64(nom + den - 1, den);
+}
+
+static bool oa_format_valid(struct xe_oa *oa, enum drm_xe_oa_format format)
+{
+	return test_bit(format, oa->format_mask);
+}
+
+static void oa_format_add(struct xe_oa *oa, enum drm_xe_oa_format format)
+{
+	__set_bit(format, oa->format_mask);
+}
+
+static int read_properties_unlocked(struct xe_oa *oa,
+				    u64 __user *uprops,
+				    u32 n_props,
+				    struct perf_open_properties *props)
+{
+	const struct xe_oa_format *f;
+	u64 __user *uprop = uprops;
+	bool config_instance = false;
+	bool config_class = false;
+	u8 class, instance;
+	struct xe_gt *gt;
+	u32 i;
+	int ret;
+
+	memset(props, 0, sizeof(struct perf_open_properties));
+	props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
+
+	/* Considering that ID = 0 is reserved and assuming that we don't
+	 * (currently) expect any configurations to ever specify duplicate
+	 * values for a particular property ID then the last _PROP_MAX value is
+	 * one greater than the maximum number of properties we expect to get
+	 * from userspace.
+	 */
+	if (!n_props || n_props >= DRM_XE_OA_PROP_MAX) {
+		drm_dbg(&oa->xe->drm,
+			"Invalid number of xe perf properties given\n");
+		return -EINVAL;
+	}
+
+	/* Defaults when class:instance is not passed */
+	class = XE_ENGINE_CLASS_RENDER;
+	instance = 0;
+
+	for (i = 0; i < n_props; i++) {
+		u64 oa_period, oa_freq_hz;
+		u64 id, value;
+
+		ret = get_user(id, uprop);
+		if (ret)
+			return ret;
+
+		ret = get_user(value, uprop + 1);
+		if (ret)
+			return ret;
+
+		if (id == 0 || id >= DRM_XE_OA_PROP_MAX) {
+			drm_dbg(&oa->xe->drm,
+				"Unknown xe perf property ID\n");
+			return -EINVAL;
+		}
+
+		switch ((enum drm_xe_oa_property_id)id) {
+		case DRM_XE_OA_PROP_CTX_HANDLE:
+			props->single_context = 1;
+			props->ctx_handle = value;
+			break;
+		case DRM_XE_OA_PROP_SAMPLE_OA:
+			if (value)
+				props->sample_flags |= SAMPLE_OA_REPORT;
+			break;
+		case DRM_XE_OA_PROP_OA_METRICS_SET:
+			if (value == 0) {
+				drm_dbg(&oa->xe->drm,
+					"Unknown OA metric set ID\n");
+				return -EINVAL;
+			}
+			props->metrics_set = value;
+			break;
+		case DRM_XE_OA_PROP_OA_FORMAT:
+			if (value == 0 || value >= XE_OA_FORMAT_MAX) {
+				drm_dbg(&oa->xe->drm,
+					"Out-of-range OA report format %llu\n",
+					  value);
+				return -EINVAL;
+			}
+			if (!oa_format_valid(oa, value)) {
+				drm_dbg(&oa->xe->drm,
+					"Unsupported OA report format %llu\n",
+					  value);
+				return -EINVAL;
+			}
+			props->oa_format = value;
+			break;
+		case DRM_XE_OA_PROP_OA_EXPONENT:
+			if (value > OA_EXPONENT_MAX) {
+				drm_dbg(&oa->xe->drm,
+					"OA timer exponent too high (> %u)\n",
+					 OA_EXPONENT_MAX);
+				return -EINVAL;
+			}
+
+			/* Theoretically we can program the OA unit to sample
+			 * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns
+			 * for BXT. We don't allow such high sampling
+			 * frequencies by default unless root.
+			 */
+
+			BUILD_BUG_ON(sizeof(oa_period) != 8);
+			oa_period = oa_exponent_to_ns(oa, value);
+
+			/* This check is primarily to ensure that oa_period <=
+			 * UINT32_MAX (before passing to do_div which only
+			 * accepts a u32 denominator), but we can also skip
+			 * checking anything < 1Hz which implicitly can't be
+			 * limited via an integer oa_max_sample_rate.
+			 */
+			if (oa_period <= NSEC_PER_SEC) {
+				u64 tmp = NSEC_PER_SEC;
+				do_div(tmp, oa_period);
+				oa_freq_hz = tmp;
+			} else
+				oa_freq_hz = 0;
+
+			if (oa_freq_hz > xe_oa_max_sample_rate && !perfmon_capable()) {
+				drm_dbg(&oa->xe->drm,
+					"OA exponent would exceed the max sampling frequency (sysctl dev.xe.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n",
+					  xe_oa_max_sample_rate);
+				return -EACCES;
+			}
+
+			props->oa_periodic = true;
+			props->oa_period_exponent = value;
+			break;
+		case DRM_XE_OA_PROP_HOLD_PREEMPTION:
+			props->hold_preemption = !!value;
+			break;
+		case DRM_XE_OA_PROP_GLOBAL_SSEU:
+			/*
+			 * FIXME: Confirm this, on i915 supportd only for < 12.5
+			 * perf_open_properties.has_sseu is removed (always false)
+			 */
+			drm_dbg(&oa->xe->drm, "SSEU config not supported\n");
+			return -ENODEV;
+		case DRM_XE_OA_PROP_POLL_OA_PERIOD:
+			if (value < 100000 /* 100us */) {
+				drm_dbg(&oa->xe->drm,
+					"OA availability timer too small (%lluns < 100us)\n",
+					  value);
+				return -EINVAL;
+			}
+			props->poll_oa_period = value;
+			break;
+		case DRM_XE_OA_PROP_OA_ENGINE_CLASS:
+			class = (u8)value;
+			config_class = true;
+			break;
+		case DRM_XE_OA_PROP_OA_ENGINE_INSTANCE:
+			instance = (u8)value;
+			config_instance = true;
+			break;
+		default:
+			// MISSING_CASE(id);
+			return -EINVAL;
+		}
+
+		uprop += 2;
+	}
+
+	if ((config_class && !config_instance) ||
+	    (config_instance && !config_class)) {
+		drm_dbg(&oa->xe->drm,
+			"OA engine-class and engine-instance parameters must be passed together\n");
+		return -EINVAL;
+	}
+
+	for_each_gt(gt, oa->xe, i) {
+		props->hwe = xe_gt_hw_engine(gt, class, instance, false);
+		if (props->hwe)
+			break;
+	}
+	if (!props->hwe) {
+		drm_dbg(&oa->xe->drm,
+			"OA engine class and instance invalid %d:%d\n",
+			class, instance);
+		return -EINVAL;
+	}
+
+	if (!engine_supports_oa(props->hwe)) {
+		drm_dbg(&oa->xe->drm,
+			"Engine not supported by OA %d:%d\n",
+			class, instance);
+		return -EINVAL;
+	}
+
+#if 0 // FIXME: Do this later
+	/*
+	 * Wa_14017512683: mtl[a0..c0): Use of OAM must be preceded with Media
+	 * C6 disable in BIOS. Fail if Media C6 is enabled on steppings where OAM
+	 * does not work as expected.
+	 */
+	if (IS_MTL_MEDIA_STEP(props->engine->xe, STEP_A0, STEP_C0) &&
+	    props->engine->oa_group->type == TYPE_OAM &&
+	    intel_check_bios_c6_setup(&props->engine->gt->rc6)) {
+		drm_dbg(&oa->xe->drm,
+			"OAM requires media C6 to be disabled in BIOS\n");
+		return -EINVAL;
+	}
+#endif
+	i = array_index_nospec(props->oa_format, XE_OA_FORMAT_MAX);
+	f = &oa->oa_formats[i];
+
+	if (!props->oa_format || !engine_supports_oa_format(props->hwe, f->type)) {
+		drm_dbg(&oa->xe->drm,
+			"Invalid OA format %d for class %d\n",
+			f->type, props->hwe->class);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int xe_oa_stream_open_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file)
+{
+	struct xe_oa *oa = &to_xe_device(dev)->oa;
+	struct drm_xe_oa_open_param *param = data;
+	struct perf_open_properties props;
+	u32 known_open_flags;
+	struct xe_gt *gt;
+	int ret;
+
+	if (!oa->xe) {
+		drm_dbg(&oa->xe->drm,
+			"xe perf interface not available for this system\n");
+		return -ENOTSUPP;
+	}
+
+	known_open_flags = XE_OA_FLAG_FD_CLOEXEC |
+			   XE_OA_FLAG_FD_NONBLOCK |
+			   XE_OA_FLAG_DISABLED;
+	if (param->flags & ~known_open_flags) {
+		drm_dbg(&oa->xe->drm,
+			"Unknown drm_xe_oa_open_param flag\n");
+		return -EINVAL;
+	}
+
+	ret = read_properties_unlocked(oa,
+				       u64_to_user_ptr(param->properties_ptr),
+				       param->num_properties,
+				       &props);
+	if (ret)
+		return ret;
+
+	gt = props.hwe->gt;
+
+	mutex_lock(&gt->oa.lock);
+	ret = xe_oa_stream_open_ioctl_locked(oa, param, &props, file);
+	mutex_unlock(&gt->oa.lock);
+
+	return ret;
+}
+
+void xe_oa_register(struct xe_device *xe)
+{
+	struct xe_oa *oa = &xe->oa;
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+	/* FIXME: needed? */
+	if (!oa->xe)
+		return;
+
+	/* To be sure we're synchronized with an attempted
+	 * i915_perf_open_ioctl(); considering that we register after
+	 * being exposed to userspace.
+	 */
+	mutex_lock(&gt->oa.lock);
+
+	oa->metrics_kobj =
+		kobject_create_and_add("metrics",
+				       &xe->drm.primary->kdev->kobj);
+
+	mutex_unlock(&gt->oa.lock);
+}
+
+void xe_oa_unregister(struct xe_device *xe)
+{
+	struct xe_oa *oa = &xe->oa;
+
+	if (!oa->metrics_kobj)
+		return;
+
+	kobject_put(oa->metrics_kobj);
+	oa->metrics_kobj = NULL;
+}
+
+static bool gen8_is_valid_flex_addr(struct xe_oa *oa, u32 addr)
+{
+	static const struct xe_reg flex_eu_regs[] = {
+		EU_PERF_CNTL0,
+		EU_PERF_CNTL1,
+		EU_PERF_CNTL2,
+		EU_PERF_CNTL3,
+		EU_PERF_CNTL4,
+		EU_PERF_CNTL5,
+		EU_PERF_CNTL6,
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
+		if (flex_eu_regs[i].addr == addr)
+			return true;
+	}
+	return false;
+}
+
+static bool reg_in_range_table(u32 addr, const struct xe_oa_range *table)
+{
+	while (table->start || table->end) {
+		if (addr >= table->start && addr <= table->end)
+			return true;
+
+		table++;
+	}
+
+	return false;
+}
+
+static const struct xe_oa_range xehp_oa_b_counters[] = {
+	{ .start = 0xdc48, .end = 0xdc48 },	/* OAA_ENABLE_REG */
+	{ .start = 0xdd00, .end = 0xdd48 },	/* OAG_LCE0_0 - OAA_LENABLE_REG */
+};
+
+static const struct xe_oa_range gen12_oa_b_counters[] = {
+	{ .start = 0x2b2c, .end = 0x2b2c },	/* GEN12_OAG_OA_PESS */
+	{ .start = 0xd900, .end = 0xd91c },	/* GEN12_OAG_OASTARTTRIG[1-8] */
+	{ .start = 0xd920, .end = 0xd93c },	/* GEN12_OAG_OAREPORTTRIG1[1-8] */
+	{ .start = 0xd940, .end = 0xd97c },	/* GEN12_OAG_CEC[0-7][0-1] */
+	{ .start = 0xdc00, .end = 0xdc3c },	/* GEN12_OAG_SCEC[0-7][0-1] */
+	{ .start = 0xdc40, .end = 0xdc40 },	/* GEN12_OAG_SPCTR_CNF */
+	{ .start = 0xdc44, .end = 0xdc44 },	/* GEN12_OAA_DBG_REG */
+	{}
+};
+
+static const struct xe_oa_range mtl_oam_b_counters[] = {
+	{ .start = 0x393000, .end = 0x39301c },	/* GEN12_OAM_STARTTRIG1[1-8] */
+	{ .start = 0x393020, .end = 0x39303c },	/* GEN12_OAM_REPORTTRIG1[1-8] */
+	{ .start = 0x393040, .end = 0x39307c },	/* GEN12_OAM_CEC[0-7][0-1] */
+	{ .start = 0x393200, .end = 0x39323C },	/* MPES[0-7] */
+	{}
+};
+
+/* FIXME: Checks below have been simplified/loosened for now compared with i915 */
+static bool xehp_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr)
+{
+	return reg_in_range_table(addr, xehp_oa_b_counters) ||
+		reg_in_range_table(addr, gen12_oa_b_counters) ||
+		reg_in_range_table(addr, mtl_oam_b_counters);
+}
+
+/*
+ * Ref: 14010536224:
+ * 0x20cc is repurposed on MTL, so use a separate array for MTL.
+ */
+static const struct xe_oa_range mtl_oa_mux_regs[] = {
+	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
+	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
+	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
+	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
+	{ .start = 0x38d100, .end = 0x38d114},	/* VISACTL */
+	{}
+};
+
+static const struct xe_oa_range gen12_oa_mux_regs[] = {
+	{ .start = 0x0d00, .end = 0x0d04 },     /* RPM_CONFIG[0-1] */
+	{ .start = 0x0d0c, .end = 0x0d2c },     /* NOA_CONFIG[0-8] */
+	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
+	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
+	{ .start = 0x20cc, .end = 0x20cc },	/* WAIT_FOR_RC6_EXIT */
+	{}
+};
+
+static bool gen12_is_valid_mux_addr(struct xe_oa *oa, u32 addr)
+{
+	if (oa->xe->info.platform == XE_METEORLAKE)
+		return reg_in_range_table(addr, mtl_oa_mux_regs);
+	else
+		return reg_in_range_table(addr, gen12_oa_mux_regs);
+}
+
+static u32 mask_reg_value(u32 reg, u32 val)
+{
+	/* HALF_SLICE_CHICKEN2 is programmed with a the
+	 * WaDisableSTUnitPowerOptimization workaround. Make sure the value
+	 * programmed by userspace doesn't change this.
+	 */
+	if (REG_EQUAL_MCR(reg, HALF_SLICE_CHICKEN2))
+		val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
+
+	/* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
+	 * indicated by its name and a bunch of selection fields used by OA
+	 * configs.
+	 */
+	if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT))
+		val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
+
+	return val;
+}
+
+static struct xe_oa_reg *alloc_oa_regs(struct xe_oa *oa,
+				       bool (*is_valid)(struct xe_oa *oa, u32 addr),
+				       u32 __user *regs,
+				       u32 n_regs)
+{
+	struct xe_oa_reg *oa_regs;
+	int err;
+	u32 i;
+
+	if (!n_regs)
+		return NULL;
+
+	/* No is_valid function means we're not allowing any register to be programmed. */
+	BUG_ON(!is_valid);
+	if (!is_valid)
+		return ERR_PTR(-EINVAL);
+
+	oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
+	if (!oa_regs)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < n_regs; i++) {
+		u32 addr, value;
+
+		err = get_user(addr, regs);
+		if (err)
+			goto addr_err;
+
+		if (!is_valid(oa, addr)) {
+			drm_dbg(&oa->xe->drm,
+				"Invalid oa_reg address: %X\n", addr);
+			err = -EINVAL;
+			goto addr_err;
+		}
+
+		err = get_user(value, regs + 1);
+		if (err)
+			goto addr_err;
+
+		oa_regs[i].addr = XE_REG(addr);
+		oa_regs[i].value = mask_reg_value(addr, value);
+
+		regs += 2;
+	}
+
+	return oa_regs;
+
+addr_err:
+	kfree(oa_regs);
+	return ERR_PTR(err);
+}
+
+static ssize_t show_dynamic_id(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       char *buf)
+{
+	struct xe_oa_config *oa_config =
+		container_of(attr, typeof(*oa_config), sysfs_metric_id);
+
+	return sprintf(buf, "%d\n", oa_config->id);
+}
+
+static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa,
+					 struct xe_oa_config *oa_config)
+{
+	sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
+	oa_config->sysfs_metric_id.attr.name = "id";
+	oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
+	oa_config->sysfs_metric_id.show = show_dynamic_id;
+	oa_config->sysfs_metric_id.store = NULL;
+
+	oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
+	oa_config->attrs[1] = NULL;
+
+	oa_config->sysfs_metric.name = oa_config->uuid;
+	oa_config->sysfs_metric.attrs = oa_config->attrs;
+
+	return sysfs_create_group(oa->metrics_kobj,
+				  &oa_config->sysfs_metric);
+}
+
+int xe_oa_add_config_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file)
+{
+	struct xe_oa *oa = &to_xe_device(dev)->oa;
+	struct drm_xe_oa_config *args = data;
+	struct xe_oa_config *oa_config, *tmp;
+	struct xe_oa_reg *regs;
+	int err, id;
+
+	if (!oa->xe) {
+		drm_dbg(&oa->xe->drm,
+			"xe oa interface not available for this system\n");
+		return -ENOTSUPP;
+	}
+
+	if (!oa->metrics_kobj) {
+		drm_dbg(&oa->xe->drm,
+			"OA metrics weren't advertised via sysfs\n");
+		return -EINVAL;
+	}
+
+	if (xe_oa_stream_paranoid && !perfmon_capable()) {
+		drm_dbg(&oa->xe->drm,
+			"Insufficient privileges to add xe OA config\n");
+		return -EACCES;
+	}
+
+	if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
+	    (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
+	    (!args->flex_regs_ptr || !args->n_flex_regs)) {
+		drm_dbg(&oa->xe->drm,
+			"No OA registers given\n");
+		return -EINVAL;
+	}
+
+	oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
+	if (!oa_config) {
+		drm_dbg(&oa->xe->drm,
+			"Failed to allocate memory for the OA config\n");
+		return -ENOMEM;
+	}
+
+	oa_config->oa = oa;
+	kref_init(&oa_config->ref);
+
+	if (!uuid_is_valid(args->uuid)) {
+		drm_dbg(&oa->xe->drm,
+			"Invalid uuid format for OA config\n");
+		err = -EINVAL;
+		goto reg_err;
+	}
+
+	/* Last character in oa_config->uuid will be 0 because oa_config is
+	 * kzalloc.
+	 */
+	memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
+
+	oa_config->mux_regs_len = args->n_mux_regs;
+	regs = alloc_oa_regs(oa,
+			     gen12_is_valid_mux_addr,
+			     u64_to_user_ptr(args->mux_regs_ptr),
+			     args->n_mux_regs);
+
+	if (IS_ERR(regs)) {
+		drm_dbg(&oa->xe->drm,
+			"Failed to create OA config for mux_regs\n");
+		err = PTR_ERR(regs);
+		goto reg_err;
+	}
+	oa_config->mux_regs = regs;
+
+	oa_config->b_counter_regs_len = args->n_boolean_regs;
+	regs = alloc_oa_regs(oa,
+			     xehp_is_valid_b_counter_addr,
+			     u64_to_user_ptr(args->boolean_regs_ptr),
+			     args->n_boolean_regs);
+
+	if (IS_ERR(regs)) {
+		drm_dbg(&oa->xe->drm,
+			"Failed to create OA config for b_counter_regs\n");
+		err = PTR_ERR(regs);
+		goto reg_err;
+	}
+	oa_config->b_counter_regs = regs;
+
+	oa_config->flex_regs_len = args->n_flex_regs;
+	regs = alloc_oa_regs(oa,
+			     gen8_is_valid_flex_addr,
+			     u64_to_user_ptr(args->flex_regs_ptr),
+			     args->n_flex_regs);
+
+	if (IS_ERR(regs)) {
+		drm_dbg(&oa->xe->drm,
+			"Failed to create OA config for flex_regs\n");
+		err = PTR_ERR(regs);
+		goto reg_err;
+	}
+	oa_config->flex_regs = regs;
+
+	err = mutex_lock_interruptible(&oa->metrics_lock);
+	if (err)
+		goto reg_err;
+
+	/* We shouldn't have too many configs, so this iteration shouldn't be
+	 * too costly.
+	 */
+	idr_for_each_entry(&oa->metrics_idr, tmp, id) {
+		if (!strcmp(tmp->uuid, oa_config->uuid)) {
+			drm_dbg(&oa->xe->drm,
+				"OA config already exists with this uuid\n");
+			err = -EADDRINUSE;
+			goto sysfs_err;
+		}
+	}
+
+	err = create_dynamic_oa_sysfs_entry(oa, oa_config);
+	if (err) {
+		drm_dbg(&oa->xe->drm,
+			"Failed to create sysfs entry for OA config\n");
+		goto sysfs_err;
+	}
+
+	/* Config id 0 is invalid, id 1 for kernel stored test config. */
+	oa_config->id = idr_alloc(&oa->metrics_idr,
+				  oa_config, 2,
+				  0, GFP_KERNEL);
+	if (oa_config->id < 0) {
+		drm_dbg(&oa->xe->drm,
+			"Failed to create sysfs entry for OA config\n");
+		err = oa_config->id;
+		goto sysfs_err;
+	}
+
+	mutex_unlock(&oa->metrics_lock);
+
+	drm_dbg(&oa->xe->drm,
+		"Added config %s id=%i\n", oa_config->uuid, oa_config->id);
+
+	return oa_config->id;
+
+sysfs_err:
+	mutex_unlock(&oa->metrics_lock);
+reg_err:
+	xe_oa_config_put(oa_config);
+	drm_dbg(&oa->xe->drm,
+		"Failed to add new OA config\n");
+	return err;
+}
+
+int xe_oa_remove_config_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file)
+{
+	struct xe_oa *oa = &to_xe_device(dev)->oa;
+	struct xe_oa_config *oa_config;
+	u64 *arg = data;
+	int ret;
+
+	if (!oa->xe) {
+		drm_dbg(&oa->xe->drm,
+			"xe oa interface not available for this system\n");
+		return -ENOTSUPP;
+	}
+
+	if (xe_oa_stream_paranoid && !perfmon_capable()) {
+		drm_dbg(&oa->xe->drm,
+			"Insufficient privileges to remove xe OA config\n");
+		return -EACCES;
+	}
+
+	ret = mutex_lock_interruptible(&oa->metrics_lock);
+	if (ret)
+		return ret;
+
+	oa_config = idr_find(&oa->metrics_idr, *arg);
+	if (!oa_config) {
+		drm_dbg(&oa->xe->drm,
+			"Failed to remove unknown OA config\n");
+		ret = -ENOENT;
+		goto err_unlock;
+	}
+
+	BUG_ON(*arg != oa_config->id);
+
+	sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric);
+
+	idr_remove(&oa->metrics_idr, *arg);
+
+	mutex_unlock(&oa->metrics_lock);
+
+	drm_dbg(&oa->xe->drm,
+		"Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
+
+	xe_oa_config_put(oa_config);
+
+	return 0;
+
+err_unlock:
+	mutex_unlock(&oa->metrics_lock);
+	return ret;
+}
+
+static struct ctl_table oa_table[] = {
+	{
+	 .procname = "perf_stream_paranoid",
+	 .data = &xe_oa_stream_paranoid,
+	 .maxlen = sizeof(xe_oa_stream_paranoid),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = SYSCTL_ZERO,
+	 .extra2 = SYSCTL_ONE,
+	 },
+	{
+	 .procname = "oa_max_sample_rate",
+	 .data = &xe_oa_max_sample_rate,
+	 .maxlen = sizeof(xe_oa_max_sample_rate),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = SYSCTL_ZERO,
+	 .extra2 = &oa_sample_rate_hard_limit,
+	 },
+	{}
+};
+
+static u32 num_perf_groups_per_gt(struct xe_gt *gt)
+{
+	return 1;
+}
+
+static u32 __oam_engine_group(struct xe_hw_engine *hwe)
+{
+	if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) {
+		/*
+		 * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
+		 * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
+		 */
+		drm_WARN_ON(&hwe->gt->tile->xe->drm,
+			    hwe->gt->info.type != XE_GT_TYPE_MEDIA);
+
+		return OA_GROUP_OAM_SAMEDIA_0;
+	}
+
+	return OA_GROUP_INVALID;
+}
+
+static u32 __oa_engine_group(struct xe_hw_engine *hwe)
+{
+	switch (hwe->class) {
+	case XE_ENGINE_CLASS_RENDER:
+		return OA_GROUP_OAG;
+
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return __oam_engine_group(hwe);
+
+	default:
+		return OA_GROUP_INVALID;
+	}
+}
+
+static struct xe_oa_regs __oam_regs(u32 base)
+{
+	return (struct xe_oa_regs) {
+		base,
+		GEN12_OAM_HEAD_POINTER(base),
+		GEN12_OAM_TAIL_POINTER(base),
+		GEN12_OAM_BUFFER(base),
+		GEN12_OAM_CONTEXT_CONTROL(base),
+		GEN12_OAM_CONTROL(base),
+		GEN12_OAM_DEBUG(base),
+		GEN12_OAM_STATUS(base),
+		GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
+	};
+}
+
+static struct xe_oa_regs __oag_regs(void)
+{
+	return (struct xe_oa_regs) {
+		0,
+		GEN12_OAG_OAHEADPTR,
+		GEN12_OAG_OATAILPTR,
+		GEN12_OAG_OABUFFER,
+		GEN12_OAG_OAGLBCTXCTRL,
+		GEN12_OAG_OACONTROL,
+		GEN12_OAG_OA_DEBUG,
+		GEN12_OAG_OASTATUS,
+		GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
+	};
+}
+
+static void oa_init_groups(struct xe_gt *gt)
+{
+	int i, num_groups = gt->oa.num_perf_groups;
+
+	for (i = 0; i < num_groups; i++) {
+		struct xe_oa_group *g = &gt->oa.group[i];
+
+		/* Fused off engines can result in a group with num_engines == 0 */
+		if (g->num_engines == 0)
+			continue;
+
+		if (i == OA_GROUP_OAG && gt->info.type != XE_GT_TYPE_MEDIA) {
+			g->regs = __oag_regs();
+			g->type = TYPE_OAG;
+		} else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+			g->regs = __oam_regs(mtl_oa_base[i]);
+			g->type = TYPE_OAM;
+		}
+
+		/* Set oa_unit_ids now to ensure ids remain contiguous. */
+		g->oa_unit_id = gt->tile->xe->oa.oa_unit_ids++;
+	}
+}
+
+static int oa_init_gt(struct xe_gt *gt)
+{
+	u32 num_groups = num_perf_groups_per_gt(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_oa_group *g;
+
+	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
+	if (!g)
+		return -ENOMEM;
+
+	for_each_hw_engine(hwe, gt, id) {
+		u32 index = __oa_engine_group(hwe);
+
+		hwe->oa_group = NULL;
+		if (index < num_groups) {
+			g[index].num_engines++;
+			hwe->oa_group = &g[index];
+		}
+	}
+
+	gt->oa.num_perf_groups = num_groups;
+	gt->oa.group = g;
+
+	oa_init_groups(gt);
+
+	return 0;
+}
+
+static int oa_init_engine_groups(struct xe_oa *oa)
+{
+	struct xe_gt *gt;
+	int i, ret;
+
+	for_each_gt(gt, oa->xe, i) {
+		ret = oa_init_gt(gt);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void oa_init_supported_formats(struct xe_oa *oa)
+{
+	switch (oa->xe->info.platform) {
+	case XE_ALDERLAKE_S:
+	case XE_ALDERLAKE_P:
+		oa_format_add(oa, XE_OA_FORMAT_A12);
+		oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8);
+		oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8);
+		oa_format_add(oa, XE_OA_FORMAT_C4_B8);
+		break;
+
+	case XE_DG2:
+		oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8);
+		oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8);
+		break;
+
+	case XE_METEORLAKE:
+		oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8);
+		oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8);
+		oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8);
+		oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8);
+		break;
+
+	default:
+		drm_err(&oa->xe->drm, "Unknown platform\n");
+	}
+}
+
+static void xe_oa_init_info(struct xe_device *xe)
+{
+}
+
+int xe_oa_init(struct xe_device *xe)
+{
+	struct xe_oa *oa = &xe->oa;
+	struct xe_gt *gt;
+	int i, ret;
+
+	oa->xe = xe;
+	oa->oa_formats = oa_formats;
+	xe_oa_init_info(xe);
+
+	for_each_gt(gt, xe, i)
+		mutex_init(&gt->oa.lock);
+
+	/* Choose a representative limit */
+	oa_sample_rate_hard_limit = xe_root_mmio_gt(xe)->info.clock_freq / 2;
+
+	mutex_init(&oa->metrics_lock);
+	idr_init_base(&oa->metrics_idr, 1);
+
+	/* We set up some ratelimit state to potentially throttle any
+	 * _NOTES about spurious, invalid OA reports which we don't
+	 * forward to userspace.
+	 *
+	 * We print a _NOTE about any throttling when closing the
+	 * stream instead of waiting until driver _fini which no one
+	 * would ever see.
+	 *
+	 * Using the same limiting factors as printk_ratelimit()
+	 */
+	ratelimit_state_init(&oa->spurious_report_rs, 5 * HZ, 10);
+	/* Since we use a DRM_NOTE for spurious reports it would be
+	 * inconsistent to let __ratelimit() automatically print a
+	 * warning for throttling.
+	 */
+	ratelimit_set_flags(&oa->spurious_report_rs, RATELIMIT_MSG_ON_RELEASE);
+	ratelimit_state_init(&oa->tail_pointer_race, 5 * HZ, 10);
+	ratelimit_set_flags(&oa->tail_pointer_race,RATELIMIT_MSG_ON_RELEASE);
+	atomic64_set(&oa->noa_programming_delay, 500 * 1000 /* 500us */);
+
+	ret = oa_init_engine_groups(oa);
+	if (ret) {
+		drm_err(&xe->drm, "OA initialization failed %d\n", ret);
+		return ret;
+	}
+
+	oa_init_supported_formats(oa);
+
+	oa->xe = xe;
+	return 0;
+}
+
+static int destroy_config(int id, void *p, void *data)
+{
+	xe_oa_config_put(p);
+	return 0;
+}
+
+int xe_oa_sysctl_register(void)
+{
+	sysctl_header = register_sysctl("dev/xe", oa_table);
+	return 0;
+}
+
+void xe_oa_sysctl_unregister(void)
+{
+	unregister_sysctl_table(sysctl_header);
+}
+
+void xe_oa_fini(struct xe_device *xe)
+{
+	struct xe_oa *oa = &xe->oa;
+	struct xe_gt *gt;
+	int i;
+
+	if (!oa->xe)
+		return;
+
+	for_each_gt(gt, xe, i)
+		kfree(gt->oa.group);
+
+	idr_for_each(&oa->metrics_idr, destroy_config, oa);
+	idr_destroy(&oa->metrics_idr);
+
+	oa->xe = NULL;
+}
+
+int xe_oa_ioctl_version(struct xe_device *xe)
+{
+	return 7; // FIXME
+}
diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h
new file mode 100644
index 0000000000000..d13b75fa0256c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_oa.h
@@ -0,0 +1,402 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_OA_H_
+#define _XE_OA_H_
+
+#include <linux/poll.h>
+#include <drm/xe_drm.h>
+#include "regs/xe_reg_defs.h"
+
+struct drm_device;
+struct drm_file;
+
+enum {
+	OA_GROUP_OAG = 0,
+	OA_GROUP_OAM_SAMEDIA_0 = 0,
+
+	OA_GROUP_MAX,
+	OA_GROUP_INVALID = U32_MAX,
+};
+
+enum oa_type {
+	TYPE_OAG,
+	TYPE_OAM,
+};
+
+enum report_header {
+	HDR_32_BIT = 0,
+	HDR_64_BIT,
+};
+
+struct xe_oa_format {
+	u32 format;
+	int size;
+	int type;
+	enum report_header header;
+};
+
+struct xe_oa_reg {
+	struct xe_reg addr;
+	u32 value;
+};
+
+struct xe_oa_range {
+	u32 start;
+	u32 end;
+};
+
+struct xe_oa_config {
+	struct xe_oa *oa;
+
+	char uuid[UUID_STRING_LEN + 1];
+	int id;
+
+	const struct xe_oa_reg *mux_regs;
+	u32 mux_regs_len;
+	const struct xe_oa_reg *b_counter_regs;
+	u32 b_counter_regs_len;
+	const struct xe_oa_reg *flex_regs;
+	u32 flex_regs_len;
+
+	struct attribute_group sysfs_metric;
+	struct attribute *attrs[2];
+	struct kobj_attribute sysfs_metric_id;
+
+	struct kref ref;
+	struct rcu_head rcu;
+};
+
+struct xe_oa_regs {
+	u32 base;
+	struct xe_reg oa_head_ptr;
+	struct xe_reg oa_tail_ptr;
+	struct xe_reg oa_buffer;
+	struct xe_reg oa_ctx_ctrl;
+	struct xe_reg oa_ctrl;
+	struct xe_reg oa_debug;
+	struct xe_reg oa_status;
+	u32 oa_ctrl_counter_format_shift;
+};
+
+struct xe_oa_group {
+	/*
+	 * @type: Identifier for the OA unit.
+	 */
+	u32 oa_unit_id;
+
+	/*
+	 * @exclusive_stream: The stream currently using the OA unit. This is
+	 * sometimes accessed outside a syscall associated to its file
+	 * descriptor.
+	 */
+	struct xe_oa_stream *exclusive_stream;
+
+	/*
+	 * @num_engines: The number of engines using this OA unit.
+	 */
+	u32 num_engines;
+
+	/*
+	 * @regs: OA buffer register group for programming the OA unit.
+	 */
+	struct xe_oa_regs regs;
+
+	/*
+	 * @type: Type of OA unit - OAM, OAG etc.
+	 */
+	enum oa_type type;
+};
+
+struct xe_oa_gt {
+	/*
+	 * Lock associated with anything below within this structure.
+	 */
+	struct mutex lock;
+
+	/** FIXME
+	 * @sseu: sseu configuration selected to run while perf is active,
+	 * applies to all contexts.
+	 */
+	// struct intel_sseu sseu;
+
+	/**
+	 * @num_perf_groups: number of perf groups per gt.
+	 */
+	u32 num_perf_groups;
+
+	/*
+	 * @group: list of OA groups - one for each OA buffer.
+	 */
+	struct xe_oa_group *group;
+};
+
+struct xe_oa {
+	struct xe_device *xe;
+
+	struct kobject *metrics_kobj;
+
+	/*
+	 * Lock associated with adding/modifying/removing OA configs
+	 * in perf->metrics_idr.
+	 */
+	struct mutex metrics_lock;
+
+	/*
+	 * List of dynamic configurations (struct i915_oa_config), you
+	 * need to hold perf->metrics_lock to access it.
+	 */
+	struct idr metrics_idr;
+
+	/**
+	 * For rate limiting any notifications of spurious
+	 * invalid OA reports
+	 */
+	struct ratelimit_state spurious_report_rs;
+
+	/**
+	 * For rate limiting any notifications of tail pointer
+	 * race.
+	 */
+	struct ratelimit_state tail_pointer_race;
+
+	// u32 gen7_latched_oastatus1; // FIXME
+	u32 ctx_oactxctrl_offset;
+	u32 ctx_flexeu0_offset;
+
+	u32 gen8_valid_ctx_bit; // FIXME: deleted
+
+	// struct i915_oa_ops ops; // FIXME: these are deleted
+
+	const struct xe_oa_format *oa_formats;
+
+#define FORMAT_MASK_SIZE DIV_ROUND_UP(XE_OA_FORMAT_MAX - 1, BITS_PER_LONG)
+	unsigned long format_mask[FORMAT_MASK_SIZE];
+
+	atomic64_t noa_programming_delay;
+
+	/* oa unit ids */
+	u32 oa_unit_ids;
+};
+
+/**
+ * struct xe_perf_stream - state for a single open stream FD
+ */
+struct xe_oa_stream {
+	/**
+	 * @xef: xe_file associated with oa stream
+	 * FIXME: is it ok to do this? Otherwise modify functions to pass this in where needed.
+	 */
+	struct xe_file *xef;
+
+	/**
+	 * @perf: xe_oa backpointer
+	 */
+	struct xe_oa *oa;
+
+	/**
+	 * @gt: gt
+	 */
+	struct xe_gt *gt;
+
+	/**
+	 * FIXME: struct xe_hw_engine instead of intel_engine_cs
+	 * @hwe: hardware engine associated with this performance stream.
+	 */
+	struct xe_hw_engine *hwe;
+
+	/**
+	 * @lock: Lock associated with operations on stream
+	 */
+	struct mutex lock;
+
+	/**
+	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
+	 * properties given when opening a stream, representing the contents
+	 * of a single sample as read() by userspace.
+	 */
+	u32 sample_flags;
+
+	/**
+	 * @sample_size: Considering the configured contents of a sample
+	 * combined with the required header size, this is the total size
+	 * of a single sample record.
+	 */
+	int sample_size;
+
+	/**
+	 * FIXME: struct xe_engine instead of i915_gem_context
+	 * @engine: %NULL if measuring system-wide across all contexts or a
+	 * specific context that is being monitored.
+	 */
+	struct xe_engine *engine;
+
+	/**
+	 * @enabled: Whether the stream is currently enabled, considering
+	 * whether the stream was opened in a disabled state and based
+	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
+	 */
+	bool enabled;
+
+	/**
+	 * @hold_preemption: Whether preemption is put on hold for command
+	 * submissions done on the @ctx. This is useful for some drivers that
+	 * cannot easily post process the OA buffer context to subtract delta
+	 * of performance counters not associated with @ctx.
+	 */
+	bool hold_preemption;
+
+	/**
+	 * @ops: The callbacks providing the implementation of this specific
+	 * type of configured stream.
+	 */
+	// const struct xe_perf_stream_ops *ops; // FIXME: these are deleted
+
+	/**
+	 * @oa_config: The OA configuration used by the stream.
+	 */
+	struct xe_oa_config *oa_config;
+
+	/**
+	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+	 * each time @oa_config changes.
+	 */
+	struct llist_head oa_config_bos;
+
+	/**
+	 * @pinned_ctx: The OA context specific information.
+	 * FIXME: not needed for xe, should be 'struct xe_lrc *' if needed
+	 */
+	// struct intel_context *pinned_ctx;
+
+	/**
+	 * @specific_ctx_id: The id of the specific context.
+	 */
+	u32 specific_ctx_id;
+
+	/**
+	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
+	 */
+	u32 specific_ctx_id_mask;
+
+	/**
+	 * @poll_check_timer: High resolution timer that will periodically
+	 * check for data in the circular OA buffer for notifying userspace
+	 * (e.g. during a read() or poll()).
+	 */
+	struct hrtimer poll_check_timer;
+
+	/**
+	 * @poll_wq: The wait queue that hrtimer callback wakes when it
+	 * sees data ready to read in the circular OA buffer.
+	 */
+	wait_queue_head_t poll_wq;
+
+	/**
+	 * @pollin: Whether there is data available to read.
+	 */
+	bool pollin;
+
+	/**
+	 * @periodic: Whether periodic sampling is currently enabled.
+	 */
+	bool periodic;
+
+	/**
+	 * @period_exponent: The OA unit sampling frequency is derived from this.
+	 */
+	int period_exponent;
+
+	/**
+	 * @oa_buffer: State of the OA buffer.
+	 */
+	struct {
+		const struct xe_oa_format *format;
+		struct xe_bo *bo;
+		// struct xe_vma *vma;
+		u8 *vaddr;
+		u32 last_ctx_id;
+		int size_exponent;
+
+		/**
+		 * @ptr_lock: Locks reads and writes to all head/tail state
+		 *
+		 * Consider: the head and tail pointer state needs to be read
+		 * consistently from a hrtimer callback (atomic context) and
+		 * read() fop (user context) with tail pointer updates happening
+		 * in atomic context and head updates in user context and the
+		 * (unlikely) possibility of read() errors needing to reset all
+		 * head/tail state.
+		 *
+		 * Note: Contention/performance aren't currently a significant
+		 * concern here considering the relatively low frequency of
+		 * hrtimer callbacks (5ms period) and that reads typically only
+		 * happen in response to a hrtimer event and likely complete
+		 * before the next callback.
+		 *
+		 * Note: This lock is not held *while* reading and copying data
+		 * to userspace so the value of head observed in htrimer
+		 * callbacks won't represent any partial consumption of data.
+		 */
+		spinlock_t ptr_lock;
+
+		/**
+		 * @head: Although we can always read back the head pointer register,
+		 * we prefer to avoid trusting the HW state, just to avoid any
+		 * risk that some hardware condition could * somehow bump the
+		 * head pointer unpredictably and cause us to forward the wrong
+		 * OA buffer data to userspace.
+		 */
+		u32 head;
+
+		/**
+		 * @tail: The last verified tail that can be read by userspace.
+		 */
+		u32 tail;
+	} oa_buffer;
+
+	/**
+	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA
+	 * logic to be reprogrammed.
+	 */
+	struct xe_bo *noa_wait;
+
+	/**
+	 * @poll_oa_period: The period in nanoseconds at which the OA
+	 * buffer should be checked for available data.
+	 */
+	u64 poll_oa_period;
+
+	/**
+	 * @override_gucrc: GuC RC has been overridden for the perf stream,
+	 * and we need to restore the default configuration on release.
+	 */
+	bool override_gucrc;
+};
+
+/* Below __UNUSED__ refers to exported oa functions not called from other parts of xe */
+int xe_oa_init(struct xe_device *xe);
+void xe_oa_fini(struct xe_device *xe);
+void xe_oa_register(struct xe_device *xe);
+void xe_oa_unregister(struct xe_device *xe);
+int xe_oa_ioctl_version(struct xe_device *xe);
+int xe_oa_sysctl_register(void);
+void xe_oa_sysctl_unregister(void);
+
+int xe_oa_stream_open_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int xe_oa_add_config_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int xe_oa_remove_config_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+void xe_oa_init_reg_state(void); // __UNUSED__
+
+struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set);  // __UNUSED__
+struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config); // __UNUSED__
+void xe_oa_config_put(struct xe_oa_config *oa_config); // __UNUSED__
+
+u32 xe_oa_timestamp_frequency(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 9acbb27dfcab9..895663dbf761b 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -77,7 +77,10 @@ static int query_engines(struct xe_device *xe,
 				xe_to_user_engine_class[hwe->class];
 			hw_engine_info[i].engine_instance =
 				hwe->logical_instance;
-			hw_engine_info[i++].gt_id = gt->info.id;
+			hw_engine_info[i].gt_id = gt->info.id;
+			hw_engine_info[i++].oa_unit_id =
+				hwe->oa_group && hwe->oa_group->num_engines ?
+				hwe->oa_group->oa_unit_id : U32_MAX;
 		}
 
 	if (copy_to_user(query_ptr, hw_engine_info, size)) {
@@ -203,6 +206,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 		hweight_long(xe->info.mem_region_mask);
 	config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY] =
 		xe_engine_device_get_max_priority(xe);
+	config->info[XE_QUERY_OA_IOCTL_VERSION] = xe_oa_ioctl_version(xe);
 
 	if (copy_to_user(query_ptr, config, size)) {
 		kfree(config);
@@ -244,6 +248,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
 			gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN;
 		gts->gts[id].instance = id;
 		gts->gts[id].clock_freq = gt->info.clock_freq;
+		gts->gts[id].oa_timestamp_freq = xe_oa_timestamp_frequency(xe);
 		if (!IS_DGFX(xe))
 			gts->gts[id].native_mem_regions = 0x1;
 		else
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index e890b131af918..44219696cfff2 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -101,6 +101,9 @@ struct xe_user_extension {
 #define DRM_XE_WAIT_USER_FENCE		0x0b
 #define DRM_XE_VM_MADVISE		0x0c
 #define DRM_XE_ENGINE_GET_PROPERTY	0x0d
+#define DRM_XE_OA_OPEN			0x36
+#define DRM_XE_OA_ADD_CONFIG		0x37
+#define DRM_XE_OA_REMOVE_CONFIG		0x38
 
 /* Must be kept compact -- no holes */
 #define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
@@ -117,6 +120,9 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_ENGINE_SET_PROPERTY	 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 #define DRM_IOCTL_XE_VM_MADVISE			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
+#define DRM_IOCTL_XE_OA_OPEN			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OA_OPEN, struct drm_xe_oa_open_param)
+#define DRM_IOCTL_XE_OA_ADD_CONFIG		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OA_ADD_CONFIG, struct drm_xe_oa_config)
+#define DRM_IOCTL_XE_OA_REMOVE_CONFIG		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OA_REMOVE_CONFIG, __u64)
 
 /**
  * enum drm_xe_memory_class - Supported memory classes.
@@ -223,7 +229,8 @@ struct drm_xe_query_config {
 #define XE_QUERY_CONFIG_GT_COUNT		4
 #define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
 #define XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY	6
-#define XE_QUERY_CONFIG_NUM_PARAM		(XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1)
+#define XE_QUERY_OA_IOCTL_VERSION		7
+#define XE_QUERY_CONFIG_NUM_PARAM		(XE_QUERY_OA_IOCTL_VERSION + 1)
 	/** @info: array of elements containing the config info */
 	__u64 info[];
 };
@@ -260,6 +267,7 @@ struct drm_xe_query_gts {
 		__u64 native_mem_regions;	/* bit mask of instances from drm_xe_query_mem_usage */
 		__u64 slow_mem_regions;		/* bit mask of instances from drm_xe_query_mem_usage */
 		__u64 inaccessible_mem_regions;	/* bit mask of instances from drm_xe_query_mem_usage */
+		__u64 oa_timestamp_freq;
 		__u64 reserved[8];
 	} gts[];
 };
@@ -699,6 +707,7 @@ struct drm_xe_engine_class_instance {
 
 	__u16 engine_instance;
 	__u16 gt_id;
+	__u16 oa_unit_id;
 };
 
 struct drm_xe_engine_create {
@@ -1002,6 +1011,280 @@ struct drm_xe_vm_madvise {
 	__u64 reserved[2];
 };
 
+enum drm_xe_oa_format {
+	XE_OA_FORMAT_C4_B8 = 7,
+
+	/* Gen8+ */
+	XE_OA_FORMAT_A12,
+	XE_OA_FORMAT_A12_B8_C8,
+	XE_OA_FORMAT_A32u40_A4u32_B8_C8,
+
+	/* DG2 */
+	XE_OAR_FORMAT_A32u40_A4u32_B8_C8,
+	XE_OA_FORMAT_A24u40_A14u32_B8_C8,
+
+	/* MTL OAM */
+	XE_OAM_FORMAT_MPEC8u64_B8_C8,
+	XE_OAM_FORMAT_MPEC8u32_B8_C8,
+
+	XE_OA_FORMAT_MAX	    /* non-ABI */
+};
+
+enum drm_xe_oa_property_id {
+	/**
+	 * Open the stream for a specific context handle (as used with
+	 * execbuffer2). A stream opened for a specific context this way
+	 * won't typically require root privileges.
+	 *
+	 * This property is available in perf revision 1.
+	 */
+	DRM_XE_OA_PROP_CTX_HANDLE = 1,
+
+	/**
+	 * A value of 1 requests the inclusion of raw OA unit reports as
+	 * part of stream samples.
+	 *
+	 * This property is available in perf revision 1.
+	 */
+	DRM_XE_OA_PROP_SAMPLE_OA,
+
+	/**
+	 * The value specifies which set of OA unit metrics should be
+	 * configured, defining the contents of any OA unit reports.
+	 *
+	 * This property is available in perf revision 1.
+	 */
+	DRM_XE_OA_PROP_OA_METRICS_SET,
+
+	/**
+	 * The value specifies the size and layout of OA unit reports.
+	 *
+	 * This property is available in perf revision 1.
+	 */
+	DRM_XE_OA_PROP_OA_FORMAT,
+
+	/**
+	 * Specifying this property implicitly requests periodic OA unit
+	 * sampling and (at least on Haswell) the sampling frequency is derived
+	 * from this exponent as follows:
+	 *
+	 *   80ns * 2^(period_exponent + 1)
+	 *
+	 * This property is available in perf revision 1.
+	 */
+	DRM_XE_OA_PROP_OA_EXPONENT,
+
+	/**
+	 * Specifying this property is only valid when specify a context to
+	 * filter with DRM_XE_OA_PROP_CTX_HANDLE. Specifying this property
+	 * will hold preemption of the particular context we want to gather
+	 * performance data about. The execbuf2 submissions must include a
+	 * drm_xe_gem_execbuffer_ext_perf parameter for this to apply.
+	 *
+	 * This property is available in perf revision 3.
+	 */
+	DRM_XE_OA_PROP_HOLD_PREEMPTION,
+
+	/**
+	 * Specifying this pins all contexts to the specified SSEU power
+	 * configuration for the duration of the recording.
+	 *
+	 * This parameter's value is a pointer to a struct
+	 * drm_xe_gem_context_param_sseu.
+	 *
+	 * This property is available in perf revision 4.
+	 */
+	DRM_XE_OA_PROP_GLOBAL_SSEU,
+
+	/**
+	 * This optional parameter specifies the timer interval in nanoseconds
+	 * at which the xe driver will check the OA buffer for available data.
+	 * Minimum allowed value is 100 microseconds. A default value is used by
+	 * the driver if this parameter is not specified. Note that larger timer
+	 * values will reduce cpu consumption during OA perf captures. However,
+	 * excessively large values would potentially result in OA buffer
+	 * overwrites as captures reach end of the OA buffer.
+	 *
+	 * This property is available in perf revision 5.
+	 */
+	DRM_XE_OA_PROP_POLL_OA_PERIOD,
+
+	/**
+	 * Multiple engines may be mapped to the same OA unit. The OA unit is
+	 * identified by class:instance of any engine mapped to it.
+	 *
+	 * This parameter specifies the engine class and must be passed along
+	 * with DRM_XE_OA_PROP_OA_ENGINE_INSTANCE.
+	 *
+	 * This property is available in perf revision 6.
+	 */
+	DRM_XE_OA_PROP_OA_ENGINE_CLASS,
+
+	/**
+	 * This parameter specifies the engine instance and must be passed along
+	 * with DRM_XE_OA_PROP_OA_ENGINE_CLASS.
+	 *
+	 * This property is available in perf revision 6.
+	 */
+	DRM_XE_OA_PROP_OA_ENGINE_INSTANCE,
+
+	DRM_XE_OA_PROP_MAX /* non-ABI */
+};
+
+struct drm_xe_oa_open_param {
+	__u32 flags;
+#define XE_OA_FLAG_FD_CLOEXEC	BIT(0)
+#define XE_OA_FLAG_FD_NONBLOCK	BIT(1)
+#define XE_OA_FLAG_DISABLED	BIT(2)
+
+	/** The number of u64 (id, value) pairs */
+	__u32 num_properties;
+
+	/**
+	 * Pointer to array of u64 (id, value) pairs configuring the stream
+	 * to open.
+	 */
+	__u64 properties_ptr;
+};
+
+/*
+ * Enable data capture for a stream that was either opened in a disabled state
+ * via I915_PERF_FLAG_DISABLED or was later disabled via
+ * I915_PERF_IOCTL_DISABLE.
+ *
+ * It is intended to be cheaper to disable and enable a stream than it may be
+ * to close and re-open a stream with the same configuration.
+ *
+ * It's undefined whether any pending data for the stream will be lost.
+ *
+ * This ioctl is available in perf revision 1.
+ */
+#define XE_OA_IOCTL_ENABLE	_IO('i', 0x0)
+
+/*
+ * Disable data capture for a stream.
+ *
+ * It is an error to try and read a stream that is disabled.
+ *
+ * This ioctl is available in perf revision 1.
+ */
+#define XE_OA_IOCTL_DISABLE	_IO('i', 0x1)
+
+/*
+ * Change metrics_set captured by a stream.
+ *
+ * If the stream is bound to a specific context, the configuration change
+ * will performed inline with that context such that it takes effect before
+ * the next execbuf submission.
+ *
+ * Returns the previously bound metrics set id, or a negative error code.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define XE_OA_IOCTL_CONFIG	_IO('i', 0x2)
+
+struct drm_xe_oa_record_header {
+	__u32 type;
+	__u16 pad;
+	__u16 size;
+};
+
+enum drm_xe_oa_record_type {
+
+	/**
+	 * Samples are the work horse record type whose contents are extensible
+	 * and defined when opening an i915 perf stream based on the given
+	 * properties.
+	 *
+	 * Boolean properties following the naming convention
+	 * DRM_I915_PERF_SAMPLE_xyz_PROP request the inclusion of 'xyz' data in
+	 * every sample.
+	 *
+	 * The order of these sample properties given by userspace has no
+	 * affect on the ordering of data within a sample. The order is
+	 * documented here.
+	 *
+	 * struct {
+	 *     struct drm_i915_perf_record_header header;
+	 *
+	 *     { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
+	 * };
+	 */
+	DRM_XE_OA_RECORD_SAMPLE = 1,
+
+	/*
+	 * Indicates that one or more OA reports were not written by the
+	 * hardware. This can happen for example if an MI_REPORT_PERF_COUNT
+	 * command collides with periodic sampling - which would be more likely
+	 * at higher sampling frequencies.
+	 */
+	DRM_XE_OA_RECORD_OA_REPORT_LOST = 2,
+
+	/**
+	 * An error occurred that resulted in all pending OA reports being lost.
+	 */
+	DRM_XE_OA_RECORD_OA_BUFFER_LOST = 3,
+
+	DRM_XE_OA_RECORD_MAX /* non-ABI */
+};
+
+struct drm_xe_oa_config {
+	/**
+	 * @uuid:
+	 *
+	 * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x"
+	 */
+	char uuid[36];
+
+	/**
+	 * @n_mux_regs:
+	 *
+	 * Number of mux regs in &mux_regs_ptr.
+	 */
+	__u32 n_mux_regs;
+
+	/**
+	 * @n_boolean_regs:
+	 *
+	 * Number of boolean regs in &boolean_regs_ptr.
+	 */
+	__u32 n_boolean_regs;
+
+	/**
+	 * @n_flex_regs:
+	 *
+	 * Number of flex regs in &flex_regs_ptr.
+	 */
+	__u32 n_flex_regs;
+
+	/**
+	 * @mux_regs_ptr:
+	 *
+	 * Pointer to tuples of u32 values (register address, value) for mux
+	 * registers.  Expected length of buffer is (2 * sizeof(u32) *
+	 * &n_mux_regs).
+	 */
+	__u64 mux_regs_ptr;
+
+	/**
+	 * @boolean_regs_ptr:
+	 *
+	 * Pointer to tuples of u32 values (register address, value) for mux
+	 * registers.  Expected length of buffer is (2 * sizeof(u32) *
+	 * &n_boolean_regs).
+	 */
+	__u64 boolean_regs_ptr;
+
+	/**
+	 * @flex_regs_ptr:
+	 *
+	 * Pointer to tuples of u32 values (register address, value) for mux
+	 * registers.  Expected length of buffer is (2 * sizeof(u32) *
+	 * &n_flex_regs).
+	 */
+	__u64 flex_regs_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.38.0