[PATCH 13/17] drm/xe/oa: Add OAC support

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Wed Dec 20 04:59:29 UTC 2023


On Thu, Dec 07, 2023 at 10:43:25PM -0800, Ashutosh Dixit wrote:
>Similar to OAR, allow userspace to execute MI_REPORT_PERF_COUNT on compute
>engines of a specified exec queue.
>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
>---
> drivers/gpu/drm/xe/regs/xe_engine_regs.h |  1 +
> drivers/gpu/drm/xe/regs/xe_oa_regs.h     |  3 +
> drivers/gpu/drm/xe/xe_oa.c               | 81 +++++++++++++++++++++++-
> 3 files changed, 82 insertions(+), 3 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>index 76c0938df05f3..045f9773f01f4 100644
>--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>@@ -73,6 +73,7 @@
>
> #define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
> #define	  CTX_CTRL_OAC_CONTEXT_ENABLE		REG_BIT(8)
>+#define	  CTX_CTRL_RUN_ALONE			REG_BIT(7)
> #define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	REG_BIT(3)
> #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	REG_BIT(0)
>
>diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
>index 7e2e875ccf80a..b66cd95b795e7 100644
>--- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h
>+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
>@@ -74,6 +74,9 @@
> #define  OAG_OASTATUS_BUFFER_OVERFLOW	REG_BIT(1)
> #define  OAG_OASTATUS_REPORT_LOST	REG_BIT(0)
>
>+/* OAC unit */
>+#define OAC_OACONTROL			XE_REG(0x15114)
>+
> /* OAM unit */
> #define OAM_HEAD_POINTER_OFFSET			(0x1a0)
> #define OAM_TAIL_POINTER_OFFSET			(0x1a4)
>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>index 9d653d7722d1a..42f32d4359f2c 100644
>--- a/drivers/gpu/drm/xe/xe_oa.c
>+++ b/drivers/gpu/drm/xe/xe_oa.c
>@@ -449,6 +449,19 @@ u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
> 		REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size);
> }
>
>+static u32 __oa_ccs_select(struct xe_oa_stream *stream)
>+{
>+	u32 val;
>+
>+	if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE)
>+		return 0;
>+
>+	val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance);
>+	xe_assert(stream->oa->xe,
>+		  REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance);

Why is there a need to do REG_FIELD_GET? I thought the REG_FIELD_PREP is 
just a bitwise operation. Are you expecting coherency issues?

>+}
>+
> static void xe_oa_enable(struct xe_oa_stream *stream)
> {
> 	const struct xe_oa_format *format = stream->oa_buffer.format;
>@@ -463,7 +476,7 @@ static void xe_oa_enable(struct xe_oa_stream *stream)
>
> 	regs = __oa_regs(stream);
> 	val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) |
>-		OAG_OACONTROL_OA_COUNTER_ENABLE;
>+		__oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE;
>
> 	xe_mmio_write32(stream->gt, regs->oa_ctrl, val);
> }
>@@ -762,6 +775,64 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
> 	return xe_oa_modify_self(stream, regs_lri, ARRAY_SIZE(regs_lri));
> }
>
>+static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
>+{
>+	const struct xe_oa_format *format = stream->oa_buffer.format;
>+	struct xe_lrc *lrc = &stream->exec_q->lrc[0];
>+	u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
>+	u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
>+		(enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
>+	struct flex regs_context[] = {
>+		{
>+			OACTXCONTROL(stream->hwe->mmio_base),
>+			stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
>+			enable ? OA_COUNTER_RESUME : 0,
>+		},
>+		{
>+			RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
>+			regs_offset + CTX_CONTEXT_CONTROL,
>+			_MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
>+				      enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) |
>+			_MASKED_FIELD(CTX_CTRL_RUN_ALONE,
>+				      enable ? CTX_CTRL_RUN_ALONE : 0),
>+		},
>+	};
>+	/* Offsets in regs_lri are not used since this configuration is applied using LRI */
>+	struct flex regs_lri[] = {
>+		{
>+			OAC_OACONTROL,
>+			OAR_OAC_OACONTROL_OFFSET + 1,
>+			oacontrol,
>+		},
>+	};
>+	int err;
>+
>+	/* Set ccs select to enable programming of OAC_OACONTROL */
>+	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream));
>+
>+	/* Modify stream hwe context image with regs_context */
>+	err = xe_oa_modify_context(stream, &stream->exec_q->lrc[0],
>+				   regs_context, ARRAY_SIZE(regs_context));
>+	if (err)
>+		return err;
>+
>+	/* Apply regs_lri using LRI */
>+	return xe_oa_modify_self(stream, regs_lri, ARRAY_SIZE(regs_lri));

I think in i915, for execlist scheduling, there was a kernel context 
that was scheduled and when it ran, it would indicate that all other 
contexts are done executing - kinda GPU idle. The modify self was (IMO) 
only needed to update the kernel context in this scenario.

GuC does not have a concept of kernel context (at least not in i915, not 
sure if things changed in XE). If so, all the modify self can be dropped 
(both from OAR and OAC).

Otherwise, this is good too, so

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>

The other query I have (unrelated to this patch) is if we need the 
PWR_CLK_STATE state configured in all contexts. It's a bit hazy if that 
was only needed for older gens or if it is applicable to newer 
platforms. (gen12_configure_all_contexts() in i915).

Thanks,
Umesh


>+}
>+
>+static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable)
>+{
>+	switch (stream->hwe->class) {
>+	case XE_ENGINE_CLASS_RENDER:
>+		return xe_oa_configure_oar_context(stream, enable);
>+	case XE_ENGINE_CLASS_COMPUTE:
>+		return xe_oa_configure_oac_context(stream, enable);
>+	default:
>+		/* Video engines do not support MI_REPORT_PERF_COUNT */
>+		return 0;
>+	}
>+}
>+
> #define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255)
>
> static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
>@@ -781,7 +852,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
>
> 	/* disable the context save/restore or OAR counters */
> 	if (stream->exec_q)
>-		xe_oa_configure_oar_context(stream, false);
>+		xe_oa_configure_oa_context(stream, false);
>
> 	/* Make sure we disable noa to save power. */
> 	xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0);
>@@ -978,8 +1049,9 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
>
> 	xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1);
>
>+	/* Configure OAR/OAC */
> 	if (stream->exec_q) {
>-		ret = xe_oa_configure_oar_context(stream, true);
>+		ret = xe_oa_configure_oa_context(stream, true);
> 		if (ret)
> 			return ret;
> 	}
>@@ -1636,6 +1708,9 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, void *data, struct drm_file
> 		param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id);
> 		if (XE_IOCTL_DBG(oa->xe, !param.exec_q))
> 			return -ENOENT;
>+
>+		if (param.exec_q->width > 1)
>+			drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n");
> 	}
>
> 	/*
>-- 
>2.41.0
>


More information about the Intel-xe mailing list