[Intel-xe] [PATCH 04/10] drm/xe/oa: Module init/exit and probe/remove

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Wed Aug 23 19:41:56 UTC 2023


On Tue, Aug 22, 2023 at 08:52:56AM -0700, Umesh Nerlige Ramappa wrote:
>On Mon, Aug 07, 2023 at 06:31:53PM -0700, Ashutosh Dixit wrote:
>>Perform OA initialization at module init and probe time:
>>
>>* Setup perf_stream_paranoid and oa_max_sample_rate files in /proc
>>* Setup metrics sysfs directories to expose which metrics configurations
>> are available
>>* Setup OA groups which associate hw engines with OA units
>>* Initialize OA units
>>
>>Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
>>---
>>drivers/gpu/drm/xe/Makefile             |   1 +
>>drivers/gpu/drm/xe/xe_device.c          |  11 +
>>drivers/gpu/drm/xe/xe_device_types.h    |   4 +
>>drivers/gpu/drm/xe/xe_gt_types.h        |   4 +
>>drivers/gpu/drm/xe/xe_hw_engine_types.h |   2 +
>>drivers/gpu/drm/xe/xe_module.c          |   5 +
>>drivers/gpu/drm/xe/xe_oa.c              | 310 ++++++++++++++++++++++++
>>drivers/gpu/drm/xe/xe_oa.h              |  18 ++
>>8 files changed, 355 insertions(+)
>>create mode 100644 drivers/gpu/drm/xe/xe_oa.c
>>create mode 100644 drivers/gpu/drm/xe/xe_oa.h
>>
>>diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>>index 1b59702cd9f98..01280233ff271 100644
>>--- a/drivers/gpu/drm/xe/Makefile
>>+++ b/drivers/gpu/drm/xe/Makefile
>>@@ -84,6 +84,7 @@ xe-y += xe_bb.o \
>>	xe_mmio.o \
>>	xe_mocs.o \
>>	xe_module.o \
>>+	xe_oa.o \
>>	xe_pat.o \
>>	xe_pci.o \
>>	xe_pcode.o \
>>diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>>index 766df07de979c..1c54cac0a117f 100644
>>--- a/drivers/gpu/drm/xe/xe_device.c
>>+++ b/drivers/gpu/drm/xe/xe_device.c
>>@@ -25,6 +25,7 @@
>>#include "xe_irq.h"
>>#include "xe_mmio.h"
>>#include "xe_module.h"
>>+#include "xe_oa.h"
>>#include "xe_pcode.h"
>>#include "xe_pm.h"
>>#include "xe_query.h"
>>@@ -323,6 +324,10 @@ int xe_device_probe(struct xe_device *xe)
>>			goto err_irq_shutdown;
>>	}
>>
>>+	err = xe_oa_init(xe);
>>+	if (err)
>>+		goto err_irq_shutdown;
>>+
>>	err = xe_display_init(xe);
>>	if (err)
>>		goto err_fini_display;
>>@@ -333,6 +338,8 @@ int xe_device_probe(struct xe_device *xe)
>>
>>	xe_display_register(xe);
>>
>>+	xe_oa_register(xe);
>>+
>>	xe_debugfs_register(xe);
>>
>>	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
>>@@ -361,10 +368,14 @@ static void xe_device_remove_display(struct xe_device *xe)
>>
>>void xe_device_remove(struct xe_device *xe)
>>{
>>+	xe_oa_unregister(xe);
>>+
>>	xe_device_remove_display(xe);
>>
>>	xe_display_unlink(xe);
>>
>>+	xe_oa_fini(xe);
>>+
>>	xe_irq_shutdown(xe);
>>}
>>
>>diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>>index f84ecb976f5d4..3b487905306b7 100644
>>--- a/drivers/gpu/drm/xe/xe_device_types.h
>>+++ b/drivers/gpu/drm/xe/xe_device_types.h
>>@@ -16,6 +16,7 @@
>>#include "xe_gt_types.h"
>>#include "xe_platform_types.h"
>>#include "xe_step_types.h"
>>+#include "xe_oa.h"
>>
>>#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
>>#include "ext/intel_device_info.h"
>>@@ -376,6 +377,9 @@ struct xe_device {
>>	 */
>>	struct task_struct *pm_callback_task;
>>
>>+	/** @oa: oa perf counter subsystem */
>>+	struct xe_oa oa;
>>+
>>	/* private: */
>>
>>#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
>>diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
>>index 35b8c19fa8bf5..d6053f85dbb60 100644
>>--- a/drivers/gpu/drm/xe/xe_gt_types.h
>>+++ b/drivers/gpu/drm/xe/xe_gt_types.h
>>@@ -13,6 +13,7 @@
>>#include "xe_reg_sr_types.h"
>>#include "xe_sa_types.h"
>>#include "xe_uc_types.h"
>>+#include "xe_oa.h"
>>
>>struct xe_exec_queue_ops;
>>struct xe_migrate;
>>@@ -346,6 +347,9 @@ struct xe_gt {
>>		/** @oob: bitmap with active OOB workaroudns */
>>		unsigned long *oob;
>>	} wa_active;
>>+
>>+	/** @oa: oa perf counter subsystem per gt info */
>>+	struct xe_oa_gt oa;
>
>I don't see a reference to this, so thought we could drop it. OR can 
>you point me to where this is used?

yikes, not this. I wanted to comment on the xe_oa member in xe_device 
structure. The xe_oa is not used... I think.

Umesh

>
>Umesh
>
>>};
>>
>>#endif
>>diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
>>index 97d9ba31b5fc7..92bb30433353c 100644
>>--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
>>+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
>>@@ -144,6 +144,8 @@ struct xe_hw_engine {
>>	enum xe_hw_engine_id engine_id;
>>	/** @eclass: pointer to per hw engine class interface */
>>	struct xe_hw_engine_class_intf *eclass;
>>+	/** @oa_group: oa unit for this hw engine */
>>+	struct xe_oa_group *oa_group;
>>};
>>
>>/**
>>diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
>>index de85494e2280b..460e8161c6f21 100644
>>--- a/drivers/gpu/drm/xe/xe_module.c
>>+++ b/drivers/gpu/drm/xe/xe_module.c
>>@@ -11,6 +11,7 @@
>>#include "xe_drv.h"
>>#include "xe_hw_fence.h"
>>#include "xe_module.h"
>>+#include "xe_oa.h"
>>#include "xe_pci.h"
>>#include "xe_sched_job.h"
>>
>>@@ -53,6 +54,10 @@ static const struct init_funcs init_funcs[] = {
>>		.init = xe_register_pci_driver,
>>		.exit = xe_unregister_pci_driver,
>>	},
>>+	{
>>+		.init = xe_oa_sysctl_register,
>>+		.exit = xe_oa_sysctl_unregister,
>>+	},
>>};
>>
>>static int __init xe_init(void)
>>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>>new file mode 100644
>>index 0000000000000..d44ef611c76eb
>>--- /dev/null
>>+++ b/drivers/gpu/drm/xe/xe_oa.c
>>@@ -0,0 +1,310 @@
>>+// SPDX-License-Identifier: MIT
>>+/*
>>+ * Copyright © 2023 Intel Corporation
>>+ */
>>+
>>+#include <linux/anon_inodes.h>
>>+#include <linux/nospec.h>
>>+#include <linux/sizes.h>
>>+#include <linux/uuid.h>
>>+
>>+#include <drm/xe_drm.h>
>>+#include <drm/drm_drv.h>
>>+
>>+#include "regs/xe_oa_regs.h"
>>+#include "xe_gt.h"
>>+#include "xe_device.h"
>>+#include "xe_oa.h"
>>+
>>+static u32 xe_oa_stream_paranoid = true;
>>+static int xe_oa_sample_rate_hard_limit;
>>+static u32 xe_oa_max_sample_rate = 100000;
>>+
>>+static const struct xe_oa_format oa_formats[] = {
>>+	[XE_OA_FORMAT_C4_B8]			= { 7, 64 },
>>+	[XE_OA_FORMAT_A12]			= { 0, 64 },
>>+	[XE_OA_FORMAT_A12_B8_C8]		= { 2, 128 },
>>+	[XE_OA_FORMAT_A32u40_A4u32_B8_C8]	= { 5, 256 },
>>+	[XE_OAR_FORMAT_A32u40_A4u32_B8_C8]	= { 5, 256 },
>>+	[XE_OA_FORMAT_A24u40_A14u32_B8_C8]	= { 5, 256 },
>>+	[XE_OAM_FORMAT_MPEC8u64_B8_C8]		= { 1, 192, TYPE_OAM, HDR_64_BIT },
>>+	[XE_OAM_FORMAT_MPEC8u32_B8_C8]		= { 2, 128, TYPE_OAM, HDR_64_BIT },
>>+};
>>+
>>+static struct ctl_table_header *sysctl_header;
>>+
>>+void xe_oa_register(struct xe_device *xe)
>>+{
>>+	struct xe_oa *oa = &xe->oa;
>>+
>>+	if (!oa->xe)
>>+		return;
>>+
>>+	oa->metrics_kobj = kobject_create_and_add("metrics",
>>+						  &xe->drm.primary->kdev->kobj);
>>+}
>>+
>>+void xe_oa_unregister(struct xe_device *xe)
>>+{
>>+	struct xe_oa *oa = &xe->oa;
>>+
>>+	if (!oa->metrics_kobj)
>>+		return;
>>+
>>+	kobject_put(oa->metrics_kobj);
>>+	oa->metrics_kobj = NULL;
>>+}
>>+
>>+static u32 num_oa_groups_per_gt(struct xe_gt *gt)
>>+{
>>+	return 1;
>>+}
>>+
>>+static u32 __oam_engine_group(struct xe_hw_engine *hwe)
>>+{
>>+	if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) {
>>+		/*
>>+		 * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
>>+		 * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
>>+		 */
>>+		drm_WARN_ON(&hwe->gt->tile->xe->drm,
>>+			    hwe->gt->info.type != XE_GT_TYPE_MEDIA);
>>+
>>+		return OA_GROUP_OAM_SAMEDIA_0;
>>+	}
>>+
>>+	return OA_GROUP_INVALID;
>>+}
>>+
>>+static u32 __oa_engine_group(struct xe_hw_engine *hwe)
>>+{
>>+	switch (hwe->class) {
>>+	case XE_ENGINE_CLASS_RENDER:
>>+		return OA_GROUP_OAG;
>>+
>>+	case XE_ENGINE_CLASS_VIDEO_DECODE:
>>+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
>>+		return __oam_engine_group(hwe);
>>+
>>+	default:
>>+		return OA_GROUP_INVALID;
>>+	}
>>+}
>>+
>>+static struct xe_oa_regs __oam_regs(u32 base)
>>+{
>>+	return (struct xe_oa_regs) {
>>+		base,
>>+		GEN12_OAM_HEAD_POINTER(base),
>>+		GEN12_OAM_TAIL_POINTER(base),
>>+		GEN12_OAM_BUFFER(base),
>>+		GEN12_OAM_CONTEXT_CONTROL(base),
>>+		GEN12_OAM_CONTROL(base),
>>+		GEN12_OAM_DEBUG(base),
>>+		GEN12_OAM_STATUS(base),
>>+		GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
>>+	};
>>+}
>>+
>>+static struct xe_oa_regs __oag_regs(void)
>>+{
>>+	return (struct xe_oa_regs) {
>>+		0,
>>+		GEN12_OAG_OAHEADPTR,
>>+		GEN12_OAG_OATAILPTR,
>>+		GEN12_OAG_OABUFFER,
>>+		GEN12_OAG_OAGLBCTXCTRL,
>>+		GEN12_OAG_OACONTROL,
>>+		GEN12_OAG_OA_DEBUG,
>>+		GEN12_OAG_OASTATUS,
>>+		GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
>>+	};
>>+}
>>+
>>+static void xe_oa_init_groups(struct xe_gt *gt)
>>+{
>>+	const u32 mtl_oa_base[] = {
>>+		[OA_GROUP_OAM_SAMEDIA_0] = 0x393000,
>>+	};
>>+	int i, num_groups = gt->oa.num_oa_groups;
>>+
>>+	for (i = 0; i < num_groups; i++) {
>>+		struct xe_oa_group *g = &gt->oa.group[i];
>>+
>>+		/* Fused off engines can result in a group with num_engines == 0 */
>>+		if (g->num_engines == 0)
>>+			continue;
>>+
>>+		if (i == OA_GROUP_OAG && gt->info.type != XE_GT_TYPE_MEDIA) {
>>+			g->regs = __oag_regs();
>>+			g->type = TYPE_OAG;
>>+		} else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
>>+			g->regs = __oam_regs(mtl_oa_base[i]);
>>+			g->type = TYPE_OAM;
>>+		}
>>+
>>+		/* Set oa_unit_ids now to ensure ids remain contiguous. */
>>+		g->oa_unit_id = gt->tile->xe->oa.oa_unit_ids++;
>>+	}
>>+}
>>+
>>+static int xe_oa_init_gt(struct xe_gt *gt)
>>+{
>>+	u32 num_groups = num_oa_groups_per_gt(gt);
>>+	struct xe_hw_engine *hwe;
>>+	enum xe_hw_engine_id id;
>>+	struct xe_oa_group *g;
>>+
>>+	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
>>+	if (!g)
>>+		return -ENOMEM;
>>+
>>+	for_each_hw_engine(hwe, gt, id) {
>>+		u32 index = __oa_engine_group(hwe);
>>+
>>+		hwe->oa_group = NULL;
>>+		if (index < num_groups) {
>>+			g[index].num_engines++;
>>+			hwe->oa_group = &g[index];
>>+		}
>>+	}
>>+
>>+	gt->oa.num_oa_groups = num_groups;
>>+	gt->oa.group = g;
>>+
>>+	xe_oa_init_groups(gt);
>>+
>>+	return 0;
>>+}
>>+
>>+static int xe_oa_init_engine_groups(struct xe_oa *oa)
>>+{
>>+	struct xe_gt *gt;
>>+	int i, ret;
>>+
>>+	for_each_gt(gt, oa->xe, i) {
>>+		ret = xe_oa_init_gt(gt);
>>+		if (ret)
>>+			return ret;
>>+	}
>>+
>>+	return 0;
>>+}
>>+
>>+static void oa_format_add(struct xe_oa *oa, enum drm_xe_oa_format format)
>>+{
>>+	__set_bit(format, oa->format_mask);
>>+}
>>+
>>+static void xe_oa_init_supported_formats(struct xe_oa *oa)
>>+{
>>+	switch (oa->xe->info.platform) {
>>+	case XE_ALDERLAKE_S:
>>+	case XE_ALDERLAKE_P:
>>+		oa_format_add(oa, XE_OA_FORMAT_A12);
>>+		oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8);
>>+		oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8);
>>+		oa_format_add(oa, XE_OA_FORMAT_C4_B8);
>>+		break;
>>+
>>+	case XE_DG2:
>>+		oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8);
>>+		oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8);
>>+		break;
>>+
>>+	case XE_METEORLAKE:
>>+		oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8);
>>+		oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8);
>>+		oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8);
>>+		oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8);
>>+		break;
>>+
>>+	default:
>>+		drm_err(&oa->xe->drm, "Unknown platform\n");
>>+	}
>>+}
>>+
>>+int xe_oa_init(struct xe_device *xe)
>>+{
>>+	struct xe_oa *oa = &xe->oa;
>>+	struct xe_gt *gt;
>>+	int i, ret;
>>+
>>+	/* Support OA only with GuC submission and Gen12+ */
>>+	if (XE_WARN_ON(!xe_device_guc_submission_enabled(xe)) ||
>>+	    XE_WARN_ON(GRAPHICS_VER(xe) < 12))
>>+		return 0;
>>+
>>+	oa->xe = xe;
>>+	oa->oa_formats = oa_formats;
>>+
>>+	for_each_gt(gt, xe, i)
>>+		mutex_init(&gt->oa.lock);
>>+
>>+	/* Choose a representative limit */
>>+	xe_oa_sample_rate_hard_limit = xe_root_mmio_gt(xe)->info.clock_freq / 2;
>>+
>>+	mutex_init(&oa->metrics_lock);
>>+	idr_init_base(&oa->metrics_idr, 1);
>>+
>>+	ret = xe_oa_init_engine_groups(oa);
>>+	if (ret) {
>>+		drm_err(&xe->drm, "OA initialization failed %d\n", ret);
>>+		return ret;
>>+	}
>>+
>>+	xe_oa_init_supported_formats(oa);
>>+
>>+	oa->xe = xe;
>>+	return 0;
>>+}
>>+
>>+void xe_oa_fini(struct xe_device *xe)
>>+{
>>+	struct xe_oa *oa = &xe->oa;
>>+	struct xe_gt *gt;
>>+	int i;
>>+
>>+	if (!oa->xe)
>>+		return;
>>+
>>+	for_each_gt(gt, xe, i)
>>+		kfree(gt->oa.group);
>>+
>>+	idr_destroy(&oa->metrics_idr);
>>+
>>+	oa->xe = NULL;
>>+}
>>+
>>+static struct ctl_table oa_ctl_table[] = {
>>+	{
>>+	 .procname = "perf_stream_paranoid",
>>+	 .data = &xe_oa_stream_paranoid,
>>+	 .maxlen = sizeof(xe_oa_stream_paranoid),
>>+	 .mode = 0644,
>>+	 .proc_handler = proc_dointvec_minmax,
>>+	 .extra1 = SYSCTL_ZERO,
>>+	 .extra2 = SYSCTL_ONE,
>>+	 },
>>+	{
>>+	 .procname = "oa_max_sample_rate",
>>+	 .data = &xe_oa_max_sample_rate,
>>+	 .maxlen = sizeof(xe_oa_max_sample_rate),
>>+	 .mode = 0644,
>>+	 .proc_handler = proc_dointvec_minmax,
>>+	 .extra1 = SYSCTL_ZERO,
>>+	 .extra2 = &xe_oa_sample_rate_hard_limit,
>>+	 },
>>+	{}
>>+};
>>+
>>+int xe_oa_sysctl_register(void)
>>+{
>>+	sysctl_header = register_sysctl("dev/xe", oa_ctl_table);
>>+	return 0;
>>+}
>>+
>>+void xe_oa_sysctl_unregister(void)
>>+{
>>+	unregister_sysctl_table(sysctl_header);
>>+}
>>diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h
>>new file mode 100644
>>index 0000000000000..ba4ba80fd34cb
>>--- /dev/null
>>+++ b/drivers/gpu/drm/xe/xe_oa.h
>>@@ -0,0 +1,18 @@
>>+/* SPDX-License-Identifier: MIT */
>>+/*
>>+ * Copyright © 2023 Intel Corporation
>>+ */
>>+
>>+#ifndef _XE_OA_H_
>>+#define _XE_OA_H_
>>+
>>+#include "xe_oa_types.h"
>>+
>>+int xe_oa_init(struct xe_device *xe);
>>+void xe_oa_fini(struct xe_device *xe);
>>+void xe_oa_register(struct xe_device *xe);
>>+void xe_oa_unregister(struct xe_device *xe);
>>+int xe_oa_sysctl_register(void);
>>+void xe_oa_sysctl_unregister(void);
>>+
>>+#endif
>>-- 
>>2.41.0
>>


More information about the Intel-xe mailing list