[PATCH v3 09/10] drm/xe/pmu: Add PMU support for per-engine-class activity

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Tue Jan 14 00:57:27 UTC 2025


On Mon, Jan 06, 2025 at 01:25:58PM +0530, Riana Tauro wrote:
>PMU provides two counters (engine-active-ticks, total-ticks)
>to calculate engine acitivity. When querying engine busyness,
>user must group these 2 counters using the perf_event
>group mechanism to ensure both counters are sampled together.
>
>To list the events
>
>	./perf list
>	  xe_0000_03_00.0/engine-active-ticks/		[Kernel PMU event]
>	  xe_0000_03_00.0/total-ticks/			[Kernel PMU event]
>
>The formats to be used with the above are
>
>	engine_class	- config:12-19
>	engine_instance	- config:20-27
>	gt_id		- config:60-63
>
>The events can then be read using perf tool
>
>./perf stat -e xe_0000_03_00.0/engine-active-ticks,gt_id=0,
>			       engine_class=0,engine_instance=0/,
>	       xe_0000_03_00.0/total-ticks,gt_id=0,
>			       engine_class=0,engine_instance=0/ -I 1000
>
>Engine activity can then be calculated as below
>engine activity % = (engine active ticks/total ticks) * 100
>
>Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>---
> drivers/gpu/drm/xe/xe_guc.c       |   5 ++
> drivers/gpu/drm/xe/xe_pmu.c       | 139 +++++++++++++++++++++++++-----
> drivers/gpu/drm/xe/xe_pmu_types.h |   7 ++
> drivers/gpu/drm/xe/xe_uc.c        |   3 +
> 4 files changed, 131 insertions(+), 23 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
>index 408365dfe4ee..f229745b78b9 100644
>--- a/drivers/gpu/drm/xe/xe_guc.c
>+++ b/drivers/gpu/drm/xe/xe_guc.c
>@@ -26,6 +26,7 @@
> #include "xe_guc_capture.h"
> #include "xe_guc_ct.h"
> #include "xe_guc_db_mgr.h"
>+#include "xe_guc_engine_activity.h"
> #include "xe_guc_hwconfig.h"
> #include "xe_guc_log.h"
> #include "xe_guc_pc.h"
>@@ -743,6 +744,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
> 	if (ret)
> 		return ret;
>
>+	ret = xe_guc_engine_activity_init(guc);
>+	if (ret)
>+		return ret;
>+
> 	return xe_guc_ads_init_post_hwconfig(&guc->ads);
> }
>
>diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>index bae8eb38fddd..5bd312b6b8f6 100644
>--- a/drivers/gpu/drm/xe/xe_pmu.c
>+++ b/drivers/gpu/drm/xe/xe_pmu.c
>@@ -12,7 +12,9 @@
> #include "xe_force_wake.h"
> #include "xe_gt_clock.h"
> #include "xe_gt_idle.h"
>+#include "xe_guc_engine_activity.h"
> #include "xe_guc_pc.h"
>+#include "xe_hw_engine.h"
> #include "xe_mmio.h"
> #include "xe_macros.h"
> #include "xe_module.h"
>@@ -90,6 +92,17 @@ static unsigned int xe_pmu_target_cpu = -1;
>  *    1950
>  *    1950
>  *    1950
>+ *
>+ * Engine Activity: PMU provides two counters (engine-active-ticks, total-ticks) to calculate
>+ * engine activity. While querying the engine activity the user should group these two counters
>+ * using the perf_event group mechanism to ensure both counters are sampled together.
>+ *
>+ * To read a engine specific event for a GT of class 1 and instance 0
>+ *
>+ * perf stat -e xe_0000_03_00.0/engine-active-ticks,gt_id=0,engine_class=1,engine_instance=0/,
>+ *		xe_0000_03_00.0/total-ticks,gt_id=0,engine_class=1,engine_instance=0/ -I 1000
>+ *
>+ * engine active % = (engine active ticks/total ticks) * 100
>  */
>
> static struct xe_pmu *event_to_pmu(struct perf_event *event)
>@@ -107,6 +120,33 @@ static u64 config_counter(const u64 config)
> 	return config & ~(~0ULL << __XE_PMU_GT_SHIFT);
> }
>
>+static u64 engine_event_sample(const u64 config)
>+{
>+	return config_counter(config) & 0xfff;
>+}
>+
>+static u8 engine_event_class(const u64 config)
>+{
>+	return (config_counter(config) >> XE_PMU_CLASS_SHIFT) & 0xff;
>+}
>+
>+static u8 engine_event_instance(const u64 config)
>+{
>+	return (config_counter(config) >> XE_PMU_INSTANCE_SHIFT) & 0xff;
>+}
>+
>+static bool is_engine_event(struct xe_device *xe, const u64 config)
>+{
>+	const u64 gt_id = config >> __XE_PMU_GT_SHIFT;

you should just use the config_gt_id() helper above and other places 
where this pattern occurs.

>+	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);

when I pass gt_id=1 in the perf stat command i get a hang/reboot. gt is 
null above for gt_id = 1, on systems without media GT and single tile.

>+	u64 sample = engine_event_sample(config);
>+
>+	if (!xe_guc_engine_activity_supported(&gt->uc.guc))
>+		return false;
>+
>+	return ((sample == XE_PMU_ENGINE_ACTIVITY_TICKS) || (sample == XE_PMU_TOTAL_TICKS));
>+}
>+
> static unsigned int pm_bit(const u64 config)
> {
> 	unsigned int val;
>@@ -192,6 +232,23 @@ config_status(struct xe_device *xe, u64 config)
> 	return 0;
> }
>
>+static int engine_event_init(struct xe_device *xe, u64 config)
>+{
>+	const unsigned int gt_id = config_gt_id(config);
>+	struct drm_xe_engine_class_instance eci;
>+	struct xe_hw_engine *hwe;
>+
>+	eci.engine_class = engine_event_class(config);
>+	eci.engine_instance = engine_event_instance(config);
>+	eci.gt_id = gt_id;
>+
>+	hwe = xe_hw_engine_lookup(xe, eci);
>+	if (!hwe || xe_hw_engine_is_reserved(hwe))
>+		return -ENOENT;
>+
>+	return 0;
>+}
>+
> static int xe_pmu_event_init(struct perf_event *event)
> {
> 	struct xe_device *xe =
>@@ -221,7 +278,12 @@ static int xe_pmu_event_init(struct perf_event *event)
> 		return -EINVAL;
>
> 	event_config = event->attr.config;

maybe check for gt_id here, by either calling xe_device_get_gt() OR 
using some logic similar to xe_device_get_gt() to validate the gt_id.

this is just a quick run of the code, I will send out more comments in 
another response.

Thanks,
Umesh

>-	ret = config_status(xe, event_config);
>+
>+	if (is_engine_event(xe, event_config))
>+		ret = engine_event_init(xe, event_config);
>+	else
>+		ret = config_status(xe, event_config);
>+
> 	if (ret)
> 		return ret;
>
>@@ -300,24 +362,49 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
> 	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
> 	u64 val = 0;
>
>-	switch (config_counter(config)) {
>-	case XE_PMU_C6_RESIDENCY:
>-		val = get_c6(gt);
>-		break;
>-	case XE_PMU_ACTUAL_FREQUENCY:
>-		val =
>-		   div_u64(read_sample(pmu, gt_id,
>-				       __XE_SAMPLE_FREQ_ACT),
>-			   USEC_PER_SEC /* to MHz */);
>-		break;
>-	case XE_PMU_REQUESTED_FREQUENCY:
>-		val =
>-		   div_u64(read_sample(pmu, gt_id,
>-				       __XE_SAMPLE_FREQ_REQ),
>-			   USEC_PER_SEC /* to MHz */);
>-		break;
>-	default:
>-		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
>+	if (is_engine_event(xe, config)) {
>+		struct drm_xe_engine_class_instance eci;
>+		struct xe_hw_engine *hwe;
>+		u64 sample = engine_event_sample(config);
>+
>+		eci.engine_class = engine_event_class(config);
>+		eci.engine_instance = engine_event_instance(config);
>+		eci.gt_id = gt_id;
>+
>+		hwe = xe_hw_engine_lookup(xe, eci);
>+		if (!hwe)
>+			drm_WARN_ON_ONCE(&xe->drm, "unknown engine\n");
>+
>+		if (xe_pm_runtime_suspended(xe))
>+			return 0;
>+
>+		if (sample == XE_PMU_ENGINE_ACTIVITY_TICKS)
>+			val = xe_guc_engine_activity_active_ticks(hwe);
>+		else if (sample == XE_PMU_TOTAL_TICKS)
>+			val = xe_guc_engine_activity_total_ticks(hwe);
>+		else
>+			drm_warn(&xe->drm, "unknown pmu engine event\n");
>+
>+	} else {
>+		switch (config_counter(config)) {
>+		case XE_PMU_C6_RESIDENCY:
>+			val = get_c6(gt);
>+			break;
>+		case XE_PMU_ACTUAL_FREQUENCY:
>+			val =
>+			   div_u64(read_sample(pmu, gt_id,
>+					       __XE_SAMPLE_FREQ_ACT),
>+				   USEC_PER_SEC /* to MHz */);
>+			break;
>+		case XE_PMU_REQUESTED_FREQUENCY:
>+			val =
>+			   div_u64(read_sample(pmu, gt_id,
>+					       __XE_SAMPLE_FREQ_REQ),
>+				   USEC_PER_SEC /* to MHz */);
>+			break;
>+		default:
>+			drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
>+		}
> 	}
>
> 	return val;
>@@ -614,11 +701,15 @@ struct xe_str_attribute {
> 	const char *str;
> };
>
>-PMU_FORMAT_ATTR(event,          "config:0-11");
>-PMU_FORMAT_ATTR(gt_id,          "config:60-63");
>+PMU_FORMAT_ATTR(event,           "config:0-11");
>+PMU_FORMAT_ATTR(engine_instance, "config:12-19");
>+PMU_FORMAT_ATTR(engine_class,	 "config:20-27");
>+PMU_FORMAT_ATTR(gt_id,           "config:60-63");
>
> static struct attribute *xe_pmu_format_attrs[] = {
> 	&format_attr_event.attr,
>+	&format_attr_engine_class.attr,
>+	&format_attr_engine_instance.attr,
> 	&format_attr_gt_id.attr,
> 	NULL,
> };
>@@ -703,6 +794,8 @@ create_event_attributes(struct xe_pmu *pmu)
> 		__event(0, "c6-residency", "ms"),
> 		__event(1, "actual-frequency", "M"),
> 		__event(2, "requested-frequency", "M"),
>+		__event(XE_PMU_ENGINE_ACTIVITY_TICKS, "engine-active-ticks", NULL),
>+		__event(XE_PMU_TOTAL_TICKS, "total-ticks", NULL),
> 	};
>
> 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
>@@ -715,7 +808,7 @@ create_event_attributes(struct xe_pmu *pmu)
> 	for (i = 0; i < ARRAY_SIZE(events); i++) {
> 		u64 config = __XE_PMU_EVENT(events[i].counter);
>
>-		if (!config_status(xe, config))
>+		if (!config_status(xe, config) || is_engine_event(xe, config))
> 			count++;
> 	}
>
>@@ -741,7 +834,7 @@ create_event_attributes(struct xe_pmu *pmu)
> 		u64 config = __XE_PMU_EVENT(events[i].counter);
> 		char *str;
>
>-		if (config_status(xe, config))
>+		if (config_status(xe, config) && !is_engine_event(xe, config))
> 			continue;
>
> 		str = kasprintf(GFP_KERNEL, "%s",
>diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
>index 0109bb11937b..bb27d0950b72 100644
>--- a/drivers/gpu/drm/xe/xe_pmu_types.h
>+++ b/drivers/gpu/drm/xe/xe_pmu_types.h
>@@ -19,6 +19,11 @@ enum {
>
> #define XE_PMU_MAX_GT 2
>
>+/* First 8 bits of config are reserved for other counters */
>+#define XE_PMU_INSTANCE_SHIFT                  (12)
>+#define XE_PMU_INSTANCE_BITS                   (8)
>+#define XE_PMU_CLASS_SHIFT \
>+	(XE_PMU_INSTANCE_SHIFT + XE_PMU_INSTANCE_BITS)
> /*
>  * Top bits of every counter are GT id.
>  */
>@@ -32,6 +37,8 @@ enum {
> #define XE_PMU_C6_RESIDENCY                    __XE_PMU_EVENT(0)
> #define XE_PMU_ACTUAL_FREQUENCY			__XE_PMU_EVENT(1)
> #define XE_PMU_REQUESTED_FREQUENCY		__XE_PMU_EVENT(2)
>+#define XE_PMU_ENGINE_ACTIVITY_TICKS		__XE_PMU_EVENT(3)
>+#define XE_PMU_TOTAL_TICKS			__XE_PMU_EVENT(4)
> #define __XE_PMU_C6_RESIDENCY(gt)              ___XE_PMU_EVENT(gt, 0)
> #define __XE_PMU_ACTUAL_FREQUENCY(gt)		___XE_PMU_EVENT(gt, 1)
> #define __XE_PMU_REQUESTED_FREQUENCY(gt)	___XE_PMU_EVENT(gt, 2)
>diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
>index 0d073a9987c2..d129bdeaed57 100644
>--- a/drivers/gpu/drm/xe/xe_uc.c
>+++ b/drivers/gpu/drm/xe/xe_uc.c
>@@ -13,6 +13,7 @@
> #include "xe_gt_printk.h"
> #include "xe_gt_sriov_vf.h"
> #include "xe_guc.h"
>+#include "xe_guc_engine_activity.h"
> #include "xe_guc_pc.h"
> #include "xe_huc.h"
> #include "xe_sriov.h"
>@@ -210,6 +211,8 @@ int xe_uc_init_hw(struct xe_uc *uc)
> 	if (ret)
> 		return ret;
>
>+	xe_guc_engine_activity_enable_stats(&uc->guc);
>+
> 	/* We don't fail the driver load if HuC fails to auth, but let's warn */
> 	ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC);
> 	xe_gt_assert(uc_to_gt(uc), !ret);
>-- 
>2.47.1
>


More information about the Intel-xe mailing list