[PATCH 7/7] drm/xe/pmu: Add PMU support for engine busyness

Riana Tauro riana.tauro at intel.com
Wed Nov 13 04:55:49 UTC 2024


PMU provides two counters (<engine>-busy-ticks-gt<n>,
<engine>-total-ticks-gt<n>) to calculate engine busyness. When querying
engine busyness, user must group these 2 counters using the perf_event
group mechanism to ensure both counters are sampled together.

To list engine busyness counters use the following

./perf list
  xe_0000_03_00.0/bcs0-busy-ticks-gt0/               [Kernel PMU event]
  xe_0000_03_00.0/bcs0-total-ticks-gt0/              [Kernel PMU event]
  xe_0000_03_00.0/ccs0-busy-ticks-gt0/               [Kernel PMU event]
  xe_0000_03_00.0/ccs0-total-ticks-gt0/              [Kernel PMU event]

Engine busyness can then be calculated as below
busyness % = (engine active ticks/total ticks) * 100

Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c       |   5 +
 drivers/gpu/drm/xe/xe_pmu.c       | 187 ++++++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_pmu_types.h |  17 +++
 drivers/gpu/drm/xe/xe_uc.c        |   3 +
 4 files changed, 192 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index df1ba94cf4ca..7f0425b41f06 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -17,6 +17,7 @@
 #include "regs/xe_irq_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_engine_activity.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
@@ -418,6 +419,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
 	if (ret)
 		return ret;
 
+	ret = xe_engine_activity_init(guc);
+	if (ret)
+		return ret;
+
 	return xe_guc_ads_init_post_hwconfig(&guc->ads);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 633552fbf78d..9a657c6bb93e 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -9,10 +9,13 @@
 
 #include "regs/xe_gt_regs.h"
 #include "xe_device.h"
+#include "xe_engine_activity.h"
 #include "xe_force_wake.h"
+#include "xe_gt.h"
 #include "xe_gt_clock.h"
 #include "xe_gt_idle.h"
 #include "xe_guc_pc.h"
+#include "xe_hw_engine.h"
 #include "xe_mmio.h"
 #include "xe_macros.h"
 #include "xe_module.h"
@@ -30,7 +33,7 @@ static unsigned int xe_pmu_target_cpu = -1;
 /**
  * DOC: Xe PMU (Performance Monitoring Unit)
  *
- * Expose events/counters like C6 residency and GT frequency to user land.
+ * Expose events/counters like C6 residency, GT frequency and engine busyness to user land.
  * Perf tool can be used to list these counters from the command line.
  *
  * Example commands to list/record supported perf events-
@@ -88,6 +91,13 @@ static unsigned int xe_pmu_target_cpu = -1;
  *    1950
  *    1950
  *    1950
+ *
+ * Engine busyness: PMU provides two counters (<engine>-busy-ticks-gt<n>,
+ * <engine>-total-ticks-gt<n>) to calculate engine busyness. When querying engine busyness, user
+ * must group these 2 counters using the perf_event group mechanism to ensure both counters are
+ * sampled together. Engine busyness can then be calculated using
+ *
+ *  busyness % = (engine active ticks/total ticks) * 100
  */
 
 static struct xe_pmu *event_to_pmu(struct perf_event *event)
@@ -105,6 +115,64 @@ static u64 config_counter(const u64 config)
 	return config & ~(~0ULL << __XE_PMU_GT_SHIFT);
 }
 
+static u8 engine_event_sample(struct perf_event *event)
+{
+	u64 config = event->attr.config;
+
+	return (config_counter(config) >> XE_PMU_SAMPLE_SHIFT) & 0xf;
+}
+
+static u8 engine_event_class(struct perf_event *event)
+{
+	u64 config = event->attr.config;
+
+	return (config_counter(config) >> XE_PMU_CLASS_SHIFT) & 0xff;
+}
+
+static u8 engine_event_instance(struct perf_event *event)
+{
+	u64 config = event->attr.config;
+
+	return (config_counter(config) >> XE_PMU_INSTANCE_SHIFT) & 0xff;
+}
+
+static bool is_engine_event(struct perf_event *event)
+{
+	return config_counter(event->attr.config) > __XE_PMU_OTHER(0xff);
+}
+
+static int engine_event_status(u8 sample)
+{
+	switch (sample) {
+	case XE_PMU_SAMPLE_BUSY_TICKS:
+	case XE_PMU_SAMPLE_TOTAL_TICKS:
+		return 0;
+	default:
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int engine_event_init(struct perf_event *event)
+{
+	struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+	const u64 config = event->attr.config;
+	const unsigned int gt_id = config_gt_id(config);
+	struct drm_xe_engine_class_instance eci;
+	struct xe_hw_engine *hwe;
+
+	eci.engine_class = engine_event_class(event);
+	eci.engine_instance = engine_event_instance(event);
+	eci.gt_id = gt_id;
+
+	hwe = xe_hw_engine_lookup(xe, eci);
+	if (!hwe)
+		return -ENOENT;
+
+	return engine_event_status(engine_event_sample(event));
+}
+
 static unsigned int other_bit(const u64 config)
 {
 	unsigned int val;
@@ -217,7 +285,11 @@ static int xe_pmu_event_init(struct perf_event *event)
 	if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
 		return -EINVAL;
 
-	ret = config_status(xe, event->attr.config);
+	if (is_engine_event(event))
+		ret = engine_event_init(event);
+	else
+		ret = config_status(xe, event->attr.config);
+
 	if (ret)
 		return ret;
 
@@ -300,26 +372,54 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
 	const unsigned int gt_id = config_gt_id(event->attr.config);
 	const u64 config = event->attr.config;
 	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+	bool device_awake;
 	u64 val = 0;
 
-	switch (config_counter(config)) {
-	case XE_PMU_RC6_RESIDENCY:
-		val = get_rc6(gt);
-		break;
-	case XE_PMU_ACTUAL_FREQUENCY:
-		val =
-		   div_u64(read_sample(pmu, gt_id,
-				       __XE_SAMPLE_FREQ_ACT),
-			   USEC_PER_SEC /* to MHz */);
-		break;
-	case XE_PMU_REQUESTED_FREQUENCY:
-		val =
-		   div_u64(read_sample(pmu, gt_id,
-				       __XE_SAMPLE_FREQ_REQ),
-			   USEC_PER_SEC /* to MHz */);
-		break;
-	default:
-		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
+	if (is_engine_event(event)) {
+		u8 sample = engine_event_sample(event);
+		struct drm_xe_engine_class_instance eci;
+		struct xe_hw_engine *hwe;
+
+		eci.engine_class = engine_event_class(event);
+		eci.engine_instance = engine_event_instance(event);
+		eci.gt_id = gt_id;
+
+		hwe = xe_hw_engine_lookup(xe, eci);
+
+		device_awake = xe_pm_runtime_get_if_active(xe);
+		if (!device_awake)
+			return 0;
+
+		if (!hwe)
+			drm_WARN_ON_ONCE(&xe->drm, "unknown engine\n");
+		else if (sample == XE_PMU_SAMPLE_BUSY_TICKS)
+			val = xe_engine_activity_get_active_ticks(hwe);
+		else if (sample == XE_PMU_SAMPLE_TOTAL_TICKS)
+			val = xe_engine_activity_get_total_ticks(hwe);
+		else
+			drm_warn(&xe->drm, "unknown pmu engine event\n");
+
+		xe_pm_runtime_put(xe);
+	} else {
+		switch (config_counter(config)) {
+		case XE_PMU_RC6_RESIDENCY:
+			val = get_rc6(gt);
+			break;
+		case XE_PMU_ACTUAL_FREQUENCY:
+			val =
+			    div_u64(read_sample(pmu, gt_id,
+						__XE_SAMPLE_FREQ_ACT),
+						USEC_PER_SEC /* to MHz */);
+			break;
+		case XE_PMU_REQUESTED_FREQUENCY:
+			val =
+			   div_u64(read_sample(pmu, gt_id,
+					       __XE_SAMPLE_FREQ_REQ),
+				   USEC_PER_SEC /* to MHz */);
+			break;
+		default:
+			drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
+		}
 	}
 
 	return val;
@@ -643,6 +743,12 @@ static const struct attribute_group xe_pmu_cpumask_attr_group = {
 	.unit = (__unit), \
 }
 
+#define __engine_event(__sample, __name) \
+{ \
+	.sample = (__sample), \
+	.name = (__name), \
+}
+
 static struct xe_ext_attribute *
 add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config)
 {
@@ -682,9 +788,19 @@ create_event_attributes(struct xe_pmu *pmu)
 		__event(2, "requested-frequency", "M"),
 	};
 
+	static const struct {
+		u8 sample;
+		char *name;
+	} engine_events[] = {
+		__engine_event(XE_PMU_SAMPLE_BUSY_TICKS, "busy-ticks"),
+		__engine_event(XE_PMU_SAMPLE_TOTAL_TICKS, "total-ticks")
+	};
+
 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
 	struct xe_ext_attribute *xe_attr = NULL, *xe_iter;
 	struct attribute **attr = NULL, **attr_iter;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
 	unsigned int count = 0;
 	unsigned int i, j;
 	struct xe_gt *gt;
@@ -699,6 +815,13 @@ create_event_attributes(struct xe_pmu *pmu)
 		}
 	}
 
+	for_each_gt(gt, xe, j) {
+		for_each_hw_engine(hwe, gt, id)
+			for (i = 0; i < ARRAY_SIZE(engine_events); i++)
+				if (!engine_event_status(engine_events[i].sample))
+					count++;
+	}
+
 	/* Allocate attribute objects and table. */
 	xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL);
 	if (!xe_attr)
@@ -744,6 +867,30 @@ create_event_attributes(struct xe_pmu *pmu)
 							events[i].unit);
 			}
 		}
+
+		for_each_hw_engine(hwe, gt, id) {
+			for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
+				char *str;
+
+				if (engine_event_status(engine_events[i].sample))
+					continue;
+
+				str = kasprintf(GFP_KERNEL, "%s%d-%s-gt%u",
+						xe_hw_engine_class_to_str(hwe->class),
+						hwe->logical_instance,
+						engine_events[i].name, j);
+				if (!str)
+					goto err;
+
+				*attr_iter++ = &xe_iter->attr.attr;
+				xe_iter = add_xe_attr
+					(xe_iter, str,
+					 XE_PMU_ENGINE(j, xe_hw_engine_to_user_class(hwe->class),
+						       hwe->logical_instance,
+						       engine_events[i].sample));
+			}
+		}
+
 	}
 
 	pmu->xe_attr = xe_attr;
diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
index c44c3d79b970..508cc8e43fbe 100644
--- a/drivers/gpu/drm/xe/xe_pmu_types.h
+++ b/drivers/gpu/drm/xe/xe_pmu_types.h
@@ -36,6 +36,23 @@ enum {
 #define __XE_PMU_ACTUAL_FREQUENCY(gt)		___XE_PMU_OTHER(gt, 1)
 #define __XE_PMU_REQUESTED_FREQUENCY(gt)	___XE_PMU_OTHER(gt, 2)
 
+#define XE_PMU_SAMPLE_BUSY_TICKS	(1)
+#define XE_PMU_SAMPLE_TOTAL_TICKS	(2)
+
+/* First 8 bits of config are reserved for other counters */
+#define XE_PMU_SAMPLE_SHIFT			(8)
+#define XE_PMU_SAMPLE_BITS			(4)
+#define XE_PMU_INSTANCE_BITS			(8)
+#define XE_PMU_INSTANCE_SHIFT \
+	(XE_PMU_SAMPLE_SHIFT + XE_PMU_SAMPLE_BITS)
+#define XE_PMU_CLASS_SHIFT \
+	(XE_PMU_INSTANCE_SHIFT + XE_PMU_INSTANCE_BITS)
+
+#define XE_PMU_ENGINE(gt, class, instance, sample) \
+	(((class) << XE_PMU_CLASS_SHIFT | \
+	(instance) << XE_PMU_INSTANCE_SHIFT | \
+	(sample) << XE_PMU_SAMPLE_SHIFT) | ((__u64)(gt) << __XE_PMU_GT_SHIFT))
+
 /**
  * Non-engine events that we need to track enabled-disabled transition and
  * current state.
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 0d073a9987c2..e50d23d53921 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -7,6 +7,7 @@
 
 #include "xe_assert.h"
 #include "xe_device.h"
+#include "xe_engine_activity.h"
 #include "xe_gsc.h"
 #include "xe_gsc_proxy.h"
 #include "xe_gt.h"
@@ -210,6 +211,8 @@ int xe_uc_init_hw(struct xe_uc *uc)
 	if (ret)
 		return ret;
 
+	xe_engine_activity_enable_stats(&uc->guc);
+
 	/* We don't fail the driver load if HuC fails to auth, but let's warn */
 	ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC);
 	xe_gt_assert(uc_to_gt(uc), !ret);
-- 
2.40.0



More information about the Intel-xe mailing list